diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-08-09 22:01:04 -0700 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-08-09 22:01:04 -0700 |
commit | 2e75c6d9603f8a5edf6495f8d4fb3115a67d823c (patch) | |
tree | 8fb2d1bec2cf0e50c5fce6bc718f755485419db0 | |
parent | cc9283ad5332f59a69a91d9d0fab299779de513c (diff) | |
parent | adc40bc760345a23678a01f27d7697dfd3811914 (diff) | |
download | yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.lz yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.xz yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.zip |
Merge flask framework and other stuff from master
172 files changed, 54285 insertions, 2763 deletions
diff --git a/http_errors.py b/http_errors.py deleted file mode 100644 index bd8fbb4..0000000 --- a/http_errors.py +++ /dev/null @@ -1,16 +0,0 @@ -class Code2xx(Exception): - pass -class Code200(Code2xx): - pass - -class Error4xx(Exception): - pass -class Error404(Error4xx): - pass - -class Error5xx(Exception): - pass -class Error500(Error5xx): - pass -class Error502(Error5xx): - pass diff --git a/python/click/__init__.py b/python/click/__init__.py new file mode 100644 index 0000000..d3c3366 --- /dev/null +++ b/python/click/__init__.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +""" +click +~~~~~ + +Click is a simple Python module inspired by the stdlib optparse to make +writing command line scripts fun. Unlike other modules, it's based +around a simple API that does not come with too much magic and is +composable. + +:copyright: © 2014 by the Pallets team. +:license: BSD, see LICENSE.rst for more details. +""" + +# Core classes +from .core import Context, BaseCommand, Command, MultiCommand, Group, \ + CommandCollection, Parameter, Option, Argument + +# Globals +from .globals import get_current_context + +# Decorators +from .decorators import pass_context, pass_obj, make_pass_decorator, \ + command, group, argument, option, confirmation_option, \ + password_option, version_option, help_option + +# Types +from .types import ParamType, File, Path, Choice, IntRange, Tuple, \ + DateTime, STRING, INT, FLOAT, BOOL, UUID, UNPROCESSED, FloatRange + +# Utilities +from .utils import echo, get_binary_stream, get_text_stream, open_file, \ + format_filename, get_app_dir, get_os_args + +# Terminal functions +from .termui import prompt, confirm, get_terminal_size, echo_via_pager, \ + progressbar, clear, style, unstyle, secho, edit, launch, getchar, \ + pause + +# Exceptions +from .exceptions import ClickException, UsageError, BadParameter, \ + FileError, Abort, NoSuchOption, BadOptionUsage, BadArgumentUsage, \ + MissingParameter + +# Formatting +from .formatting import HelpFormatter, wrap_text + +# Parsing +from .parser import OptionParser + + +__all__ = [ + # Core classes + 'Context', 'BaseCommand', 'Command', 'MultiCommand', 'Group', + 'CommandCollection', 'Parameter', 'Option', 'Argument', + + # Globals + 'get_current_context', + + # Decorators + 'pass_context', 'pass_obj', 'make_pass_decorator', 'command', 'group', + 'argument', 'option', 'confirmation_option', 'password_option', + 'version_option', 'help_option', + + # Types + 'ParamType', 'File', 'Path', 'Choice', 'IntRange', 'Tuple', + 'DateTime', 'STRING', 'INT', 'FLOAT', 'BOOL', 'UUID', 'UNPROCESSED', + 'FloatRange', + + # Utilities + 'echo', 'get_binary_stream', 'get_text_stream', 'open_file', + 'format_filename', 'get_app_dir', 'get_os_args', + + # Terminal functions + 'prompt', 'confirm', 'get_terminal_size', 'echo_via_pager', + 'progressbar', 'clear', 'style', 'unstyle', 'secho', 'edit', 'launch', + 'getchar', 'pause', + + # Exceptions + 'ClickException', 'UsageError', 'BadParameter', 'FileError', + 'Abort', 'NoSuchOption', 'BadOptionUsage', 'BadArgumentUsage', + 'MissingParameter', + + # Formatting + 'HelpFormatter', 'wrap_text', + + # Parsing + 'OptionParser', +] + + +# Controls if click should emit the warning about the use of unicode +# literals. +disable_unicode_literals_warning = False + + +__version__ = '7.0' diff --git a/python/click/_bashcomplete.py b/python/click/_bashcomplete.py new file mode 100644 index 0000000..a5f1084 --- /dev/null +++ b/python/click/_bashcomplete.py @@ -0,0 +1,293 @@ +import copy +import os +import re + +from .utils import echo +from .parser import split_arg_string +from .core import MultiCommand, Option, Argument +from .types import Choice + +try: + from collections import abc +except ImportError: + import collections as abc + +WORDBREAK = '=' + +# Note, only BASH version 4.4 and later have the nosort option. +COMPLETION_SCRIPT_BASH = ''' +%(complete_func)s() { + local IFS=$'\n' + COMPREPLY=( $( env COMP_WORDS="${COMP_WORDS[*]}" \\ + COMP_CWORD=$COMP_CWORD \\ + %(autocomplete_var)s=complete $1 ) ) + return 0 +} + +%(complete_func)setup() { + local COMPLETION_OPTIONS="" + local BASH_VERSION_ARR=(${BASH_VERSION//./ }) + # Only BASH version 4.4 and later have the nosort option. + if [ ${BASH_VERSION_ARR[0]} -gt 4 ] || ([ ${BASH_VERSION_ARR[0]} -eq 4 ] && [ ${BASH_VERSION_ARR[1]} -ge 4 ]); then + COMPLETION_OPTIONS="-o nosort" + fi + + complete $COMPLETION_OPTIONS -F %(complete_func)s %(script_names)s +} + +%(complete_func)setup +''' + +COMPLETION_SCRIPT_ZSH = ''' +%(complete_func)s() { + local -a completions + local -a completions_with_descriptions + local -a response + response=("${(@f)$( env COMP_WORDS=\"${words[*]}\" \\ + COMP_CWORD=$((CURRENT-1)) \\ + %(autocomplete_var)s=\"complete_zsh\" \\ + %(script_names)s )}") + + for key descr in ${(kv)response}; do + if [[ "$descr" == "_" ]]; then + completions+=("$key") + else + completions_with_descriptions+=("$key":"$descr") + fi + done + + if [ -n "$completions_with_descriptions" ]; then + _describe -V unsorted completions_with_descriptions -U -Q + fi + + if [ -n "$completions" ]; then + compadd -U -V unsorted -Q -a completions + fi + compstate[insert]="automenu" +} + +compdef %(complete_func)s %(script_names)s +''' + +_invalid_ident_char_re = re.compile(r'[^a-zA-Z0-9_]') + + +def get_completion_script(prog_name, complete_var, shell): + cf_name = _invalid_ident_char_re.sub('', prog_name.replace('-', '_')) + script = COMPLETION_SCRIPT_ZSH if shell == 'zsh' else COMPLETION_SCRIPT_BASH + return (script % { + 'complete_func': '_%s_completion' % cf_name, + 'script_names': prog_name, + 'autocomplete_var': complete_var, + }).strip() + ';' + + +def resolve_ctx(cli, prog_name, args): + """ + Parse into a hierarchy of contexts. Contexts are connected through the parent variable. + :param cli: command definition + :param prog_name: the program that is running + :param args: full list of args + :return: the final context/command parsed + """ + ctx = cli.make_context(prog_name, args, resilient_parsing=True) + args = ctx.protected_args + ctx.args + while args: + if isinstance(ctx.command, MultiCommand): + if not ctx.command.chain: + cmd_name, cmd, args = ctx.command.resolve_command(ctx, args) + if cmd is None: + return ctx + ctx = cmd.make_context(cmd_name, args, parent=ctx, + resilient_parsing=True) + args = ctx.protected_args + ctx.args + else: + # Walk chained subcommand contexts saving the last one. + while args: + cmd_name, cmd, args = ctx.command.resolve_command(ctx, args) + if cmd is None: + return ctx + sub_ctx = cmd.make_context(cmd_name, args, parent=ctx, + allow_extra_args=True, + allow_interspersed_args=False, + resilient_parsing=True) + args = sub_ctx.args + ctx = sub_ctx + args = sub_ctx.protected_args + sub_ctx.args + else: + break + return ctx + + +def start_of_option(param_str): + """ + :param param_str: param_str to check + :return: whether or not this is the start of an option declaration (i.e. starts "-" or "--") + """ + return param_str and param_str[:1] == '-' + + +def is_incomplete_option(all_args, cmd_param): + """ + :param all_args: the full original list of args supplied + :param cmd_param: the current command paramter + :return: whether or not the last option declaration (i.e. starts "-" or "--") is incomplete and + corresponds to this cmd_param. In other words whether this cmd_param option can still accept + values + """ + if not isinstance(cmd_param, Option): + return False + if cmd_param.is_flag: + return False + last_option = None + for index, arg_str in enumerate(reversed([arg for arg in all_args if arg != WORDBREAK])): + if index + 1 > cmd_param.nargs: + break + if start_of_option(arg_str): + last_option = arg_str + + return True if last_option and last_option in cmd_param.opts else False + + +def is_incomplete_argument(current_params, cmd_param): + """ + :param current_params: the current params and values for this argument as already entered + :param cmd_param: the current command parameter + :return: whether or not the last argument is incomplete and corresponds to this cmd_param. In + other words whether or not the this cmd_param argument can still accept values + """ + if not isinstance(cmd_param, Argument): + return False + current_param_values = current_params[cmd_param.name] + if current_param_values is None: + return True + if cmd_param.nargs == -1: + return True + if isinstance(current_param_values, abc.Iterable) \ + and cmd_param.nargs > 1 and len(current_param_values) < cmd_param.nargs: + return True + return False + + +def get_user_autocompletions(ctx, args, incomplete, cmd_param): + """ + :param ctx: context associated with the parsed command + :param args: full list of args + :param incomplete: the incomplete text to autocomplete + :param cmd_param: command definition + :return: all the possible user-specified completions for the param + """ + results = [] + if isinstance(cmd_param.type, Choice): + # Choices don't support descriptions. + results = [(c, None) + for c in cmd_param.type.choices if str(c).startswith(incomplete)] + elif cmd_param.autocompletion is not None: + dynamic_completions = cmd_param.autocompletion(ctx=ctx, + args=args, + incomplete=incomplete) + results = [c if isinstance(c, tuple) else (c, None) + for c in dynamic_completions] + return results + + +def get_visible_commands_starting_with(ctx, starts_with): + """ + :param ctx: context associated with the parsed command + :starts_with: string that visible commands must start with. + :return: all visible (not hidden) commands that start with starts_with. + """ + for c in ctx.command.list_commands(ctx): + if c.startswith(starts_with): + command = ctx.command.get_command(ctx, c) + if not command.hidden: + yield command + + +def add_subcommand_completions(ctx, incomplete, completions_out): + # Add subcommand completions. + if isinstance(ctx.command, MultiCommand): + completions_out.extend( + [(c.name, c.get_short_help_str()) for c in get_visible_commands_starting_with(ctx, incomplete)]) + + # Walk up the context list and add any other completion possibilities from chained commands + while ctx.parent is not None: + ctx = ctx.parent + if isinstance(ctx.command, MultiCommand) and ctx.command.chain: + remaining_commands = [c for c in get_visible_commands_starting_with(ctx, incomplete) + if c.name not in ctx.protected_args] + completions_out.extend([(c.name, c.get_short_help_str()) for c in remaining_commands]) + + +def get_choices(cli, prog_name, args, incomplete): + """ + :param cli: command definition + :param prog_name: the program that is running + :param args: full list of args + :param incomplete: the incomplete text to autocomplete + :return: all the possible completions for the incomplete + """ + all_args = copy.deepcopy(args) + + ctx = resolve_ctx(cli, prog_name, args) + if ctx is None: + return [] + + # In newer versions of bash long opts with '='s are partitioned, but it's easier to parse + # without the '=' + if start_of_option(incomplete) and WORDBREAK in incomplete: + partition_incomplete = incomplete.partition(WORDBREAK) + all_args.append(partition_incomplete[0]) + incomplete = partition_incomplete[2] + elif incomplete == WORDBREAK: + incomplete = '' + + completions = [] + if start_of_option(incomplete): + # completions for partial options + for param in ctx.command.params: + if isinstance(param, Option) and not param.hidden: + param_opts = [param_opt for param_opt in param.opts + + param.secondary_opts if param_opt not in all_args or param.multiple] + completions.extend([(o, param.help) for o in param_opts if o.startswith(incomplete)]) + return completions + # completion for option values from user supplied values + for param in ctx.command.params: + if is_incomplete_option(all_args, param): + return get_user_autocompletions(ctx, all_args, incomplete, param) + # completion for argument values from user supplied values + for param in ctx.command.params: + if is_incomplete_argument(ctx.params, param): + return get_user_autocompletions(ctx, all_args, incomplete, param) + + add_subcommand_completions(ctx, incomplete, completions) + # Sort before returning so that proper ordering can be enforced in custom types. + return sorted(completions) + + +def do_complete(cli, prog_name, include_descriptions): + cwords = split_arg_string(os.environ['COMP_WORDS']) + cword = int(os.environ['COMP_CWORD']) + args = cwords[1:cword] + try: + incomplete = cwords[cword] + except IndexError: + incomplete = '' + + for item in get_choices(cli, prog_name, args, incomplete): + echo(item[0]) + if include_descriptions: + # ZSH has trouble dealing with empty array parameters when returned from commands, so use a well defined character '_' to indicate no description is present. + echo(item[1] if item[1] else '_') + + return True + + +def bashcomplete(cli, prog_name, complete_var, complete_instr): + if complete_instr.startswith('source'): + shell = 'zsh' if complete_instr == 'source_zsh' else 'bash' + echo(get_completion_script(prog_name, complete_var, shell)) + return True + elif complete_instr == 'complete' or complete_instr == 'complete_zsh': + return do_complete(cli, prog_name, complete_instr == 'complete_zsh') + return False diff --git a/python/click/_compat.py b/python/click/_compat.py new file mode 100644 index 0000000..937e230 --- /dev/null +++ b/python/click/_compat.py @@ -0,0 +1,703 @@ +import re +import io +import os +import sys +import codecs +from weakref import WeakKeyDictionary + + +PY2 = sys.version_info[0] == 2 +CYGWIN = sys.platform.startswith('cygwin') +# Determine local App Engine environment, per Google's own suggestion +APP_ENGINE = ('APPENGINE_RUNTIME' in os.environ and + 'Development/' in os.environ['SERVER_SOFTWARE']) +WIN = sys.platform.startswith('win') and not APP_ENGINE +DEFAULT_COLUMNS = 80 + + +_ansi_re = re.compile(r'\033\[((?:\d|;)*)([a-zA-Z])') + + +def get_filesystem_encoding(): + return sys.getfilesystemencoding() or sys.getdefaultencoding() + + +def _make_text_stream(stream, encoding, errors, + force_readable=False, force_writable=False): + if encoding is None: + encoding = get_best_encoding(stream) + if errors is None: + errors = 'replace' + return _NonClosingTextIOWrapper(stream, encoding, errors, + line_buffering=True, + force_readable=force_readable, + force_writable=force_writable) + + +def is_ascii_encoding(encoding): + """Checks if a given encoding is ascii.""" + try: + return codecs.lookup(encoding).name == 'ascii' + except LookupError: + return False + + +def get_best_encoding(stream): + """Returns the default stream encoding if not found.""" + rv = getattr(stream, 'encoding', None) or sys.getdefaultencoding() + if is_ascii_encoding(rv): + return 'utf-8' + return rv + + +class _NonClosingTextIOWrapper(io.TextIOWrapper): + + def __init__(self, stream, encoding, errors, + force_readable=False, force_writable=False, **extra): + self._stream = stream = _FixupStream(stream, force_readable, + force_writable) + io.TextIOWrapper.__init__(self, stream, encoding, errors, **extra) + + # The io module is a place where the Python 3 text behavior + # was forced upon Python 2, so we need to unbreak + # it to look like Python 2. + if PY2: + def write(self, x): + if isinstance(x, str) or is_bytes(x): + try: + self.flush() + except Exception: + pass + return self.buffer.write(str(x)) + return io.TextIOWrapper.write(self, x) + + def writelines(self, lines): + for line in lines: + self.write(line) + + def __del__(self): + try: + self.detach() + except Exception: + pass + + def isatty(self): + # https://bitbucket.org/pypy/pypy/issue/1803 + return self._stream.isatty() + + +class _FixupStream(object): + """The new io interface needs more from streams than streams + traditionally implement. As such, this fix-up code is necessary in + some circumstances. + + The forcing of readable and writable flags are there because some tools + put badly patched objects on sys (one such offender are certain version + of jupyter notebook). + """ + + def __init__(self, stream, force_readable=False, force_writable=False): + self._stream = stream + self._force_readable = force_readable + self._force_writable = force_writable + + def __getattr__(self, name): + return getattr(self._stream, name) + + def read1(self, size): + f = getattr(self._stream, 'read1', None) + if f is not None: + return f(size) + # We only dispatch to readline instead of read in Python 2 as we + # do not want cause problems with the different implementation + # of line buffering. + if PY2: + return self._stream.readline(size) + return self._stream.read(size) + + def readable(self): + if self._force_readable: + return True + x = getattr(self._stream, 'readable', None) + if x is not None: + return x() + try: + self._stream.read(0) + except Exception: + return False + return True + + def writable(self): + if self._force_writable: + return True + x = getattr(self._stream, 'writable', None) + if x is not None: + return x() + try: + self._stream.write('') + except Exception: + try: + self._stream.write(b'') + except Exception: + return False + return True + + def seekable(self): + x = getattr(self._stream, 'seekable', None) + if x is not None: + return x() + try: + self._stream.seek(self._stream.tell()) + except Exception: + return False + return True + + +if PY2: + text_type = unicode + bytes = str + raw_input = raw_input + string_types = (str, unicode) + int_types = (int, long) + iteritems = lambda x: x.iteritems() + range_type = xrange + + def is_bytes(x): + return isinstance(x, (buffer, bytearray)) + + _identifier_re = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') + + # For Windows, we need to force stdout/stdin/stderr to binary if it's + # fetched for that. This obviously is not the most correct way to do + # it as it changes global state. Unfortunately, there does not seem to + # be a clear better way to do it as just reopening the file in binary + # mode does not change anything. + # + # An option would be to do what Python 3 does and to open the file as + # binary only, patch it back to the system, and then use a wrapper + # stream that converts newlines. It's not quite clear what's the + # correct option here. + # + # This code also lives in _winconsole for the fallback to the console + # emulation stream. + # + # There are also Windows environments where the `msvcrt` module is not + # available (which is why we use try-catch instead of the WIN variable + # here), such as the Google App Engine development server on Windows. In + # those cases there is just nothing we can do. + def set_binary_mode(f): + return f + + try: + import msvcrt + except ImportError: + pass + else: + def set_binary_mode(f): + try: + fileno = f.fileno() + except Exception: + pass + else: + msvcrt.setmode(fileno, os.O_BINARY) + return f + + try: + import fcntl + except ImportError: + pass + else: + def set_binary_mode(f): + try: + fileno = f.fileno() + except Exception: + pass + else: + flags = fcntl.fcntl(fileno, fcntl.F_GETFL) + fcntl.fcntl(fileno, fcntl.F_SETFL, flags & ~os.O_NONBLOCK) + return f + + def isidentifier(x): + return _identifier_re.search(x) is not None + + def get_binary_stdin(): + return set_binary_mode(sys.stdin) + + def get_binary_stdout(): + _wrap_std_stream('stdout') + return set_binary_mode(sys.stdout) + + def get_binary_stderr(): + _wrap_std_stream('stderr') + return set_binary_mode(sys.stderr) + + def get_text_stdin(encoding=None, errors=None): + rv = _get_windows_console_stream(sys.stdin, encoding, errors) + if rv is not None: + return rv + return _make_text_stream(sys.stdin, encoding, errors, + force_readable=True) + + def get_text_stdout(encoding=None, errors=None): + _wrap_std_stream('stdout') + rv = _get_windows_console_stream(sys.stdout, encoding, errors) + if rv is not None: + return rv + return _make_text_stream(sys.stdout, encoding, errors, + force_writable=True) + + def get_text_stderr(encoding=None, errors=None): + _wrap_std_stream('stderr') + rv = _get_windows_console_stream(sys.stderr, encoding, errors) + if rv is not None: + return rv + return _make_text_stream(sys.stderr, encoding, errors, + force_writable=True) + + def filename_to_ui(value): + if isinstance(value, bytes): + value = value.decode(get_filesystem_encoding(), 'replace') + return value +else: + import io + text_type = str + raw_input = input + string_types = (str,) + int_types = (int,) + range_type = range + isidentifier = lambda x: x.isidentifier() + iteritems = lambda x: iter(x.items()) + + def is_bytes(x): + return isinstance(x, (bytes, memoryview, bytearray)) + + def _is_binary_reader(stream, default=False): + try: + return isinstance(stream.read(0), bytes) + except Exception: + return default + # This happens in some cases where the stream was already + # closed. In this case, we assume the default. + + def _is_binary_writer(stream, default=False): + try: + stream.write(b'') + except Exception: + try: + stream.write('') + return False + except Exception: + pass + return default + return True + + def _find_binary_reader(stream): + # We need to figure out if the given stream is already binary. + # This can happen because the official docs recommend detaching + # the streams to get binary streams. Some code might do this, so + # we need to deal with this case explicitly. + if _is_binary_reader(stream, False): + return stream + + buf = getattr(stream, 'buffer', None) + + # Same situation here; this time we assume that the buffer is + # actually binary in case it's closed. + if buf is not None and _is_binary_reader(buf, True): + return buf + + def _find_binary_writer(stream): + # We need to figure out if the given stream is already binary. + # This can happen because the official docs recommend detatching + # the streams to get binary streams. Some code might do this, so + # we need to deal with this case explicitly. + if _is_binary_writer(stream, False): + return stream + + buf = getattr(stream, 'buffer', None) + + # Same situation here; this time we assume that the buffer is + # actually binary in case it's closed. + if buf is not None and _is_binary_writer(buf, True): + return buf + + def _stream_is_misconfigured(stream): + """A stream is misconfigured if its encoding is ASCII.""" + # If the stream does not have an encoding set, we assume it's set + # to ASCII. This appears to happen in certain unittest + # environments. It's not quite clear what the correct behavior is + # but this at least will force Click to recover somehow. + return is_ascii_encoding(getattr(stream, 'encoding', None) or 'ascii') + + def _is_compatible_text_stream(stream, encoding, errors): + stream_encoding = getattr(stream, 'encoding', None) + stream_errors = getattr(stream, 'errors', None) + + # Perfect match. + if stream_encoding == encoding and stream_errors == errors: + return True + + # Otherwise, it's only a compatible stream if we did not ask for + # an encoding. + if encoding is None: + return stream_encoding is not None + + return False + + def _force_correct_text_reader(text_reader, encoding, errors, + force_readable=False): + if _is_binary_reader(text_reader, False): + binary_reader = text_reader + else: + # If there is no target encoding set, we need to verify that the + # reader is not actually misconfigured. + if encoding is None and not _stream_is_misconfigured(text_reader): + return text_reader + + if _is_compatible_text_stream(text_reader, encoding, errors): + return text_reader + + # If the reader has no encoding, we try to find the underlying + # binary reader for it. If that fails because the environment is + # misconfigured, we silently go with the same reader because this + # is too common to happen. In that case, mojibake is better than + # exceptions. + binary_reader = _find_binary_reader(text_reader) + if binary_reader is None: + return text_reader + + # At this point, we default the errors to replace instead of strict + # because nobody handles those errors anyways and at this point + # we're so fundamentally fucked that nothing can repair it. + if errors is None: + errors = 'replace' + return _make_text_stream(binary_reader, encoding, errors, + force_readable=force_readable) + + def _force_correct_text_writer(text_writer, encoding, errors, + force_writable=False): + if _is_binary_writer(text_writer, False): + binary_writer = text_writer + else: + # If there is no target encoding set, we need to verify that the + # writer is not actually misconfigured. + if encoding is None and not _stream_is_misconfigured(text_writer): + return text_writer + + if _is_compatible_text_stream(text_writer, encoding, errors): + return text_writer + + # If the writer has no encoding, we try to find the underlying + # binary writer for it. If that fails because the environment is + # misconfigured, we silently go with the same writer because this + # is too common to happen. In that case, mojibake is better than + # exceptions. + binary_writer = _find_binary_writer(text_writer) + if binary_writer is None: + return text_writer + + # At this point, we default the errors to replace instead of strict + # because nobody handles those errors anyways and at this point + # we're so fundamentally fucked that nothing can repair it. + if errors is None: + errors = 'replace' + return _make_text_stream(binary_writer, encoding, errors, + force_writable=force_writable) + + def get_binary_stdin(): + reader = _find_binary_reader(sys.stdin) + if reader is None: + raise RuntimeError('Was not able to determine binary ' + 'stream for sys.stdin.') + return reader + + def get_binary_stdout(): + writer = _find_binary_writer(sys.stdout) + if writer is None: + raise RuntimeError('Was not able to determine binary ' + 'stream for sys.stdout.') + return writer + + def get_binary_stderr(): + writer = _find_binary_writer(sys.stderr) + if writer is None: + raise RuntimeError('Was not able to determine binary ' + 'stream for sys.stderr.') + return writer + + def get_text_stdin(encoding=None, errors=None): + rv = _get_windows_console_stream(sys.stdin, encoding, errors) + if rv is not None: + return rv + return _force_correct_text_reader(sys.stdin, encoding, errors, + force_readable=True) + + def get_text_stdout(encoding=None, errors=None): + rv = _get_windows_console_stream(sys.stdout, encoding, errors) + if rv is not None: + return rv + return _force_correct_text_writer(sys.stdout, encoding, errors, + force_writable=True) + + def get_text_stderr(encoding=None, errors=None): + rv = _get_windows_console_stream(sys.stderr, encoding, errors) + if rv is not None: + return rv + return _force_correct_text_writer(sys.stderr, encoding, errors, + force_writable=True) + + def filename_to_ui(value): + if isinstance(value, bytes): + value = value.decode(get_filesystem_encoding(), 'replace') + else: + value = value.encode('utf-8', 'surrogateescape') \ + .decode('utf-8', 'replace') + return value + + +def get_streerror(e, default=None): + if hasattr(e, 'strerror'): + msg = e.strerror + else: + if default is not None: + msg = default + else: + msg = str(e) + if isinstance(msg, bytes): + msg = msg.decode('utf-8', 'replace') + return msg + + +def open_stream(filename, mode='r', encoding=None, errors='strict', + atomic=False): + # Standard streams first. These are simple because they don't need + # special handling for the atomic flag. It's entirely ignored. + if filename == '-': + if any(m in mode for m in ['w', 'a', 'x']): + if 'b' in mode: + return get_binary_stdout(), False + return get_text_stdout(encoding=encoding, errors=errors), False + if 'b' in mode: + return get_binary_stdin(), False + return get_text_stdin(encoding=encoding, errors=errors), False + + # Non-atomic writes directly go out through the regular open functions. + if not atomic: + if encoding is None: + return open(filename, mode), True + return io.open(filename, mode, encoding=encoding, errors=errors), True + + # Some usability stuff for atomic writes + if 'a' in mode: + raise ValueError( + 'Appending to an existing file is not supported, because that ' + 'would involve an expensive `copy`-operation to a temporary ' + 'file. Open the file in normal `w`-mode and copy explicitly ' + 'if that\'s what you\'re after.' + ) + if 'x' in mode: + raise ValueError('Use the `overwrite`-parameter instead.') + if 'w' not in mode: + raise ValueError('Atomic writes only make sense with `w`-mode.') + + # Atomic writes are more complicated. They work by opening a file + # as a proxy in the same folder and then using the fdopen + # functionality to wrap it in a Python file. Then we wrap it in an + # atomic file that moves the file over on close. + import tempfile + fd, tmp_filename = tempfile.mkstemp(dir=os.path.dirname(filename), + prefix='.__atomic-write') + + if encoding is not None: + f = io.open(fd, mode, encoding=encoding, errors=errors) + else: + f = os.fdopen(fd, mode) + + return _AtomicFile(f, tmp_filename, os.path.realpath(filename)), True + + +# Used in a destructor call, needs extra protection from interpreter cleanup. +if hasattr(os, 'replace'): + _replace = os.replace + _can_replace = True +else: + _replace = os.rename + _can_replace = not WIN + + +class _AtomicFile(object): + + def __init__(self, f, tmp_filename, real_filename): + self._f = f + self._tmp_filename = tmp_filename + self._real_filename = real_filename + self.closed = False + + @property + def name(self): + return self._real_filename + + def close(self, delete=False): + if self.closed: + return + self._f.close() + if not _can_replace: + try: + os.remove(self._real_filename) + except OSError: + pass + _replace(self._tmp_filename, self._real_filename) + self.closed = True + + def __getattr__(self, name): + return getattr(self._f, name) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + self.close(delete=exc_type is not None) + + def __repr__(self): + return repr(self._f) + + +auto_wrap_for_ansi = None +colorama = None +get_winterm_size = None + + +def strip_ansi(value): + return _ansi_re.sub('', value) + + +def should_strip_ansi(stream=None, color=None): + if color is None: + if stream is None: + stream = sys.stdin + return not isatty(stream) + return not color + + +# If we're on Windows, we provide transparent integration through +# colorama. This will make ANSI colors through the echo function +# work automatically. +if WIN: + # Windows has a smaller terminal + DEFAULT_COLUMNS = 79 + + from ._winconsole import _get_windows_console_stream, _wrap_std_stream + + def _get_argv_encoding(): + import locale + return locale.getpreferredencoding() + + if PY2: + def raw_input(prompt=''): + sys.stderr.flush() + if prompt: + stdout = _default_text_stdout() + stdout.write(prompt) + stdin = _default_text_stdin() + return stdin.readline().rstrip('\r\n') + + try: + import colorama + except ImportError: + pass + else: + _ansi_stream_wrappers = WeakKeyDictionary() + + def auto_wrap_for_ansi(stream, color=None): + """This function wraps a stream so that calls through colorama + are issued to the win32 console API to recolor on demand. It + also ensures to reset the colors if a write call is interrupted + to not destroy the console afterwards. + """ + try: + cached = _ansi_stream_wrappers.get(stream) + except Exception: + cached = None + if cached is not None: + return cached + strip = should_strip_ansi(stream, color) + ansi_wrapper = colorama.AnsiToWin32(stream, strip=strip) + rv = ansi_wrapper.stream + _write = rv.write + + def _safe_write(s): + try: + return _write(s) + except: + ansi_wrapper.reset_all() + raise + + rv.write = _safe_write + try: + _ansi_stream_wrappers[stream] = rv + except Exception: + pass + return rv + + def get_winterm_size(): + win = colorama.win32.GetConsoleScreenBufferInfo( + colorama.win32.STDOUT).srWindow + return win.Right - win.Left, win.Bottom - win.Top +else: + def _get_argv_encoding(): + return getattr(sys.stdin, 'encoding', None) or get_filesystem_encoding() + + _get_windows_console_stream = lambda *x: None + _wrap_std_stream = lambda *x: None + + +def term_len(x): + return len(strip_ansi(x)) + + +def isatty(stream): + try: + return stream.isatty() + except Exception: + return False + + +def _make_cached_stream_func(src_func, wrapper_func): + cache = WeakKeyDictionary() + def func(): + stream = src_func() + try: + rv = cache.get(stream) + except Exception: + rv = None + if rv is not None: + return rv + rv = wrapper_func() + try: + stream = src_func() # In case wrapper_func() modified the stream + cache[stream] = rv + except Exception: + pass + return rv + return func + + +_default_text_stdin = _make_cached_stream_func( + lambda: sys.stdin, get_text_stdin) +_default_text_stdout = _make_cached_stream_func( + lambda: sys.stdout, get_text_stdout) +_default_text_stderr = _make_cached_stream_func( + lambda: sys.stderr, get_text_stderr) + + +binary_streams = { + 'stdin': get_binary_stdin, + 'stdout': get_binary_stdout, + 'stderr': get_binary_stderr, +} + +text_streams = { + 'stdin': get_text_stdin, + 'stdout': get_text_stdout, + 'stderr': get_text_stderr, +} diff --git a/python/click/_termui_impl.py b/python/click/_termui_impl.py new file mode 100644 index 0000000..00a8e5e --- /dev/null +++ b/python/click/_termui_impl.py @@ -0,0 +1,621 @@ +# -*- coding: utf-8 -*- +""" +click._termui_impl +~~~~~~~~~~~~~~~~~~ + +This module contains implementations for the termui module. To keep the +import time of Click down, some infrequently used functionality is +placed in this module and only imported as needed. + +:copyright: © 2014 by the Pallets team. +:license: BSD, see LICENSE.rst for more details. +""" + +import os +import sys +import time +import math +import contextlib +from ._compat import _default_text_stdout, range_type, PY2, isatty, \ + open_stream, strip_ansi, term_len, get_best_encoding, WIN, int_types, \ + CYGWIN +from .utils import echo +from .exceptions import ClickException + + +if os.name == 'nt': + BEFORE_BAR = '\r' + AFTER_BAR = '\n' +else: + BEFORE_BAR = '\r\033[?25l' + AFTER_BAR = '\033[?25h\n' + + +def _length_hint(obj): + """Returns the length hint of an object.""" + try: + return len(obj) + except (AttributeError, TypeError): + try: + get_hint = type(obj).__length_hint__ + except AttributeError: + return None + try: + hint = get_hint(obj) + except TypeError: + return None + if hint is NotImplemented or \ + not isinstance(hint, int_types) or \ + hint < 0: + return None + return hint + + +class ProgressBar(object): + + def __init__(self, iterable, length=None, fill_char='#', empty_char=' ', + bar_template='%(bar)s', info_sep=' ', show_eta=True, + show_percent=None, show_pos=False, item_show_func=None, + label=None, file=None, color=None, width=30): + self.fill_char = fill_char + self.empty_char = empty_char + self.bar_template = bar_template + self.info_sep = info_sep + self.show_eta = show_eta + self.show_percent = show_percent + self.show_pos = show_pos + self.item_show_func = item_show_func + self.label = label or '' + if file is None: + file = _default_text_stdout() + self.file = file + self.color = color + self.width = width + self.autowidth = width == 0 + + if length is None: + length = _length_hint(iterable) + if iterable is None: + if length is None: + raise TypeError('iterable or length is required') + iterable = range_type(length) + self.iter = iter(iterable) + self.length = length + self.length_known = length is not None + self.pos = 0 + self.avg = [] + self.start = self.last_eta = time.time() + self.eta_known = False + self.finished = False + self.max_width = None + self.entered = False + self.current_item = None + self.is_hidden = not isatty(self.file) + self._last_line = None + self.short_limit = 0.5 + + def __enter__(self): + self.entered = True + self.render_progress() + return self + + def __exit__(self, exc_type, exc_value, tb): + self.render_finish() + + def __iter__(self): + if not self.entered: + raise RuntimeError('You need to use progress bars in a with block.') + self.render_progress() + return self.generator() + + def is_fast(self): + return time.time() - self.start <= self.short_limit + + def render_finish(self): + if self.is_hidden or self.is_fast(): + return + self.file.write(AFTER_BAR) + self.file.flush() + + @property + def pct(self): + if self.finished: + return 1.0 + return min(self.pos / (float(self.length) or 1), 1.0) + + @property + def time_per_iteration(self): + if not self.avg: + return 0.0 + return sum(self.avg) / float(len(self.avg)) + + @property + def eta(self): + if self.length_known and not self.finished: + return self.time_per_iteration * (self.length - self.pos) + return 0.0 + + def format_eta(self): + if self.eta_known: + t = int(self.eta) + seconds = t % 60 + t //= 60 + minutes = t % 60 + t //= 60 + hours = t % 24 + t //= 24 + if t > 0: + days = t + return '%dd %02d:%02d:%02d' % (days, hours, minutes, seconds) + else: + return '%02d:%02d:%02d' % (hours, minutes, seconds) + return '' + + def format_pos(self): + pos = str(self.pos) + if self.length_known: + pos += '/%s' % self.length + return pos + + def format_pct(self): + return ('% 4d%%' % int(self.pct * 100))[1:] + + def format_bar(self): + if self.length_known: + bar_length = int(self.pct * self.width) + bar = self.fill_char * bar_length + bar += self.empty_char * (self.width - bar_length) + elif self.finished: + bar = self.fill_char * self.width + else: + bar = list(self.empty_char * (self.width or 1)) + if self.time_per_iteration != 0: + bar[int((math.cos(self.pos * self.time_per_iteration) + / 2.0 + 0.5) * self.width)] = self.fill_char + bar = ''.join(bar) + return bar + + def format_progress_line(self): + show_percent = self.show_percent + + info_bits = [] + if self.length_known and show_percent is None: + show_percent = not self.show_pos + + if self.show_pos: + info_bits.append(self.format_pos()) + if show_percent: + info_bits.append(self.format_pct()) + if self.show_eta and self.eta_known and not self.finished: + info_bits.append(self.format_eta()) + if self.item_show_func is not None: + item_info = self.item_show_func(self.current_item) + if item_info is not None: + info_bits.append(item_info) + + return (self.bar_template % { + 'label': self.label, + 'bar': self.format_bar(), + 'info': self.info_sep.join(info_bits) + }).rstrip() + + def render_progress(self): + from .termui import get_terminal_size + + if self.is_hidden: + return + + buf = [] + # Update width in case the terminal has been resized + if self.autowidth: + old_width = self.width + self.width = 0 + clutter_length = term_len(self.format_progress_line()) + new_width = max(0, get_terminal_size()[0] - clutter_length) + if new_width < old_width: + buf.append(BEFORE_BAR) + buf.append(' ' * self.max_width) + self.max_width = new_width + self.width = new_width + + clear_width = self.width + if self.max_width is not None: + clear_width = self.max_width + + buf.append(BEFORE_BAR) + line = self.format_progress_line() + line_len = term_len(line) + if self.max_width is None or self.max_width < line_len: + self.max_width = line_len + + buf.append(line) + buf.append(' ' * (clear_width - line_len)) + line = ''.join(buf) + # Render the line only if it changed. + + if line != self._last_line and not self.is_fast(): + self._last_line = line + echo(line, file=self.file, color=self.color, nl=False) + self.file.flush() + + def make_step(self, n_steps): + self.pos += n_steps + if self.length_known and self.pos >= self.length: + self.finished = True + + if (time.time() - self.last_eta) < 1.0: + return + + self.last_eta = time.time() + + # self.avg is a rolling list of length <= 7 of steps where steps are + # defined as time elapsed divided by the total progress through + # self.length. + if self.pos: + step = (time.time() - self.start) / self.pos + else: + step = time.time() - self.start + + self.avg = self.avg[-6:] + [step] + + self.eta_known = self.length_known + + def update(self, n_steps): + self.make_step(n_steps) + self.render_progress() + + def finish(self): + self.eta_known = 0 + self.current_item = None + self.finished = True + + def generator(self): + """ + Returns a generator which yields the items added to the bar during + construction, and updates the progress bar *after* the yielded block + returns. + """ + if not self.entered: + raise RuntimeError('You need to use progress bars in a with block.') + + if self.is_hidden: + for rv in self.iter: + yield rv + else: + for rv in self.iter: + self.current_item = rv + yield rv + self.update(1) + self.finish() + self.render_progress() + + +def pager(generator, color=None): + """Decide what method to use for paging through text.""" + stdout = _default_text_stdout() + if not isatty(sys.stdin) or not isatty(stdout): + return _nullpager(stdout, generator, color) + pager_cmd = (os.environ.get('PAGER', None) or '').strip() + if pager_cmd: + if WIN: + return _tempfilepager(generator, pager_cmd, color) + return _pipepager(generator, pager_cmd, color) + if os.environ.get('TERM') in ('dumb', 'emacs'): + return _nullpager(stdout, generator, color) + if WIN or sys.platform.startswith('os2'): + return _tempfilepager(generator, 'more <', color) + if hasattr(os, 'system') and os.system('(less) 2>/dev/null') == 0: + return _pipepager(generator, 'less', color) + + import tempfile + fd, filename = tempfile.mkstemp() + os.close(fd) + try: + if hasattr(os, 'system') and os.system('more "%s"' % filename) == 0: + return _pipepager(generator, 'more', color) + return _nullpager(stdout, generator, color) + finally: + os.unlink(filename) + + +def _pipepager(generator, cmd, color): + """Page through text by feeding it to another program. Invoking a + pager through this might support colors. + """ + import subprocess + env = dict(os.environ) + + # If we're piping to less we might support colors under the + # condition that + cmd_detail = cmd.rsplit('/', 1)[-1].split() + if color is None and cmd_detail[0] == 'less': + less_flags = os.environ.get('LESS', '') + ' '.join(cmd_detail[1:]) + if not less_flags: + env['LESS'] = '-R' + color = True + elif 'r' in less_flags or 'R' in less_flags: + color = True + + c = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, + env=env) + encoding = get_best_encoding(c.stdin) + try: + for text in generator: + if not color: + text = strip_ansi(text) + + c.stdin.write(text.encode(encoding, 'replace')) + except (IOError, KeyboardInterrupt): + pass + else: + c.stdin.close() + + # Less doesn't respect ^C, but catches it for its own UI purposes (aborting + # search or other commands inside less). + # + # That means when the user hits ^C, the parent process (click) terminates, + # but less is still alive, paging the output and messing up the terminal. + # + # If the user wants to make the pager exit on ^C, they should set + # `LESS='-K'`. It's not our decision to make. + while True: + try: + c.wait() + except KeyboardInterrupt: + pass + else: + break + + +def _tempfilepager(generator, cmd, color): + """Page through text by invoking a program on a temporary file.""" + import tempfile + filename = tempfile.mktemp() + # TODO: This never terminates if the passed generator never terminates. + text = "".join(generator) + if not color: + text = strip_ansi(text) + encoding = get_best_encoding(sys.stdout) + with open_stream(filename, 'wb')[0] as f: + f.write(text.encode(encoding)) + try: + os.system(cmd + ' "' + filename + '"') + finally: + os.unlink(filename) + + +def _nullpager(stream, generator, color): + """Simply print unformatted text. This is the ultimate fallback.""" + for text in generator: + if not color: + text = strip_ansi(text) + stream.write(text) + + +class Editor(object): + + def __init__(self, editor=None, env=None, require_save=True, + extension='.txt'): + self.editor = editor + self.env = env + self.require_save = require_save + self.extension = extension + + def get_editor(self): + if self.editor is not None: + return self.editor + for key in 'VISUAL', 'EDITOR': + rv = os.environ.get(key) + if rv: + return rv + if WIN: + return 'notepad' + for editor in 'vim', 'nano': + if os.system('which %s >/dev/null 2>&1' % editor) == 0: + return editor + return 'vi' + + def edit_file(self, filename): + import subprocess + editor = self.get_editor() + if self.env: + environ = os.environ.copy() + environ.update(self.env) + else: + environ = None + try: + c = subprocess.Popen('%s "%s"' % (editor, filename), + env=environ, shell=True) + exit_code = c.wait() + if exit_code != 0: + raise ClickException('%s: Editing failed!' % editor) + except OSError as e: + raise ClickException('%s: Editing failed: %s' % (editor, e)) + + def edit(self, text): + import tempfile + + text = text or '' + if text and not text.endswith('\n'): + text += '\n' + + fd, name = tempfile.mkstemp(prefix='editor-', suffix=self.extension) + try: + if WIN: + encoding = 'utf-8-sig' + text = text.replace('\n', '\r\n') + else: + encoding = 'utf-8' + text = text.encode(encoding) + + f = os.fdopen(fd, 'wb') + f.write(text) + f.close() + timestamp = os.path.getmtime(name) + + self.edit_file(name) + + if self.require_save \ + and os.path.getmtime(name) == timestamp: + return None + + f = open(name, 'rb') + try: + rv = f.read() + finally: + f.close() + return rv.decode('utf-8-sig').replace('\r\n', '\n') + finally: + os.unlink(name) + + +def open_url(url, wait=False, locate=False): + import subprocess + + def _unquote_file(url): + try: + import urllib + except ImportError: + import urllib + if url.startswith('file://'): + url = urllib.unquote(url[7:]) + return url + + if sys.platform == 'darwin': + args = ['open'] + if wait: + args.append('-W') + if locate: + args.append('-R') + args.append(_unquote_file(url)) + null = open('/dev/null', 'w') + try: + return subprocess.Popen(args, stderr=null).wait() + finally: + null.close() + elif WIN: + if locate: + url = _unquote_file(url) + args = 'explorer /select,"%s"' % _unquote_file( + url.replace('"', '')) + else: + args = 'start %s "" "%s"' % ( + wait and '/WAIT' or '', url.replace('"', '')) + return os.system(args) + elif CYGWIN: + if locate: + url = _unquote_file(url) + args = 'cygstart "%s"' % (os.path.dirname(url).replace('"', '')) + else: + args = 'cygstart %s "%s"' % ( + wait and '-w' or '', url.replace('"', '')) + return os.system(args) + + try: + if locate: + url = os.path.dirname(_unquote_file(url)) or '.' + else: + url = _unquote_file(url) + c = subprocess.Popen(['xdg-open', url]) + if wait: + return c.wait() + return 0 + except OSError: + if url.startswith(('http://', 'https://')) and not locate and not wait: + import webbrowser + webbrowser.open(url) + return 0 + return 1 + + +def _translate_ch_to_exc(ch): + if ch == u'\x03': + raise KeyboardInterrupt() + if ch == u'\x04' and not WIN: # Unix-like, Ctrl+D + raise EOFError() + if ch == u'\x1a' and WIN: # Windows, Ctrl+Z + raise EOFError() + + +if WIN: + import msvcrt + + @contextlib.contextmanager + def raw_terminal(): + yield + + def getchar(echo): + # The function `getch` will return a bytes object corresponding to + # the pressed character. Since Windows 10 build 1803, it will also + # return \x00 when called a second time after pressing a regular key. + # + # `getwch` does not share this probably-bugged behavior. Moreover, it + # returns a Unicode object by default, which is what we want. + # + # Either of these functions will return \x00 or \xe0 to indicate + # a special key, and you need to call the same function again to get + # the "rest" of the code. The fun part is that \u00e0 is + # "latin small letter a with grave", so if you type that on a French + # keyboard, you _also_ get a \xe0. + # E.g., consider the Up arrow. This returns \xe0 and then \x48. The + # resulting Unicode string reads as "a with grave" + "capital H". + # This is indistinguishable from when the user actually types + # "a with grave" and then "capital H". + # + # When \xe0 is returned, we assume it's part of a special-key sequence + # and call `getwch` again, but that means that when the user types + # the \u00e0 character, `getchar` doesn't return until a second + # character is typed. + # The alternative is returning immediately, but that would mess up + # cross-platform handling of arrow keys and others that start with + # \xe0. Another option is using `getch`, but then we can't reliably + # read non-ASCII characters, because return values of `getch` are + # limited to the current 8-bit codepage. + # + # Anyway, Click doesn't claim to do this Right(tm), and using `getwch` + # is doing the right thing in more situations than with `getch`. + if echo: + func = msvcrt.getwche + else: + func = msvcrt.getwch + + rv = func() + if rv in (u'\x00', u'\xe0'): + # \x00 and \xe0 are control characters that indicate special key, + # see above. + rv += func() + _translate_ch_to_exc(rv) + return rv +else: + import tty + import termios + + @contextlib.contextmanager + def raw_terminal(): + if not isatty(sys.stdin): + f = open('/dev/tty') + fd = f.fileno() + else: + fd = sys.stdin.fileno() + f = None + try: + old_settings = termios.tcgetattr(fd) + try: + tty.setraw(fd) + yield fd + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + sys.stdout.flush() + if f is not None: + f.close() + except termios.error: + pass + + def getchar(echo): + with raw_terminal() as fd: + ch = os.read(fd, 32) + ch = ch.decode(get_best_encoding(sys.stdin), 'replace') + if echo and isatty(sys.stdout): + sys.stdout.write(ch) + _translate_ch_to_exc(ch) + return ch diff --git a/python/click/_textwrap.py b/python/click/_textwrap.py new file mode 100644 index 0000000..7e77603 --- /dev/null +++ b/python/click/_textwrap.py @@ -0,0 +1,38 @@ +import textwrap +from contextlib import contextmanager + + +class TextWrapper(textwrap.TextWrapper): + + def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): + space_left = max(width - cur_len, 1) + + if self.break_long_words: + last = reversed_chunks[-1] + cut = last[:space_left] + res = last[space_left:] + cur_line.append(cut) + reversed_chunks[-1] = res + elif not cur_line: + cur_line.append(reversed_chunks.pop()) + + @contextmanager + def extra_indent(self, indent): + old_initial_indent = self.initial_indent + old_subsequent_indent = self.subsequent_indent + self.initial_indent += indent + self.subsequent_indent += indent + try: + yield + finally: + self.initial_indent = old_initial_indent + self.subsequent_indent = old_subsequent_indent + + def indent_only(self, text): + rv = [] + for idx, line in enumerate(text.splitlines()): + indent = self.initial_indent + if idx > 0: + indent = self.subsequent_indent + rv.append(indent + line) + return '\n'.join(rv) diff --git a/python/click/_unicodefun.py b/python/click/_unicodefun.py new file mode 100644 index 0000000..620edff --- /dev/null +++ b/python/click/_unicodefun.py @@ -0,0 +1,125 @@ +import os +import sys +import codecs + +from ._compat import PY2 + + +# If someone wants to vendor click, we want to ensure the +# correct package is discovered. Ideally we could use a +# relative import here but unfortunately Python does not +# support that. +click = sys.modules[__name__.rsplit('.', 1)[0]] + + +def _find_unicode_literals_frame(): + import __future__ + if not hasattr(sys, '_getframe'): # not all Python implementations have it + return 0 + frm = sys._getframe(1) + idx = 1 + while frm is not None: + if frm.f_globals.get('__name__', '').startswith('click.'): + frm = frm.f_back + idx += 1 + elif frm.f_code.co_flags & __future__.unicode_literals.compiler_flag: + return idx + else: + break + return 0 + + +def _check_for_unicode_literals(): + if not __debug__: + return + if not PY2 or click.disable_unicode_literals_warning: + return + bad_frame = _find_unicode_literals_frame() + if bad_frame <= 0: + return + from warnings import warn + warn(Warning('Click detected the use of the unicode_literals ' + '__future__ import. This is heavily discouraged ' + 'because it can introduce subtle bugs in your ' + 'code. You should instead use explicit u"" literals ' + 'for your unicode strings. For more information see ' + 'https://click.palletsprojects.com/python3/'), + stacklevel=bad_frame) + + +def _verify_python3_env(): + """Ensures that the environment is good for unicode on Python 3.""" + if PY2: + return + try: + import locale + fs_enc = codecs.lookup(locale.getpreferredencoding()).name + except Exception: + fs_enc = 'ascii' + if fs_enc != 'ascii': + return + + extra = '' + if os.name == 'posix': + import subprocess + try: + rv = subprocess.Popen(['locale', '-a'], stdout=subprocess.PIPE, + stderr=subprocess.PIPE).communicate()[0] + except OSError: + rv = b'' + good_locales = set() + has_c_utf8 = False + + # Make sure we're operating on text here. + if isinstance(rv, bytes): + rv = rv.decode('ascii', 'replace') + + for line in rv.splitlines(): + locale = line.strip() + if locale.lower().endswith(('.utf-8', '.utf8')): + good_locales.add(locale) + if locale.lower() in ('c.utf8', 'c.utf-8'): + has_c_utf8 = True + + extra += '\n\n' + if not good_locales: + extra += ( + 'Additional information: on this system no suitable UTF-8\n' + 'locales were discovered. This most likely requires resolving\n' + 'by reconfiguring the locale system.' + ) + elif has_c_utf8: + extra += ( + 'This system supports the C.UTF-8 locale which is recommended.\n' + 'You might be able to resolve your issue by exporting the\n' + 'following environment variables:\n\n' + ' export LC_ALL=C.UTF-8\n' + ' export LANG=C.UTF-8' + ) + else: + extra += ( + 'This system lists a couple of UTF-8 supporting locales that\n' + 'you can pick from. The following suitable locales were\n' + 'discovered: %s' + ) % ', '.join(sorted(good_locales)) + + bad_locale = None + for locale in os.environ.get('LC_ALL'), os.environ.get('LANG'): + if locale and locale.lower().endswith(('.utf-8', '.utf8')): + bad_locale = locale + if locale is not None: + break + if bad_locale is not None: + extra += ( + '\n\nClick discovered that you exported a UTF-8 locale\n' + 'but the locale system could not pick up from it because\n' + 'it does not exist. The exported locale is "%s" but it\n' + 'is not supported' + ) % bad_locale + + raise RuntimeError( + 'Click will abort further execution because Python 3 was' + ' configured to use ASCII as encoding for the environment.' + ' Consult https://click.palletsprojects.com/en/7.x/python3/ for' + ' mitigation steps.' + extra + ) diff --git a/python/click/_winconsole.py b/python/click/_winconsole.py new file mode 100644 index 0000000..bbb080d --- /dev/null +++ b/python/click/_winconsole.py @@ -0,0 +1,307 @@ +# -*- coding: utf-8 -*- +# This module is based on the excellent work by Adam Bartoš who +# provided a lot of what went into the implementation here in +# the discussion to issue1602 in the Python bug tracker. +# +# There are some general differences in regards to how this works +# compared to the original patches as we do not need to patch +# the entire interpreter but just work in our little world of +# echo and prmopt. + +import io +import os +import sys +import zlib +import time +import ctypes +import msvcrt +from ._compat import _NonClosingTextIOWrapper, text_type, PY2 +from ctypes import byref, POINTER, c_int, c_char, c_char_p, \ + c_void_p, py_object, c_ssize_t, c_ulong, windll, WINFUNCTYPE +try: + from ctypes import pythonapi + PyObject_GetBuffer = pythonapi.PyObject_GetBuffer + PyBuffer_Release = pythonapi.PyBuffer_Release +except ImportError: + pythonapi = None +from ctypes.wintypes import LPWSTR, LPCWSTR + + +c_ssize_p = POINTER(c_ssize_t) + +kernel32 = windll.kernel32 +GetStdHandle = kernel32.GetStdHandle +ReadConsoleW = kernel32.ReadConsoleW +WriteConsoleW = kernel32.WriteConsoleW +GetLastError = kernel32.GetLastError +GetCommandLineW = WINFUNCTYPE(LPWSTR)( + ('GetCommandLineW', windll.kernel32)) +CommandLineToArgvW = WINFUNCTYPE( + POINTER(LPWSTR), LPCWSTR, POINTER(c_int))( + ('CommandLineToArgvW', windll.shell32)) + + +STDIN_HANDLE = GetStdHandle(-10) +STDOUT_HANDLE = GetStdHandle(-11) +STDERR_HANDLE = GetStdHandle(-12) + + +PyBUF_SIMPLE = 0 +PyBUF_WRITABLE = 1 + +ERROR_SUCCESS = 0 +ERROR_NOT_ENOUGH_MEMORY = 8 +ERROR_OPERATION_ABORTED = 995 + +STDIN_FILENO = 0 +STDOUT_FILENO = 1 +STDERR_FILENO = 2 + +EOF = b'\x1a' +MAX_BYTES_WRITTEN = 32767 + + +class Py_buffer(ctypes.Structure): + _fields_ = [ + ('buf', c_void_p), + ('obj', py_object), + ('len', c_ssize_t), + ('itemsize', c_ssize_t), + ('readonly', c_int), + ('ndim', c_int), + ('format', c_char_p), + ('shape', c_ssize_p), + ('strides', c_ssize_p), + ('suboffsets', c_ssize_p), + ('internal', c_void_p) + ] + + if PY2: + _fields_.insert(-1, ('smalltable', c_ssize_t * 2)) + + +# On PyPy we cannot get buffers so our ability to operate here is +# serverly limited. +if pythonapi is None: + get_buffer = None +else: + def get_buffer(obj, writable=False): + buf = Py_buffer() + flags = PyBUF_WRITABLE if writable else PyBUF_SIMPLE + PyObject_GetBuffer(py_object(obj), byref(buf), flags) + try: + buffer_type = c_char * buf.len + return buffer_type.from_address(buf.buf) + finally: + PyBuffer_Release(byref(buf)) + + +class _WindowsConsoleRawIOBase(io.RawIOBase): + + def __init__(self, handle): + self.handle = handle + + def isatty(self): + io.RawIOBase.isatty(self) + return True + + +class _WindowsConsoleReader(_WindowsConsoleRawIOBase): + + def readable(self): + return True + + def readinto(self, b): + bytes_to_be_read = len(b) + if not bytes_to_be_read: + return 0 + elif bytes_to_be_read % 2: + raise ValueError('cannot read odd number of bytes from ' + 'UTF-16-LE encoded console') + + buffer = get_buffer(b, writable=True) + code_units_to_be_read = bytes_to_be_read // 2 + code_units_read = c_ulong() + + rv = ReadConsoleW(self.handle, buffer, code_units_to_be_read, + byref(code_units_read), None) + if GetLastError() == ERROR_OPERATION_ABORTED: + # wait for KeyboardInterrupt + time.sleep(0.1) + if not rv: + raise OSError('Windows error: %s' % GetLastError()) + + if buffer[0] == EOF: + return 0 + return 2 * code_units_read.value + + +class _WindowsConsoleWriter(_WindowsConsoleRawIOBase): + + def writable(self): + return True + + @staticmethod + def _get_error_message(errno): + if errno == ERROR_SUCCESS: + return 'ERROR_SUCCESS' + elif errno == ERROR_NOT_ENOUGH_MEMORY: + return 'ERROR_NOT_ENOUGH_MEMORY' + return 'Windows error %s' % errno + + def write(self, b): + bytes_to_be_written = len(b) + buf = get_buffer(b) + code_units_to_be_written = min(bytes_to_be_written, + MAX_BYTES_WRITTEN) // 2 + code_units_written = c_ulong() + + WriteConsoleW(self.handle, buf, code_units_to_be_written, + byref(code_units_written), None) + bytes_written = 2 * code_units_written.value + + if bytes_written == 0 and bytes_to_be_written > 0: + raise OSError(self._get_error_message(GetLastError())) + return bytes_written + + +class ConsoleStream(object): + + def __init__(self, text_stream, byte_stream): + self._text_stream = text_stream + self.buffer = byte_stream + + @property + def name(self): + return self.buffer.name + + def write(self, x): + if isinstance(x, text_type): + return self._text_stream.write(x) + try: + self.flush() + except Exception: + pass + return self.buffer.write(x) + + def writelines(self, lines): + for line in lines: + self.write(line) + + def __getattr__(self, name): + return getattr(self._text_stream, name) + + def isatty(self): + return self.buffer.isatty() + + def __repr__(self): + return '<ConsoleStream name=%r encoding=%r>' % ( + self.name, + self.encoding, + ) + + +class WindowsChunkedWriter(object): + """ + Wraps a stream (such as stdout), acting as a transparent proxy for all + attribute access apart from method 'write()' which we wrap to write in + limited chunks due to a Windows limitation on binary console streams. + """ + def __init__(self, wrapped): + # double-underscore everything to prevent clashes with names of + # attributes on the wrapped stream object. + self.__wrapped = wrapped + + def __getattr__(self, name): + return getattr(self.__wrapped, name) + + def write(self, text): + total_to_write = len(text) + written = 0 + + while written < total_to_write: + to_write = min(total_to_write - written, MAX_BYTES_WRITTEN) + self.__wrapped.write(text[written:written+to_write]) + written += to_write + + +_wrapped_std_streams = set() + + +def _wrap_std_stream(name): + # Python 2 & Windows 7 and below + if PY2 and sys.getwindowsversion()[:2] <= (6, 1) and name not in _wrapped_std_streams: + setattr(sys, name, WindowsChunkedWriter(getattr(sys, name))) + _wrapped_std_streams.add(name) + + +def _get_text_stdin(buffer_stream): + text_stream = _NonClosingTextIOWrapper( + io.BufferedReader(_WindowsConsoleReader(STDIN_HANDLE)), + 'utf-16-le', 'strict', line_buffering=True) + return ConsoleStream(text_stream, buffer_stream) + + +def _get_text_stdout(buffer_stream): + text_stream = _NonClosingTextIOWrapper( + io.BufferedWriter(_WindowsConsoleWriter(STDOUT_HANDLE)), + 'utf-16-le', 'strict', line_buffering=True) + return ConsoleStream(text_stream, buffer_stream) + + +def _get_text_stderr(buffer_stream): + text_stream = _NonClosingTextIOWrapper( + io.BufferedWriter(_WindowsConsoleWriter(STDERR_HANDLE)), + 'utf-16-le', 'strict', line_buffering=True) + return ConsoleStream(text_stream, buffer_stream) + + +if PY2: + def _hash_py_argv(): + return zlib.crc32('\x00'.join(sys.argv[1:])) + + _initial_argv_hash = _hash_py_argv() + + def _get_windows_argv(): + argc = c_int(0) + argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) + argv = [argv_unicode[i] for i in range(0, argc.value)] + + if not hasattr(sys, 'frozen'): + argv = argv[1:] + while len(argv) > 0: + arg = argv[0] + if not arg.startswith('-') or arg == '-': + break + argv = argv[1:] + if arg.startswith(('-c', '-m')): + break + + return argv[1:] + + +_stream_factories = { + 0: _get_text_stdin, + 1: _get_text_stdout, + 2: _get_text_stderr, +} + + +def _get_windows_console_stream(f, encoding, errors): + if get_buffer is not None and \ + encoding in ('utf-16-le', None) \ + and errors in ('strict', None) and \ + hasattr(f, 'isatty') and f.isatty(): + func = _stream_factories.get(f.fileno()) + if func is not None: + if not PY2: + f = getattr(f, 'buffer', None) + if f is None: + return None + else: + # If we are on Python 2 we need to set the stream that we + # deal with to binary mode as otherwise the exercise if a + # bit moot. The same problems apply as for + # get_binary_stdin and friends from _compat. + msvcrt.setmode(f.fileno(), os.O_BINARY) + return func(f) diff --git a/python/click/core.py b/python/click/core.py new file mode 100644 index 0000000..7a1e342 --- /dev/null +++ b/python/click/core.py @@ -0,0 +1,1856 @@ +import errno +import inspect +import os +import sys +from contextlib import contextmanager +from itertools import repeat +from functools import update_wrapper + +from .types import convert_type, IntRange, BOOL +from .utils import PacifyFlushWrapper, make_str, make_default_short_help, \ + echo, get_os_args +from .exceptions import ClickException, UsageError, BadParameter, Abort, \ + MissingParameter, Exit +from .termui import prompt, confirm, style +from .formatting import HelpFormatter, join_options +from .parser import OptionParser, split_opt +from .globals import push_context, pop_context + +from ._compat import PY2, isidentifier, iteritems, string_types +from ._unicodefun import _check_for_unicode_literals, _verify_python3_env + + +_missing = object() + + +SUBCOMMAND_METAVAR = 'COMMAND [ARGS]...' +SUBCOMMANDS_METAVAR = 'COMMAND1 [ARGS]... [COMMAND2 [ARGS]...]...' + +DEPRECATED_HELP_NOTICE = ' (DEPRECATED)' +DEPRECATED_INVOKE_NOTICE = 'DeprecationWarning: ' + \ + 'The command %(name)s is deprecated.' + + +def _maybe_show_deprecated_notice(cmd): + if cmd.deprecated: + echo(style(DEPRECATED_INVOKE_NOTICE % {'name': cmd.name}, fg='red'), err=True) + + +def fast_exit(code): + """Exit without garbage collection, this speeds up exit by about 10ms for + things like bash completion. + """ + sys.stdout.flush() + sys.stderr.flush() + os._exit(code) + + +def _bashcomplete(cmd, prog_name, complete_var=None): + """Internal handler for the bash completion support.""" + if complete_var is None: + complete_var = '_%s_COMPLETE' % (prog_name.replace('-', '_')).upper() + complete_instr = os.environ.get(complete_var) + if not complete_instr: + return + + from ._bashcomplete import bashcomplete + if bashcomplete(cmd, prog_name, complete_var, complete_instr): + fast_exit(1) + + +def _check_multicommand(base_command, cmd_name, cmd, register=False): + if not base_command.chain or not isinstance(cmd, MultiCommand): + return + if register: + hint = 'It is not possible to add multi commands as children to ' \ + 'another multi command that is in chain mode' + else: + hint = 'Found a multi command as subcommand to a multi command ' \ + 'that is in chain mode. This is not supported' + raise RuntimeError('%s. Command "%s" is set to chain and "%s" was ' + 'added as subcommand but it in itself is a ' + 'multi command. ("%s" is a %s within a chained ' + '%s named "%s").' % ( + hint, base_command.name, cmd_name, + cmd_name, cmd.__class__.__name__, + base_command.__class__.__name__, + base_command.name)) + + +def batch(iterable, batch_size): + return list(zip(*repeat(iter(iterable), batch_size))) + + +def invoke_param_callback(callback, ctx, param, value): + code = getattr(callback, '__code__', None) + args = getattr(code, 'co_argcount', 3) + + if args < 3: + # This will become a warning in Click 3.0: + from warnings import warn + warn(Warning('Invoked legacy parameter callback "%s". The new ' + 'signature for such callbacks starting with ' + 'click 2.0 is (ctx, param, value).' + % callback), stacklevel=3) + return callback(ctx, value) + return callback(ctx, param, value) + + +@contextmanager +def augment_usage_errors(ctx, param=None): + """Context manager that attaches extra information to exceptions that + fly. + """ + try: + yield + except BadParameter as e: + if e.ctx is None: + e.ctx = ctx + if param is not None and e.param is None: + e.param = param + raise + except UsageError as e: + if e.ctx is None: + e.ctx = ctx + raise + + +def iter_params_for_processing(invocation_order, declaration_order): + """Given a sequence of parameters in the order as should be considered + for processing and an iterable of parameters that exist, this returns + a list in the correct order as they should be processed. + """ + def sort_key(item): + try: + idx = invocation_order.index(item) + except ValueError: + idx = float('inf') + return (not item.is_eager, idx) + + return sorted(declaration_order, key=sort_key) + + +class Context(object): + """The context is a special internal object that holds state relevant + for the script execution at every single level. It's normally invisible + to commands unless they opt-in to getting access to it. + + The context is useful as it can pass internal objects around and can + control special execution features such as reading data from + environment variables. + + A context can be used as context manager in which case it will call + :meth:`close` on teardown. + + .. versionadded:: 2.0 + Added the `resilient_parsing`, `help_option_names`, + `token_normalize_func` parameters. + + .. versionadded:: 3.0 + Added the `allow_extra_args` and `allow_interspersed_args` + parameters. + + .. versionadded:: 4.0 + Added the `color`, `ignore_unknown_options`, and + `max_content_width` parameters. + + :param command: the command class for this context. + :param parent: the parent context. + :param info_name: the info name for this invocation. Generally this + is the most descriptive name for the script or + command. For the toplevel script it is usually + the name of the script, for commands below it it's + the name of the script. + :param obj: an arbitrary object of user data. + :param auto_envvar_prefix: the prefix to use for automatic environment + variables. If this is `None` then reading + from environment variables is disabled. This + does not affect manually set environment + variables which are always read. + :param default_map: a dictionary (like object) with default values + for parameters. + :param terminal_width: the width of the terminal. The default is + inherit from parent context. If no context + defines the terminal width then auto + detection will be applied. + :param max_content_width: the maximum width for content rendered by + Click (this currently only affects help + pages). This defaults to 80 characters if + not overridden. In other words: even if the + terminal is larger than that, Click will not + format things wider than 80 characters by + default. In addition to that, formatters might + add some safety mapping on the right. + :param resilient_parsing: if this flag is enabled then Click will + parse without any interactivity or callback + invocation. Default values will also be + ignored. This is useful for implementing + things such as completion support. + :param allow_extra_args: if this is set to `True` then extra arguments + at the end will not raise an error and will be + kept on the context. The default is to inherit + from the command. + :param allow_interspersed_args: if this is set to `False` then options + and arguments cannot be mixed. The + default is to inherit from the command. + :param ignore_unknown_options: instructs click to ignore options it does + not know and keeps them for later + processing. + :param help_option_names: optionally a list of strings that define how + the default help parameter is named. The + default is ``['--help']``. + :param token_normalize_func: an optional function that is used to + normalize tokens (options, choices, + etc.). This for instance can be used to + implement case insensitive behavior. + :param color: controls if the terminal supports ANSI colors or not. The + default is autodetection. This is only needed if ANSI + codes are used in texts that Click prints which is by + default not the case. This for instance would affect + help output. + """ + + def __init__(self, command, parent=None, info_name=None, obj=None, + auto_envvar_prefix=None, default_map=None, + terminal_width=None, max_content_width=None, + resilient_parsing=False, allow_extra_args=None, + allow_interspersed_args=None, + ignore_unknown_options=None, help_option_names=None, + token_normalize_func=None, color=None): + #: the parent context or `None` if none exists. + self.parent = parent + #: the :class:`Command` for this context. + self.command = command + #: the descriptive information name + self.info_name = info_name + #: the parsed parameters except if the value is hidden in which + #: case it's not remembered. + self.params = {} + #: the leftover arguments. + self.args = [] + #: protected arguments. These are arguments that are prepended + #: to `args` when certain parsing scenarios are encountered but + #: must be never propagated to another arguments. This is used + #: to implement nested parsing. + self.protected_args = [] + if obj is None and parent is not None: + obj = parent.obj + #: the user object stored. + self.obj = obj + self._meta = getattr(parent, 'meta', {}) + + #: A dictionary (-like object) with defaults for parameters. + if default_map is None \ + and parent is not None \ + and parent.default_map is not None: + default_map = parent.default_map.get(info_name) + self.default_map = default_map + + #: This flag indicates if a subcommand is going to be executed. A + #: group callback can use this information to figure out if it's + #: being executed directly or because the execution flow passes + #: onwards to a subcommand. By default it's None, but it can be + #: the name of the subcommand to execute. + #: + #: If chaining is enabled this will be set to ``'*'`` in case + #: any commands are executed. It is however not possible to + #: figure out which ones. If you require this knowledge you + #: should use a :func:`resultcallback`. + self.invoked_subcommand = None + + if terminal_width is None and parent is not None: + terminal_width = parent.terminal_width + #: The width of the terminal (None is autodetection). + self.terminal_width = terminal_width + + if max_content_width is None and parent is not None: + max_content_width = parent.max_content_width + #: The maximum width of formatted content (None implies a sensible + #: default which is 80 for most things). + self.max_content_width = max_content_width + + if allow_extra_args is None: + allow_extra_args = command.allow_extra_args + #: Indicates if the context allows extra args or if it should + #: fail on parsing. + #: + #: .. versionadded:: 3.0 + self.allow_extra_args = allow_extra_args + + if allow_interspersed_args is None: + allow_interspersed_args = command.allow_interspersed_args + #: Indicates if the context allows mixing of arguments and + #: options or not. + #: + #: .. versionadded:: 3.0 + self.allow_interspersed_args = allow_interspersed_args + + if ignore_unknown_options is None: + ignore_unknown_options = command.ignore_unknown_options + #: Instructs click to ignore options that a command does not + #: understand and will store it on the context for later + #: processing. This is primarily useful for situations where you + #: want to call into external programs. Generally this pattern is + #: strongly discouraged because it's not possibly to losslessly + #: forward all arguments. + #: + #: .. versionadded:: 4.0 + self.ignore_unknown_options = ignore_unknown_options + + if help_option_names is None: + if parent is not None: + help_option_names = parent.help_option_names + else: + help_option_names = ['--help'] + + #: The names for the help options. + self.help_option_names = help_option_names + + if token_normalize_func is None and parent is not None: + token_normalize_func = parent.token_normalize_func + + #: An optional normalization function for tokens. This is + #: options, choices, commands etc. + self.token_normalize_func = token_normalize_func + + #: Indicates if resilient parsing is enabled. In that case Click + #: will do its best to not cause any failures and default values + #: will be ignored. Useful for completion. + self.resilient_parsing = resilient_parsing + + # If there is no envvar prefix yet, but the parent has one and + # the command on this level has a name, we can expand the envvar + # prefix automatically. + if auto_envvar_prefix is None: + if parent is not None \ + and parent.auto_envvar_prefix is not None and \ + self.info_name is not None: + auto_envvar_prefix = '%s_%s' % (parent.auto_envvar_prefix, + self.info_name.upper()) + else: + auto_envvar_prefix = auto_envvar_prefix.upper() + self.auto_envvar_prefix = auto_envvar_prefix + + if color is None and parent is not None: + color = parent.color + + #: Controls if styling output is wanted or not. + self.color = color + + self._close_callbacks = [] + self._depth = 0 + + def __enter__(self): + self._depth += 1 + push_context(self) + return self + + def __exit__(self, exc_type, exc_value, tb): + self._depth -= 1 + if self._depth == 0: + self.close() + pop_context() + + @contextmanager + def scope(self, cleanup=True): + """This helper method can be used with the context object to promote + it to the current thread local (see :func:`get_current_context`). + The default behavior of this is to invoke the cleanup functions which + can be disabled by setting `cleanup` to `False`. The cleanup + functions are typically used for things such as closing file handles. + + If the cleanup is intended the context object can also be directly + used as a context manager. + + Example usage:: + + with ctx.scope(): + assert get_current_context() is ctx + + This is equivalent:: + + with ctx: + assert get_current_context() is ctx + + .. versionadded:: 5.0 + + :param cleanup: controls if the cleanup functions should be run or + not. The default is to run these functions. In + some situations the context only wants to be + temporarily pushed in which case this can be disabled. + Nested pushes automatically defer the cleanup. + """ + if not cleanup: + self._depth += 1 + try: + with self as rv: + yield rv + finally: + if not cleanup: + self._depth -= 1 + + @property + def meta(self): + """This is a dictionary which is shared with all the contexts + that are nested. It exists so that click utilities can store some + state here if they need to. It is however the responsibility of + that code to manage this dictionary well. + + The keys are supposed to be unique dotted strings. For instance + module paths are a good choice for it. What is stored in there is + irrelevant for the operation of click. However what is important is + that code that places data here adheres to the general semantics of + the system. + + Example usage:: + + LANG_KEY = __name__ + '.lang' + + def set_language(value): + ctx = get_current_context() + ctx.meta[LANG_KEY] = value + + def get_language(): + return get_current_context().meta.get(LANG_KEY, 'en_US') + + .. versionadded:: 5.0 + """ + return self._meta + + def make_formatter(self): + """Creates the formatter for the help and usage output.""" + return HelpFormatter(width=self.terminal_width, + max_width=self.max_content_width) + + def call_on_close(self, f): + """This decorator remembers a function as callback that should be + executed when the context tears down. This is most useful to bind + resource handling to the script execution. For instance, file objects + opened by the :class:`File` type will register their close callbacks + here. + + :param f: the function to execute on teardown. + """ + self._close_callbacks.append(f) + return f + + def close(self): + """Invokes all close callbacks.""" + for cb in self._close_callbacks: + cb() + self._close_callbacks = [] + + @property + def command_path(self): + """The computed command path. This is used for the ``usage`` + information on the help page. It's automatically created by + combining the info names of the chain of contexts to the root. + """ + rv = '' + if self.info_name is not None: + rv = self.info_name + if self.parent is not None: + rv = self.parent.command_path + ' ' + rv + return rv.lstrip() + + def find_root(self): + """Finds the outermost context.""" + node = self + while node.parent is not None: + node = node.parent + return node + + def find_object(self, object_type): + """Finds the closest object of a given type.""" + node = self + while node is not None: + if isinstance(node.obj, object_type): + return node.obj + node = node.parent + + def ensure_object(self, object_type): + """Like :meth:`find_object` but sets the innermost object to a + new instance of `object_type` if it does not exist. + """ + rv = self.find_object(object_type) + if rv is None: + self.obj = rv = object_type() + return rv + + def lookup_default(self, name): + """Looks up the default for a parameter name. This by default + looks into the :attr:`default_map` if available. + """ + if self.default_map is not None: + rv = self.default_map.get(name) + if callable(rv): + rv = rv() + return rv + + def fail(self, message): + """Aborts the execution of the program with a specific error + message. + + :param message: the error message to fail with. + """ + raise UsageError(message, self) + + def abort(self): + """Aborts the script.""" + raise Abort() + + def exit(self, code=0): + """Exits the application with a given exit code.""" + raise Exit(code) + + def get_usage(self): + """Helper method to get formatted usage string for the current + context and command. + """ + return self.command.get_usage(self) + + def get_help(self): + """Helper method to get formatted help page for the current + context and command. + """ + return self.command.get_help(self) + + def invoke(*args, **kwargs): + """Invokes a command callback in exactly the way it expects. There + are two ways to invoke this method: + + 1. the first argument can be a callback and all other arguments and + keyword arguments are forwarded directly to the function. + 2. the first argument is a click command object. In that case all + arguments are forwarded as well but proper click parameters + (options and click arguments) must be keyword arguments and Click + will fill in defaults. + + Note that before Click 3.2 keyword arguments were not properly filled + in against the intention of this code and no context was created. For + more information about this change and why it was done in a bugfix + release see :ref:`upgrade-to-3.2`. + """ + self, callback = args[:2] + ctx = self + + # It's also possible to invoke another command which might or + # might not have a callback. In that case we also fill + # in defaults and make a new context for this command. + if isinstance(callback, Command): + other_cmd = callback + callback = other_cmd.callback + ctx = Context(other_cmd, info_name=other_cmd.name, parent=self) + if callback is None: + raise TypeError('The given command does not have a ' + 'callback that can be invoked.') + + for param in other_cmd.params: + if param.name not in kwargs and param.expose_value: + kwargs[param.name] = param.get_default(ctx) + + args = args[2:] + with augment_usage_errors(self): + with ctx: + return callback(*args, **kwargs) + + def forward(*args, **kwargs): + """Similar to :meth:`invoke` but fills in default keyword + arguments from the current context if the other command expects + it. This cannot invoke callbacks directly, only other commands. + """ + self, cmd = args[:2] + + # It's also possible to invoke another command which might or + # might not have a callback. + if not isinstance(cmd, Command): + raise TypeError('Callback is not a command.') + + for param in self.params: + if param not in kwargs: + kwargs[param] = self.params[param] + + return self.invoke(cmd, **kwargs) + + +class BaseCommand(object): + """The base command implements the minimal API contract of commands. + Most code will never use this as it does not implement a lot of useful + functionality but it can act as the direct subclass of alternative + parsing methods that do not depend on the Click parser. + + For instance, this can be used to bridge Click and other systems like + argparse or docopt. + + Because base commands do not implement a lot of the API that other + parts of Click take for granted, they are not supported for all + operations. For instance, they cannot be used with the decorators + usually and they have no built-in callback system. + + .. versionchanged:: 2.0 + Added the `context_settings` parameter. + + :param name: the name of the command to use unless a group overrides it. + :param context_settings: an optional dictionary with defaults that are + passed to the context object. + """ + #: the default for the :attr:`Context.allow_extra_args` flag. + allow_extra_args = False + #: the default for the :attr:`Context.allow_interspersed_args` flag. + allow_interspersed_args = True + #: the default for the :attr:`Context.ignore_unknown_options` flag. + ignore_unknown_options = False + + def __init__(self, name, context_settings=None): + #: the name the command thinks it has. Upon registering a command + #: on a :class:`Group` the group will default the command name + #: with this information. You should instead use the + #: :class:`Context`\'s :attr:`~Context.info_name` attribute. + self.name = name + if context_settings is None: + context_settings = {} + #: an optional dictionary with defaults passed to the context. + self.context_settings = context_settings + + def get_usage(self, ctx): + raise NotImplementedError('Base commands cannot get usage') + + def get_help(self, ctx): + raise NotImplementedError('Base commands cannot get help') + + def make_context(self, info_name, args, parent=None, **extra): + """This function when given an info name and arguments will kick + off the parsing and create a new :class:`Context`. It does not + invoke the actual command callback though. + + :param info_name: the info name for this invokation. Generally this + is the most descriptive name for the script or + command. For the toplevel script it's usually + the name of the script, for commands below it it's + the name of the script. + :param args: the arguments to parse as list of strings. + :param parent: the parent context if available. + :param extra: extra keyword arguments forwarded to the context + constructor. + """ + for key, value in iteritems(self.context_settings): + if key not in extra: + extra[key] = value + ctx = Context(self, info_name=info_name, parent=parent, **extra) + with ctx.scope(cleanup=False): + self.parse_args(ctx, args) + return ctx + + def parse_args(self, ctx, args): + """Given a context and a list of arguments this creates the parser + and parses the arguments, then modifies the context as necessary. + This is automatically invoked by :meth:`make_context`. + """ + raise NotImplementedError('Base commands do not know how to parse ' + 'arguments.') + + def invoke(self, ctx): + """Given a context, this invokes the command. The default + implementation is raising a not implemented error. + """ + raise NotImplementedError('Base commands are not invokable by default') + + def main(self, args=None, prog_name=None, complete_var=None, + standalone_mode=True, **extra): + """This is the way to invoke a script with all the bells and + whistles as a command line application. This will always terminate + the application after a call. If this is not wanted, ``SystemExit`` + needs to be caught. + + This method is also available by directly calling the instance of + a :class:`Command`. + + .. versionadded:: 3.0 + Added the `standalone_mode` flag to control the standalone mode. + + :param args: the arguments that should be used for parsing. If not + provided, ``sys.argv[1:]`` is used. + :param prog_name: the program name that should be used. By default + the program name is constructed by taking the file + name from ``sys.argv[0]``. + :param complete_var: the environment variable that controls the + bash completion support. The default is + ``"_<prog_name>_COMPLETE"`` with prog_name in + uppercase. + :param standalone_mode: the default behavior is to invoke the script + in standalone mode. Click will then + handle exceptions and convert them into + error messages and the function will never + return but shut down the interpreter. If + this is set to `False` they will be + propagated to the caller and the return + value of this function is the return value + of :meth:`invoke`. + :param extra: extra keyword arguments are forwarded to the context + constructor. See :class:`Context` for more information. + """ + # If we are in Python 3, we will verify that the environment is + # sane at this point or reject further execution to avoid a + # broken script. + if not PY2: + _verify_python3_env() + else: + _check_for_unicode_literals() + + if args is None: + args = get_os_args() + else: + args = list(args) + + if prog_name is None: + prog_name = make_str(os.path.basename( + sys.argv and sys.argv[0] or __file__)) + + # Hook for the Bash completion. This only activates if the Bash + # completion is actually enabled, otherwise this is quite a fast + # noop. + _bashcomplete(self, prog_name, complete_var) + + try: + try: + with self.make_context(prog_name, args, **extra) as ctx: + rv = self.invoke(ctx) + if not standalone_mode: + return rv + # it's not safe to `ctx.exit(rv)` here! + # note that `rv` may actually contain data like "1" which + # has obvious effects + # more subtle case: `rv=[None, None]` can come out of + # chained commands which all returned `None` -- so it's not + # even always obvious that `rv` indicates success/failure + # by its truthiness/falsiness + ctx.exit() + except (EOFError, KeyboardInterrupt): + echo(file=sys.stderr) + raise Abort() + except ClickException as e: + if not standalone_mode: + raise + e.show() + sys.exit(e.exit_code) + except IOError as e: + if e.errno == errno.EPIPE: + sys.stdout = PacifyFlushWrapper(sys.stdout) + sys.stderr = PacifyFlushWrapper(sys.stderr) + sys.exit(1) + else: + raise + except Exit as e: + if standalone_mode: + sys.exit(e.exit_code) + else: + # in non-standalone mode, return the exit code + # note that this is only reached if `self.invoke` above raises + # an Exit explicitly -- thus bypassing the check there which + # would return its result + # the results of non-standalone execution may therefore be + # somewhat ambiguous: if there are codepaths which lead to + # `ctx.exit(1)` and to `return 1`, the caller won't be able to + # tell the difference between the two + return e.exit_code + except Abort: + if not standalone_mode: + raise + echo('Aborted!', file=sys.stderr) + sys.exit(1) + + def __call__(self, *args, **kwargs): + """Alias for :meth:`main`.""" + return self.main(*args, **kwargs) + + +class Command(BaseCommand): + """Commands are the basic building block of command line interfaces in + Click. A basic command handles command line parsing and might dispatch + more parsing to commands nested below it. + + .. versionchanged:: 2.0 + Added the `context_settings` parameter. + + :param name: the name of the command to use unless a group overrides it. + :param context_settings: an optional dictionary with defaults that are + passed to the context object. + :param callback: the callback to invoke. This is optional. + :param params: the parameters to register with this command. This can + be either :class:`Option` or :class:`Argument` objects. + :param help: the help string to use for this command. + :param epilog: like the help string but it's printed at the end of the + help page after everything else. + :param short_help: the short help to use for this command. This is + shown on the command listing of the parent command. + :param add_help_option: by default each command registers a ``--help`` + option. This can be disabled by this parameter. + :param hidden: hide this command from help outputs. + + :param deprecated: issues a message indicating that + the command is deprecated. + """ + + def __init__(self, name, context_settings=None, callback=None, + params=None, help=None, epilog=None, short_help=None, + options_metavar='[OPTIONS]', add_help_option=True, + hidden=False, deprecated=False): + BaseCommand.__init__(self, name, context_settings) + #: the callback to execute when the command fires. This might be + #: `None` in which case nothing happens. + self.callback = callback + #: the list of parameters for this command in the order they + #: should show up in the help page and execute. Eager parameters + #: will automatically be handled before non eager ones. + self.params = params or [] + # if a form feed (page break) is found in the help text, truncate help + # text to the content preceding the first form feed + if help and '\f' in help: + help = help.split('\f', 1)[0] + self.help = help + self.epilog = epilog + self.options_metavar = options_metavar + self.short_help = short_help + self.add_help_option = add_help_option + self.hidden = hidden + self.deprecated = deprecated + + def get_usage(self, ctx): + formatter = ctx.make_formatter() + self.format_usage(ctx, formatter) + return formatter.getvalue().rstrip('\n') + + def get_params(self, ctx): + rv = self.params + help_option = self.get_help_option(ctx) + if help_option is not None: + rv = rv + [help_option] + return rv + + def format_usage(self, ctx, formatter): + """Writes the usage line into the formatter.""" + pieces = self.collect_usage_pieces(ctx) + formatter.write_usage(ctx.command_path, ' '.join(pieces)) + + def collect_usage_pieces(self, ctx): + """Returns all the pieces that go into the usage line and returns + it as a list of strings. + """ + rv = [self.options_metavar] + for param in self.get_params(ctx): + rv.extend(param.get_usage_pieces(ctx)) + return rv + + def get_help_option_names(self, ctx): + """Returns the names for the help option.""" + all_names = set(ctx.help_option_names) + for param in self.params: + all_names.difference_update(param.opts) + all_names.difference_update(param.secondary_opts) + return all_names + + def get_help_option(self, ctx): + """Returns the help option object.""" + help_options = self.get_help_option_names(ctx) + if not help_options or not self.add_help_option: + return + + def show_help(ctx, param, value): + if value and not ctx.resilient_parsing: + echo(ctx.get_help(), color=ctx.color) + ctx.exit() + return Option(help_options, is_flag=True, + is_eager=True, expose_value=False, + callback=show_help, + help='Show this message and exit.') + + def make_parser(self, ctx): + """Creates the underlying option parser for this command.""" + parser = OptionParser(ctx) + for param in self.get_params(ctx): + param.add_to_parser(parser, ctx) + return parser + + def get_help(self, ctx): + """Formats the help into a string and returns it. This creates a + formatter and will call into the following formatting methods: + """ + formatter = ctx.make_formatter() + self.format_help(ctx, formatter) + return formatter.getvalue().rstrip('\n') + + def get_short_help_str(self, limit=45): + """Gets short help for the command or makes it by shortening the long help string.""" + return self.short_help or self.help and make_default_short_help(self.help, limit) or '' + + def format_help(self, ctx, formatter): + """Writes the help into the formatter if it exists. + + This calls into the following methods: + + - :meth:`format_usage` + - :meth:`format_help_text` + - :meth:`format_options` + - :meth:`format_epilog` + """ + self.format_usage(ctx, formatter) + self.format_help_text(ctx, formatter) + self.format_options(ctx, formatter) + self.format_epilog(ctx, formatter) + + def format_help_text(self, ctx, formatter): + """Writes the help text to the formatter if it exists.""" + if self.help: + formatter.write_paragraph() + with formatter.indentation(): + help_text = self.help + if self.deprecated: + help_text += DEPRECATED_HELP_NOTICE + formatter.write_text(help_text) + elif self.deprecated: + formatter.write_paragraph() + with formatter.indentation(): + formatter.write_text(DEPRECATED_HELP_NOTICE) + + def format_options(self, ctx, formatter): + """Writes all the options into the formatter if they exist.""" + opts = [] + for param in self.get_params(ctx): + rv = param.get_help_record(ctx) + if rv is not None: + opts.append(rv) + + if opts: + with formatter.section('Options'): + formatter.write_dl(opts) + + def format_epilog(self, ctx, formatter): + """Writes the epilog into the formatter if it exists.""" + if self.epilog: + formatter.write_paragraph() + with formatter.indentation(): + formatter.write_text(self.epilog) + + def parse_args(self, ctx, args): + parser = self.make_parser(ctx) + opts, args, param_order = parser.parse_args(args=args) + + for param in iter_params_for_processing( + param_order, self.get_params(ctx)): + value, args = param.handle_parse_result(ctx, opts, args) + + if args and not ctx.allow_extra_args and not ctx.resilient_parsing: + ctx.fail('Got unexpected extra argument%s (%s)' + % (len(args) != 1 and 's' or '', + ' '.join(map(make_str, args)))) + + ctx.args = args + return args + + def invoke(self, ctx): + """Given a context, this invokes the attached callback (if it exists) + in the right way. + """ + _maybe_show_deprecated_notice(self) + if self.callback is not None: + return ctx.invoke(self.callback, **ctx.params) + + +class MultiCommand(Command): + """A multi command is the basic implementation of a command that + dispatches to subcommands. The most common version is the + :class:`Group`. + + :param invoke_without_command: this controls how the multi command itself + is invoked. By default it's only invoked + if a subcommand is provided. + :param no_args_is_help: this controls what happens if no arguments are + provided. This option is enabled by default if + `invoke_without_command` is disabled or disabled + if it's enabled. If enabled this will add + ``--help`` as argument if no arguments are + passed. + :param subcommand_metavar: the string that is used in the documentation + to indicate the subcommand place. + :param chain: if this is set to `True` chaining of multiple subcommands + is enabled. This restricts the form of commands in that + they cannot have optional arguments but it allows + multiple commands to be chained together. + :param result_callback: the result callback to attach to this multi + command. + """ + allow_extra_args = True + allow_interspersed_args = False + + def __init__(self, name=None, invoke_without_command=False, + no_args_is_help=None, subcommand_metavar=None, + chain=False, result_callback=None, **attrs): + Command.__init__(self, name, **attrs) + if no_args_is_help is None: + no_args_is_help = not invoke_without_command + self.no_args_is_help = no_args_is_help + self.invoke_without_command = invoke_without_command + if subcommand_metavar is None: + if chain: + subcommand_metavar = SUBCOMMANDS_METAVAR + else: + subcommand_metavar = SUBCOMMAND_METAVAR + self.subcommand_metavar = subcommand_metavar + self.chain = chain + #: The result callback that is stored. This can be set or + #: overridden with the :func:`resultcallback` decorator. + self.result_callback = result_callback + + if self.chain: + for param in self.params: + if isinstance(param, Argument) and not param.required: + raise RuntimeError('Multi commands in chain mode cannot ' + 'have optional arguments.') + + def collect_usage_pieces(self, ctx): + rv = Command.collect_usage_pieces(self, ctx) + rv.append(self.subcommand_metavar) + return rv + + def format_options(self, ctx, formatter): + Command.format_options(self, ctx, formatter) + self.format_commands(ctx, formatter) + + def resultcallback(self, replace=False): + """Adds a result callback to the chain command. By default if a + result callback is already registered this will chain them but + this can be disabled with the `replace` parameter. The result + callback is invoked with the return value of the subcommand + (or the list of return values from all subcommands if chaining + is enabled) as well as the parameters as they would be passed + to the main callback. + + Example:: + + @click.group() + @click.option('-i', '--input', default=23) + def cli(input): + return 42 + + @cli.resultcallback() + def process_result(result, input): + return result + input + + .. versionadded:: 3.0 + + :param replace: if set to `True` an already existing result + callback will be removed. + """ + def decorator(f): + old_callback = self.result_callback + if old_callback is None or replace: + self.result_callback = f + return f + def function(__value, *args, **kwargs): + return f(old_callback(__value, *args, **kwargs), + *args, **kwargs) + self.result_callback = rv = update_wrapper(function, f) + return rv + return decorator + + def format_commands(self, ctx, formatter): + """Extra format methods for multi methods that adds all the commands + after the options. + """ + commands = [] + for subcommand in self.list_commands(ctx): + cmd = self.get_command(ctx, subcommand) + # What is this, the tool lied about a command. Ignore it + if cmd is None: + continue + if cmd.hidden: + continue + + commands.append((subcommand, cmd)) + + # allow for 3 times the default spacing + if len(commands): + limit = formatter.width - 6 - max(len(cmd[0]) for cmd in commands) + + rows = [] + for subcommand, cmd in commands: + help = cmd.get_short_help_str(limit) + rows.append((subcommand, help)) + + if rows: + with formatter.section('Commands'): + formatter.write_dl(rows) + + def parse_args(self, ctx, args): + if not args and self.no_args_is_help and not ctx.resilient_parsing: + echo(ctx.get_help(), color=ctx.color) + ctx.exit() + + rest = Command.parse_args(self, ctx, args) + if self.chain: + ctx.protected_args = rest + ctx.args = [] + elif rest: + ctx.protected_args, ctx.args = rest[:1], rest[1:] + + return ctx.args + + def invoke(self, ctx): + def _process_result(value): + if self.result_callback is not None: + value = ctx.invoke(self.result_callback, value, + **ctx.params) + return value + + if not ctx.protected_args: + # If we are invoked without command the chain flag controls + # how this happens. If we are not in chain mode, the return + # value here is the return value of the command. + # If however we are in chain mode, the return value is the + # return value of the result processor invoked with an empty + # list (which means that no subcommand actually was executed). + if self.invoke_without_command: + if not self.chain: + return Command.invoke(self, ctx) + with ctx: + Command.invoke(self, ctx) + return _process_result([]) + ctx.fail('Missing command.') + + # Fetch args back out + args = ctx.protected_args + ctx.args + ctx.args = [] + ctx.protected_args = [] + + # If we're not in chain mode, we only allow the invocation of a + # single command but we also inform the current context about the + # name of the command to invoke. + if not self.chain: + # Make sure the context is entered so we do not clean up + # resources until the result processor has worked. + with ctx: + cmd_name, cmd, args = self.resolve_command(ctx, args) + ctx.invoked_subcommand = cmd_name + Command.invoke(self, ctx) + sub_ctx = cmd.make_context(cmd_name, args, parent=ctx) + with sub_ctx: + return _process_result(sub_ctx.command.invoke(sub_ctx)) + + # In chain mode we create the contexts step by step, but after the + # base command has been invoked. Because at that point we do not + # know the subcommands yet, the invoked subcommand attribute is + # set to ``*`` to inform the command that subcommands are executed + # but nothing else. + with ctx: + ctx.invoked_subcommand = args and '*' or None + Command.invoke(self, ctx) + + # Otherwise we make every single context and invoke them in a + # chain. In that case the return value to the result processor + # is the list of all invoked subcommand's results. + contexts = [] + while args: + cmd_name, cmd, args = self.resolve_command(ctx, args) + sub_ctx = cmd.make_context(cmd_name, args, parent=ctx, + allow_extra_args=True, + allow_interspersed_args=False) + contexts.append(sub_ctx) + args, sub_ctx.args = sub_ctx.args, [] + + rv = [] + for sub_ctx in contexts: + with sub_ctx: + rv.append(sub_ctx.command.invoke(sub_ctx)) + return _process_result(rv) + + def resolve_command(self, ctx, args): + cmd_name = make_str(args[0]) + original_cmd_name = cmd_name + + # Get the command + cmd = self.get_command(ctx, cmd_name) + + # If we can't find the command but there is a normalization + # function available, we try with that one. + if cmd is None and ctx.token_normalize_func is not None: + cmd_name = ctx.token_normalize_func(cmd_name) + cmd = self.get_command(ctx, cmd_name) + + # If we don't find the command we want to show an error message + # to the user that it was not provided. However, there is + # something else we should do: if the first argument looks like + # an option we want to kick off parsing again for arguments to + # resolve things like --help which now should go to the main + # place. + if cmd is None and not ctx.resilient_parsing: + if split_opt(cmd_name)[0]: + self.parse_args(ctx, ctx.args) + ctx.fail('No such command "%s".' % original_cmd_name) + + return cmd_name, cmd, args[1:] + + def get_command(self, ctx, cmd_name): + """Given a context and a command name, this returns a + :class:`Command` object if it exists or returns `None`. + """ + raise NotImplementedError() + + def list_commands(self, ctx): + """Returns a list of subcommand names in the order they should + appear. + """ + return [] + + +class Group(MultiCommand): + """A group allows a command to have subcommands attached. This is the + most common way to implement nesting in Click. + + :param commands: a dictionary of commands. + """ + + def __init__(self, name=None, commands=None, **attrs): + MultiCommand.__init__(self, name, **attrs) + #: the registered subcommands by their exported names. + self.commands = commands or {} + + def add_command(self, cmd, name=None): + """Registers another :class:`Command` with this group. If the name + is not provided, the name of the command is used. + """ + name = name or cmd.name + if name is None: + raise TypeError('Command has no name.') + _check_multicommand(self, name, cmd, register=True) + self.commands[name] = cmd + + def command(self, *args, **kwargs): + """A shortcut decorator for declaring and attaching a command to + the group. This takes the same arguments as :func:`command` but + immediately registers the created command with this instance by + calling into :meth:`add_command`. + """ + def decorator(f): + cmd = command(*args, **kwargs)(f) + self.add_command(cmd) + return cmd + return decorator + + def group(self, *args, **kwargs): + """A shortcut decorator for declaring and attaching a group to + the group. This takes the same arguments as :func:`group` but + immediately registers the created command with this instance by + calling into :meth:`add_command`. + """ + def decorator(f): + cmd = group(*args, **kwargs)(f) + self.add_command(cmd) + return cmd + return decorator + + def get_command(self, ctx, cmd_name): + return self.commands.get(cmd_name) + + def list_commands(self, ctx): + return sorted(self.commands) + + +class CommandCollection(MultiCommand): + """A command collection is a multi command that merges multiple multi + commands together into one. This is a straightforward implementation + that accepts a list of different multi commands as sources and + provides all the commands for each of them. + """ + + def __init__(self, name=None, sources=None, **attrs): + MultiCommand.__init__(self, name, **attrs) + #: The list of registered multi commands. + self.sources = sources or [] + + def add_source(self, multi_cmd): + """Adds a new multi command to the chain dispatcher.""" + self.sources.append(multi_cmd) + + def get_command(self, ctx, cmd_name): + for source in self.sources: + rv = source.get_command(ctx, cmd_name) + if rv is not None: + if self.chain: + _check_multicommand(self, cmd_name, rv) + return rv + + def list_commands(self, ctx): + rv = set() + for source in self.sources: + rv.update(source.list_commands(ctx)) + return sorted(rv) + + +class Parameter(object): + r"""A parameter to a command comes in two versions: they are either + :class:`Option`\s or :class:`Argument`\s. Other subclasses are currently + not supported by design as some of the internals for parsing are + intentionally not finalized. + + Some settings are supported by both options and arguments. + + .. versionchanged:: 2.0 + Changed signature for parameter callback to also be passed the + parameter. In Click 2.0, the old callback format will still work, + but it will raise a warning to give you change to migrate the + code easier. + + :param param_decls: the parameter declarations for this option or + argument. This is a list of flags or argument + names. + :param type: the type that should be used. Either a :class:`ParamType` + or a Python type. The later is converted into the former + automatically if supported. + :param required: controls if this is optional or not. + :param default: the default value if omitted. This can also be a callable, + in which case it's invoked when the default is needed + without any arguments. + :param callback: a callback that should be executed after the parameter + was matched. This is called as ``fn(ctx, param, + value)`` and needs to return the value. Before Click + 2.0, the signature was ``(ctx, value)``. + :param nargs: the number of arguments to match. If not ``1`` the return + value is a tuple instead of single value. The default for + nargs is ``1`` (except if the type is a tuple, then it's + the arity of the tuple). + :param metavar: how the value is represented in the help page. + :param expose_value: if this is `True` then the value is passed onwards + to the command callback and stored on the context, + otherwise it's skipped. + :param is_eager: eager values are processed before non eager ones. This + should not be set for arguments or it will inverse the + order of processing. + :param envvar: a string or list of strings that are environment variables + that should be checked. + """ + param_type_name = 'parameter' + + def __init__(self, param_decls=None, type=None, required=False, + default=None, callback=None, nargs=None, metavar=None, + expose_value=True, is_eager=False, envvar=None, + autocompletion=None): + self.name, self.opts, self.secondary_opts = \ + self._parse_decls(param_decls or (), expose_value) + + self.type = convert_type(type, default) + + # Default nargs to what the type tells us if we have that + # information available. + if nargs is None: + if self.type.is_composite: + nargs = self.type.arity + else: + nargs = 1 + + self.required = required + self.callback = callback + self.nargs = nargs + self.multiple = False + self.expose_value = expose_value + self.default = default + self.is_eager = is_eager + self.metavar = metavar + self.envvar = envvar + self.autocompletion = autocompletion + + @property + def human_readable_name(self): + """Returns the human readable name of this parameter. This is the + same as the name for options, but the metavar for arguments. + """ + return self.name + + def make_metavar(self): + if self.metavar is not None: + return self.metavar + metavar = self.type.get_metavar(self) + if metavar is None: + metavar = self.type.name.upper() + if self.nargs != 1: + metavar += '...' + return metavar + + def get_default(self, ctx): + """Given a context variable this calculates the default value.""" + # Otherwise go with the regular default. + if callable(self.default): + rv = self.default() + else: + rv = self.default + return self.type_cast_value(ctx, rv) + + def add_to_parser(self, parser, ctx): + pass + + def consume_value(self, ctx, opts): + value = opts.get(self.name) + if value is None: + value = self.value_from_envvar(ctx) + if value is None: + value = ctx.lookup_default(self.name) + return value + + def type_cast_value(self, ctx, value): + """Given a value this runs it properly through the type system. + This automatically handles things like `nargs` and `multiple` as + well as composite types. + """ + if self.type.is_composite: + if self.nargs <= 1: + raise TypeError('Attempted to invoke composite type ' + 'but nargs has been set to %s. This is ' + 'not supported; nargs needs to be set to ' + 'a fixed value > 1.' % self.nargs) + if self.multiple: + return tuple(self.type(x or (), self, ctx) for x in value or ()) + return self.type(value or (), self, ctx) + + def _convert(value, level): + if level == 0: + return self.type(value, self, ctx) + return tuple(_convert(x, level - 1) for x in value or ()) + return _convert(value, (self.nargs != 1) + bool(self.multiple)) + + def process_value(self, ctx, value): + """Given a value and context this runs the logic to convert the + value as necessary. + """ + # If the value we were given is None we do nothing. This way + # code that calls this can easily figure out if something was + # not provided. Otherwise it would be converted into an empty + # tuple for multiple invocations which is inconvenient. + if value is not None: + return self.type_cast_value(ctx, value) + + def value_is_missing(self, value): + if value is None: + return True + if (self.nargs != 1 or self.multiple) and value == (): + return True + return False + + def full_process_value(self, ctx, value): + value = self.process_value(ctx, value) + + if value is None and not ctx.resilient_parsing: + value = self.get_default(ctx) + + if self.required and self.value_is_missing(value): + raise MissingParameter(ctx=ctx, param=self) + + return value + + def resolve_envvar_value(self, ctx): + if self.envvar is None: + return + if isinstance(self.envvar, (tuple, list)): + for envvar in self.envvar: + rv = os.environ.get(envvar) + if rv is not None: + return rv + else: + return os.environ.get(self.envvar) + + def value_from_envvar(self, ctx): + rv = self.resolve_envvar_value(ctx) + if rv is not None and self.nargs != 1: + rv = self.type.split_envvar_value(rv) + return rv + + def handle_parse_result(self, ctx, opts, args): + with augment_usage_errors(ctx, param=self): + value = self.consume_value(ctx, opts) + try: + value = self.full_process_value(ctx, value) + except Exception: + if not ctx.resilient_parsing: + raise + value = None + if self.callback is not None: + try: + value = invoke_param_callback( + self.callback, ctx, self, value) + except Exception: + if not ctx.resilient_parsing: + raise + + if self.expose_value: + ctx.params[self.name] = value + return value, args + + def get_help_record(self, ctx): + pass + + def get_usage_pieces(self, ctx): + return [] + + def get_error_hint(self, ctx): + """Get a stringified version of the param for use in error messages to + indicate which param caused the error. + """ + hint_list = self.opts or [self.human_readable_name] + return ' / '.join('"%s"' % x for x in hint_list) + + +class Option(Parameter): + """Options are usually optional values on the command line and + have some extra features that arguments don't have. + + All other parameters are passed onwards to the parameter constructor. + + :param show_default: controls if the default value should be shown on the + help page. Normally, defaults are not shown. If this + value is a string, it shows the string instead of the + value. This is particularly useful for dynamic options. + :param show_envvar: controls if an environment variable should be shown on + the help page. Normally, environment variables + are not shown. + :param prompt: if set to `True` or a non empty string then the user will be + prompted for input. If set to `True` the prompt will be the + option name capitalized. + :param confirmation_prompt: if set then the value will need to be confirmed + if it was prompted for. + :param hide_input: if this is `True` then the input on the prompt will be + hidden from the user. This is useful for password + input. + :param is_flag: forces this option to act as a flag. The default is + auto detection. + :param flag_value: which value should be used for this flag if it's + enabled. This is set to a boolean automatically if + the option string contains a slash to mark two options. + :param multiple: if this is set to `True` then the argument is accepted + multiple times and recorded. This is similar to ``nargs`` + in how it works but supports arbitrary number of + arguments. + :param count: this flag makes an option increment an integer. + :param allow_from_autoenv: if this is enabled then the value of this + parameter will be pulled from an environment + variable in case a prefix is defined on the + context. + :param help: the help string. + :param hidden: hide this option from help outputs. + """ + param_type_name = 'option' + + def __init__(self, param_decls=None, show_default=False, + prompt=False, confirmation_prompt=False, + hide_input=False, is_flag=None, flag_value=None, + multiple=False, count=False, allow_from_autoenv=True, + type=None, help=None, hidden=False, show_choices=True, + show_envvar=False, **attrs): + default_is_missing = attrs.get('default', _missing) is _missing + Parameter.__init__(self, param_decls, type=type, **attrs) + + if prompt is True: + prompt_text = self.name.replace('_', ' ').capitalize() + elif prompt is False: + prompt_text = None + else: + prompt_text = prompt + self.prompt = prompt_text + self.confirmation_prompt = confirmation_prompt + self.hide_input = hide_input + self.hidden = hidden + + # Flags + if is_flag is None: + if flag_value is not None: + is_flag = True + else: + is_flag = bool(self.secondary_opts) + if is_flag and default_is_missing: + self.default = False + if flag_value is None: + flag_value = not self.default + self.is_flag = is_flag + self.flag_value = flag_value + if self.is_flag and isinstance(self.flag_value, bool) \ + and type is None: + self.type = BOOL + self.is_bool_flag = True + else: + self.is_bool_flag = False + + # Counting + self.count = count + if count: + if type is None: + self.type = IntRange(min=0) + if default_is_missing: + self.default = 0 + + self.multiple = multiple + self.allow_from_autoenv = allow_from_autoenv + self.help = help + self.show_default = show_default + self.show_choices = show_choices + self.show_envvar = show_envvar + + # Sanity check for stuff we don't support + if __debug__: + if self.nargs < 0: + raise TypeError('Options cannot have nargs < 0') + if self.prompt and self.is_flag and not self.is_bool_flag: + raise TypeError('Cannot prompt for flags that are not bools.') + if not self.is_bool_flag and self.secondary_opts: + raise TypeError('Got secondary option for non boolean flag.') + if self.is_bool_flag and self.hide_input \ + and self.prompt is not None: + raise TypeError('Hidden input does not work with boolean ' + 'flag prompts.') + if self.count: + if self.multiple: + raise TypeError('Options cannot be multiple and count ' + 'at the same time.') + elif self.is_flag: + raise TypeError('Options cannot be count and flags at ' + 'the same time.') + + def _parse_decls(self, decls, expose_value): + opts = [] + secondary_opts = [] + name = None + possible_names = [] + + for decl in decls: + if isidentifier(decl): + if name is not None: + raise TypeError('Name defined twice') + name = decl + else: + split_char = decl[:1] == '/' and ';' or '/' + if split_char in decl: + first, second = decl.split(split_char, 1) + first = first.rstrip() + if first: + possible_names.append(split_opt(first)) + opts.append(first) + second = second.lstrip() + if second: + secondary_opts.append(second.lstrip()) + else: + possible_names.append(split_opt(decl)) + opts.append(decl) + + if name is None and possible_names: + possible_names.sort(key=lambda x: -len(x[0])) # group long options first + name = possible_names[0][1].replace('-', '_').lower() + if not isidentifier(name): + name = None + + if name is None: + if not expose_value: + return None, opts, secondary_opts + raise TypeError('Could not determine name for option') + + if not opts and not secondary_opts: + raise TypeError('No options defined but a name was passed (%s). ' + 'Did you mean to declare an argument instead ' + 'of an option?' % name) + + return name, opts, secondary_opts + + def add_to_parser(self, parser, ctx): + kwargs = { + 'dest': self.name, + 'nargs': self.nargs, + 'obj': self, + } + + if self.multiple: + action = 'append' + elif self.count: + action = 'count' + else: + action = 'store' + + if self.is_flag: + kwargs.pop('nargs', None) + if self.is_bool_flag and self.secondary_opts: + parser.add_option(self.opts, action=action + '_const', + const=True, **kwargs) + parser.add_option(self.secondary_opts, action=action + + '_const', const=False, **kwargs) + else: + parser.add_option(self.opts, action=action + '_const', + const=self.flag_value, + **kwargs) + else: + kwargs['action'] = action + parser.add_option(self.opts, **kwargs) + + def get_help_record(self, ctx): + if self.hidden: + return + any_prefix_is_slash = [] + + def _write_opts(opts): + rv, any_slashes = join_options(opts) + if any_slashes: + any_prefix_is_slash[:] = [True] + if not self.is_flag and not self.count: + rv += ' ' + self.make_metavar() + return rv + + rv = [_write_opts(self.opts)] + if self.secondary_opts: + rv.append(_write_opts(self.secondary_opts)) + + help = self.help or '' + extra = [] + if self.show_envvar: + envvar = self.envvar + if envvar is None: + if self.allow_from_autoenv and \ + ctx.auto_envvar_prefix is not None: + envvar = '%s_%s' % (ctx.auto_envvar_prefix, self.name.upper()) + if envvar is not None: + extra.append('env var: %s' % ( + ', '.join('%s' % d for d in envvar) + if isinstance(envvar, (list, tuple)) + else envvar, )) + if self.default is not None and self.show_default: + if isinstance(self.show_default, string_types): + default_string = '({})'.format(self.show_default) + elif isinstance(self.default, (list, tuple)): + default_string = ', '.join('%s' % d for d in self.default) + elif inspect.isfunction(self.default): + default_string = "(dynamic)" + else: + default_string = self.default + extra.append('default: {}'.format(default_string)) + + if self.required: + extra.append('required') + if extra: + help = '%s[%s]' % (help and help + ' ' or '', '; '.join(extra)) + + return ((any_prefix_is_slash and '; ' or ' / ').join(rv), help) + + def get_default(self, ctx): + # If we're a non boolean flag out default is more complex because + # we need to look at all flags in the same group to figure out + # if we're the the default one in which case we return the flag + # value as default. + if self.is_flag and not self.is_bool_flag: + for param in ctx.command.params: + if param.name == self.name and param.default: + return param.flag_value + return None + return Parameter.get_default(self, ctx) + + def prompt_for_value(self, ctx): + """This is an alternative flow that can be activated in the full + value processing if a value does not exist. It will prompt the + user until a valid value exists and then returns the processed + value as result. + """ + # Calculate the default before prompting anything to be stable. + default = self.get_default(ctx) + + # If this is a prompt for a flag we need to handle this + # differently. + if self.is_bool_flag: + return confirm(self.prompt, default) + + return prompt(self.prompt, default=default, type=self.type, + hide_input=self.hide_input, show_choices=self.show_choices, + confirmation_prompt=self.confirmation_prompt, + value_proc=lambda x: self.process_value(ctx, x)) + + def resolve_envvar_value(self, ctx): + rv = Parameter.resolve_envvar_value(self, ctx) + if rv is not None: + return rv + if self.allow_from_autoenv and \ + ctx.auto_envvar_prefix is not None: + envvar = '%s_%s' % (ctx.auto_envvar_prefix, self.name.upper()) + return os.environ.get(envvar) + + def value_from_envvar(self, ctx): + rv = self.resolve_envvar_value(ctx) + if rv is None: + return None + value_depth = (self.nargs != 1) + bool(self.multiple) + if value_depth > 0 and rv is not None: + rv = self.type.split_envvar_value(rv) + if self.multiple and self.nargs != 1: + rv = batch(rv, self.nargs) + return rv + + def full_process_value(self, ctx, value): + if value is None and self.prompt is not None \ + and not ctx.resilient_parsing: + return self.prompt_for_value(ctx) + return Parameter.full_process_value(self, ctx, value) + + +class Argument(Parameter): + """Arguments are positional parameters to a command. They generally + provide fewer features than options but can have infinite ``nargs`` + and are required by default. + + All parameters are passed onwards to the parameter constructor. + """ + param_type_name = 'argument' + + def __init__(self, param_decls, required=None, **attrs): + if required is None: + if attrs.get('default') is not None: + required = False + else: + required = attrs.get('nargs', 1) > 0 + Parameter.__init__(self, param_decls, required=required, **attrs) + if self.default is not None and self.nargs < 0: + raise TypeError('nargs=-1 in combination with a default value ' + 'is not supported.') + + @property + def human_readable_name(self): + if self.metavar is not None: + return self.metavar + return self.name.upper() + + def make_metavar(self): + if self.metavar is not None: + return self.metavar + var = self.type.get_metavar(self) + if not var: + var = self.name.upper() + if not self.required: + var = '[%s]' % var + if self.nargs != 1: + var += '...' + return var + + def _parse_decls(self, decls, expose_value): + if not decls: + if not expose_value: + return None, [], [] + raise TypeError('Could not determine name for argument') + if len(decls) == 1: + name = arg = decls[0] + name = name.replace('-', '_').lower() + else: + raise TypeError('Arguments take exactly one ' + 'parameter declaration, got %d' % len(decls)) + return name, [arg], [] + + def get_usage_pieces(self, ctx): + return [self.make_metavar()] + + def get_error_hint(self, ctx): + return '"%s"' % self.make_metavar() + + def add_to_parser(self, parser, ctx): + parser.add_argument(dest=self.name, nargs=self.nargs, + obj=self) + + +# Circular dependency between decorators and core +from .decorators import command, group diff --git a/python/click/decorators.py b/python/click/decorators.py new file mode 100644 index 0000000..c57c530 --- /dev/null +++ b/python/click/decorators.py @@ -0,0 +1,311 @@ +import sys +import inspect + +from functools import update_wrapper + +from ._compat import iteritems +from ._unicodefun import _check_for_unicode_literals +from .utils import echo +from .globals import get_current_context + + +def pass_context(f): + """Marks a callback as wanting to receive the current context + object as first argument. + """ + def new_func(*args, **kwargs): + return f(get_current_context(), *args, **kwargs) + return update_wrapper(new_func, f) + + +def pass_obj(f): + """Similar to :func:`pass_context`, but only pass the object on the + context onwards (:attr:`Context.obj`). This is useful if that object + represents the state of a nested system. + """ + def new_func(*args, **kwargs): + return f(get_current_context().obj, *args, **kwargs) + return update_wrapper(new_func, f) + + +def make_pass_decorator(object_type, ensure=False): + """Given an object type this creates a decorator that will work + similar to :func:`pass_obj` but instead of passing the object of the + current context, it will find the innermost context of type + :func:`object_type`. + + This generates a decorator that works roughly like this:: + + from functools import update_wrapper + + def decorator(f): + @pass_context + def new_func(ctx, *args, **kwargs): + obj = ctx.find_object(object_type) + return ctx.invoke(f, obj, *args, **kwargs) + return update_wrapper(new_func, f) + return decorator + + :param object_type: the type of the object to pass. + :param ensure: if set to `True`, a new object will be created and + remembered on the context if it's not there yet. + """ + def decorator(f): + def new_func(*args, **kwargs): + ctx = get_current_context() + if ensure: + obj = ctx.ensure_object(object_type) + else: + obj = ctx.find_object(object_type) + if obj is None: + raise RuntimeError('Managed to invoke callback without a ' + 'context object of type %r existing' + % object_type.__name__) + return ctx.invoke(f, obj, *args, **kwargs) + return update_wrapper(new_func, f) + return decorator + + +def _make_command(f, name, attrs, cls): + if isinstance(f, Command): + raise TypeError('Attempted to convert a callback into a ' + 'command twice.') + try: + params = f.__click_params__ + params.reverse() + del f.__click_params__ + except AttributeError: + params = [] + help = attrs.get('help') + if help is None: + help = inspect.getdoc(f) + if isinstance(help, bytes): + help = help.decode('utf-8') + else: + help = inspect.cleandoc(help) + attrs['help'] = help + _check_for_unicode_literals() + return cls(name=name or f.__name__.lower().replace('_', '-'), + callback=f, params=params, **attrs) + + +def command(name=None, cls=None, **attrs): + r"""Creates a new :class:`Command` and uses the decorated function as + callback. This will also automatically attach all decorated + :func:`option`\s and :func:`argument`\s as parameters to the command. + + The name of the command defaults to the name of the function. If you + want to change that, you can pass the intended name as the first + argument. + + All keyword arguments are forwarded to the underlying command class. + + Once decorated the function turns into a :class:`Command` instance + that can be invoked as a command line utility or be attached to a + command :class:`Group`. + + :param name: the name of the command. This defaults to the function + name with underscores replaced by dashes. + :param cls: the command class to instantiate. This defaults to + :class:`Command`. + """ + if cls is None: + cls = Command + def decorator(f): + cmd = _make_command(f, name, attrs, cls) + cmd.__doc__ = f.__doc__ + return cmd + return decorator + + +def group(name=None, **attrs): + """Creates a new :class:`Group` with a function as callback. This + works otherwise the same as :func:`command` just that the `cls` + parameter is set to :class:`Group`. + """ + attrs.setdefault('cls', Group) + return command(name, **attrs) + + +def _param_memo(f, param): + if isinstance(f, Command): + f.params.append(param) + else: + if not hasattr(f, '__click_params__'): + f.__click_params__ = [] + f.__click_params__.append(param) + + +def argument(*param_decls, **attrs): + """Attaches an argument to the command. All positional arguments are + passed as parameter declarations to :class:`Argument`; all keyword + arguments are forwarded unchanged (except ``cls``). + This is equivalent to creating an :class:`Argument` instance manually + and attaching it to the :attr:`Command.params` list. + + :param cls: the argument class to instantiate. This defaults to + :class:`Argument`. + """ + def decorator(f): + ArgumentClass = attrs.pop('cls', Argument) + _param_memo(f, ArgumentClass(param_decls, **attrs)) + return f + return decorator + + +def option(*param_decls, **attrs): + """Attaches an option to the command. All positional arguments are + passed as parameter declarations to :class:`Option`; all keyword + arguments are forwarded unchanged (except ``cls``). + This is equivalent to creating an :class:`Option` instance manually + and attaching it to the :attr:`Command.params` list. + + :param cls: the option class to instantiate. This defaults to + :class:`Option`. + """ + def decorator(f): + # Issue 926, copy attrs, so pre-defined options can re-use the same cls= + option_attrs = attrs.copy() + + if 'help' in option_attrs: + option_attrs['help'] = inspect.cleandoc(option_attrs['help']) + OptionClass = option_attrs.pop('cls', Option) + _param_memo(f, OptionClass(param_decls, **option_attrs)) + return f + return decorator + + +def confirmation_option(*param_decls, **attrs): + """Shortcut for confirmation prompts that can be ignored by passing + ``--yes`` as parameter. + + This is equivalent to decorating a function with :func:`option` with + the following parameters:: + + def callback(ctx, param, value): + if not value: + ctx.abort() + + @click.command() + @click.option('--yes', is_flag=True, callback=callback, + expose_value=False, prompt='Do you want to continue?') + def dropdb(): + pass + """ + def decorator(f): + def callback(ctx, param, value): + if not value: + ctx.abort() + attrs.setdefault('is_flag', True) + attrs.setdefault('callback', callback) + attrs.setdefault('expose_value', False) + attrs.setdefault('prompt', 'Do you want to continue?') + attrs.setdefault('help', 'Confirm the action without prompting.') + return option(*(param_decls or ('--yes',)), **attrs)(f) + return decorator + + +def password_option(*param_decls, **attrs): + """Shortcut for password prompts. + + This is equivalent to decorating a function with :func:`option` with + the following parameters:: + + @click.command() + @click.option('--password', prompt=True, confirmation_prompt=True, + hide_input=True) + def changeadmin(password): + pass + """ + def decorator(f): + attrs.setdefault('prompt', True) + attrs.setdefault('confirmation_prompt', True) + attrs.setdefault('hide_input', True) + return option(*(param_decls or ('--password',)), **attrs)(f) + return decorator + + +def version_option(version=None, *param_decls, **attrs): + """Adds a ``--version`` option which immediately ends the program + printing out the version number. This is implemented as an eager + option that prints the version and exits the program in the callback. + + :param version: the version number to show. If not provided Click + attempts an auto discovery via setuptools. + :param prog_name: the name of the program (defaults to autodetection) + :param message: custom message to show instead of the default + (``'%(prog)s, version %(version)s'``) + :param others: everything else is forwarded to :func:`option`. + """ + if version is None: + if hasattr(sys, '_getframe'): + module = sys._getframe(1).f_globals.get('__name__') + else: + module = '' + + def decorator(f): + prog_name = attrs.pop('prog_name', None) + message = attrs.pop('message', '%(prog)s, version %(version)s') + + def callback(ctx, param, value): + if not value or ctx.resilient_parsing: + return + prog = prog_name + if prog is None: + prog = ctx.find_root().info_name + ver = version + if ver is None: + try: + import pkg_resources + except ImportError: + pass + else: + for dist in pkg_resources.working_set: + scripts = dist.get_entry_map().get('console_scripts') or {} + for script_name, entry_point in iteritems(scripts): + if entry_point.module_name == module: + ver = dist.version + break + if ver is None: + raise RuntimeError('Could not determine version') + echo(message % { + 'prog': prog, + 'version': ver, + }, color=ctx.color) + ctx.exit() + + attrs.setdefault('is_flag', True) + attrs.setdefault('expose_value', False) + attrs.setdefault('is_eager', True) + attrs.setdefault('help', 'Show the version and exit.') + attrs['callback'] = callback + return option(*(param_decls or ('--version',)), **attrs)(f) + return decorator + + +def help_option(*param_decls, **attrs): + """Adds a ``--help`` option which immediately ends the program + printing out the help page. This is usually unnecessary to add as + this is added by default to all commands unless suppressed. + + Like :func:`version_option`, this is implemented as eager option that + prints in the callback and exits. + + All arguments are forwarded to :func:`option`. + """ + def decorator(f): + def callback(ctx, param, value): + if value and not ctx.resilient_parsing: + echo(ctx.get_help(), color=ctx.color) + ctx.exit() + attrs.setdefault('is_flag', True) + attrs.setdefault('expose_value', False) + attrs.setdefault('help', 'Show this message and exit.') + attrs.setdefault('is_eager', True) + attrs['callback'] = callback + return option(*(param_decls or ('--help',)), **attrs)(f) + return decorator + + +# Circular dependencies between core and decorators +from .core import Command, Group, Argument, Option diff --git a/python/click/exceptions.py b/python/click/exceptions.py new file mode 100644 index 0000000..6fa1765 --- /dev/null +++ b/python/click/exceptions.py @@ -0,0 +1,235 @@ +from ._compat import PY2, filename_to_ui, get_text_stderr +from .utils import echo + + +def _join_param_hints(param_hint): + if isinstance(param_hint, (tuple, list)): + return ' / '.join('"%s"' % x for x in param_hint) + return param_hint + + +class ClickException(Exception): + """An exception that Click can handle and show to the user.""" + + #: The exit code for this exception + exit_code = 1 + + def __init__(self, message): + ctor_msg = message + if PY2: + if ctor_msg is not None: + ctor_msg = ctor_msg.encode('utf-8') + Exception.__init__(self, ctor_msg) + self.message = message + + def format_message(self): + return self.message + + def __str__(self): + return self.message + + if PY2: + __unicode__ = __str__ + + def __str__(self): + return self.message.encode('utf-8') + + def show(self, file=None): + if file is None: + file = get_text_stderr() + echo('Error: %s' % self.format_message(), file=file) + + +class UsageError(ClickException): + """An internal exception that signals a usage error. This typically + aborts any further handling. + + :param message: the error message to display. + :param ctx: optionally the context that caused this error. Click will + fill in the context automatically in some situations. + """ + exit_code = 2 + + def __init__(self, message, ctx=None): + ClickException.__init__(self, message) + self.ctx = ctx + self.cmd = self.ctx and self.ctx.command or None + + def show(self, file=None): + if file is None: + file = get_text_stderr() + color = None + hint = '' + if (self.cmd is not None and + self.cmd.get_help_option(self.ctx) is not None): + hint = ('Try "%s %s" for help.\n' + % (self.ctx.command_path, self.ctx.help_option_names[0])) + if self.ctx is not None: + color = self.ctx.color + echo(self.ctx.get_usage() + '\n%s' % hint, file=file, color=color) + echo('Error: %s' % self.format_message(), file=file, color=color) + + +class BadParameter(UsageError): + """An exception that formats out a standardized error message for a + bad parameter. This is useful when thrown from a callback or type as + Click will attach contextual information to it (for instance, which + parameter it is). + + .. versionadded:: 2.0 + + :param param: the parameter object that caused this error. This can + be left out, and Click will attach this info itself + if possible. + :param param_hint: a string that shows up as parameter name. This + can be used as alternative to `param` in cases + where custom validation should happen. If it is + a string it's used as such, if it's a list then + each item is quoted and separated. + """ + + def __init__(self, message, ctx=None, param=None, + param_hint=None): + UsageError.__init__(self, message, ctx) + self.param = param + self.param_hint = param_hint + + def format_message(self): + if self.param_hint is not None: + param_hint = self.param_hint + elif self.param is not None: + param_hint = self.param.get_error_hint(self.ctx) + else: + return 'Invalid value: %s' % self.message + param_hint = _join_param_hints(param_hint) + + return 'Invalid value for %s: %s' % (param_hint, self.message) + + +class MissingParameter(BadParameter): + """Raised if click required an option or argument but it was not + provided when invoking the script. + + .. versionadded:: 4.0 + + :param param_type: a string that indicates the type of the parameter. + The default is to inherit the parameter type from + the given `param`. Valid values are ``'parameter'``, + ``'option'`` or ``'argument'``. + """ + + def __init__(self, message=None, ctx=None, param=None, + param_hint=None, param_type=None): + BadParameter.__init__(self, message, ctx, param, param_hint) + self.param_type = param_type + + def format_message(self): + if self.param_hint is not None: + param_hint = self.param_hint + elif self.param is not None: + param_hint = self.param.get_error_hint(self.ctx) + else: + param_hint = None + param_hint = _join_param_hints(param_hint) + + param_type = self.param_type + if param_type is None and self.param is not None: + param_type = self.param.param_type_name + + msg = self.message + if self.param is not None: + msg_extra = self.param.type.get_missing_message(self.param) + if msg_extra: + if msg: + msg += '. ' + msg_extra + else: + msg = msg_extra + + return 'Missing %s%s%s%s' % ( + param_type, + param_hint and ' %s' % param_hint or '', + msg and '. ' or '.', + msg or '', + ) + + +class NoSuchOption(UsageError): + """Raised if click attempted to handle an option that does not + exist. + + .. versionadded:: 4.0 + """ + + def __init__(self, option_name, message=None, possibilities=None, + ctx=None): + if message is None: + message = 'no such option: %s' % option_name + UsageError.__init__(self, message, ctx) + self.option_name = option_name + self.possibilities = possibilities + + def format_message(self): + bits = [self.message] + if self.possibilities: + if len(self.possibilities) == 1: + bits.append('Did you mean %s?' % self.possibilities[0]) + else: + possibilities = sorted(self.possibilities) + bits.append('(Possible options: %s)' % ', '.join(possibilities)) + return ' '.join(bits) + + +class BadOptionUsage(UsageError): + """Raised if an option is generally supplied but the use of the option + was incorrect. This is for instance raised if the number of arguments + for an option is not correct. + + .. versionadded:: 4.0 + + :param option_name: the name of the option being used incorrectly. + """ + + def __init__(self, option_name, message, ctx=None): + UsageError.__init__(self, message, ctx) + self.option_name = option_name + + +class BadArgumentUsage(UsageError): + """Raised if an argument is generally supplied but the use of the argument + was incorrect. This is for instance raised if the number of values + for an argument is not correct. + + .. versionadded:: 6.0 + """ + + def __init__(self, message, ctx=None): + UsageError.__init__(self, message, ctx) + + +class FileError(ClickException): + """Raised if a file cannot be opened.""" + + def __init__(self, filename, hint=None): + ui_filename = filename_to_ui(filename) + if hint is None: + hint = 'unknown error' + ClickException.__init__(self, hint) + self.ui_filename = ui_filename + self.filename = filename + + def format_message(self): + return 'Could not open file %s: %s' % (self.ui_filename, self.message) + + +class Abort(RuntimeError): + """An internal signalling exception that signals Click to abort.""" + + +class Exit(RuntimeError): + """An exception that indicates that the application should exit with some + status code. + + :param code: the status code to exit with. + """ + def __init__(self, code=0): + self.exit_code = code diff --git a/python/click/formatting.py b/python/click/formatting.py new file mode 100644 index 0000000..a3d6a4d --- /dev/null +++ b/python/click/formatting.py @@ -0,0 +1,256 @@ +from contextlib import contextmanager +from .termui import get_terminal_size +from .parser import split_opt +from ._compat import term_len + + +# Can force a width. This is used by the test system +FORCED_WIDTH = None + + +def measure_table(rows): + widths = {} + for row in rows: + for idx, col in enumerate(row): + widths[idx] = max(widths.get(idx, 0), term_len(col)) + return tuple(y for x, y in sorted(widths.items())) + + +def iter_rows(rows, col_count): + for row in rows: + row = tuple(row) + yield row + ('',) * (col_count - len(row)) + + +def wrap_text(text, width=78, initial_indent='', subsequent_indent='', + preserve_paragraphs=False): + """A helper function that intelligently wraps text. By default, it + assumes that it operates on a single paragraph of text but if the + `preserve_paragraphs` parameter is provided it will intelligently + handle paragraphs (defined by two empty lines). + + If paragraphs are handled, a paragraph can be prefixed with an empty + line containing the ``\\b`` character (``\\x08``) to indicate that + no rewrapping should happen in that block. + + :param text: the text that should be rewrapped. + :param width: the maximum width for the text. + :param initial_indent: the initial indent that should be placed on the + first line as a string. + :param subsequent_indent: the indent string that should be placed on + each consecutive line. + :param preserve_paragraphs: if this flag is set then the wrapping will + intelligently handle paragraphs. + """ + from ._textwrap import TextWrapper + text = text.expandtabs() + wrapper = TextWrapper(width, initial_indent=initial_indent, + subsequent_indent=subsequent_indent, + replace_whitespace=False) + if not preserve_paragraphs: + return wrapper.fill(text) + + p = [] + buf = [] + indent = None + + def _flush_par(): + if not buf: + return + if buf[0].strip() == '\b': + p.append((indent or 0, True, '\n'.join(buf[1:]))) + else: + p.append((indent or 0, False, ' '.join(buf))) + del buf[:] + + for line in text.splitlines(): + if not line: + _flush_par() + indent = None + else: + if indent is None: + orig_len = term_len(line) + line = line.lstrip() + indent = orig_len - term_len(line) + buf.append(line) + _flush_par() + + rv = [] + for indent, raw, text in p: + with wrapper.extra_indent(' ' * indent): + if raw: + rv.append(wrapper.indent_only(text)) + else: + rv.append(wrapper.fill(text)) + + return '\n\n'.join(rv) + + +class HelpFormatter(object): + """This class helps with formatting text-based help pages. It's + usually just needed for very special internal cases, but it's also + exposed so that developers can write their own fancy outputs. + + At present, it always writes into memory. + + :param indent_increment: the additional increment for each level. + :param width: the width for the text. This defaults to the terminal + width clamped to a maximum of 78. + """ + + def __init__(self, indent_increment=2, width=None, max_width=None): + self.indent_increment = indent_increment + if max_width is None: + max_width = 80 + if width is None: + width = FORCED_WIDTH + if width is None: + width = max(min(get_terminal_size()[0], max_width) - 2, 50) + self.width = width + self.current_indent = 0 + self.buffer = [] + + def write(self, string): + """Writes a unicode string into the internal buffer.""" + self.buffer.append(string) + + def indent(self): + """Increases the indentation.""" + self.current_indent += self.indent_increment + + def dedent(self): + """Decreases the indentation.""" + self.current_indent -= self.indent_increment + + def write_usage(self, prog, args='', prefix='Usage: '): + """Writes a usage line into the buffer. + + :param prog: the program name. + :param args: whitespace separated list of arguments. + :param prefix: the prefix for the first line. + """ + usage_prefix = '%*s%s ' % (self.current_indent, prefix, prog) + text_width = self.width - self.current_indent + + if text_width >= (term_len(usage_prefix) + 20): + # The arguments will fit to the right of the prefix. + indent = ' ' * term_len(usage_prefix) + self.write(wrap_text(args, text_width, + initial_indent=usage_prefix, + subsequent_indent=indent)) + else: + # The prefix is too long, put the arguments on the next line. + self.write(usage_prefix) + self.write('\n') + indent = ' ' * (max(self.current_indent, term_len(prefix)) + 4) + self.write(wrap_text(args, text_width, + initial_indent=indent, + subsequent_indent=indent)) + + self.write('\n') + + def write_heading(self, heading): + """Writes a heading into the buffer.""" + self.write('%*s%s:\n' % (self.current_indent, '', heading)) + + def write_paragraph(self): + """Writes a paragraph into the buffer.""" + if self.buffer: + self.write('\n') + + def write_text(self, text): + """Writes re-indented text into the buffer. This rewraps and + preserves paragraphs. + """ + text_width = max(self.width - self.current_indent, 11) + indent = ' ' * self.current_indent + self.write(wrap_text(text, text_width, + initial_indent=indent, + subsequent_indent=indent, + preserve_paragraphs=True)) + self.write('\n') + + def write_dl(self, rows, col_max=30, col_spacing=2): + """Writes a definition list into the buffer. This is how options + and commands are usually formatted. + + :param rows: a list of two item tuples for the terms and values. + :param col_max: the maximum width of the first column. + :param col_spacing: the number of spaces between the first and + second column. + """ + rows = list(rows) + widths = measure_table(rows) + if len(widths) != 2: + raise TypeError('Expected two columns for definition list') + + first_col = min(widths[0], col_max) + col_spacing + + for first, second in iter_rows(rows, len(widths)): + self.write('%*s%s' % (self.current_indent, '', first)) + if not second: + self.write('\n') + continue + if term_len(first) <= first_col - col_spacing: + self.write(' ' * (first_col - term_len(first))) + else: + self.write('\n') + self.write(' ' * (first_col + self.current_indent)) + + text_width = max(self.width - first_col - 2, 10) + lines = iter(wrap_text(second, text_width).splitlines()) + if lines: + self.write(next(lines) + '\n') + for line in lines: + self.write('%*s%s\n' % ( + first_col + self.current_indent, '', line)) + else: + self.write('\n') + + @contextmanager + def section(self, name): + """Helpful context manager that writes a paragraph, a heading, + and the indents. + + :param name: the section name that is written as heading. + """ + self.write_paragraph() + self.write_heading(name) + self.indent() + try: + yield + finally: + self.dedent() + + @contextmanager + def indentation(self): + """A context manager that increases the indentation.""" + self.indent() + try: + yield + finally: + self.dedent() + + def getvalue(self): + """Returns the buffer contents.""" + return ''.join(self.buffer) + + +def join_options(options): + """Given a list of option strings this joins them in the most appropriate + way and returns them in the form ``(formatted_string, + any_prefix_is_slash)`` where the second item in the tuple is a flag that + indicates if any of the option prefixes was a slash. + """ + rv = [] + any_prefix_is_slash = False + for opt in options: + prefix = split_opt(opt)[0] + if prefix == '/': + any_prefix_is_slash = True + rv.append((len(prefix), opt)) + + rv.sort(key=lambda x: x[0]) + + rv = ', '.join(x[1] for x in rv) + return rv, any_prefix_is_slash diff --git a/python/click/globals.py b/python/click/globals.py new file mode 100644 index 0000000..843b594 --- /dev/null +++ b/python/click/globals.py @@ -0,0 +1,48 @@ +from threading import local + + +_local = local() + + +def get_current_context(silent=False): + """Returns the current click context. This can be used as a way to + access the current context object from anywhere. This is a more implicit + alternative to the :func:`pass_context` decorator. This function is + primarily useful for helpers such as :func:`echo` which might be + interested in changing its behavior based on the current context. + + To push the current context, :meth:`Context.scope` can be used. + + .. versionadded:: 5.0 + + :param silent: is set to `True` the return value is `None` if no context + is available. The default behavior is to raise a + :exc:`RuntimeError`. + """ + try: + return getattr(_local, 'stack')[-1] + except (AttributeError, IndexError): + if not silent: + raise RuntimeError('There is no active click context.') + + +def push_context(ctx): + """Pushes a new context to the current stack.""" + _local.__dict__.setdefault('stack', []).append(ctx) + + +def pop_context(): + """Removes the top level from the stack.""" + _local.stack.pop() + + +def resolve_color_default(color=None): + """"Internal helper to get the default value of the color flag. If a + value is passed it's returned unchanged, otherwise it's looked up from + the current context. + """ + if color is not None: + return color + ctx = get_current_context(silent=True) + if ctx is not None: + return ctx.color diff --git a/python/click/parser.py b/python/click/parser.py new file mode 100644 index 0000000..1c3ae9c --- /dev/null +++ b/python/click/parser.py @@ -0,0 +1,427 @@ +# -*- coding: utf-8 -*- +""" +click.parser +~~~~~~~~~~~~ + +This module started out as largely a copy paste from the stdlib's +optparse module with the features removed that we do not need from +optparse because we implement them in Click on a higher level (for +instance type handling, help formatting and a lot more). + +The plan is to remove more and more from here over time. + +The reason this is a different module and not optparse from the stdlib +is that there are differences in 2.x and 3.x about the error messages +generated and optparse in the stdlib uses gettext for no good reason +and might cause us issues. +""" + +import re +from collections import deque +from .exceptions import UsageError, NoSuchOption, BadOptionUsage, \ + BadArgumentUsage + + +def _unpack_args(args, nargs_spec): + """Given an iterable of arguments and an iterable of nargs specifications, + it returns a tuple with all the unpacked arguments at the first index + and all remaining arguments as the second. + + The nargs specification is the number of arguments that should be consumed + or `-1` to indicate that this position should eat up all the remainders. + + Missing items are filled with `None`. + """ + args = deque(args) + nargs_spec = deque(nargs_spec) + rv = [] + spos = None + + def _fetch(c): + try: + if spos is None: + return c.popleft() + else: + return c.pop() + except IndexError: + return None + + while nargs_spec: + nargs = _fetch(nargs_spec) + if nargs == 1: + rv.append(_fetch(args)) + elif nargs > 1: + x = [_fetch(args) for _ in range(nargs)] + # If we're reversed, we're pulling in the arguments in reverse, + # so we need to turn them around. + if spos is not None: + x.reverse() + rv.append(tuple(x)) + elif nargs < 0: + if spos is not None: + raise TypeError('Cannot have two nargs < 0') + spos = len(rv) + rv.append(None) + + # spos is the position of the wildcard (star). If it's not `None`, + # we fill it with the remainder. + if spos is not None: + rv[spos] = tuple(args) + args = [] + rv[spos + 1:] = reversed(rv[spos + 1:]) + + return tuple(rv), list(args) + + +def _error_opt_args(nargs, opt): + if nargs == 1: + raise BadOptionUsage(opt, '%s option requires an argument' % opt) + raise BadOptionUsage(opt, '%s option requires %d arguments' % (opt, nargs)) + + +def split_opt(opt): + first = opt[:1] + if first.isalnum(): + return '', opt + if opt[1:2] == first: + return opt[:2], opt[2:] + return first, opt[1:] + + +def normalize_opt(opt, ctx): + if ctx is None or ctx.token_normalize_func is None: + return opt + prefix, opt = split_opt(opt) + return prefix + ctx.token_normalize_func(opt) + + +def split_arg_string(string): + """Given an argument string this attempts to split it into small parts.""" + rv = [] + for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'" + r'|"([^"\\]*(?:\\.[^"\\]*)*)"' + r'|\S+)\s*', string, re.S): + arg = match.group().strip() + if arg[:1] == arg[-1:] and arg[:1] in '"\'': + arg = arg[1:-1].encode('ascii', 'backslashreplace') \ + .decode('unicode-escape') + try: + arg = type(string)(arg) + except UnicodeError: + pass + rv.append(arg) + return rv + + +class Option(object): + + def __init__(self, opts, dest, action=None, nargs=1, const=None, obj=None): + self._short_opts = [] + self._long_opts = [] + self.prefixes = set() + + for opt in opts: + prefix, value = split_opt(opt) + if not prefix: + raise ValueError('Invalid start character for option (%s)' + % opt) + self.prefixes.add(prefix[0]) + if len(prefix) == 1 and len(value) == 1: + self._short_opts.append(opt) + else: + self._long_opts.append(opt) + self.prefixes.add(prefix) + + if action is None: + action = 'store' + + self.dest = dest + self.action = action + self.nargs = nargs + self.const = const + self.obj = obj + + @property + def takes_value(self): + return self.action in ('store', 'append') + + def process(self, value, state): + if self.action == 'store': + state.opts[self.dest] = value + elif self.action == 'store_const': + state.opts[self.dest] = self.const + elif self.action == 'append': + state.opts.setdefault(self.dest, []).append(value) + elif self.action == 'append_const': + state.opts.setdefault(self.dest, []).append(self.const) + elif self.action == 'count': + state.opts[self.dest] = state.opts.get(self.dest, 0) + 1 + else: + raise ValueError('unknown action %r' % self.action) + state.order.append(self.obj) + + +class Argument(object): + + def __init__(self, dest, nargs=1, obj=None): + self.dest = dest + self.nargs = nargs + self.obj = obj + + def process(self, value, state): + if self.nargs > 1: + holes = sum(1 for x in value if x is None) + if holes == len(value): + value = None + elif holes != 0: + raise BadArgumentUsage('argument %s takes %d values' + % (self.dest, self.nargs)) + state.opts[self.dest] = value + state.order.append(self.obj) + + +class ParsingState(object): + + def __init__(self, rargs): + self.opts = {} + self.largs = [] + self.rargs = rargs + self.order = [] + + +class OptionParser(object): + """The option parser is an internal class that is ultimately used to + parse options and arguments. It's modelled after optparse and brings + a similar but vastly simplified API. It should generally not be used + directly as the high level Click classes wrap it for you. + + It's not nearly as extensible as optparse or argparse as it does not + implement features that are implemented on a higher level (such as + types or defaults). + + :param ctx: optionally the :class:`~click.Context` where this parser + should go with. + """ + + def __init__(self, ctx=None): + #: The :class:`~click.Context` for this parser. This might be + #: `None` for some advanced use cases. + self.ctx = ctx + #: This controls how the parser deals with interspersed arguments. + #: If this is set to `False`, the parser will stop on the first + #: non-option. Click uses this to implement nested subcommands + #: safely. + self.allow_interspersed_args = True + #: This tells the parser how to deal with unknown options. By + #: default it will error out (which is sensible), but there is a + #: second mode where it will ignore it and continue processing + #: after shifting all the unknown options into the resulting args. + self.ignore_unknown_options = False + if ctx is not None: + self.allow_interspersed_args = ctx.allow_interspersed_args + self.ignore_unknown_options = ctx.ignore_unknown_options + self._short_opt = {} + self._long_opt = {} + self._opt_prefixes = set(['-', '--']) + self._args = [] + + def add_option(self, opts, dest, action=None, nargs=1, const=None, + obj=None): + """Adds a new option named `dest` to the parser. The destination + is not inferred (unlike with optparse) and needs to be explicitly + provided. Action can be any of ``store``, ``store_const``, + ``append``, ``appnd_const`` or ``count``. + + The `obj` can be used to identify the option in the order list + that is returned from the parser. + """ + if obj is None: + obj = dest + opts = [normalize_opt(opt, self.ctx) for opt in opts] + option = Option(opts, dest, action=action, nargs=nargs, + const=const, obj=obj) + self._opt_prefixes.update(option.prefixes) + for opt in option._short_opts: + self._short_opt[opt] = option + for opt in option._long_opts: + self._long_opt[opt] = option + + def add_argument(self, dest, nargs=1, obj=None): + """Adds a positional argument named `dest` to the parser. + + The `obj` can be used to identify the option in the order list + that is returned from the parser. + """ + if obj is None: + obj = dest + self._args.append(Argument(dest=dest, nargs=nargs, obj=obj)) + + def parse_args(self, args): + """Parses positional arguments and returns ``(values, args, order)`` + for the parsed options and arguments as well as the leftover + arguments if there are any. The order is a list of objects as they + appear on the command line. If arguments appear multiple times they + will be memorized multiple times as well. + """ + state = ParsingState(args) + try: + self._process_args_for_options(state) + self._process_args_for_args(state) + except UsageError: + if self.ctx is None or not self.ctx.resilient_parsing: + raise + return state.opts, state.largs, state.order + + def _process_args_for_args(self, state): + pargs, args = _unpack_args(state.largs + state.rargs, + [x.nargs for x in self._args]) + + for idx, arg in enumerate(self._args): + arg.process(pargs[idx], state) + + state.largs = args + state.rargs = [] + + def _process_args_for_options(self, state): + while state.rargs: + arg = state.rargs.pop(0) + arglen = len(arg) + # Double dashes always handled explicitly regardless of what + # prefixes are valid. + if arg == '--': + return + elif arg[:1] in self._opt_prefixes and arglen > 1: + self._process_opts(arg, state) + elif self.allow_interspersed_args: + state.largs.append(arg) + else: + state.rargs.insert(0, arg) + return + + # Say this is the original argument list: + # [arg0, arg1, ..., arg(i-1), arg(i), arg(i+1), ..., arg(N-1)] + # ^ + # (we are about to process arg(i)). + # + # Then rargs is [arg(i), ..., arg(N-1)] and largs is a *subset* of + # [arg0, ..., arg(i-1)] (any options and their arguments will have + # been removed from largs). + # + # The while loop will usually consume 1 or more arguments per pass. + # If it consumes 1 (eg. arg is an option that takes no arguments), + # then after _process_arg() is done the situation is: + # + # largs = subset of [arg0, ..., arg(i)] + # rargs = [arg(i+1), ..., arg(N-1)] + # + # If allow_interspersed_args is false, largs will always be + # *empty* -- still a subset of [arg0, ..., arg(i-1)], but + # not a very interesting subset! + + def _match_long_opt(self, opt, explicit_value, state): + if opt not in self._long_opt: + possibilities = [word for word in self._long_opt + if word.startswith(opt)] + raise NoSuchOption(opt, possibilities=possibilities, ctx=self.ctx) + + option = self._long_opt[opt] + if option.takes_value: + # At this point it's safe to modify rargs by injecting the + # explicit value, because no exception is raised in this + # branch. This means that the inserted value will be fully + # consumed. + if explicit_value is not None: + state.rargs.insert(0, explicit_value) + + nargs = option.nargs + if len(state.rargs) < nargs: + _error_opt_args(nargs, opt) + elif nargs == 1: + value = state.rargs.pop(0) + else: + value = tuple(state.rargs[:nargs]) + del state.rargs[:nargs] + + elif explicit_value is not None: + raise BadOptionUsage(opt, '%s option does not take a value' % opt) + + else: + value = None + + option.process(value, state) + + def _match_short_opt(self, arg, state): + stop = False + i = 1 + prefix = arg[0] + unknown_options = [] + + for ch in arg[1:]: + opt = normalize_opt(prefix + ch, self.ctx) + option = self._short_opt.get(opt) + i += 1 + + if not option: + if self.ignore_unknown_options: + unknown_options.append(ch) + continue + raise NoSuchOption(opt, ctx=self.ctx) + if option.takes_value: + # Any characters left in arg? Pretend they're the + # next arg, and stop consuming characters of arg. + if i < len(arg): + state.rargs.insert(0, arg[i:]) + stop = True + + nargs = option.nargs + if len(state.rargs) < nargs: + _error_opt_args(nargs, opt) + elif nargs == 1: + value = state.rargs.pop(0) + else: + value = tuple(state.rargs[:nargs]) + del state.rargs[:nargs] + + else: + value = None + + option.process(value, state) + + if stop: + break + + # If we got any unknown options we re-combinate the string of the + # remaining options and re-attach the prefix, then report that + # to the state as new larg. This way there is basic combinatorics + # that can be achieved while still ignoring unknown arguments. + if self.ignore_unknown_options and unknown_options: + state.largs.append(prefix + ''.join(unknown_options)) + + def _process_opts(self, arg, state): + explicit_value = None + # Long option handling happens in two parts. The first part is + # supporting explicitly attached values. In any case, we will try + # to long match the option first. + if '=' in arg: + long_opt, explicit_value = arg.split('=', 1) + else: + long_opt = arg + norm_long_opt = normalize_opt(long_opt, self.ctx) + + # At this point we will match the (assumed) long option through + # the long option matching code. Note that this allows options + # like "-foo" to be matched as long options. + try: + self._match_long_opt(norm_long_opt, explicit_value, state) + except NoSuchOption: + # At this point the long option matching failed, and we need + # to try with short options. However there is a special rule + # which says, that if we have a two character options prefix + # (applies to "--foo" for instance), we do not dispatch to the + # short option code and will instead raise the no option + # error. + if arg[:2] not in self._opt_prefixes: + return self._match_short_opt(arg, state) + if not self.ignore_unknown_options: + raise + state.largs.append(arg) diff --git a/python/click/termui.py b/python/click/termui.py new file mode 100644 index 0000000..bf9a3aa --- /dev/null +++ b/python/click/termui.py @@ -0,0 +1,606 @@ +import os +import sys +import struct +import inspect +import itertools + +from ._compat import raw_input, text_type, string_types, \ + isatty, strip_ansi, get_winterm_size, DEFAULT_COLUMNS, WIN +from .utils import echo +from .exceptions import Abort, UsageError +from .types import convert_type, Choice, Path +from .globals import resolve_color_default + + +# The prompt functions to use. The doc tools currently override these +# functions to customize how they work. +visible_prompt_func = raw_input + +_ansi_colors = { + 'black': 30, + 'red': 31, + 'green': 32, + 'yellow': 33, + 'blue': 34, + 'magenta': 35, + 'cyan': 36, + 'white': 37, + 'reset': 39, + 'bright_black': 90, + 'bright_red': 91, + 'bright_green': 92, + 'bright_yellow': 93, + 'bright_blue': 94, + 'bright_magenta': 95, + 'bright_cyan': 96, + 'bright_white': 97, +} +_ansi_reset_all = '\033[0m' + + +def hidden_prompt_func(prompt): + import getpass + return getpass.getpass(prompt) + + +def _build_prompt(text, suffix, show_default=False, default=None, show_choices=True, type=None): + prompt = text + if type is not None and show_choices and isinstance(type, Choice): + prompt += ' (' + ", ".join(map(str, type.choices)) + ')' + if default is not None and show_default: + prompt = '%s [%s]' % (prompt, default) + return prompt + suffix + + +def prompt(text, default=None, hide_input=False, confirmation_prompt=False, + type=None, value_proc=None, prompt_suffix=': ', show_default=True, + err=False, show_choices=True): + """Prompts a user for input. This is a convenience function that can + be used to prompt a user for input later. + + If the user aborts the input by sending a interrupt signal, this + function will catch it and raise a :exc:`Abort` exception. + + .. versionadded:: 7.0 + Added the show_choices parameter. + + .. versionadded:: 6.0 + Added unicode support for cmd.exe on Windows. + + .. versionadded:: 4.0 + Added the `err` parameter. + + :param text: the text to show for the prompt. + :param default: the default value to use if no input happens. If this + is not given it will prompt until it's aborted. + :param hide_input: if this is set to true then the input value will + be hidden. + :param confirmation_prompt: asks for confirmation for the value. + :param type: the type to use to check the value against. + :param value_proc: if this parameter is provided it's a function that + is invoked instead of the type conversion to + convert a value. + :param prompt_suffix: a suffix that should be added to the prompt. + :param show_default: shows or hides the default value in the prompt. + :param err: if set to true the file defaults to ``stderr`` instead of + ``stdout``, the same as with echo. + :param show_choices: Show or hide choices if the passed type is a Choice. + For example if type is a Choice of either day or week, + show_choices is true and text is "Group by" then the + prompt will be "Group by (day, week): ". + """ + result = None + + def prompt_func(text): + f = hide_input and hidden_prompt_func or visible_prompt_func + try: + # Write the prompt separately so that we get nice + # coloring through colorama on Windows + echo(text, nl=False, err=err) + return f('') + except (KeyboardInterrupt, EOFError): + # getpass doesn't print a newline if the user aborts input with ^C. + # Allegedly this behavior is inherited from getpass(3). + # A doc bug has been filed at https://bugs.python.org/issue24711 + if hide_input: + echo(None, err=err) + raise Abort() + + if value_proc is None: + value_proc = convert_type(type, default) + + prompt = _build_prompt(text, prompt_suffix, show_default, default, show_choices, type) + + while 1: + while 1: + value = prompt_func(prompt) + if value: + break + elif default is not None: + if isinstance(value_proc, Path): + # validate Path default value(exists, dir_okay etc.) + value = default + break + return default + try: + result = value_proc(value) + except UsageError as e: + echo('Error: %s' % e.message, err=err) + continue + if not confirmation_prompt: + return result + while 1: + value2 = prompt_func('Repeat for confirmation: ') + if value2: + break + if value == value2: + return result + echo('Error: the two entered values do not match', err=err) + + +def confirm(text, default=False, abort=False, prompt_suffix=': ', + show_default=True, err=False): + """Prompts for confirmation (yes/no question). + + If the user aborts the input by sending a interrupt signal this + function will catch it and raise a :exc:`Abort` exception. + + .. versionadded:: 4.0 + Added the `err` parameter. + + :param text: the question to ask. + :param default: the default for the prompt. + :param abort: if this is set to `True` a negative answer aborts the + exception by raising :exc:`Abort`. + :param prompt_suffix: a suffix that should be added to the prompt. + :param show_default: shows or hides the default value in the prompt. + :param err: if set to true the file defaults to ``stderr`` instead of + ``stdout``, the same as with echo. + """ + prompt = _build_prompt(text, prompt_suffix, show_default, + default and 'Y/n' or 'y/N') + while 1: + try: + # Write the prompt separately so that we get nice + # coloring through colorama on Windows + echo(prompt, nl=False, err=err) + value = visible_prompt_func('').lower().strip() + except (KeyboardInterrupt, EOFError): + raise Abort() + if value in ('y', 'yes'): + rv = True + elif value in ('n', 'no'): + rv = False + elif value == '': + rv = default + else: + echo('Error: invalid input', err=err) + continue + break + if abort and not rv: + raise Abort() + return rv + + +def get_terminal_size(): + """Returns the current size of the terminal as tuple in the form + ``(width, height)`` in columns and rows. + """ + # If shutil has get_terminal_size() (Python 3.3 and later) use that + if sys.version_info >= (3, 3): + import shutil + shutil_get_terminal_size = getattr(shutil, 'get_terminal_size', None) + if shutil_get_terminal_size: + sz = shutil_get_terminal_size() + return sz.columns, sz.lines + + # We provide a sensible default for get_winterm_size() when being invoked + # inside a subprocess. Without this, it would not provide a useful input. + if get_winterm_size is not None: + size = get_winterm_size() + if size == (0, 0): + return (79, 24) + else: + return size + + def ioctl_gwinsz(fd): + try: + import fcntl + import termios + cr = struct.unpack( + 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) + except Exception: + return + return cr + + cr = ioctl_gwinsz(0) or ioctl_gwinsz(1) or ioctl_gwinsz(2) + if not cr: + try: + fd = os.open(os.ctermid(), os.O_RDONLY) + try: + cr = ioctl_gwinsz(fd) + finally: + os.close(fd) + except Exception: + pass + if not cr or not cr[0] or not cr[1]: + cr = (os.environ.get('LINES', 25), + os.environ.get('COLUMNS', DEFAULT_COLUMNS)) + return int(cr[1]), int(cr[0]) + + +def echo_via_pager(text_or_generator, color=None): + """This function takes a text and shows it via an environment specific + pager on stdout. + + .. versionchanged:: 3.0 + Added the `color` flag. + + :param text_or_generator: the text to page, or alternatively, a + generator emitting the text to page. + :param color: controls if the pager supports ANSI colors or not. The + default is autodetection. + """ + color = resolve_color_default(color) + + if inspect.isgeneratorfunction(text_or_generator): + i = text_or_generator() + elif isinstance(text_or_generator, string_types): + i = [text_or_generator] + else: + i = iter(text_or_generator) + + # convert every element of i to a text type if necessary + text_generator = (el if isinstance(el, string_types) else text_type(el) + for el in i) + + from ._termui_impl import pager + return pager(itertools.chain(text_generator, "\n"), color) + + +def progressbar(iterable=None, length=None, label=None, show_eta=True, + show_percent=None, show_pos=False, + item_show_func=None, fill_char='#', empty_char='-', + bar_template='%(label)s [%(bar)s] %(info)s', + info_sep=' ', width=36, file=None, color=None): + """This function creates an iterable context manager that can be used + to iterate over something while showing a progress bar. It will + either iterate over the `iterable` or `length` items (that are counted + up). While iteration happens, this function will print a rendered + progress bar to the given `file` (defaults to stdout) and will attempt + to calculate remaining time and more. By default, this progress bar + will not be rendered if the file is not a terminal. + + The context manager creates the progress bar. When the context + manager is entered the progress bar is already displayed. With every + iteration over the progress bar, the iterable passed to the bar is + advanced and the bar is updated. When the context manager exits, + a newline is printed and the progress bar is finalized on screen. + + No printing must happen or the progress bar will be unintentionally + destroyed. + + Example usage:: + + with progressbar(items) as bar: + for item in bar: + do_something_with(item) + + Alternatively, if no iterable is specified, one can manually update the + progress bar through the `update()` method instead of directly + iterating over the progress bar. The update method accepts the number + of steps to increment the bar with:: + + with progressbar(length=chunks.total_bytes) as bar: + for chunk in chunks: + process_chunk(chunk) + bar.update(chunks.bytes) + + .. versionadded:: 2.0 + + .. versionadded:: 4.0 + Added the `color` parameter. Added a `update` method to the + progressbar object. + + :param iterable: an iterable to iterate over. If not provided the length + is required. + :param length: the number of items to iterate over. By default the + progressbar will attempt to ask the iterator about its + length, which might or might not work. If an iterable is + also provided this parameter can be used to override the + length. If an iterable is not provided the progress bar + will iterate over a range of that length. + :param label: the label to show next to the progress bar. + :param show_eta: enables or disables the estimated time display. This is + automatically disabled if the length cannot be + determined. + :param show_percent: enables or disables the percentage display. The + default is `True` if the iterable has a length or + `False` if not. + :param show_pos: enables or disables the absolute position display. The + default is `False`. + :param item_show_func: a function called with the current item which + can return a string to show the current item + next to the progress bar. Note that the current + item can be `None`! + :param fill_char: the character to use to show the filled part of the + progress bar. + :param empty_char: the character to use to show the non-filled part of + the progress bar. + :param bar_template: the format string to use as template for the bar. + The parameters in it are ``label`` for the label, + ``bar`` for the progress bar and ``info`` for the + info section. + :param info_sep: the separator between multiple info items (eta etc.) + :param width: the width of the progress bar in characters, 0 means full + terminal width + :param file: the file to write to. If this is not a terminal then + only the label is printed. + :param color: controls if the terminal supports ANSI colors or not. The + default is autodetection. This is only needed if ANSI + codes are included anywhere in the progress bar output + which is not the case by default. + """ + from ._termui_impl import ProgressBar + color = resolve_color_default(color) + return ProgressBar(iterable=iterable, length=length, show_eta=show_eta, + show_percent=show_percent, show_pos=show_pos, + item_show_func=item_show_func, fill_char=fill_char, + empty_char=empty_char, bar_template=bar_template, + info_sep=info_sep, file=file, label=label, + width=width, color=color) + + +def clear(): + """Clears the terminal screen. This will have the effect of clearing + the whole visible space of the terminal and moving the cursor to the + top left. This does not do anything if not connected to a terminal. + + .. versionadded:: 2.0 + """ + if not isatty(sys.stdout): + return + # If we're on Windows and we don't have colorama available, then we + # clear the screen by shelling out. Otherwise we can use an escape + # sequence. + if WIN: + os.system('cls') + else: + sys.stdout.write('\033[2J\033[1;1H') + + +def style(text, fg=None, bg=None, bold=None, dim=None, underline=None, + blink=None, reverse=None, reset=True): + """Styles a text with ANSI styles and returns the new string. By + default the styling is self contained which means that at the end + of the string a reset code is issued. This can be prevented by + passing ``reset=False``. + + Examples:: + + click.echo(click.style('Hello World!', fg='green')) + click.echo(click.style('ATTENTION!', blink=True)) + click.echo(click.style('Some things', reverse=True, fg='cyan')) + + Supported color names: + + * ``black`` (might be a gray) + * ``red`` + * ``green`` + * ``yellow`` (might be an orange) + * ``blue`` + * ``magenta`` + * ``cyan`` + * ``white`` (might be light gray) + * ``bright_black`` + * ``bright_red`` + * ``bright_green`` + * ``bright_yellow`` + * ``bright_blue`` + * ``bright_magenta`` + * ``bright_cyan`` + * ``bright_white`` + * ``reset`` (reset the color code only) + + .. versionadded:: 2.0 + + .. versionadded:: 7.0 + Added support for bright colors. + + :param text: the string to style with ansi codes. + :param fg: if provided this will become the foreground color. + :param bg: if provided this will become the background color. + :param bold: if provided this will enable or disable bold mode. + :param dim: if provided this will enable or disable dim mode. This is + badly supported. + :param underline: if provided this will enable or disable underline. + :param blink: if provided this will enable or disable blinking. + :param reverse: if provided this will enable or disable inverse + rendering (foreground becomes background and the + other way round). + :param reset: by default a reset-all code is added at the end of the + string which means that styles do not carry over. This + can be disabled to compose styles. + """ + bits = [] + if fg: + try: + bits.append('\033[%dm' % (_ansi_colors[fg])) + except KeyError: + raise TypeError('Unknown color %r' % fg) + if bg: + try: + bits.append('\033[%dm' % (_ansi_colors[bg] + 10)) + except KeyError: + raise TypeError('Unknown color %r' % bg) + if bold is not None: + bits.append('\033[%dm' % (1 if bold else 22)) + if dim is not None: + bits.append('\033[%dm' % (2 if dim else 22)) + if underline is not None: + bits.append('\033[%dm' % (4 if underline else 24)) + if blink is not None: + bits.append('\033[%dm' % (5 if blink else 25)) + if reverse is not None: + bits.append('\033[%dm' % (7 if reverse else 27)) + bits.append(text) + if reset: + bits.append(_ansi_reset_all) + return ''.join(bits) + + +def unstyle(text): + """Removes ANSI styling information from a string. Usually it's not + necessary to use this function as Click's echo function will + automatically remove styling if necessary. + + .. versionadded:: 2.0 + + :param text: the text to remove style information from. + """ + return strip_ansi(text) + + +def secho(message=None, file=None, nl=True, err=False, color=None, **styles): + """This function combines :func:`echo` and :func:`style` into one + call. As such the following two calls are the same:: + + click.secho('Hello World!', fg='green') + click.echo(click.style('Hello World!', fg='green')) + + All keyword arguments are forwarded to the underlying functions + depending on which one they go with. + + .. versionadded:: 2.0 + """ + if message is not None: + message = style(message, **styles) + return echo(message, file=file, nl=nl, err=err, color=color) + + +def edit(text=None, editor=None, env=None, require_save=True, + extension='.txt', filename=None): + r"""Edits the given text in the defined editor. If an editor is given + (should be the full path to the executable but the regular operating + system search path is used for finding the executable) it overrides + the detected editor. Optionally, some environment variables can be + used. If the editor is closed without changes, `None` is returned. In + case a file is edited directly the return value is always `None` and + `require_save` and `extension` are ignored. + + If the editor cannot be opened a :exc:`UsageError` is raised. + + Note for Windows: to simplify cross-platform usage, the newlines are + automatically converted from POSIX to Windows and vice versa. As such, + the message here will have ``\n`` as newline markers. + + :param text: the text to edit. + :param editor: optionally the editor to use. Defaults to automatic + detection. + :param env: environment variables to forward to the editor. + :param require_save: if this is true, then not saving in the editor + will make the return value become `None`. + :param extension: the extension to tell the editor about. This defaults + to `.txt` but changing this might change syntax + highlighting. + :param filename: if provided it will edit this file instead of the + provided text contents. It will not use a temporary + file as an indirection in that case. + """ + from ._termui_impl import Editor + editor = Editor(editor=editor, env=env, require_save=require_save, + extension=extension) + if filename is None: + return editor.edit(text) + editor.edit_file(filename) + + +def launch(url, wait=False, locate=False): + """This function launches the given URL (or filename) in the default + viewer application for this file type. If this is an executable, it + might launch the executable in a new session. The return value is + the exit code of the launched application. Usually, ``0`` indicates + success. + + Examples:: + + click.launch('https://click.palletsprojects.com/') + click.launch('/my/downloaded/file', locate=True) + + .. versionadded:: 2.0 + + :param url: URL or filename of the thing to launch. + :param wait: waits for the program to stop. + :param locate: if this is set to `True` then instead of launching the + application associated with the URL it will attempt to + launch a file manager with the file located. This + might have weird effects if the URL does not point to + the filesystem. + """ + from ._termui_impl import open_url + return open_url(url, wait=wait, locate=locate) + + +# If this is provided, getchar() calls into this instead. This is used +# for unittesting purposes. +_getchar = None + + +def getchar(echo=False): + """Fetches a single character from the terminal and returns it. This + will always return a unicode character and under certain rare + circumstances this might return more than one character. The + situations which more than one character is returned is when for + whatever reason multiple characters end up in the terminal buffer or + standard input was not actually a terminal. + + Note that this will always read from the terminal, even if something + is piped into the standard input. + + Note for Windows: in rare cases when typing non-ASCII characters, this + function might wait for a second character and then return both at once. + This is because certain Unicode characters look like special-key markers. + + .. versionadded:: 2.0 + + :param echo: if set to `True`, the character read will also show up on + the terminal. The default is to not show it. + """ + f = _getchar + if f is None: + from ._termui_impl import getchar as f + return f(echo) + + +def raw_terminal(): + from ._termui_impl import raw_terminal as f + return f() + + +def pause(info='Press any key to continue ...', err=False): + """This command stops execution and waits for the user to press any + key to continue. This is similar to the Windows batch "pause" + command. If the program is not run through a terminal, this command + will instead do nothing. + + .. versionadded:: 2.0 + + .. versionadded:: 4.0 + Added the `err` parameter. + + :param info: the info string to print before pausing. + :param err: if set to message goes to ``stderr`` instead of + ``stdout``, the same as with echo. + """ + if not isatty(sys.stdin) or not isatty(sys.stdout): + return + try: + if info: + echo(info, nl=False, err=err) + try: + getchar() + except (KeyboardInterrupt, EOFError): + pass + finally: + if info: + echo(err=err) diff --git a/python/click/testing.py b/python/click/testing.py new file mode 100644 index 0000000..1b2924e --- /dev/null +++ b/python/click/testing.py @@ -0,0 +1,374 @@ +import os +import sys +import shutil +import tempfile +import contextlib +import shlex + +from ._compat import iteritems, PY2, string_types + + +# If someone wants to vendor click, we want to ensure the +# correct package is discovered. Ideally we could use a +# relative import here but unfortunately Python does not +# support that. +clickpkg = sys.modules[__name__.rsplit('.', 1)[0]] + + +if PY2: + from cStringIO import StringIO +else: + import io + from ._compat import _find_binary_reader + + +class EchoingStdin(object): + + def __init__(self, input, output): + self._input = input + self._output = output + + def __getattr__(self, x): + return getattr(self._input, x) + + def _echo(self, rv): + self._output.write(rv) + return rv + + def read(self, n=-1): + return self._echo(self._input.read(n)) + + def readline(self, n=-1): + return self._echo(self._input.readline(n)) + + def readlines(self): + return [self._echo(x) for x in self._input.readlines()] + + def __iter__(self): + return iter(self._echo(x) for x in self._input) + + def __repr__(self): + return repr(self._input) + + +def make_input_stream(input, charset): + # Is already an input stream. + if hasattr(input, 'read'): + if PY2: + return input + rv = _find_binary_reader(input) + if rv is not None: + return rv + raise TypeError('Could not find binary reader for input stream.') + + if input is None: + input = b'' + elif not isinstance(input, bytes): + input = input.encode(charset) + if PY2: + return StringIO(input) + return io.BytesIO(input) + + +class Result(object): + """Holds the captured result of an invoked CLI script.""" + + def __init__(self, runner, stdout_bytes, stderr_bytes, exit_code, + exception, exc_info=None): + #: The runner that created the result + self.runner = runner + #: The standard output as bytes. + self.stdout_bytes = stdout_bytes + #: The standard error as bytes, or False(y) if not available + self.stderr_bytes = stderr_bytes + #: The exit code as integer. + self.exit_code = exit_code + #: The exception that happened if one did. + self.exception = exception + #: The traceback + self.exc_info = exc_info + + @property + def output(self): + """The (standard) output as unicode string.""" + return self.stdout + + @property + def stdout(self): + """The standard output as unicode string.""" + return self.stdout_bytes.decode(self.runner.charset, 'replace') \ + .replace('\r\n', '\n') + + @property + def stderr(self): + """The standard error as unicode string.""" + if not self.stderr_bytes: + raise ValueError("stderr not separately captured") + return self.stderr_bytes.decode(self.runner.charset, 'replace') \ + .replace('\r\n', '\n') + + + def __repr__(self): + return '<%s %s>' % ( + type(self).__name__, + self.exception and repr(self.exception) or 'okay', + ) + + +class CliRunner(object): + """The CLI runner provides functionality to invoke a Click command line + script for unittesting purposes in a isolated environment. This only + works in single-threaded systems without any concurrency as it changes the + global interpreter state. + + :param charset: the character set for the input and output data. This is + UTF-8 by default and should not be changed currently as + the reporting to Click only works in Python 2 properly. + :param env: a dictionary with environment variables for overriding. + :param echo_stdin: if this is set to `True`, then reading from stdin writes + to stdout. This is useful for showing examples in + some circumstances. Note that regular prompts + will automatically echo the input. + :param mix_stderr: if this is set to `False`, then stdout and stderr are + preserved as independent streams. This is useful for + Unix-philosophy apps that have predictable stdout and + noisy stderr, such that each may be measured + independently + """ + + def __init__(self, charset=None, env=None, echo_stdin=False, + mix_stderr=True): + if charset is None: + charset = 'utf-8' + self.charset = charset + self.env = env or {} + self.echo_stdin = echo_stdin + self.mix_stderr = mix_stderr + + def get_default_prog_name(self, cli): + """Given a command object it will return the default program name + for it. The default is the `name` attribute or ``"root"`` if not + set. + """ + return cli.name or 'root' + + def make_env(self, overrides=None): + """Returns the environment overrides for invoking a script.""" + rv = dict(self.env) + if overrides: + rv.update(overrides) + return rv + + @contextlib.contextmanager + def isolation(self, input=None, env=None, color=False): + """A context manager that sets up the isolation for invoking of a + command line tool. This sets up stdin with the given input data + and `os.environ` with the overrides from the given dictionary. + This also rebinds some internals in Click to be mocked (like the + prompt functionality). + + This is automatically done in the :meth:`invoke` method. + + .. versionadded:: 4.0 + The ``color`` parameter was added. + + :param input: the input stream to put into sys.stdin. + :param env: the environment overrides as dictionary. + :param color: whether the output should contain color codes. The + application can still override this explicitly. + """ + input = make_input_stream(input, self.charset) + + old_stdin = sys.stdin + old_stdout = sys.stdout + old_stderr = sys.stderr + old_forced_width = clickpkg.formatting.FORCED_WIDTH + clickpkg.formatting.FORCED_WIDTH = 80 + + env = self.make_env(env) + + if PY2: + bytes_output = StringIO() + if self.echo_stdin: + input = EchoingStdin(input, bytes_output) + sys.stdout = bytes_output + if not self.mix_stderr: + bytes_error = StringIO() + sys.stderr = bytes_error + else: + bytes_output = io.BytesIO() + if self.echo_stdin: + input = EchoingStdin(input, bytes_output) + input = io.TextIOWrapper(input, encoding=self.charset) + sys.stdout = io.TextIOWrapper( + bytes_output, encoding=self.charset) + if not self.mix_stderr: + bytes_error = io.BytesIO() + sys.stderr = io.TextIOWrapper( + bytes_error, encoding=self.charset) + + if self.mix_stderr: + sys.stderr = sys.stdout + + sys.stdin = input + + def visible_input(prompt=None): + sys.stdout.write(prompt or '') + val = input.readline().rstrip('\r\n') + sys.stdout.write(val + '\n') + sys.stdout.flush() + return val + + def hidden_input(prompt=None): + sys.stdout.write((prompt or '') + '\n') + sys.stdout.flush() + return input.readline().rstrip('\r\n') + + def _getchar(echo): + char = sys.stdin.read(1) + if echo: + sys.stdout.write(char) + sys.stdout.flush() + return char + + default_color = color + + def should_strip_ansi(stream=None, color=None): + if color is None: + return not default_color + return not color + + old_visible_prompt_func = clickpkg.termui.visible_prompt_func + old_hidden_prompt_func = clickpkg.termui.hidden_prompt_func + old__getchar_func = clickpkg.termui._getchar + old_should_strip_ansi = clickpkg.utils.should_strip_ansi + clickpkg.termui.visible_prompt_func = visible_input + clickpkg.termui.hidden_prompt_func = hidden_input + clickpkg.termui._getchar = _getchar + clickpkg.utils.should_strip_ansi = should_strip_ansi + + old_env = {} + try: + for key, value in iteritems(env): + old_env[key] = os.environ.get(key) + if value is None: + try: + del os.environ[key] + except Exception: + pass + else: + os.environ[key] = value + yield (bytes_output, not self.mix_stderr and bytes_error) + finally: + for key, value in iteritems(old_env): + if value is None: + try: + del os.environ[key] + except Exception: + pass + else: + os.environ[key] = value + sys.stdout = old_stdout + sys.stderr = old_stderr + sys.stdin = old_stdin + clickpkg.termui.visible_prompt_func = old_visible_prompt_func + clickpkg.termui.hidden_prompt_func = old_hidden_prompt_func + clickpkg.termui._getchar = old__getchar_func + clickpkg.utils.should_strip_ansi = old_should_strip_ansi + clickpkg.formatting.FORCED_WIDTH = old_forced_width + + def invoke(self, cli, args=None, input=None, env=None, + catch_exceptions=True, color=False, mix_stderr=False, **extra): + """Invokes a command in an isolated environment. The arguments are + forwarded directly to the command line script, the `extra` keyword + arguments are passed to the :meth:`~clickpkg.Command.main` function of + the command. + + This returns a :class:`Result` object. + + .. versionadded:: 3.0 + The ``catch_exceptions`` parameter was added. + + .. versionchanged:: 3.0 + The result object now has an `exc_info` attribute with the + traceback if available. + + .. versionadded:: 4.0 + The ``color`` parameter was added. + + :param cli: the command to invoke + :param args: the arguments to invoke. It may be given as an iterable + or a string. When given as string it will be interpreted + as a Unix shell command. More details at + :func:`shlex.split`. + :param input: the input data for `sys.stdin`. + :param env: the environment overrides. + :param catch_exceptions: Whether to catch any other exceptions than + ``SystemExit``. + :param extra: the keyword arguments to pass to :meth:`main`. + :param color: whether the output should contain color codes. The + application can still override this explicitly. + """ + exc_info = None + with self.isolation(input=input, env=env, color=color) as outstreams: + exception = None + exit_code = 0 + + if isinstance(args, string_types): + args = shlex.split(args) + + try: + prog_name = extra.pop("prog_name") + except KeyError: + prog_name = self.get_default_prog_name(cli) + + try: + cli.main(args=args or (), prog_name=prog_name, **extra) + except SystemExit as e: + exc_info = sys.exc_info() + exit_code = e.code + if exit_code is None: + exit_code = 0 + + if exit_code != 0: + exception = e + + if not isinstance(exit_code, int): + sys.stdout.write(str(exit_code)) + sys.stdout.write('\n') + exit_code = 1 + + except Exception as e: + if not catch_exceptions: + raise + exception = e + exit_code = 1 + exc_info = sys.exc_info() + finally: + sys.stdout.flush() + stdout = outstreams[0].getvalue() + stderr = outstreams[1] and outstreams[1].getvalue() + + return Result(runner=self, + stdout_bytes=stdout, + stderr_bytes=stderr, + exit_code=exit_code, + exception=exception, + exc_info=exc_info) + + @contextlib.contextmanager + def isolated_filesystem(self): + """A context manager that creates a temporary folder and changes + the current working directory to it for isolated filesystem tests. + """ + cwd = os.getcwd() + t = tempfile.mkdtemp() + os.chdir(t) + try: + yield t + finally: + os.chdir(cwd) + try: + shutil.rmtree(t) + except (OSError, IOError): + pass diff --git a/python/click/types.py b/python/click/types.py new file mode 100644 index 0000000..1f88032 --- /dev/null +++ b/python/click/types.py @@ -0,0 +1,668 @@ +import os +import stat +from datetime import datetime + +from ._compat import open_stream, text_type, filename_to_ui, \ + get_filesystem_encoding, get_streerror, _get_argv_encoding, PY2 +from .exceptions import BadParameter +from .utils import safecall, LazyFile + + +class ParamType(object): + """Helper for converting values through types. The following is + necessary for a valid type: + + * it needs a name + * it needs to pass through None unchanged + * it needs to convert from a string + * it needs to convert its result type through unchanged + (eg: needs to be idempotent) + * it needs to be able to deal with param and context being `None`. + This can be the case when the object is used with prompt + inputs. + """ + is_composite = False + + #: the descriptive name of this type + name = None + + #: if a list of this type is expected and the value is pulled from a + #: string environment variable, this is what splits it up. `None` + #: means any whitespace. For all parameters the general rule is that + #: whitespace splits them up. The exception are paths and files which + #: are split by ``os.path.pathsep`` by default (":" on Unix and ";" on + #: Windows). + envvar_list_splitter = None + + def __call__(self, value, param=None, ctx=None): + if value is not None: + return self.convert(value, param, ctx) + + def get_metavar(self, param): + """Returns the metavar default for this param if it provides one.""" + + def get_missing_message(self, param): + """Optionally might return extra information about a missing + parameter. + + .. versionadded:: 2.0 + """ + + def convert(self, value, param, ctx): + """Converts the value. This is not invoked for values that are + `None` (the missing value). + """ + return value + + def split_envvar_value(self, rv): + """Given a value from an environment variable this splits it up + into small chunks depending on the defined envvar list splitter. + + If the splitter is set to `None`, which means that whitespace splits, + then leading and trailing whitespace is ignored. Otherwise, leading + and trailing splitters usually lead to empty items being included. + """ + return (rv or '').split(self.envvar_list_splitter) + + def fail(self, message, param=None, ctx=None): + """Helper method to fail with an invalid value message.""" + raise BadParameter(message, ctx=ctx, param=param) + + +class CompositeParamType(ParamType): + is_composite = True + + @property + def arity(self): + raise NotImplementedError() + + +class FuncParamType(ParamType): + + def __init__(self, func): + self.name = func.__name__ + self.func = func + + def convert(self, value, param, ctx): + try: + return self.func(value) + except ValueError: + try: + value = text_type(value) + except UnicodeError: + value = str(value).decode('utf-8', 'replace') + self.fail(value, param, ctx) + + +class UnprocessedParamType(ParamType): + name = 'text' + + def convert(self, value, param, ctx): + return value + + def __repr__(self): + return 'UNPROCESSED' + + +class StringParamType(ParamType): + name = 'text' + + def convert(self, value, param, ctx): + if isinstance(value, bytes): + enc = _get_argv_encoding() + try: + value = value.decode(enc) + except UnicodeError: + fs_enc = get_filesystem_encoding() + if fs_enc != enc: + try: + value = value.decode(fs_enc) + except UnicodeError: + value = value.decode('utf-8', 'replace') + return value + return value + + def __repr__(self): + return 'STRING' + + +class Choice(ParamType): + """The choice type allows a value to be checked against a fixed set + of supported values. All of these values have to be strings. + + You should only pass a list or tuple of choices. Other iterables + (like generators) may lead to surprising results. + + See :ref:`choice-opts` for an example. + + :param case_sensitive: Set to false to make choices case + insensitive. Defaults to true. + """ + + name = 'choice' + + def __init__(self, choices, case_sensitive=True): + self.choices = choices + self.case_sensitive = case_sensitive + + def get_metavar(self, param): + return '[%s]' % '|'.join(self.choices) + + def get_missing_message(self, param): + return 'Choose from:\n\t%s.' % ',\n\t'.join(self.choices) + + def convert(self, value, param, ctx): + # Exact match + if value in self.choices: + return value + + # Match through normalization and case sensitivity + # first do token_normalize_func, then lowercase + # preserve original `value` to produce an accurate message in + # `self.fail` + normed_value = value + normed_choices = self.choices + + if ctx is not None and \ + ctx.token_normalize_func is not None: + normed_value = ctx.token_normalize_func(value) + normed_choices = [ctx.token_normalize_func(choice) for choice in + self.choices] + + if not self.case_sensitive: + normed_value = normed_value.lower() + normed_choices = [choice.lower() for choice in normed_choices] + + if normed_value in normed_choices: + return normed_value + + self.fail('invalid choice: %s. (choose from %s)' % + (value, ', '.join(self.choices)), param, ctx) + + def __repr__(self): + return 'Choice(%r)' % list(self.choices) + + +class DateTime(ParamType): + """The DateTime type converts date strings into `datetime` objects. + + The format strings which are checked are configurable, but default to some + common (non-timezone aware) ISO 8601 formats. + + When specifying *DateTime* formats, you should only pass a list or a tuple. + Other iterables, like generators, may lead to surprising results. + + The format strings are processed using ``datetime.strptime``, and this + consequently defines the format strings which are allowed. + + Parsing is tried using each format, in order, and the first format which + parses successfully is used. + + :param formats: A list or tuple of date format strings, in the order in + which they should be tried. Defaults to + ``'%Y-%m-%d'``, ``'%Y-%m-%dT%H:%M:%S'``, + ``'%Y-%m-%d %H:%M:%S'``. + """ + name = 'datetime' + + def __init__(self, formats=None): + self.formats = formats or [ + '%Y-%m-%d', + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%d %H:%M:%S' + ] + + def get_metavar(self, param): + return '[{}]'.format('|'.join(self.formats)) + + def _try_to_convert_date(self, value, format): + try: + return datetime.strptime(value, format) + except ValueError: + return None + + def convert(self, value, param, ctx): + # Exact match + for format in self.formats: + dtime = self._try_to_convert_date(value, format) + if dtime: + return dtime + + self.fail( + 'invalid datetime format: {}. (choose from {})'.format( + value, ', '.join(self.formats))) + + def __repr__(self): + return 'DateTime' + + +class IntParamType(ParamType): + name = 'integer' + + def convert(self, value, param, ctx): + try: + return int(value) + except (ValueError, UnicodeError): + self.fail('%s is not a valid integer' % value, param, ctx) + + def __repr__(self): + return 'INT' + + +class IntRange(IntParamType): + """A parameter that works similar to :data:`click.INT` but restricts + the value to fit into a range. The default behavior is to fail if the + value falls outside the range, but it can also be silently clamped + between the two edges. + + See :ref:`ranges` for an example. + """ + name = 'integer range' + + def __init__(self, min=None, max=None, clamp=False): + self.min = min + self.max = max + self.clamp = clamp + + def convert(self, value, param, ctx): + rv = IntParamType.convert(self, value, param, ctx) + if self.clamp: + if self.min is not None and rv < self.min: + return self.min + if self.max is not None and rv > self.max: + return self.max + if self.min is not None and rv < self.min or \ + self.max is not None and rv > self.max: + if self.min is None: + self.fail('%s is bigger than the maximum valid value ' + '%s.' % (rv, self.max), param, ctx) + elif self.max is None: + self.fail('%s is smaller than the minimum valid value ' + '%s.' % (rv, self.min), param, ctx) + else: + self.fail('%s is not in the valid range of %s to %s.' + % (rv, self.min, self.max), param, ctx) + return rv + + def __repr__(self): + return 'IntRange(%r, %r)' % (self.min, self.max) + + +class FloatParamType(ParamType): + name = 'float' + + def convert(self, value, param, ctx): + try: + return float(value) + except (UnicodeError, ValueError): + self.fail('%s is not a valid floating point value' % + value, param, ctx) + + def __repr__(self): + return 'FLOAT' + + +class FloatRange(FloatParamType): + """A parameter that works similar to :data:`click.FLOAT` but restricts + the value to fit into a range. The default behavior is to fail if the + value falls outside the range, but it can also be silently clamped + between the two edges. + + See :ref:`ranges` for an example. + """ + name = 'float range' + + def __init__(self, min=None, max=None, clamp=False): + self.min = min + self.max = max + self.clamp = clamp + + def convert(self, value, param, ctx): + rv = FloatParamType.convert(self, value, param, ctx) + if self.clamp: + if self.min is not None and rv < self.min: + return self.min + if self.max is not None and rv > self.max: + return self.max + if self.min is not None and rv < self.min or \ + self.max is not None and rv > self.max: + if self.min is None: + self.fail('%s is bigger than the maximum valid value ' + '%s.' % (rv, self.max), param, ctx) + elif self.max is None: + self.fail('%s is smaller than the minimum valid value ' + '%s.' % (rv, self.min), param, ctx) + else: + self.fail('%s is not in the valid range of %s to %s.' + % (rv, self.min, self.max), param, ctx) + return rv + + def __repr__(self): + return 'FloatRange(%r, %r)' % (self.min, self.max) + + +class BoolParamType(ParamType): + name = 'boolean' + + def convert(self, value, param, ctx): + if isinstance(value, bool): + return bool(value) + value = value.lower() + if value in ('true', 't', '1', 'yes', 'y'): + return True + elif value in ('false', 'f', '0', 'no', 'n'): + return False + self.fail('%s is not a valid boolean' % value, param, ctx) + + def __repr__(self): + return 'BOOL' + + +class UUIDParameterType(ParamType): + name = 'uuid' + + def convert(self, value, param, ctx): + import uuid + try: + if PY2 and isinstance(value, text_type): + value = value.encode('ascii') + return uuid.UUID(value) + except (UnicodeError, ValueError): + self.fail('%s is not a valid UUID value' % value, param, ctx) + + def __repr__(self): + return 'UUID' + + +class File(ParamType): + """Declares a parameter to be a file for reading or writing. The file + is automatically closed once the context tears down (after the command + finished working). + + Files can be opened for reading or writing. The special value ``-`` + indicates stdin or stdout depending on the mode. + + By default, the file is opened for reading text data, but it can also be + opened in binary mode or for writing. The encoding parameter can be used + to force a specific encoding. + + The `lazy` flag controls if the file should be opened immediately or upon + first IO. The default is to be non-lazy for standard input and output + streams as well as files opened for reading, `lazy` otherwise. When opening a + file lazily for reading, it is still opened temporarily for validation, but + will not be held open until first IO. lazy is mainly useful when opening + for writing to avoid creating the file until it is needed. + + Starting with Click 2.0, files can also be opened atomically in which + case all writes go into a separate file in the same folder and upon + completion the file will be moved over to the original location. This + is useful if a file regularly read by other users is modified. + + See :ref:`file-args` for more information. + """ + name = 'filename' + envvar_list_splitter = os.path.pathsep + + def __init__(self, mode='r', encoding=None, errors='strict', lazy=None, + atomic=False): + self.mode = mode + self.encoding = encoding + self.errors = errors + self.lazy = lazy + self.atomic = atomic + + def resolve_lazy_flag(self, value): + if self.lazy is not None: + return self.lazy + if value == '-': + return False + elif 'w' in self.mode: + return True + return False + + def convert(self, value, param, ctx): + try: + if hasattr(value, 'read') or hasattr(value, 'write'): + return value + + lazy = self.resolve_lazy_flag(value) + + if lazy: + f = LazyFile(value, self.mode, self.encoding, self.errors, + atomic=self.atomic) + if ctx is not None: + ctx.call_on_close(f.close_intelligently) + return f + + f, should_close = open_stream(value, self.mode, + self.encoding, self.errors, + atomic=self.atomic) + # If a context is provided, we automatically close the file + # at the end of the context execution (or flush out). If a + # context does not exist, it's the caller's responsibility to + # properly close the file. This for instance happens when the + # type is used with prompts. + if ctx is not None: + if should_close: + ctx.call_on_close(safecall(f.close)) + else: + ctx.call_on_close(safecall(f.flush)) + return f + except (IOError, OSError) as e: + self.fail('Could not open file: %s: %s' % ( + filename_to_ui(value), + get_streerror(e), + ), param, ctx) + + +class Path(ParamType): + """The path type is similar to the :class:`File` type but it performs + different checks. First of all, instead of returning an open file + handle it returns just the filename. Secondly, it can perform various + basic checks about what the file or directory should be. + + .. versionchanged:: 6.0 + `allow_dash` was added. + + :param exists: if set to true, the file or directory needs to exist for + this value to be valid. If this is not required and a + file does indeed not exist, then all further checks are + silently skipped. + :param file_okay: controls if a file is a possible value. + :param dir_okay: controls if a directory is a possible value. + :param writable: if true, a writable check is performed. + :param readable: if true, a readable check is performed. + :param resolve_path: if this is true, then the path is fully resolved + before the value is passed onwards. This means + that it's absolute and symlinks are resolved. It + will not expand a tilde-prefix, as this is + supposed to be done by the shell only. + :param allow_dash: If this is set to `True`, a single dash to indicate + standard streams is permitted. + :param path_type: optionally a string type that should be used to + represent the path. The default is `None` which + means the return value will be either bytes or + unicode depending on what makes most sense given the + input data Click deals with. + """ + envvar_list_splitter = os.path.pathsep + + def __init__(self, exists=False, file_okay=True, dir_okay=True, + writable=False, readable=True, resolve_path=False, + allow_dash=False, path_type=None): + self.exists = exists + self.file_okay = file_okay + self.dir_okay = dir_okay + self.writable = writable + self.readable = readable + self.resolve_path = resolve_path + self.allow_dash = allow_dash + self.type = path_type + + if self.file_okay and not self.dir_okay: + self.name = 'file' + self.path_type = 'File' + elif self.dir_okay and not self.file_okay: + self.name = 'directory' + self.path_type = 'Directory' + else: + self.name = 'path' + self.path_type = 'Path' + + def coerce_path_result(self, rv): + if self.type is not None and not isinstance(rv, self.type): + if self.type is text_type: + rv = rv.decode(get_filesystem_encoding()) + else: + rv = rv.encode(get_filesystem_encoding()) + return rv + + def convert(self, value, param, ctx): + rv = value + + is_dash = self.file_okay and self.allow_dash and rv in (b'-', '-') + + if not is_dash: + if self.resolve_path: + rv = os.path.realpath(rv) + + try: + st = os.stat(rv) + except OSError: + if not self.exists: + return self.coerce_path_result(rv) + self.fail('%s "%s" does not exist.' % ( + self.path_type, + filename_to_ui(value) + ), param, ctx) + + if not self.file_okay and stat.S_ISREG(st.st_mode): + self.fail('%s "%s" is a file.' % ( + self.path_type, + filename_to_ui(value) + ), param, ctx) + if not self.dir_okay and stat.S_ISDIR(st.st_mode): + self.fail('%s "%s" is a directory.' % ( + self.path_type, + filename_to_ui(value) + ), param, ctx) + if self.writable and not os.access(value, os.W_OK): + self.fail('%s "%s" is not writable.' % ( + self.path_type, + filename_to_ui(value) + ), param, ctx) + if self.readable and not os.access(value, os.R_OK): + self.fail('%s "%s" is not readable.' % ( + self.path_type, + filename_to_ui(value) + ), param, ctx) + + return self.coerce_path_result(rv) + + +class Tuple(CompositeParamType): + """The default behavior of Click is to apply a type on a value directly. + This works well in most cases, except for when `nargs` is set to a fixed + count and different types should be used for different items. In this + case the :class:`Tuple` type can be used. This type can only be used + if `nargs` is set to a fixed number. + + For more information see :ref:`tuple-type`. + + This can be selected by using a Python tuple literal as a type. + + :param types: a list of types that should be used for the tuple items. + """ + + def __init__(self, types): + self.types = [convert_type(ty) for ty in types] + + @property + def name(self): + return "<" + " ".join(ty.name for ty in self.types) + ">" + + @property + def arity(self): + return len(self.types) + + def convert(self, value, param, ctx): + if len(value) != len(self.types): + raise TypeError('It would appear that nargs is set to conflict ' + 'with the composite type arity.') + return tuple(ty(x, param, ctx) for ty, x in zip(self.types, value)) + + +def convert_type(ty, default=None): + """Converts a callable or python ty into the most appropriate param + ty. + """ + guessed_type = False + if ty is None and default is not None: + if isinstance(default, tuple): + ty = tuple(map(type, default)) + else: + ty = type(default) + guessed_type = True + + if isinstance(ty, tuple): + return Tuple(ty) + if isinstance(ty, ParamType): + return ty + if ty is text_type or ty is str or ty is None: + return STRING + if ty is int: + return INT + # Booleans are only okay if not guessed. This is done because for + # flags the default value is actually a bit of a lie in that it + # indicates which of the flags is the one we want. See get_default() + # for more information. + if ty is bool and not guessed_type: + return BOOL + if ty is float: + return FLOAT + if guessed_type: + return STRING + + # Catch a common mistake + if __debug__: + try: + if issubclass(ty, ParamType): + raise AssertionError('Attempted to use an uninstantiated ' + 'parameter type (%s).' % ty) + except TypeError: + pass + return FuncParamType(ty) + + +#: A dummy parameter type that just does nothing. From a user's +#: perspective this appears to just be the same as `STRING` but internally +#: no string conversion takes place. This is necessary to achieve the +#: same bytes/unicode behavior on Python 2/3 in situations where you want +#: to not convert argument types. This is usually useful when working +#: with file paths as they can appear in bytes and unicode. +#: +#: For path related uses the :class:`Path` type is a better choice but +#: there are situations where an unprocessed type is useful which is why +#: it is is provided. +#: +#: .. versionadded:: 4.0 +UNPROCESSED = UnprocessedParamType() + +#: A unicode string parameter type which is the implicit default. This +#: can also be selected by using ``str`` as type. +STRING = StringParamType() + +#: An integer parameter. This can also be selected by using ``int`` as +#: type. +INT = IntParamType() + +#: A floating point value parameter. This can also be selected by using +#: ``float`` as type. +FLOAT = FloatParamType() + +#: A boolean parameter. This is the default for boolean flags. This can +#: also be selected by using ``bool`` as a type. +BOOL = BoolParamType() + +#: A UUID parameter. +UUID = UUIDParameterType() diff --git a/python/click/utils.py b/python/click/utils.py new file mode 100644 index 0000000..fc84369 --- /dev/null +++ b/python/click/utils.py @@ -0,0 +1,440 @@ +import os +import sys + +from .globals import resolve_color_default + +from ._compat import text_type, open_stream, get_filesystem_encoding, \ + get_streerror, string_types, PY2, binary_streams, text_streams, \ + filename_to_ui, auto_wrap_for_ansi, strip_ansi, should_strip_ansi, \ + _default_text_stdout, _default_text_stderr, is_bytes, WIN + +if not PY2: + from ._compat import _find_binary_writer +elif WIN: + from ._winconsole import _get_windows_argv, \ + _hash_py_argv, _initial_argv_hash + + +echo_native_types = string_types + (bytes, bytearray) + + +def _posixify(name): + return '-'.join(name.split()).lower() + + +def safecall(func): + """Wraps a function so that it swallows exceptions.""" + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception: + pass + return wrapper + + +def make_str(value): + """Converts a value into a valid string.""" + if isinstance(value, bytes): + try: + return value.decode(get_filesystem_encoding()) + except UnicodeError: + return value.decode('utf-8', 'replace') + return text_type(value) + + +def make_default_short_help(help, max_length=45): + """Return a condensed version of help string.""" + words = help.split() + total_length = 0 + result = [] + done = False + + for word in words: + if word[-1:] == '.': + done = True + new_length = result and 1 + len(word) or len(word) + if total_length + new_length > max_length: + result.append('...') + done = True + else: + if result: + result.append(' ') + result.append(word) + if done: + break + total_length += new_length + + return ''.join(result) + + +class LazyFile(object): + """A lazy file works like a regular file but it does not fully open + the file but it does perform some basic checks early to see if the + filename parameter does make sense. This is useful for safely opening + files for writing. + """ + + def __init__(self, filename, mode='r', encoding=None, errors='strict', + atomic=False): + self.name = filename + self.mode = mode + self.encoding = encoding + self.errors = errors + self.atomic = atomic + + if filename == '-': + self._f, self.should_close = open_stream(filename, mode, + encoding, errors) + else: + if 'r' in mode: + # Open and close the file in case we're opening it for + # reading so that we can catch at least some errors in + # some cases early. + open(filename, mode).close() + self._f = None + self.should_close = True + + def __getattr__(self, name): + return getattr(self.open(), name) + + def __repr__(self): + if self._f is not None: + return repr(self._f) + return '<unopened file %r %s>' % (self.name, self.mode) + + def open(self): + """Opens the file if it's not yet open. This call might fail with + a :exc:`FileError`. Not handling this error will produce an error + that Click shows. + """ + if self._f is not None: + return self._f + try: + rv, self.should_close = open_stream(self.name, self.mode, + self.encoding, + self.errors, + atomic=self.atomic) + except (IOError, OSError) as e: + from .exceptions import FileError + raise FileError(self.name, hint=get_streerror(e)) + self._f = rv + return rv + + def close(self): + """Closes the underlying file, no matter what.""" + if self._f is not None: + self._f.close() + + def close_intelligently(self): + """This function only closes the file if it was opened by the lazy + file wrapper. For instance this will never close stdin. + """ + if self.should_close: + self.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + self.close_intelligently() + + def __iter__(self): + self.open() + return iter(self._f) + + +class KeepOpenFile(object): + + def __init__(self, file): + self._file = file + + def __getattr__(self, name): + return getattr(self._file, name) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + pass + + def __repr__(self): + return repr(self._file) + + def __iter__(self): + return iter(self._file) + + +def echo(message=None, file=None, nl=True, err=False, color=None): + """Prints a message plus a newline to the given file or stdout. On + first sight, this looks like the print function, but it has improved + support for handling Unicode and binary data that does not fail no + matter how badly configured the system is. + + Primarily it means that you can print binary data as well as Unicode + data on both 2.x and 3.x to the given file in the most appropriate way + possible. This is a very carefree function in that it will try its + best to not fail. As of Click 6.0 this includes support for unicode + output on the Windows console. + + In addition to that, if `colorama`_ is installed, the echo function will + also support clever handling of ANSI codes. Essentially it will then + do the following: + + - add transparent handling of ANSI color codes on Windows. + - hide ANSI codes automatically if the destination file is not a + terminal. + + .. _colorama: https://pypi.org/project/colorama/ + + .. versionchanged:: 6.0 + As of Click 6.0 the echo function will properly support unicode + output on the windows console. Not that click does not modify + the interpreter in any way which means that `sys.stdout` or the + print statement or function will still not provide unicode support. + + .. versionchanged:: 2.0 + Starting with version 2.0 of Click, the echo function will work + with colorama if it's installed. + + .. versionadded:: 3.0 + The `err` parameter was added. + + .. versionchanged:: 4.0 + Added the `color` flag. + + :param message: the message to print + :param file: the file to write to (defaults to ``stdout``) + :param err: if set to true the file defaults to ``stderr`` instead of + ``stdout``. This is faster and easier than calling + :func:`get_text_stderr` yourself. + :param nl: if set to `True` (the default) a newline is printed afterwards. + :param color: controls if the terminal supports ANSI colors or not. The + default is autodetection. + """ + if file is None: + if err: + file = _default_text_stderr() + else: + file = _default_text_stdout() + + # Convert non bytes/text into the native string type. + if message is not None and not isinstance(message, echo_native_types): + message = text_type(message) + + if nl: + message = message or u'' + if isinstance(message, text_type): + message += u'\n' + else: + message += b'\n' + + # If there is a message, and we're in Python 3, and the value looks + # like bytes, we manually need to find the binary stream and write the + # message in there. This is done separately so that most stream + # types will work as you would expect. Eg: you can write to StringIO + # for other cases. + if message and not PY2 and is_bytes(message): + binary_file = _find_binary_writer(file) + if binary_file is not None: + file.flush() + binary_file.write(message) + binary_file.flush() + return + + # ANSI-style support. If there is no message or we are dealing with + # bytes nothing is happening. If we are connected to a file we want + # to strip colors. If we are on windows we either wrap the stream + # to strip the color or we use the colorama support to translate the + # ansi codes to API calls. + if message and not is_bytes(message): + color = resolve_color_default(color) + if should_strip_ansi(file, color): + message = strip_ansi(message) + elif WIN: + if auto_wrap_for_ansi is not None: + file = auto_wrap_for_ansi(file) + elif not color: + message = strip_ansi(message) + + if message: + file.write(message) + file.flush() + + +def get_binary_stream(name): + """Returns a system stream for byte processing. This essentially + returns the stream from the sys module with the given name but it + solves some compatibility issues between different Python versions. + Primarily this function is necessary for getting binary streams on + Python 3. + + :param name: the name of the stream to open. Valid names are ``'stdin'``, + ``'stdout'`` and ``'stderr'`` + """ + opener = binary_streams.get(name) + if opener is None: + raise TypeError('Unknown standard stream %r' % name) + return opener() + + +def get_text_stream(name, encoding=None, errors='strict'): + """Returns a system stream for text processing. This usually returns + a wrapped stream around a binary stream returned from + :func:`get_binary_stream` but it also can take shortcuts on Python 3 + for already correctly configured streams. + + :param name: the name of the stream to open. Valid names are ``'stdin'``, + ``'stdout'`` and ``'stderr'`` + :param encoding: overrides the detected default encoding. + :param errors: overrides the default error mode. + """ + opener = text_streams.get(name) + if opener is None: + raise TypeError('Unknown standard stream %r' % name) + return opener(encoding, errors) + + +def open_file(filename, mode='r', encoding=None, errors='strict', + lazy=False, atomic=False): + """This is similar to how the :class:`File` works but for manual + usage. Files are opened non lazy by default. This can open regular + files as well as stdin/stdout if ``'-'`` is passed. + + If stdin/stdout is returned the stream is wrapped so that the context + manager will not close the stream accidentally. This makes it possible + to always use the function like this without having to worry to + accidentally close a standard stream:: + + with open_file(filename) as f: + ... + + .. versionadded:: 3.0 + + :param filename: the name of the file to open (or ``'-'`` for stdin/stdout). + :param mode: the mode in which to open the file. + :param encoding: the encoding to use. + :param errors: the error handling for this file. + :param lazy: can be flipped to true to open the file lazily. + :param atomic: in atomic mode writes go into a temporary file and it's + moved on close. + """ + if lazy: + return LazyFile(filename, mode, encoding, errors, atomic=atomic) + f, should_close = open_stream(filename, mode, encoding, errors, + atomic=atomic) + if not should_close: + f = KeepOpenFile(f) + return f + + +def get_os_args(): + """This returns the argument part of sys.argv in the most appropriate + form for processing. What this means is that this return value is in + a format that works for Click to process but does not necessarily + correspond well to what's actually standard for the interpreter. + + On most environments the return value is ``sys.argv[:1]`` unchanged. + However if you are on Windows and running Python 2 the return value + will actually be a list of unicode strings instead because the + default behavior on that platform otherwise will not be able to + carry all possible values that sys.argv can have. + + .. versionadded:: 6.0 + """ + # We can only extract the unicode argv if sys.argv has not been + # changed since the startup of the application. + if PY2 and WIN and _initial_argv_hash == _hash_py_argv(): + return _get_windows_argv() + return sys.argv[1:] + + +def format_filename(filename, shorten=False): + """Formats a filename for user display. The main purpose of this + function is to ensure that the filename can be displayed at all. This + will decode the filename to unicode if necessary in a way that it will + not fail. Optionally, it can shorten the filename to not include the + full path to the filename. + + :param filename: formats a filename for UI display. This will also convert + the filename into unicode without failing. + :param shorten: this optionally shortens the filename to strip of the + path that leads up to it. + """ + if shorten: + filename = os.path.basename(filename) + return filename_to_ui(filename) + + +def get_app_dir(app_name, roaming=True, force_posix=False): + r"""Returns the config folder for the application. The default behavior + is to return whatever is most appropriate for the operating system. + + To give you an idea, for an app called ``"Foo Bar"``, something like + the following folders could be returned: + + Mac OS X: + ``~/Library/Application Support/Foo Bar`` + Mac OS X (POSIX): + ``~/.foo-bar`` + Unix: + ``~/.config/foo-bar`` + Unix (POSIX): + ``~/.foo-bar`` + Win XP (roaming): + ``C:\Documents and Settings\<user>\Local Settings\Application Data\Foo Bar`` + Win XP (not roaming): + ``C:\Documents and Settings\<user>\Application Data\Foo Bar`` + Win 7 (roaming): + ``C:\Users\<user>\AppData\Roaming\Foo Bar`` + Win 7 (not roaming): + ``C:\Users\<user>\AppData\Local\Foo Bar`` + + .. versionadded:: 2.0 + + :param app_name: the application name. This should be properly capitalized + and can contain whitespace. + :param roaming: controls if the folder should be roaming or not on Windows. + Has no affect otherwise. + :param force_posix: if this is set to `True` then on any POSIX system the + folder will be stored in the home folder with a leading + dot instead of the XDG config home or darwin's + application support folder. + """ + if WIN: + key = roaming and 'APPDATA' or 'LOCALAPPDATA' + folder = os.environ.get(key) + if folder is None: + folder = os.path.expanduser('~') + return os.path.join(folder, app_name) + if force_posix: + return os.path.join(os.path.expanduser('~/.' + _posixify(app_name))) + if sys.platform == 'darwin': + return os.path.join(os.path.expanduser( + '~/Library/Application Support'), app_name) + return os.path.join( + os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config')), + _posixify(app_name)) + + +class PacifyFlushWrapper(object): + """This wrapper is used to catch and suppress BrokenPipeErrors resulting + from ``.flush()`` being called on broken pipe during the shutdown/final-GC + of the Python interpreter. Notably ``.flush()`` is always called on + ``sys.stdout`` and ``sys.stderr``. So as to have minimal impact on any + other cleanup code, and the case where the underlying file is not a broken + pipe, all calls and attributes are proxied. + """ + + def __init__(self, wrapped): + self.wrapped = wrapped + + def flush(self): + try: + self.wrapped.flush() + except IOError as e: + import errno + if e.errno != errno.EPIPE: + raise + + def __getattr__(self, attr): + return getattr(self.wrapped, attr) diff --git a/python/flask/__init__.py b/python/flask/__init__.py new file mode 100644 index 0000000..59f0fff --- /dev/null +++ b/python/flask/__init__.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +""" + flask + ~~~~~ + + A microframework based on Werkzeug. It's extensively documented + and follows best practice patterns. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +__version__ = '1.0.3' + +# utilities we import from Werkzeug and Jinja2 that are unused +# in the module but are exported as public interface. +from werkzeug.exceptions import abort +from werkzeug.utils import redirect +from jinja2 import Markup, escape + +from .app import Flask, Request, Response +from .config import Config +from .helpers import url_for, flash, send_file, send_from_directory, \ + get_flashed_messages, get_template_attribute, make_response, safe_join, \ + stream_with_context +from .globals import current_app, g, request, session, _request_ctx_stack, \ + _app_ctx_stack +from .ctx import has_request_context, has_app_context, \ + after_this_request, copy_current_request_context +from .blueprints import Blueprint +from .templating import render_template, render_template_string + +# the signals +from .signals import signals_available, template_rendered, request_started, \ + request_finished, got_request_exception, request_tearing_down, \ + appcontext_tearing_down, appcontext_pushed, \ + appcontext_popped, message_flashed, before_render_template + +# We're not exposing the actual json module but a convenient wrapper around +# it. +from . import json + +# This was the only thing that Flask used to export at one point and it had +# a more generic name. +jsonify = json.jsonify + +# backwards compat, goes away in 1.0 +from .sessions import SecureCookieSession as Session +json_available = True diff --git a/python/flask/__main__.py b/python/flask/__main__.py new file mode 100644 index 0000000..4aee654 --- /dev/null +++ b/python/flask/__main__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +""" + flask.__main__ + ~~~~~~~~~~~~~~ + + Alias for flask.run for the command line. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +if __name__ == '__main__': + from .cli import main + main(as_module=True) diff --git a/python/flask/_compat.py b/python/flask/_compat.py new file mode 100644 index 0000000..dfbaae9 --- /dev/null +++ b/python/flask/_compat.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" + flask._compat + ~~~~~~~~~~~~~ + + Some py2/py3 compatibility support based on a stripped down + version of six so we don't have to depend on a specific version + of it. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +import sys + +PY2 = sys.version_info[0] == 2 +_identity = lambda x: x + + +if not PY2: + text_type = str + string_types = (str,) + integer_types = (int,) + + iterkeys = lambda d: iter(d.keys()) + itervalues = lambda d: iter(d.values()) + iteritems = lambda d: iter(d.items()) + + from inspect import getfullargspec as getargspec + from io import StringIO + import collections.abc as collections_abc + + def reraise(tp, value, tb=None): + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + + implements_to_string = _identity + +else: + text_type = unicode + string_types = (str, unicode) + integer_types = (int, long) + + iterkeys = lambda d: d.iterkeys() + itervalues = lambda d: d.itervalues() + iteritems = lambda d: d.iteritems() + + from inspect import getargspec + from cStringIO import StringIO + import collections as collections_abc + + exec('def reraise(tp, value, tb=None):\n raise tp, value, tb') + + def implements_to_string(cls): + cls.__unicode__ = cls.__str__ + cls.__str__ = lambda x: x.__unicode__().encode('utf-8') + return cls + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a + # dummy metaclass for one level of class instantiation that replaces + # itself with the actual metaclass. + class metaclass(type): + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +# Certain versions of pypy have a bug where clearing the exception stack +# breaks the __exit__ function in a very peculiar way. The second level of +# exception blocks is necessary because pypy seems to forget to check if an +# exception happened until the next bytecode instruction? +# +# Relevant PyPy bugfix commit: +# https://bitbucket.org/pypy/pypy/commits/77ecf91c635a287e88e60d8ddb0f4e9df4003301 +# According to ronan on #pypy IRC, it is released in PyPy2 2.3 and later +# versions. +# +# Ubuntu 14.04 has PyPy 2.2.1, which does exhibit this bug. +BROKEN_PYPY_CTXMGR_EXIT = False +if hasattr(sys, 'pypy_version_info'): + class _Mgr(object): + def __enter__(self): + return self + def __exit__(self, *args): + if hasattr(sys, 'exc_clear'): + # Python 3 (PyPy3) doesn't have exc_clear + sys.exc_clear() + try: + try: + with _Mgr(): + raise AssertionError() + except: + raise + except TypeError: + BROKEN_PYPY_CTXMGR_EXIT = True + except AssertionError: + pass diff --git a/python/flask/app.py b/python/flask/app.py new file mode 100644 index 0000000..c570a95 --- /dev/null +++ b/python/flask/app.py @@ -0,0 +1,2334 @@ +# -*- coding: utf-8 -*- +""" + flask.app + ~~~~~~~~~ + + This module implements the central WSGI application object. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +import os +import sys +import warnings +from datetime import timedelta +from functools import update_wrapper +from itertools import chain +from threading import Lock + +from werkzeug.datastructures import Headers, ImmutableDict +from werkzeug.exceptions import BadRequest, BadRequestKeyError, HTTPException, \ + InternalServerError, MethodNotAllowed, default_exceptions +from werkzeug.routing import BuildError, Map, RequestRedirect, \ + RoutingException, Rule + +from . import cli, json +from ._compat import integer_types, reraise, string_types, text_type +from .config import Config, ConfigAttribute +from .ctx import AppContext, RequestContext, _AppCtxGlobals +from .globals import _request_ctx_stack, g, request, session +from .helpers import ( + _PackageBoundObject, + _endpoint_from_view_func, find_package, get_env, get_debug_flag, + get_flashed_messages, locked_cached_property, url_for, get_load_dotenv +) +from .logging import create_logger +from .sessions import SecureCookieSessionInterface +from .signals import appcontext_tearing_down, got_request_exception, \ + request_finished, request_started, request_tearing_down +from .templating import DispatchingJinjaLoader, Environment, \ + _default_template_ctx_processor +from .wrappers import Request, Response + +# a singleton sentinel value for parameter defaults +_sentinel = object() + + +def _make_timedelta(value): + if not isinstance(value, timedelta): + return timedelta(seconds=value) + return value + + +def setupmethod(f): + """Wraps a method so that it performs a check in debug mode if the + first request was already handled. + """ + def wrapper_func(self, *args, **kwargs): + if self.debug and self._got_first_request: + raise AssertionError('A setup function was called after the ' + 'first request was handled. This usually indicates a bug ' + 'in the application where a module was not imported ' + 'and decorators or other functionality was called too late.\n' + 'To fix this make sure to import all your view modules, ' + 'database models and everything related at a central place ' + 'before the application starts serving requests.') + return f(self, *args, **kwargs) + return update_wrapper(wrapper_func, f) + + +class Flask(_PackageBoundObject): + """The flask object implements a WSGI application and acts as the central + object. It is passed the name of the module or package of the + application. Once it is created it will act as a central registry for + the view functions, the URL rules, template configuration and much more. + + The name of the package is used to resolve resources from inside the + package or the folder the module is contained in depending on if the + package parameter resolves to an actual python package (a folder with + an :file:`__init__.py` file inside) or a standard module (just a ``.py`` file). + + For more information about resource loading, see :func:`open_resource`. + + Usually you create a :class:`Flask` instance in your main module or + in the :file:`__init__.py` file of your package like this:: + + from flask import Flask + app = Flask(__name__) + + .. admonition:: About the First Parameter + + The idea of the first parameter is to give Flask an idea of what + belongs to your application. This name is used to find resources + on the filesystem, can be used by extensions to improve debugging + information and a lot more. + + So it's important what you provide there. If you are using a single + module, `__name__` is always the correct value. If you however are + using a package, it's usually recommended to hardcode the name of + your package there. + + For example if your application is defined in :file:`yourapplication/app.py` + you should create it with one of the two versions below:: + + app = Flask('yourapplication') + app = Flask(__name__.split('.')[0]) + + Why is that? The application will work even with `__name__`, thanks + to how resources are looked up. However it will make debugging more + painful. Certain extensions can make assumptions based on the + import name of your application. For example the Flask-SQLAlchemy + extension will look for the code in your application that triggered + an SQL query in debug mode. If the import name is not properly set + up, that debugging information is lost. (For example it would only + pick up SQL queries in `yourapplication.app` and not + `yourapplication.views.frontend`) + + .. versionadded:: 0.7 + The `static_url_path`, `static_folder`, and `template_folder` + parameters were added. + + .. versionadded:: 0.8 + The `instance_path` and `instance_relative_config` parameters were + added. + + .. versionadded:: 0.11 + The `root_path` parameter was added. + + .. versionadded:: 1.0 + The ``host_matching`` and ``static_host`` parameters were added. + + .. versionadded:: 1.0 + The ``subdomain_matching`` parameter was added. Subdomain + matching needs to be enabled manually now. Setting + :data:`SERVER_NAME` does not implicitly enable it. + + :param import_name: the name of the application package + :param static_url_path: can be used to specify a different path for the + static files on the web. Defaults to the name + of the `static_folder` folder. + :param static_folder: the folder with static files that should be served + at `static_url_path`. Defaults to the ``'static'`` + folder in the root path of the application. + :param static_host: the host to use when adding the static route. + Defaults to None. Required when using ``host_matching=True`` + with a ``static_folder`` configured. + :param host_matching: set ``url_map.host_matching`` attribute. + Defaults to False. + :param subdomain_matching: consider the subdomain relative to + :data:`SERVER_NAME` when matching routes. Defaults to False. + :param template_folder: the folder that contains the templates that should + be used by the application. Defaults to + ``'templates'`` folder in the root path of the + application. + :param instance_path: An alternative instance path for the application. + By default the folder ``'instance'`` next to the + package or module is assumed to be the instance + path. + :param instance_relative_config: if set to ``True`` relative filenames + for loading the config are assumed to + be relative to the instance path instead + of the application root. + :param root_path: Flask by default will automatically calculate the path + to the root of the application. In certain situations + this cannot be achieved (for instance if the package + is a Python 3 namespace package) and needs to be + manually defined. + """ + + #: The class that is used for request objects. See :class:`~flask.Request` + #: for more information. + request_class = Request + + #: The class that is used for response objects. See + #: :class:`~flask.Response` for more information. + response_class = Response + + #: The class that is used for the Jinja environment. + #: + #: .. versionadded:: 0.11 + jinja_environment = Environment + + #: The class that is used for the :data:`~flask.g` instance. + #: + #: Example use cases for a custom class: + #: + #: 1. Store arbitrary attributes on flask.g. + #: 2. Add a property for lazy per-request database connectors. + #: 3. Return None instead of AttributeError on unexpected attributes. + #: 4. Raise exception if an unexpected attr is set, a "controlled" flask.g. + #: + #: In Flask 0.9 this property was called `request_globals_class` but it + #: was changed in 0.10 to :attr:`app_ctx_globals_class` because the + #: flask.g object is now application context scoped. + #: + #: .. versionadded:: 0.10 + app_ctx_globals_class = _AppCtxGlobals + + #: The class that is used for the ``config`` attribute of this app. + #: Defaults to :class:`~flask.Config`. + #: + #: Example use cases for a custom class: + #: + #: 1. Default values for certain config options. + #: 2. Access to config values through attributes in addition to keys. + #: + #: .. versionadded:: 0.11 + config_class = Config + + #: The testing flag. Set this to ``True`` to enable the test mode of + #: Flask extensions (and in the future probably also Flask itself). + #: For example this might activate test helpers that have an + #: additional runtime cost which should not be enabled by default. + #: + #: If this is enabled and PROPAGATE_EXCEPTIONS is not changed from the + #: default it's implicitly enabled. + #: + #: This attribute can also be configured from the config with the + #: ``TESTING`` configuration key. Defaults to ``False``. + testing = ConfigAttribute('TESTING') + + #: If a secret key is set, cryptographic components can use this to + #: sign cookies and other things. Set this to a complex random value + #: when you want to use the secure cookie for instance. + #: + #: This attribute can also be configured from the config with the + #: :data:`SECRET_KEY` configuration key. Defaults to ``None``. + secret_key = ConfigAttribute('SECRET_KEY') + + #: The secure cookie uses this for the name of the session cookie. + #: + #: This attribute can also be configured from the config with the + #: ``SESSION_COOKIE_NAME`` configuration key. Defaults to ``'session'`` + session_cookie_name = ConfigAttribute('SESSION_COOKIE_NAME') + + #: A :class:`~datetime.timedelta` which is used to set the expiration + #: date of a permanent session. The default is 31 days which makes a + #: permanent session survive for roughly one month. + #: + #: This attribute can also be configured from the config with the + #: ``PERMANENT_SESSION_LIFETIME`` configuration key. Defaults to + #: ``timedelta(days=31)`` + permanent_session_lifetime = ConfigAttribute('PERMANENT_SESSION_LIFETIME', + get_converter=_make_timedelta) + + #: A :class:`~datetime.timedelta` which is used as default cache_timeout + #: for the :func:`send_file` functions. The default is 12 hours. + #: + #: This attribute can also be configured from the config with the + #: ``SEND_FILE_MAX_AGE_DEFAULT`` configuration key. This configuration + #: variable can also be set with an integer value used as seconds. + #: Defaults to ``timedelta(hours=12)`` + send_file_max_age_default = ConfigAttribute('SEND_FILE_MAX_AGE_DEFAULT', + get_converter=_make_timedelta) + + #: Enable this if you want to use the X-Sendfile feature. Keep in + #: mind that the server has to support this. This only affects files + #: sent with the :func:`send_file` method. + #: + #: .. versionadded:: 0.2 + #: + #: This attribute can also be configured from the config with the + #: ``USE_X_SENDFILE`` configuration key. Defaults to ``False``. + use_x_sendfile = ConfigAttribute('USE_X_SENDFILE') + + #: The JSON encoder class to use. Defaults to :class:`~flask.json.JSONEncoder`. + #: + #: .. versionadded:: 0.10 + json_encoder = json.JSONEncoder + + #: The JSON decoder class to use. Defaults to :class:`~flask.json.JSONDecoder`. + #: + #: .. versionadded:: 0.10 + json_decoder = json.JSONDecoder + + #: Options that are passed directly to the Jinja2 environment. + jinja_options = ImmutableDict( + extensions=['jinja2.ext.autoescape', 'jinja2.ext.with_'] + ) + + #: Default configuration parameters. + default_config = ImmutableDict({ + 'ENV': None, + 'DEBUG': None, + 'TESTING': False, + 'PROPAGATE_EXCEPTIONS': None, + 'PRESERVE_CONTEXT_ON_EXCEPTION': None, + 'SECRET_KEY': None, + 'PERMANENT_SESSION_LIFETIME': timedelta(days=31), + 'USE_X_SENDFILE': False, + 'SERVER_NAME': None, + 'APPLICATION_ROOT': '/', + 'SESSION_COOKIE_NAME': 'session', + 'SESSION_COOKIE_DOMAIN': None, + 'SESSION_COOKIE_PATH': None, + 'SESSION_COOKIE_HTTPONLY': True, + 'SESSION_COOKIE_SECURE': False, + 'SESSION_COOKIE_SAMESITE': None, + 'SESSION_REFRESH_EACH_REQUEST': True, + 'MAX_CONTENT_LENGTH': None, + 'SEND_FILE_MAX_AGE_DEFAULT': timedelta(hours=12), + 'TRAP_BAD_REQUEST_ERRORS': None, + 'TRAP_HTTP_EXCEPTIONS': False, + 'EXPLAIN_TEMPLATE_LOADING': False, + 'PREFERRED_URL_SCHEME': 'http', + 'JSON_AS_ASCII': True, + 'JSON_SORT_KEYS': True, + 'JSONIFY_PRETTYPRINT_REGULAR': False, + 'JSONIFY_MIMETYPE': 'application/json', + 'TEMPLATES_AUTO_RELOAD': None, + 'MAX_COOKIE_SIZE': 4093, + }) + + #: The rule object to use for URL rules created. This is used by + #: :meth:`add_url_rule`. Defaults to :class:`werkzeug.routing.Rule`. + #: + #: .. versionadded:: 0.7 + url_rule_class = Rule + + #: the test client that is used with when `test_client` is used. + #: + #: .. versionadded:: 0.7 + test_client_class = None + + #: The :class:`~click.testing.CliRunner` subclass, by default + #: :class:`~flask.testing.FlaskCliRunner` that is used by + #: :meth:`test_cli_runner`. Its ``__init__`` method should take a + #: Flask app object as the first argument. + #: + #: .. versionadded:: 1.0 + test_cli_runner_class = None + + #: the session interface to use. By default an instance of + #: :class:`~flask.sessions.SecureCookieSessionInterface` is used here. + #: + #: .. versionadded:: 0.8 + session_interface = SecureCookieSessionInterface() + + # TODO remove the next three attrs when Sphinx :inherited-members: works + # https://github.com/sphinx-doc/sphinx/issues/741 + + #: The name of the package or module that this app belongs to. Do not + #: change this once it is set by the constructor. + import_name = None + + #: Location of the template files to be added to the template lookup. + #: ``None`` if templates should not be added. + template_folder = None + + #: Absolute path to the package on the filesystem. Used to look up + #: resources contained in the package. + root_path = None + + def __init__( + self, + import_name, + static_url_path=None, + static_folder='static', + static_host=None, + host_matching=False, + subdomain_matching=False, + template_folder='templates', + instance_path=None, + instance_relative_config=False, + root_path=None + ): + _PackageBoundObject.__init__( + self, + import_name, + template_folder=template_folder, + root_path=root_path + ) + + if static_url_path is not None: + self.static_url_path = static_url_path + + if static_folder is not None: + self.static_folder = static_folder + + if instance_path is None: + instance_path = self.auto_find_instance_path() + elif not os.path.isabs(instance_path): + raise ValueError( + 'If an instance path is provided it must be absolute.' + ' A relative path was given instead.' + ) + + #: Holds the path to the instance folder. + #: + #: .. versionadded:: 0.8 + self.instance_path = instance_path + + #: The configuration dictionary as :class:`Config`. This behaves + #: exactly like a regular dictionary but supports additional methods + #: to load a config from files. + self.config = self.make_config(instance_relative_config) + + #: A dictionary of all view functions registered. The keys will + #: be function names which are also used to generate URLs and + #: the values are the function objects themselves. + #: To register a view function, use the :meth:`route` decorator. + self.view_functions = {} + + #: A dictionary of all registered error handlers. The key is ``None`` + #: for error handlers active on the application, otherwise the key is + #: the name of the blueprint. Each key points to another dictionary + #: where the key is the status code of the http exception. The + #: special key ``None`` points to a list of tuples where the first item + #: is the class for the instance check and the second the error handler + #: function. + #: + #: To register an error handler, use the :meth:`errorhandler` + #: decorator. + self.error_handler_spec = {} + + #: A list of functions that are called when :meth:`url_for` raises a + #: :exc:`~werkzeug.routing.BuildError`. Each function registered here + #: is called with `error`, `endpoint` and `values`. If a function + #: returns ``None`` or raises a :exc:`BuildError` the next function is + #: tried. + #: + #: .. versionadded:: 0.9 + self.url_build_error_handlers = [] + + #: A dictionary with lists of functions that will be called at the + #: beginning of each request. The key of the dictionary is the name of + #: the blueprint this function is active for, or ``None`` for all + #: requests. To register a function, use the :meth:`before_request` + #: decorator. + self.before_request_funcs = {} + + #: A list of functions that will be called at the beginning of the + #: first request to this instance. To register a function, use the + #: :meth:`before_first_request` decorator. + #: + #: .. versionadded:: 0.8 + self.before_first_request_funcs = [] + + #: A dictionary with lists of functions that should be called after + #: each request. The key of the dictionary is the name of the blueprint + #: this function is active for, ``None`` for all requests. This can for + #: example be used to close database connections. To register a function + #: here, use the :meth:`after_request` decorator. + self.after_request_funcs = {} + + #: A dictionary with lists of functions that are called after + #: each request, even if an exception has occurred. The key of the + #: dictionary is the name of the blueprint this function is active for, + #: ``None`` for all requests. These functions are not allowed to modify + #: the request, and their return values are ignored. If an exception + #: occurred while processing the request, it gets passed to each + #: teardown_request function. To register a function here, use the + #: :meth:`teardown_request` decorator. + #: + #: .. versionadded:: 0.7 + self.teardown_request_funcs = {} + + #: A list of functions that are called when the application context + #: is destroyed. Since the application context is also torn down + #: if the request ends this is the place to store code that disconnects + #: from databases. + #: + #: .. versionadded:: 0.9 + self.teardown_appcontext_funcs = [] + + #: A dictionary with lists of functions that are called before the + #: :attr:`before_request_funcs` functions. The key of the dictionary is + #: the name of the blueprint this function is active for, or ``None`` + #: for all requests. To register a function, use + #: :meth:`url_value_preprocessor`. + #: + #: .. versionadded:: 0.7 + self.url_value_preprocessors = {} + + #: A dictionary with lists of functions that can be used as URL value + #: preprocessors. The key ``None`` here is used for application wide + #: callbacks, otherwise the key is the name of the blueprint. + #: Each of these functions has the chance to modify the dictionary + #: of URL values before they are used as the keyword arguments of the + #: view function. For each function registered this one should also + #: provide a :meth:`url_defaults` function that adds the parameters + #: automatically again that were removed that way. + #: + #: .. versionadded:: 0.7 + self.url_default_functions = {} + + #: A dictionary with list of functions that are called without argument + #: to populate the template context. The key of the dictionary is the + #: name of the blueprint this function is active for, ``None`` for all + #: requests. Each returns a dictionary that the template context is + #: updated with. To register a function here, use the + #: :meth:`context_processor` decorator. + self.template_context_processors = { + None: [_default_template_ctx_processor] + } + + #: A list of shell context processor functions that should be run + #: when a shell context is created. + #: + #: .. versionadded:: 0.11 + self.shell_context_processors = [] + + #: all the attached blueprints in a dictionary by name. Blueprints + #: can be attached multiple times so this dictionary does not tell + #: you how often they got attached. + #: + #: .. versionadded:: 0.7 + self.blueprints = {} + self._blueprint_order = [] + + #: a place where extensions can store application specific state. For + #: example this is where an extension could store database engines and + #: similar things. For backwards compatibility extensions should register + #: themselves like this:: + #: + #: if not hasattr(app, 'extensions'): + #: app.extensions = {} + #: app.extensions['extensionname'] = SomeObject() + #: + #: The key must match the name of the extension module. For example in + #: case of a "Flask-Foo" extension in `flask_foo`, the key would be + #: ``'foo'``. + #: + #: .. versionadded:: 0.7 + self.extensions = {} + + #: The :class:`~werkzeug.routing.Map` for this instance. You can use + #: this to change the routing converters after the class was created + #: but before any routes are connected. Example:: + #: + #: from werkzeug.routing import BaseConverter + #: + #: class ListConverter(BaseConverter): + #: def to_python(self, value): + #: return value.split(',') + #: def to_url(self, values): + #: return ','.join(super(ListConverter, self).to_url(value) + #: for value in values) + #: + #: app = Flask(__name__) + #: app.url_map.converters['list'] = ListConverter + self.url_map = Map() + + self.url_map.host_matching = host_matching + self.subdomain_matching = subdomain_matching + + # tracks internally if the application already handled at least one + # request. + self._got_first_request = False + self._before_request_lock = Lock() + + # Add a static route using the provided static_url_path, static_host, + # and static_folder if there is a configured static_folder. + # Note we do this without checking if static_folder exists. + # For one, it might be created while the server is running (e.g. during + # development). Also, Google App Engine stores static files somewhere + if self.has_static_folder: + assert bool(static_host) == host_matching, 'Invalid static_host/host_matching combination' + self.add_url_rule( + self.static_url_path + '/<path:filename>', + endpoint='static', + host=static_host, + view_func=self.send_static_file + ) + + #: The click command line context for this application. Commands + #: registered here show up in the :command:`flask` command once the + #: application has been discovered. The default commands are + #: provided by Flask itself and can be overridden. + #: + #: This is an instance of a :class:`click.Group` object. + self.cli = cli.AppGroup(self.name) + + @locked_cached_property + def name(self): + """The name of the application. This is usually the import name + with the difference that it's guessed from the run file if the + import name is main. This name is used as a display name when + Flask needs the name of the application. It can be set and overridden + to change the value. + + .. versionadded:: 0.8 + """ + if self.import_name == '__main__': + fn = getattr(sys.modules['__main__'], '__file__', None) + if fn is None: + return '__main__' + return os.path.splitext(os.path.basename(fn))[0] + return self.import_name + + @property + def propagate_exceptions(self): + """Returns the value of the ``PROPAGATE_EXCEPTIONS`` configuration + value in case it's set, otherwise a sensible default is returned. + + .. versionadded:: 0.7 + """ + rv = self.config['PROPAGATE_EXCEPTIONS'] + if rv is not None: + return rv + return self.testing or self.debug + + @property + def preserve_context_on_exception(self): + """Returns the value of the ``PRESERVE_CONTEXT_ON_EXCEPTION`` + configuration value in case it's set, otherwise a sensible default + is returned. + + .. versionadded:: 0.7 + """ + rv = self.config['PRESERVE_CONTEXT_ON_EXCEPTION'] + if rv is not None: + return rv + return self.debug + + @locked_cached_property + def logger(self): + """The ``'flask.app'`` logger, a standard Python + :class:`~logging.Logger`. + + In debug mode, the logger's :attr:`~logging.Logger.level` will be set + to :data:`~logging.DEBUG`. + + If there are no handlers configured, a default handler will be added. + See :ref:`logging` for more information. + + .. versionchanged:: 1.0 + Behavior was simplified. The logger is always named + ``flask.app``. The level is only set during configuration, it + doesn't check ``app.debug`` each time. Only one format is used, + not different ones depending on ``app.debug``. No handlers are + removed, and a handler is only added if no handlers are already + configured. + + .. versionadded:: 0.3 + """ + return create_logger(self) + + @locked_cached_property + def jinja_env(self): + """The Jinja2 environment used to load templates.""" + return self.create_jinja_environment() + + @property + def got_first_request(self): + """This attribute is set to ``True`` if the application started + handling the first request. + + .. versionadded:: 0.8 + """ + return self._got_first_request + + def make_config(self, instance_relative=False): + """Used to create the config attribute by the Flask constructor. + The `instance_relative` parameter is passed in from the constructor + of Flask (there named `instance_relative_config`) and indicates if + the config should be relative to the instance path or the root path + of the application. + + .. versionadded:: 0.8 + """ + root_path = self.root_path + if instance_relative: + root_path = self.instance_path + defaults = dict(self.default_config) + defaults['ENV'] = get_env() + defaults['DEBUG'] = get_debug_flag() + return self.config_class(root_path, defaults) + + def auto_find_instance_path(self): + """Tries to locate the instance path if it was not provided to the + constructor of the application class. It will basically calculate + the path to a folder named ``instance`` next to your main file or + the package. + + .. versionadded:: 0.8 + """ + prefix, package_path = find_package(self.import_name) + if prefix is None: + return os.path.join(package_path, 'instance') + return os.path.join(prefix, 'var', self.name + '-instance') + + def open_instance_resource(self, resource, mode='rb'): + """Opens a resource from the application's instance folder + (:attr:`instance_path`). Otherwise works like + :meth:`open_resource`. Instance resources can also be opened for + writing. + + :param resource: the name of the resource. To access resources within + subfolders use forward slashes as separator. + :param mode: resource file opening mode, default is 'rb'. + """ + return open(os.path.join(self.instance_path, resource), mode) + + def _get_templates_auto_reload(self): + """Reload templates when they are changed. Used by + :meth:`create_jinja_environment`. + + This attribute can be configured with :data:`TEMPLATES_AUTO_RELOAD`. If + not set, it will be enabled in debug mode. + + .. versionadded:: 1.0 + This property was added but the underlying config and behavior + already existed. + """ + rv = self.config['TEMPLATES_AUTO_RELOAD'] + return rv if rv is not None else self.debug + + def _set_templates_auto_reload(self, value): + self.config['TEMPLATES_AUTO_RELOAD'] = value + + templates_auto_reload = property( + _get_templates_auto_reload, _set_templates_auto_reload + ) + del _get_templates_auto_reload, _set_templates_auto_reload + + def create_jinja_environment(self): + """Creates the Jinja2 environment based on :attr:`jinja_options` + and :meth:`select_jinja_autoescape`. Since 0.7 this also adds + the Jinja2 globals and filters after initialization. Override + this function to customize the behavior. + + .. versionadded:: 0.5 + .. versionchanged:: 0.11 + ``Environment.auto_reload`` set in accordance with + ``TEMPLATES_AUTO_RELOAD`` configuration option. + """ + options = dict(self.jinja_options) + + if 'autoescape' not in options: + options['autoescape'] = self.select_jinja_autoescape + + if 'auto_reload' not in options: + options['auto_reload'] = self.templates_auto_reload + + rv = self.jinja_environment(self, **options) + rv.globals.update( + url_for=url_for, + get_flashed_messages=get_flashed_messages, + config=self.config, + # request, session and g are normally added with the + # context processor for efficiency reasons but for imported + # templates we also want the proxies in there. + request=request, + session=session, + g=g + ) + rv.filters['tojson'] = json.tojson_filter + return rv + + def create_global_jinja_loader(self): + """Creates the loader for the Jinja2 environment. Can be used to + override just the loader and keeping the rest unchanged. It's + discouraged to override this function. Instead one should override + the :meth:`jinja_loader` function instead. + + The global loader dispatches between the loaders of the application + and the individual blueprints. + + .. versionadded:: 0.7 + """ + return DispatchingJinjaLoader(self) + + def select_jinja_autoescape(self, filename): + """Returns ``True`` if autoescaping should be active for the given + template name. If no template name is given, returns `True`. + + .. versionadded:: 0.5 + """ + if filename is None: + return True + return filename.endswith(('.html', '.htm', '.xml', '.xhtml')) + + def update_template_context(self, context): + """Update the template context with some commonly used variables. + This injects request, session, config and g into the template + context as well as everything template context processors want + to inject. Note that the as of Flask 0.6, the original values + in the context will not be overridden if a context processor + decides to return a value with the same key. + + :param context: the context as a dictionary that is updated in place + to add extra variables. + """ + funcs = self.template_context_processors[None] + reqctx = _request_ctx_stack.top + if reqctx is not None: + bp = reqctx.request.blueprint + if bp is not None and bp in self.template_context_processors: + funcs = chain(funcs, self.template_context_processors[bp]) + orig_ctx = context.copy() + for func in funcs: + context.update(func()) + # make sure the original values win. This makes it possible to + # easier add new variables in context processors without breaking + # existing views. + context.update(orig_ctx) + + def make_shell_context(self): + """Returns the shell context for an interactive shell for this + application. This runs all the registered shell context + processors. + + .. versionadded:: 0.11 + """ + rv = {'app': self, 'g': g} + for processor in self.shell_context_processors: + rv.update(processor()) + return rv + + #: What environment the app is running in. Flask and extensions may + #: enable behaviors based on the environment, such as enabling debug + #: mode. This maps to the :data:`ENV` config key. This is set by the + #: :envvar:`FLASK_ENV` environment variable and may not behave as + #: expected if set in code. + #: + #: **Do not enable development when deploying in production.** + #: + #: Default: ``'production'`` + env = ConfigAttribute('ENV') + + def _get_debug(self): + return self.config['DEBUG'] + + def _set_debug(self, value): + self.config['DEBUG'] = value + self.jinja_env.auto_reload = self.templates_auto_reload + + #: Whether debug mode is enabled. When using ``flask run`` to start + #: the development server, an interactive debugger will be shown for + #: unhandled exceptions, and the server will be reloaded when code + #: changes. This maps to the :data:`DEBUG` config key. This is + #: enabled when :attr:`env` is ``'development'`` and is overridden + #: by the ``FLASK_DEBUG`` environment variable. It may not behave as + #: expected if set in code. + #: + #: **Do not enable debug mode when deploying in production.** + #: + #: Default: ``True`` if :attr:`env` is ``'development'``, or + #: ``False`` otherwise. + debug = property(_get_debug, _set_debug) + del _get_debug, _set_debug + + def run(self, host=None, port=None, debug=None, + load_dotenv=True, **options): + """Runs the application on a local development server. + + Do not use ``run()`` in a production setting. It is not intended to + meet security and performance requirements for a production server. + Instead, see :ref:`deployment` for WSGI server recommendations. + + If the :attr:`debug` flag is set the server will automatically reload + for code changes and show a debugger in case an exception happened. + + If you want to run the application in debug mode, but disable the + code execution on the interactive debugger, you can pass + ``use_evalex=False`` as parameter. This will keep the debugger's + traceback screen active, but disable code execution. + + It is not recommended to use this function for development with + automatic reloading as this is badly supported. Instead you should + be using the :command:`flask` command line script's ``run`` support. + + .. admonition:: Keep in Mind + + Flask will suppress any server error with a generic error page + unless it is in debug mode. As such to enable just the + interactive debugger without the code reloading, you have to + invoke :meth:`run` with ``debug=True`` and ``use_reloader=False``. + Setting ``use_debugger`` to ``True`` without being in debug mode + won't catch any exceptions because there won't be any to + catch. + + :param host: the hostname to listen on. Set this to ``'0.0.0.0'`` to + have the server available externally as well. Defaults to + ``'127.0.0.1'`` or the host in the ``SERVER_NAME`` config variable + if present. + :param port: the port of the webserver. Defaults to ``5000`` or the + port defined in the ``SERVER_NAME`` config variable if present. + :param debug: if given, enable or disable debug mode. See + :attr:`debug`. + :param load_dotenv: Load the nearest :file:`.env` and :file:`.flaskenv` + files to set environment variables. Will also change the working + directory to the directory containing the first file found. + :param options: the options to be forwarded to the underlying Werkzeug + server. See :func:`werkzeug.serving.run_simple` for more + information. + + .. versionchanged:: 1.0 + If installed, python-dotenv will be used to load environment + variables from :file:`.env` and :file:`.flaskenv` files. + + If set, the :envvar:`FLASK_ENV` and :envvar:`FLASK_DEBUG` + environment variables will override :attr:`env` and + :attr:`debug`. + + Threaded mode is enabled by default. + + .. versionchanged:: 0.10 + The default port is now picked from the ``SERVER_NAME`` + variable. + """ + # Change this into a no-op if the server is invoked from the + # command line. Have a look at cli.py for more information. + if os.environ.get('FLASK_RUN_FROM_CLI') == 'true': + from .debughelpers import explain_ignored_app_run + explain_ignored_app_run() + return + + if get_load_dotenv(load_dotenv): + cli.load_dotenv() + + # if set, let env vars override previous values + if 'FLASK_ENV' in os.environ: + self.env = get_env() + self.debug = get_debug_flag() + elif 'FLASK_DEBUG' in os.environ: + self.debug = get_debug_flag() + + # debug passed to method overrides all other sources + if debug is not None: + self.debug = bool(debug) + + _host = '127.0.0.1' + _port = 5000 + server_name = self.config.get('SERVER_NAME') + sn_host, sn_port = None, None + + if server_name: + sn_host, _, sn_port = server_name.partition(':') + + host = host or sn_host or _host + port = int(port or sn_port or _port) + + options.setdefault('use_reloader', self.debug) + options.setdefault('use_debugger', self.debug) + options.setdefault('threaded', True) + + cli.show_server_banner(self.env, self.debug, self.name, False) + + from werkzeug.serving import run_simple + + try: + run_simple(host, port, self, **options) + finally: + # reset the first request information if the development server + # reset normally. This makes it possible to restart the server + # without reloader and that stuff from an interactive shell. + self._got_first_request = False + + def test_client(self, use_cookies=True, **kwargs): + """Creates a test client for this application. For information + about unit testing head over to :ref:`testing`. + + Note that if you are testing for assertions or exceptions in your + application code, you must set ``app.testing = True`` in order for the + exceptions to propagate to the test client. Otherwise, the exception + will be handled by the application (not visible to the test client) and + the only indication of an AssertionError or other exception will be a + 500 status code response to the test client. See the :attr:`testing` + attribute. For example:: + + app.testing = True + client = app.test_client() + + The test client can be used in a ``with`` block to defer the closing down + of the context until the end of the ``with`` block. This is useful if + you want to access the context locals for testing:: + + with app.test_client() as c: + rv = c.get('/?vodka=42') + assert request.args['vodka'] == '42' + + Additionally, you may pass optional keyword arguments that will then + be passed to the application's :attr:`test_client_class` constructor. + For example:: + + from flask.testing import FlaskClient + + class CustomClient(FlaskClient): + def __init__(self, *args, **kwargs): + self._authentication = kwargs.pop("authentication") + super(CustomClient,self).__init__( *args, **kwargs) + + app.test_client_class = CustomClient + client = app.test_client(authentication='Basic ....') + + See :class:`~flask.testing.FlaskClient` for more information. + + .. versionchanged:: 0.4 + added support for ``with`` block usage for the client. + + .. versionadded:: 0.7 + The `use_cookies` parameter was added as well as the ability + to override the client to be used by setting the + :attr:`test_client_class` attribute. + + .. versionchanged:: 0.11 + Added `**kwargs` to support passing additional keyword arguments to + the constructor of :attr:`test_client_class`. + """ + cls = self.test_client_class + if cls is None: + from flask.testing import FlaskClient as cls + return cls(self, self.response_class, use_cookies=use_cookies, **kwargs) + + def test_cli_runner(self, **kwargs): + """Create a CLI runner for testing CLI commands. + See :ref:`testing-cli`. + + Returns an instance of :attr:`test_cli_runner_class`, by default + :class:`~flask.testing.FlaskCliRunner`. The Flask app object is + passed as the first argument. + + .. versionadded:: 1.0 + """ + cls = self.test_cli_runner_class + + if cls is None: + from flask.testing import FlaskCliRunner as cls + + return cls(self, **kwargs) + + def open_session(self, request): + """Creates or opens a new session. Default implementation stores all + session data in a signed cookie. This requires that the + :attr:`secret_key` is set. Instead of overriding this method + we recommend replacing the :class:`session_interface`. + + .. deprecated: 1.0 + Will be removed in 1.1. Use ``session_interface.open_session`` + instead. + + :param request: an instance of :attr:`request_class`. + """ + + warnings.warn(DeprecationWarning( + '"open_session" is deprecated and will be removed in 1.1. Use' + ' "session_interface.open_session" instead.' + )) + return self.session_interface.open_session(self, request) + + def save_session(self, session, response): + """Saves the session if it needs updates. For the default + implementation, check :meth:`open_session`. Instead of overriding this + method we recommend replacing the :class:`session_interface`. + + .. deprecated: 1.0 + Will be removed in 1.1. Use ``session_interface.save_session`` + instead. + + :param session: the session to be saved (a + :class:`~werkzeug.contrib.securecookie.SecureCookie` + object) + :param response: an instance of :attr:`response_class` + """ + + warnings.warn(DeprecationWarning( + '"save_session" is deprecated and will be removed in 1.1. Use' + ' "session_interface.save_session" instead.' + )) + return self.session_interface.save_session(self, session, response) + + def make_null_session(self): + """Creates a new instance of a missing session. Instead of overriding + this method we recommend replacing the :class:`session_interface`. + + .. deprecated: 1.0 + Will be removed in 1.1. Use ``session_interface.make_null_session`` + instead. + + .. versionadded:: 0.7 + """ + + warnings.warn(DeprecationWarning( + '"make_null_session" is deprecated and will be removed in 1.1. Use' + ' "session_interface.make_null_session" instead.' + )) + return self.session_interface.make_null_session(self) + + @setupmethod + def register_blueprint(self, blueprint, **options): + """Register a :class:`~flask.Blueprint` on the application. Keyword + arguments passed to this method will override the defaults set on the + blueprint. + + Calls the blueprint's :meth:`~flask.Blueprint.register` method after + recording the blueprint in the application's :attr:`blueprints`. + + :param blueprint: The blueprint to register. + :param url_prefix: Blueprint routes will be prefixed with this. + :param subdomain: Blueprint routes will match on this subdomain. + :param url_defaults: Blueprint routes will use these default values for + view arguments. + :param options: Additional keyword arguments are passed to + :class:`~flask.blueprints.BlueprintSetupState`. They can be + accessed in :meth:`~flask.Blueprint.record` callbacks. + + .. versionadded:: 0.7 + """ + first_registration = False + + if blueprint.name in self.blueprints: + assert self.blueprints[blueprint.name] is blueprint, ( + 'A name collision occurred between blueprints %r and %r. Both' + ' share the same name "%s". Blueprints that are created on the' + ' fly need unique names.' % ( + blueprint, self.blueprints[blueprint.name], blueprint.name + ) + ) + else: + self.blueprints[blueprint.name] = blueprint + self._blueprint_order.append(blueprint) + first_registration = True + + blueprint.register(self, options, first_registration) + + def iter_blueprints(self): + """Iterates over all blueprints by the order they were registered. + + .. versionadded:: 0.11 + """ + return iter(self._blueprint_order) + + @setupmethod + def add_url_rule(self, rule, endpoint=None, view_func=None, + provide_automatic_options=None, **options): + """Connects a URL rule. Works exactly like the :meth:`route` + decorator. If a view_func is provided it will be registered with the + endpoint. + + Basically this example:: + + @app.route('/') + def index(): + pass + + Is equivalent to the following:: + + def index(): + pass + app.add_url_rule('/', 'index', index) + + If the view_func is not provided you will need to connect the endpoint + to a view function like so:: + + app.view_functions['index'] = index + + Internally :meth:`route` invokes :meth:`add_url_rule` so if you want + to customize the behavior via subclassing you only need to change + this method. + + For more information refer to :ref:`url-route-registrations`. + + .. versionchanged:: 0.2 + `view_func` parameter added. + + .. versionchanged:: 0.6 + ``OPTIONS`` is added automatically as method. + + :param rule: the URL rule as string + :param endpoint: the endpoint for the registered URL rule. Flask + itself assumes the name of the view function as + endpoint + :param view_func: the function to call when serving a request to the + provided endpoint + :param provide_automatic_options: controls whether the ``OPTIONS`` + method should be added automatically. This can also be controlled + by setting the ``view_func.provide_automatic_options = False`` + before adding the rule. + :param options: the options to be forwarded to the underlying + :class:`~werkzeug.routing.Rule` object. A change + to Werkzeug is handling of method options. methods + is a list of methods this rule should be limited + to (``GET``, ``POST`` etc.). By default a rule + just listens for ``GET`` (and implicitly ``HEAD``). + Starting with Flask 0.6, ``OPTIONS`` is implicitly + added and handled by the standard request handling. + """ + if endpoint is None: + endpoint = _endpoint_from_view_func(view_func) + options['endpoint'] = endpoint + methods = options.pop('methods', None) + + # if the methods are not given and the view_func object knows its + # methods we can use that instead. If neither exists, we go with + # a tuple of only ``GET`` as default. + if methods is None: + methods = getattr(view_func, 'methods', None) or ('GET',) + if isinstance(methods, string_types): + raise TypeError('Allowed methods have to be iterables of strings, ' + 'for example: @app.route(..., methods=["POST"])') + methods = set(item.upper() for item in methods) + + # Methods that should always be added + required_methods = set(getattr(view_func, 'required_methods', ())) + + # starting with Flask 0.8 the view_func object can disable and + # force-enable the automatic options handling. + if provide_automatic_options is None: + provide_automatic_options = getattr(view_func, + 'provide_automatic_options', None) + + if provide_automatic_options is None: + if 'OPTIONS' not in methods: + provide_automatic_options = True + required_methods.add('OPTIONS') + else: + provide_automatic_options = False + + # Add the required methods now. + methods |= required_methods + + rule = self.url_rule_class(rule, methods=methods, **options) + rule.provide_automatic_options = provide_automatic_options + + self.url_map.add(rule) + if view_func is not None: + old_func = self.view_functions.get(endpoint) + if old_func is not None and old_func != view_func: + raise AssertionError('View function mapping is overwriting an ' + 'existing endpoint function: %s' % endpoint) + self.view_functions[endpoint] = view_func + + def route(self, rule, **options): + """A decorator that is used to register a view function for a + given URL rule. This does the same thing as :meth:`add_url_rule` + but is intended for decorator usage:: + + @app.route('/') + def index(): + return 'Hello World' + + For more information refer to :ref:`url-route-registrations`. + + :param rule: the URL rule as string + :param endpoint: the endpoint for the registered URL rule. Flask + itself assumes the name of the view function as + endpoint + :param options: the options to be forwarded to the underlying + :class:`~werkzeug.routing.Rule` object. A change + to Werkzeug is handling of method options. methods + is a list of methods this rule should be limited + to (``GET``, ``POST`` etc.). By default a rule + just listens for ``GET`` (and implicitly ``HEAD``). + Starting with Flask 0.6, ``OPTIONS`` is implicitly + added and handled by the standard request handling. + """ + def decorator(f): + endpoint = options.pop('endpoint', None) + self.add_url_rule(rule, endpoint, f, **options) + return f + return decorator + + @setupmethod + def endpoint(self, endpoint): + """A decorator to register a function as an endpoint. + Example:: + + @app.endpoint('example.endpoint') + def example(): + return "example" + + :param endpoint: the name of the endpoint + """ + def decorator(f): + self.view_functions[endpoint] = f + return f + return decorator + + @staticmethod + def _get_exc_class_and_code(exc_class_or_code): + """Ensure that we register only exceptions as handler keys""" + if isinstance(exc_class_or_code, integer_types): + exc_class = default_exceptions[exc_class_or_code] + else: + exc_class = exc_class_or_code + + assert issubclass(exc_class, Exception) + + if issubclass(exc_class, HTTPException): + return exc_class, exc_class.code + else: + return exc_class, None + + @setupmethod + def errorhandler(self, code_or_exception): + """Register a function to handle errors by code or exception class. + + A decorator that is used to register a function given an + error code. Example:: + + @app.errorhandler(404) + def page_not_found(error): + return 'This page does not exist', 404 + + You can also register handlers for arbitrary exceptions:: + + @app.errorhandler(DatabaseError) + def special_exception_handler(error): + return 'Database connection failed', 500 + + .. versionadded:: 0.7 + Use :meth:`register_error_handler` instead of modifying + :attr:`error_handler_spec` directly, for application wide error + handlers. + + .. versionadded:: 0.7 + One can now additionally also register custom exception types + that do not necessarily have to be a subclass of the + :class:`~werkzeug.exceptions.HTTPException` class. + + :param code_or_exception: the code as integer for the handler, or + an arbitrary exception + """ + def decorator(f): + self._register_error_handler(None, code_or_exception, f) + return f + return decorator + + @setupmethod + def register_error_handler(self, code_or_exception, f): + """Alternative error attach function to the :meth:`errorhandler` + decorator that is more straightforward to use for non decorator + usage. + + .. versionadded:: 0.7 + """ + self._register_error_handler(None, code_or_exception, f) + + @setupmethod + def _register_error_handler(self, key, code_or_exception, f): + """ + :type key: None|str + :type code_or_exception: int|T<=Exception + :type f: callable + """ + if isinstance(code_or_exception, HTTPException): # old broken behavior + raise ValueError( + 'Tried to register a handler for an exception instance {0!r}.' + ' Handlers can only be registered for exception classes or' + ' HTTP error codes.'.format(code_or_exception) + ) + + try: + exc_class, code = self._get_exc_class_and_code(code_or_exception) + except KeyError: + raise KeyError( + "'{0}' is not a recognized HTTP error code. Use a subclass of" + " HTTPException with that code instead.".format(code_or_exception) + ) + + handlers = self.error_handler_spec.setdefault(key, {}).setdefault(code, {}) + handlers[exc_class] = f + + @setupmethod + def template_filter(self, name=None): + """A decorator that is used to register custom template filter. + You can specify a name for the filter, otherwise the function + name will be used. Example:: + + @app.template_filter() + def reverse(s): + return s[::-1] + + :param name: the optional name of the filter, otherwise the + function name will be used. + """ + def decorator(f): + self.add_template_filter(f, name=name) + return f + return decorator + + @setupmethod + def add_template_filter(self, f, name=None): + """Register a custom template filter. Works exactly like the + :meth:`template_filter` decorator. + + :param name: the optional name of the filter, otherwise the + function name will be used. + """ + self.jinja_env.filters[name or f.__name__] = f + + @setupmethod + def template_test(self, name=None): + """A decorator that is used to register custom template test. + You can specify a name for the test, otherwise the function + name will be used. Example:: + + @app.template_test() + def is_prime(n): + if n == 2: + return True + for i in range(2, int(math.ceil(math.sqrt(n))) + 1): + if n % i == 0: + return False + return True + + .. versionadded:: 0.10 + + :param name: the optional name of the test, otherwise the + function name will be used. + """ + def decorator(f): + self.add_template_test(f, name=name) + return f + return decorator + + @setupmethod + def add_template_test(self, f, name=None): + """Register a custom template test. Works exactly like the + :meth:`template_test` decorator. + + .. versionadded:: 0.10 + + :param name: the optional name of the test, otherwise the + function name will be used. + """ + self.jinja_env.tests[name or f.__name__] = f + + @setupmethod + def template_global(self, name=None): + """A decorator that is used to register a custom template global function. + You can specify a name for the global function, otherwise the function + name will be used. Example:: + + @app.template_global() + def double(n): + return 2 * n + + .. versionadded:: 0.10 + + :param name: the optional name of the global function, otherwise the + function name will be used. + """ + def decorator(f): + self.add_template_global(f, name=name) + return f + return decorator + + @setupmethod + def add_template_global(self, f, name=None): + """Register a custom template global function. Works exactly like the + :meth:`template_global` decorator. + + .. versionadded:: 0.10 + + :param name: the optional name of the global function, otherwise the + function name will be used. + """ + self.jinja_env.globals[name or f.__name__] = f + + @setupmethod + def before_request(self, f): + """Registers a function to run before each request. + + For example, this can be used to open a database connection, or to load + the logged in user from the session. + + The function will be called without any arguments. If it returns a + non-None value, the value is handled as if it was the return value from + the view, and further request handling is stopped. + """ + self.before_request_funcs.setdefault(None, []).append(f) + return f + + @setupmethod + def before_first_request(self, f): + """Registers a function to be run before the first request to this + instance of the application. + + The function will be called without any arguments and its return + value is ignored. + + .. versionadded:: 0.8 + """ + self.before_first_request_funcs.append(f) + return f + + @setupmethod + def after_request(self, f): + """Register a function to be run after each request. + + Your function must take one parameter, an instance of + :attr:`response_class` and return a new response object or the + same (see :meth:`process_response`). + + As of Flask 0.7 this function might not be executed at the end of the + request in case an unhandled exception occurred. + """ + self.after_request_funcs.setdefault(None, []).append(f) + return f + + @setupmethod + def teardown_request(self, f): + """Register a function to be run at the end of each request, + regardless of whether there was an exception or not. These functions + are executed when the request context is popped, even if not an + actual request was performed. + + Example:: + + ctx = app.test_request_context() + ctx.push() + ... + ctx.pop() + + When ``ctx.pop()`` is executed in the above example, the teardown + functions are called just before the request context moves from the + stack of active contexts. This becomes relevant if you are using + such constructs in tests. + + Generally teardown functions must take every necessary step to avoid + that they will fail. If they do execute code that might fail they + will have to surround the execution of these code by try/except + statements and log occurring errors. + + When a teardown function was called because of an exception it will + be passed an error object. + + The return values of teardown functions are ignored. + + .. admonition:: Debug Note + + In debug mode Flask will not tear down a request on an exception + immediately. Instead it will keep it alive so that the interactive + debugger can still access it. This behavior can be controlled + by the ``PRESERVE_CONTEXT_ON_EXCEPTION`` configuration variable. + """ + self.teardown_request_funcs.setdefault(None, []).append(f) + return f + + @setupmethod + def teardown_appcontext(self, f): + """Registers a function to be called when the application context + ends. These functions are typically also called when the request + context is popped. + + Example:: + + ctx = app.app_context() + ctx.push() + ... + ctx.pop() + + When ``ctx.pop()`` is executed in the above example, the teardown + functions are called just before the app context moves from the + stack of active contexts. This becomes relevant if you are using + such constructs in tests. + + Since a request context typically also manages an application + context it would also be called when you pop a request context. + + When a teardown function was called because of an unhandled exception + it will be passed an error object. If an :meth:`errorhandler` is + registered, it will handle the exception and the teardown will not + receive it. + + The return values of teardown functions are ignored. + + .. versionadded:: 0.9 + """ + self.teardown_appcontext_funcs.append(f) + return f + + @setupmethod + def context_processor(self, f): + """Registers a template context processor function.""" + self.template_context_processors[None].append(f) + return f + + @setupmethod + def shell_context_processor(self, f): + """Registers a shell context processor function. + + .. versionadded:: 0.11 + """ + self.shell_context_processors.append(f) + return f + + @setupmethod + def url_value_preprocessor(self, f): + """Register a URL value preprocessor function for all view + functions in the application. These functions will be called before the + :meth:`before_request` functions. + + The function can modify the values captured from the matched url before + they are passed to the view. For example, this can be used to pop a + common language code value and place it in ``g`` rather than pass it to + every view. + + The function is passed the endpoint name and values dict. The return + value is ignored. + """ + self.url_value_preprocessors.setdefault(None, []).append(f) + return f + + @setupmethod + def url_defaults(self, f): + """Callback function for URL defaults for all view functions of the + application. It's called with the endpoint and values and should + update the values passed in place. + """ + self.url_default_functions.setdefault(None, []).append(f) + return f + + def _find_error_handler(self, e): + """Return a registered error handler for an exception in this order: + blueprint handler for a specific code, app handler for a specific code, + blueprint handler for an exception class, app handler for an exception + class, or ``None`` if a suitable handler is not found. + """ + exc_class, code = self._get_exc_class_and_code(type(e)) + + for name, c in ( + (request.blueprint, code), (None, code), + (request.blueprint, None), (None, None) + ): + handler_map = self.error_handler_spec.setdefault(name, {}).get(c) + + if not handler_map: + continue + + for cls in exc_class.__mro__: + handler = handler_map.get(cls) + + if handler is not None: + return handler + + def handle_http_exception(self, e): + """Handles an HTTP exception. By default this will invoke the + registered error handlers and fall back to returning the + exception as response. + + .. versionchanged:: 1.0.3 + ``RoutingException``, used internally for actions such as + slash redirects during routing, is not passed to error + handlers. + + .. versionchanged:: 1.0 + Exceptions are looked up by code *and* by MRO, so + ``HTTPExcpetion`` subclasses can be handled with a catch-all + handler for the base ``HTTPException``. + + .. versionadded:: 0.3 + """ + # Proxy exceptions don't have error codes. We want to always return + # those unchanged as errors + if e.code is None: + return e + + # RoutingExceptions are used internally to trigger routing + # actions, such as slash redirects raising RequestRedirect. They + # are not raised or handled in user code. + if isinstance(e, RoutingException): + return e + + handler = self._find_error_handler(e) + if handler is None: + return e + return handler(e) + + def trap_http_exception(self, e): + """Checks if an HTTP exception should be trapped or not. By default + this will return ``False`` for all exceptions except for a bad request + key error if ``TRAP_BAD_REQUEST_ERRORS`` is set to ``True``. It + also returns ``True`` if ``TRAP_HTTP_EXCEPTIONS`` is set to ``True``. + + This is called for all HTTP exceptions raised by a view function. + If it returns ``True`` for any exception the error handler for this + exception is not called and it shows up as regular exception in the + traceback. This is helpful for debugging implicitly raised HTTP + exceptions. + + .. versionchanged:: 1.0 + Bad request errors are not trapped by default in debug mode. + + .. versionadded:: 0.8 + """ + if self.config['TRAP_HTTP_EXCEPTIONS']: + return True + + trap_bad_request = self.config['TRAP_BAD_REQUEST_ERRORS'] + + # if unset, trap key errors in debug mode + if ( + trap_bad_request is None and self.debug + and isinstance(e, BadRequestKeyError) + ): + return True + + if trap_bad_request: + return isinstance(e, BadRequest) + + return False + + def handle_user_exception(self, e): + """This method is called whenever an exception occurs that + should be handled. A special case is :class:`~werkzeug + .exceptions.HTTPException` which is forwarded to the + :meth:`handle_http_exception` method. This function will either + return a response value or reraise the exception with the same + traceback. + + .. versionchanged:: 1.0 + Key errors raised from request data like ``form`` show the + bad key in debug mode rather than a generic bad request + message. + + .. versionadded:: 0.7 + """ + exc_type, exc_value, tb = sys.exc_info() + assert exc_value is e + # ensure not to trash sys.exc_info() at that point in case someone + # wants the traceback preserved in handle_http_exception. Of course + # we cannot prevent users from trashing it themselves in a custom + # trap_http_exception method so that's their fault then. + + if isinstance(e, BadRequestKeyError): + if self.debug or self.config["TRAP_BAD_REQUEST_ERRORS"]: + # Werkzeug < 0.15 doesn't add the KeyError to the 400 + # message, add it in manually. + description = e.get_description() + + if e.args[0] not in description: + e.description = "KeyError: '{}'".format(*e.args) + else: + # Werkzeug >= 0.15 does add it, remove it in production + e.args = () + + if isinstance(e, HTTPException) and not self.trap_http_exception(e): + return self.handle_http_exception(e) + + handler = self._find_error_handler(e) + + if handler is None: + reraise(exc_type, exc_value, tb) + return handler(e) + + def handle_exception(self, e): + """Default exception handling that kicks in when an exception + occurs that is not caught. In debug mode the exception will + be re-raised immediately, otherwise it is logged and the handler + for a 500 internal server error is used. If no such handler + exists, a default 500 internal server error message is displayed. + + .. versionadded:: 0.3 + """ + exc_type, exc_value, tb = sys.exc_info() + + got_request_exception.send(self, exception=e) + handler = self._find_error_handler(InternalServerError()) + + if self.propagate_exceptions: + # if we want to repropagate the exception, we can attempt to + # raise it with the whole traceback in case we can do that + # (the function was actually called from the except part) + # otherwise, we just raise the error again + if exc_value is e: + reraise(exc_type, exc_value, tb) + else: + raise e + + self.log_exception((exc_type, exc_value, tb)) + if handler is None: + return InternalServerError() + return self.finalize_request(handler(e), from_error_handler=True) + + def log_exception(self, exc_info): + """Logs an exception. This is called by :meth:`handle_exception` + if debugging is disabled and right before the handler is called. + The default implementation logs the exception as error on the + :attr:`logger`. + + .. versionadded:: 0.8 + """ + self.logger.error('Exception on %s [%s]' % ( + request.path, + request.method + ), exc_info=exc_info) + + def raise_routing_exception(self, request): + """Exceptions that are recording during routing are reraised with + this method. During debug we are not reraising redirect requests + for non ``GET``, ``HEAD``, or ``OPTIONS`` requests and we're raising + a different error instead to help debug situations. + + :internal: + """ + if not self.debug \ + or not isinstance(request.routing_exception, RequestRedirect) \ + or request.method in ('GET', 'HEAD', 'OPTIONS'): + raise request.routing_exception + + from .debughelpers import FormDataRoutingRedirect + raise FormDataRoutingRedirect(request) + + def dispatch_request(self): + """Does the request dispatching. Matches the URL and returns the + return value of the view or error handler. This does not have to + be a response object. In order to convert the return value to a + proper response object, call :func:`make_response`. + + .. versionchanged:: 0.7 + This no longer does the exception handling, this code was + moved to the new :meth:`full_dispatch_request`. + """ + req = _request_ctx_stack.top.request + if req.routing_exception is not None: + self.raise_routing_exception(req) + rule = req.url_rule + # if we provide automatic options for this URL and the + # request came with the OPTIONS method, reply automatically + if getattr(rule, 'provide_automatic_options', False) \ + and req.method == 'OPTIONS': + return self.make_default_options_response() + # otherwise dispatch to the handler for that endpoint + return self.view_functions[rule.endpoint](**req.view_args) + + def full_dispatch_request(self): + """Dispatches the request and on top of that performs request + pre and postprocessing as well as HTTP exception catching and + error handling. + + .. versionadded:: 0.7 + """ + self.try_trigger_before_first_request_functions() + try: + request_started.send(self) + rv = self.preprocess_request() + if rv is None: + rv = self.dispatch_request() + except Exception as e: + rv = self.handle_user_exception(e) + return self.finalize_request(rv) + + def finalize_request(self, rv, from_error_handler=False): + """Given the return value from a view function this finalizes + the request by converting it into a response and invoking the + postprocessing functions. This is invoked for both normal + request dispatching as well as error handlers. + + Because this means that it might be called as a result of a + failure a special safe mode is available which can be enabled + with the `from_error_handler` flag. If enabled, failures in + response processing will be logged and otherwise ignored. + + :internal: + """ + response = self.make_response(rv) + try: + response = self.process_response(response) + request_finished.send(self, response=response) + except Exception: + if not from_error_handler: + raise + self.logger.exception('Request finalizing failed with an ' + 'error while handling an error') + return response + + def try_trigger_before_first_request_functions(self): + """Called before each request and will ensure that it triggers + the :attr:`before_first_request_funcs` and only exactly once per + application instance (which means process usually). + + :internal: + """ + if self._got_first_request: + return + with self._before_request_lock: + if self._got_first_request: + return + for func in self.before_first_request_funcs: + func() + self._got_first_request = True + + def make_default_options_response(self): + """This method is called to create the default ``OPTIONS`` response. + This can be changed through subclassing to change the default + behavior of ``OPTIONS`` responses. + + .. versionadded:: 0.7 + """ + adapter = _request_ctx_stack.top.url_adapter + if hasattr(adapter, 'allowed_methods'): + methods = adapter.allowed_methods() + else: + # fallback for Werkzeug < 0.7 + methods = [] + try: + adapter.match(method='--') + except MethodNotAllowed as e: + methods = e.valid_methods + except HTTPException as e: + pass + rv = self.response_class() + rv.allow.update(methods) + return rv + + def should_ignore_error(self, error): + """This is called to figure out if an error should be ignored + or not as far as the teardown system is concerned. If this + function returns ``True`` then the teardown handlers will not be + passed the error. + + .. versionadded:: 0.10 + """ + return False + + def make_response(self, rv): + """Convert the return value from a view function to an instance of + :attr:`response_class`. + + :param rv: the return value from the view function. The view function + must return a response. Returning ``None``, or the view ending + without returning, is not allowed. The following types are allowed + for ``view_rv``: + + ``str`` (``unicode`` in Python 2) + A response object is created with the string encoded to UTF-8 + as the body. + + ``bytes`` (``str`` in Python 2) + A response object is created with the bytes as the body. + + ``tuple`` + Either ``(body, status, headers)``, ``(body, status)``, or + ``(body, headers)``, where ``body`` is any of the other types + allowed here, ``status`` is a string or an integer, and + ``headers`` is a dictionary or a list of ``(key, value)`` + tuples. If ``body`` is a :attr:`response_class` instance, + ``status`` overwrites the exiting value and ``headers`` are + extended. + + :attr:`response_class` + The object is returned unchanged. + + other :class:`~werkzeug.wrappers.Response` class + The object is coerced to :attr:`response_class`. + + :func:`callable` + The function is called as a WSGI application. The result is + used to create a response object. + + .. versionchanged:: 0.9 + Previously a tuple was interpreted as the arguments for the + response object. + """ + + status = headers = None + + # unpack tuple returns + if isinstance(rv, tuple): + len_rv = len(rv) + + # a 3-tuple is unpacked directly + if len_rv == 3: + rv, status, headers = rv + # decide if a 2-tuple has status or headers + elif len_rv == 2: + if isinstance(rv[1], (Headers, dict, tuple, list)): + rv, headers = rv + else: + rv, status = rv + # other sized tuples are not allowed + else: + raise TypeError( + 'The view function did not return a valid response tuple.' + ' The tuple must have the form (body, status, headers),' + ' (body, status), or (body, headers).' + ) + + # the body must not be None + if rv is None: + raise TypeError( + 'The view function did not return a valid response. The' + ' function either returned None or ended without a return' + ' statement.' + ) + + # make sure the body is an instance of the response class + if not isinstance(rv, self.response_class): + if isinstance(rv, (text_type, bytes, bytearray)): + # let the response class set the status and headers instead of + # waiting to do it manually, so that the class can handle any + # special logic + rv = self.response_class(rv, status=status, headers=headers) + status = headers = None + else: + # evaluate a WSGI callable, or coerce a different response + # class to the correct type + try: + rv = self.response_class.force_type(rv, request.environ) + except TypeError as e: + new_error = TypeError( + '{e}\nThe view function did not return a valid' + ' response. The return type must be a string, tuple,' + ' Response instance, or WSGI callable, but it was a' + ' {rv.__class__.__name__}.'.format(e=e, rv=rv) + ) + reraise(TypeError, new_error, sys.exc_info()[2]) + + # prefer the status if it was provided + if status is not None: + if isinstance(status, (text_type, bytes, bytearray)): + rv.status = status + else: + rv.status_code = status + + # extend existing headers with provided headers + if headers: + rv.headers.extend(headers) + + return rv + + def create_url_adapter(self, request): + """Creates a URL adapter for the given request. The URL adapter + is created at a point where the request context is not yet set + up so the request is passed explicitly. + + .. versionadded:: 0.6 + + .. versionchanged:: 0.9 + This can now also be called without a request object when the + URL adapter is created for the application context. + + .. versionchanged:: 1.0 + :data:`SERVER_NAME` no longer implicitly enables subdomain + matching. Use :attr:`subdomain_matching` instead. + """ + if request is not None: + # If subdomain matching is disabled (the default), use the + # default subdomain in all cases. This should be the default + # in Werkzeug but it currently does not have that feature. + subdomain = ((self.url_map.default_subdomain or None) + if not self.subdomain_matching else None) + return self.url_map.bind_to_environ( + request.environ, + server_name=self.config['SERVER_NAME'], + subdomain=subdomain) + # We need at the very least the server name to be set for this + # to work. + if self.config['SERVER_NAME'] is not None: + return self.url_map.bind( + self.config['SERVER_NAME'], + script_name=self.config['APPLICATION_ROOT'], + url_scheme=self.config['PREFERRED_URL_SCHEME']) + + def inject_url_defaults(self, endpoint, values): + """Injects the URL defaults for the given endpoint directly into + the values dictionary passed. This is used internally and + automatically called on URL building. + + .. versionadded:: 0.7 + """ + funcs = self.url_default_functions.get(None, ()) + if '.' in endpoint: + bp = endpoint.rsplit('.', 1)[0] + funcs = chain(funcs, self.url_default_functions.get(bp, ())) + for func in funcs: + func(endpoint, values) + + def handle_url_build_error(self, error, endpoint, values): + """Handle :class:`~werkzeug.routing.BuildError` on :meth:`url_for`. + """ + exc_type, exc_value, tb = sys.exc_info() + for handler in self.url_build_error_handlers: + try: + rv = handler(error, endpoint, values) + if rv is not None: + return rv + except BuildError as e: + # make error available outside except block (py3) + error = e + + # At this point we want to reraise the exception. If the error is + # still the same one we can reraise it with the original traceback, + # otherwise we raise it from here. + if error is exc_value: + reraise(exc_type, exc_value, tb) + raise error + + def preprocess_request(self): + """Called before the request is dispatched. Calls + :attr:`url_value_preprocessors` registered with the app and the + current blueprint (if any). Then calls :attr:`before_request_funcs` + registered with the app and the blueprint. + + If any :meth:`before_request` handler returns a non-None value, the + value is handled as if it was the return value from the view, and + further request handling is stopped. + """ + + bp = _request_ctx_stack.top.request.blueprint + + funcs = self.url_value_preprocessors.get(None, ()) + if bp is not None and bp in self.url_value_preprocessors: + funcs = chain(funcs, self.url_value_preprocessors[bp]) + for func in funcs: + func(request.endpoint, request.view_args) + + funcs = self.before_request_funcs.get(None, ()) + if bp is not None and bp in self.before_request_funcs: + funcs = chain(funcs, self.before_request_funcs[bp]) + for func in funcs: + rv = func() + if rv is not None: + return rv + + def process_response(self, response): + """Can be overridden in order to modify the response object + before it's sent to the WSGI server. By default this will + call all the :meth:`after_request` decorated functions. + + .. versionchanged:: 0.5 + As of Flask 0.5 the functions registered for after request + execution are called in reverse order of registration. + + :param response: a :attr:`response_class` object. + :return: a new response object or the same, has to be an + instance of :attr:`response_class`. + """ + ctx = _request_ctx_stack.top + bp = ctx.request.blueprint + funcs = ctx._after_request_functions + if bp is not None and bp in self.after_request_funcs: + funcs = chain(funcs, reversed(self.after_request_funcs[bp])) + if None in self.after_request_funcs: + funcs = chain(funcs, reversed(self.after_request_funcs[None])) + for handler in funcs: + response = handler(response) + if not self.session_interface.is_null_session(ctx.session): + self.session_interface.save_session(self, ctx.session, response) + return response + + def do_teardown_request(self, exc=_sentinel): + """Called after the request is dispatched and the response is + returned, right before the request context is popped. + + This calls all functions decorated with + :meth:`teardown_request`, and :meth:`Blueprint.teardown_request` + if a blueprint handled the request. Finally, the + :data:`request_tearing_down` signal is sent. + + This is called by + :meth:`RequestContext.pop() <flask.ctx.RequestContext.pop>`, + which may be delayed during testing to maintain access to + resources. + + :param exc: An unhandled exception raised while dispatching the + request. Detected from the current exception information if + not passed. Passed to each teardown function. + + .. versionchanged:: 0.9 + Added the ``exc`` argument. + """ + if exc is _sentinel: + exc = sys.exc_info()[1] + funcs = reversed(self.teardown_request_funcs.get(None, ())) + bp = _request_ctx_stack.top.request.blueprint + if bp is not None and bp in self.teardown_request_funcs: + funcs = chain(funcs, reversed(self.teardown_request_funcs[bp])) + for func in funcs: + func(exc) + request_tearing_down.send(self, exc=exc) + + def do_teardown_appcontext(self, exc=_sentinel): + """Called right before the application context is popped. + + When handling a request, the application context is popped + after the request context. See :meth:`do_teardown_request`. + + This calls all functions decorated with + :meth:`teardown_appcontext`. Then the + :data:`appcontext_tearing_down` signal is sent. + + This is called by + :meth:`AppContext.pop() <flask.ctx.AppContext.pop>`. + + .. versionadded:: 0.9 + """ + if exc is _sentinel: + exc = sys.exc_info()[1] + for func in reversed(self.teardown_appcontext_funcs): + func(exc) + appcontext_tearing_down.send(self, exc=exc) + + def app_context(self): + """Create an :class:`~flask.ctx.AppContext`. Use as a ``with`` + block to push the context, which will make :data:`current_app` + point at this application. + + An application context is automatically pushed by + :meth:`RequestContext.push() <flask.ctx.RequestContext.push>` + when handling a request, and when running a CLI command. Use + this to manually create a context outside of these situations. + + :: + + with app.app_context(): + init_db() + + See :doc:`/appcontext`. + + .. versionadded:: 0.9 + """ + return AppContext(self) + + def request_context(self, environ): + """Create a :class:`~flask.ctx.RequestContext` representing a + WSGI environment. Use a ``with`` block to push the context, + which will make :data:`request` point at this request. + + See :doc:`/reqcontext`. + + Typically you should not call this from your own code. A request + context is automatically pushed by the :meth:`wsgi_app` when + handling a request. Use :meth:`test_request_context` to create + an environment and context instead of this method. + + :param environ: a WSGI environment + """ + return RequestContext(self, environ) + + def test_request_context(self, *args, **kwargs): + """Create a :class:`~flask.ctx.RequestContext` for a WSGI + environment created from the given values. This is mostly useful + during testing, where you may want to run a function that uses + request data without dispatching a full request. + + See :doc:`/reqcontext`. + + Use a ``with`` block to push the context, which will make + :data:`request` point at the request for the created + environment. :: + + with test_request_context(...): + generate_report() + + When using the shell, it may be easier to push and pop the + context manually to avoid indentation. :: + + ctx = app.test_request_context(...) + ctx.push() + ... + ctx.pop() + + Takes the same arguments as Werkzeug's + :class:`~werkzeug.test.EnvironBuilder`, with some defaults from + the application. See the linked Werkzeug docs for most of the + available arguments. Flask-specific behavior is listed here. + + :param path: URL path being requested. + :param base_url: Base URL where the app is being served, which + ``path`` is relative to. If not given, built from + :data:`PREFERRED_URL_SCHEME`, ``subdomain``, + :data:`SERVER_NAME`, and :data:`APPLICATION_ROOT`. + :param subdomain: Subdomain name to append to + :data:`SERVER_NAME`. + :param url_scheme: Scheme to use instead of + :data:`PREFERRED_URL_SCHEME`. + :param data: The request body, either as a string or a dict of + form keys and values. + :param json: If given, this is serialized as JSON and passed as + ``data``. Also defaults ``content_type`` to + ``application/json``. + :param args: other positional arguments passed to + :class:`~werkzeug.test.EnvironBuilder`. + :param kwargs: other keyword arguments passed to + :class:`~werkzeug.test.EnvironBuilder`. + """ + from flask.testing import make_test_environ_builder + + builder = make_test_environ_builder(self, *args, **kwargs) + + try: + return self.request_context(builder.get_environ()) + finally: + builder.close() + + def wsgi_app(self, environ, start_response): + """The actual WSGI application. This is not implemented in + :meth:`__call__` so that middlewares can be applied without + losing a reference to the app object. Instead of doing this:: + + app = MyMiddleware(app) + + It's a better idea to do this instead:: + + app.wsgi_app = MyMiddleware(app.wsgi_app) + + Then you still have the original application object around and + can continue to call methods on it. + + .. versionchanged:: 0.7 + Teardown events for the request and app contexts are called + even if an unhandled error occurs. Other events may not be + called depending on when an error occurs during dispatch. + See :ref:`callbacks-and-errors`. + + :param environ: A WSGI environment. + :param start_response: A callable accepting a status code, + a list of headers, and an optional exception context to + start the response. + """ + ctx = self.request_context(environ) + error = None + try: + try: + ctx.push() + response = self.full_dispatch_request() + except Exception as e: + error = e + response = self.handle_exception(e) + except: + error = sys.exc_info()[1] + raise + return response(environ, start_response) + finally: + if self.should_ignore_error(error): + error = None + ctx.auto_pop(error) + + def __call__(self, environ, start_response): + """The WSGI server calls the Flask application object as the + WSGI application. This calls :meth:`wsgi_app` which can be + wrapped to applying middleware.""" + return self.wsgi_app(environ, start_response) + + def __repr__(self): + return '<%s %r>' % ( + self.__class__.__name__, + self.name, + ) diff --git a/python/flask/blueprints.py b/python/flask/blueprints.py new file mode 100644 index 0000000..c2158fe --- /dev/null +++ b/python/flask/blueprints.py @@ -0,0 +1,447 @@ +# -*- coding: utf-8 -*- +""" + flask.blueprints + ~~~~~~~~~~~~~~~~ + + Blueprints are the recommended way to implement larger or more + pluggable applications in Flask 0.7 and later. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" +from functools import update_wrapper + +from .helpers import _PackageBoundObject, _endpoint_from_view_func + + +class BlueprintSetupState(object): + """Temporary holder object for registering a blueprint with the + application. An instance of this class is created by the + :meth:`~flask.Blueprint.make_setup_state` method and later passed + to all register callback functions. + """ + + def __init__(self, blueprint, app, options, first_registration): + #: a reference to the current application + self.app = app + + #: a reference to the blueprint that created this setup state. + self.blueprint = blueprint + + #: a dictionary with all options that were passed to the + #: :meth:`~flask.Flask.register_blueprint` method. + self.options = options + + #: as blueprints can be registered multiple times with the + #: application and not everything wants to be registered + #: multiple times on it, this attribute can be used to figure + #: out if the blueprint was registered in the past already. + self.first_registration = first_registration + + subdomain = self.options.get('subdomain') + if subdomain is None: + subdomain = self.blueprint.subdomain + + #: The subdomain that the blueprint should be active for, ``None`` + #: otherwise. + self.subdomain = subdomain + + url_prefix = self.options.get('url_prefix') + if url_prefix is None: + url_prefix = self.blueprint.url_prefix + #: The prefix that should be used for all URLs defined on the + #: blueprint. + self.url_prefix = url_prefix + + #: A dictionary with URL defaults that is added to each and every + #: URL that was defined with the blueprint. + self.url_defaults = dict(self.blueprint.url_values_defaults) + self.url_defaults.update(self.options.get('url_defaults', ())) + + def add_url_rule(self, rule, endpoint=None, view_func=None, **options): + """A helper method to register a rule (and optionally a view function) + to the application. The endpoint is automatically prefixed with the + blueprint's name. + """ + if self.url_prefix is not None: + if rule: + rule = '/'.join(( + self.url_prefix.rstrip('/'), rule.lstrip('/'))) + else: + rule = self.url_prefix + options.setdefault('subdomain', self.subdomain) + if endpoint is None: + endpoint = _endpoint_from_view_func(view_func) + defaults = self.url_defaults + if 'defaults' in options: + defaults = dict(defaults, **options.pop('defaults')) + self.app.add_url_rule(rule, '%s.%s' % (self.blueprint.name, endpoint), + view_func, defaults=defaults, **options) + + +class Blueprint(_PackageBoundObject): + """Represents a blueprint. A blueprint is an object that records + functions that will be called with the + :class:`~flask.blueprints.BlueprintSetupState` later to register functions + or other things on the main application. See :ref:`blueprints` for more + information. + + .. versionadded:: 0.7 + """ + + warn_on_modifications = False + _got_registered_once = False + + #: Blueprint local JSON decoder class to use. + #: Set to ``None`` to use the app's :class:`~flask.app.Flask.json_encoder`. + json_encoder = None + #: Blueprint local JSON decoder class to use. + #: Set to ``None`` to use the app's :class:`~flask.app.Flask.json_decoder`. + json_decoder = None + + # TODO remove the next three attrs when Sphinx :inherited-members: works + # https://github.com/sphinx-doc/sphinx/issues/741 + + #: The name of the package or module that this app belongs to. Do not + #: change this once it is set by the constructor. + import_name = None + + #: Location of the template files to be added to the template lookup. + #: ``None`` if templates should not be added. + template_folder = None + + #: Absolute path to the package on the filesystem. Used to look up + #: resources contained in the package. + root_path = None + + def __init__(self, name, import_name, static_folder=None, + static_url_path=None, template_folder=None, + url_prefix=None, subdomain=None, url_defaults=None, + root_path=None): + _PackageBoundObject.__init__(self, import_name, template_folder, + root_path=root_path) + self.name = name + self.url_prefix = url_prefix + self.subdomain = subdomain + self.static_folder = static_folder + self.static_url_path = static_url_path + self.deferred_functions = [] + if url_defaults is None: + url_defaults = {} + self.url_values_defaults = url_defaults + + def record(self, func): + """Registers a function that is called when the blueprint is + registered on the application. This function is called with the + state as argument as returned by the :meth:`make_setup_state` + method. + """ + if self._got_registered_once and self.warn_on_modifications: + from warnings import warn + warn(Warning('The blueprint was already registered once ' + 'but is getting modified now. These changes ' + 'will not show up.')) + self.deferred_functions.append(func) + + def record_once(self, func): + """Works like :meth:`record` but wraps the function in another + function that will ensure the function is only called once. If the + blueprint is registered a second time on the application, the + function passed is not called. + """ + def wrapper(state): + if state.first_registration: + func(state) + return self.record(update_wrapper(wrapper, func)) + + def make_setup_state(self, app, options, first_registration=False): + """Creates an instance of :meth:`~flask.blueprints.BlueprintSetupState` + object that is later passed to the register callback functions. + Subclasses can override this to return a subclass of the setup state. + """ + return BlueprintSetupState(self, app, options, first_registration) + + def register(self, app, options, first_registration=False): + """Called by :meth:`Flask.register_blueprint` to register all views + and callbacks registered on the blueprint with the application. Creates + a :class:`.BlueprintSetupState` and calls each :meth:`record` callback + with it. + + :param app: The application this blueprint is being registered with. + :param options: Keyword arguments forwarded from + :meth:`~Flask.register_blueprint`. + :param first_registration: Whether this is the first time this + blueprint has been registered on the application. + """ + self._got_registered_once = True + state = self.make_setup_state(app, options, first_registration) + + if self.has_static_folder: + state.add_url_rule( + self.static_url_path + '/<path:filename>', + view_func=self.send_static_file, endpoint='static' + ) + + for deferred in self.deferred_functions: + deferred(state) + + def route(self, rule, **options): + """Like :meth:`Flask.route` but for a blueprint. The endpoint for the + :func:`url_for` function is prefixed with the name of the blueprint. + """ + def decorator(f): + endpoint = options.pop("endpoint", f.__name__) + self.add_url_rule(rule, endpoint, f, **options) + return f + return decorator + + def add_url_rule(self, rule, endpoint=None, view_func=None, **options): + """Like :meth:`Flask.add_url_rule` but for a blueprint. The endpoint for + the :func:`url_for` function is prefixed with the name of the blueprint. + """ + if endpoint: + assert '.' not in endpoint, "Blueprint endpoints should not contain dots" + if view_func and hasattr(view_func, '__name__'): + assert '.' not in view_func.__name__, "Blueprint view function name should not contain dots" + self.record(lambda s: + s.add_url_rule(rule, endpoint, view_func, **options)) + + def endpoint(self, endpoint): + """Like :meth:`Flask.endpoint` but for a blueprint. This does not + prefix the endpoint with the blueprint name, this has to be done + explicitly by the user of this method. If the endpoint is prefixed + with a `.` it will be registered to the current blueprint, otherwise + it's an application independent endpoint. + """ + def decorator(f): + def register_endpoint(state): + state.app.view_functions[endpoint] = f + self.record_once(register_endpoint) + return f + return decorator + + def app_template_filter(self, name=None): + """Register a custom template filter, available application wide. Like + :meth:`Flask.template_filter` but for a blueprint. + + :param name: the optional name of the filter, otherwise the + function name will be used. + """ + def decorator(f): + self.add_app_template_filter(f, name=name) + return f + return decorator + + def add_app_template_filter(self, f, name=None): + """Register a custom template filter, available application wide. Like + :meth:`Flask.add_template_filter` but for a blueprint. Works exactly + like the :meth:`app_template_filter` decorator. + + :param name: the optional name of the filter, otherwise the + function name will be used. + """ + def register_template(state): + state.app.jinja_env.filters[name or f.__name__] = f + self.record_once(register_template) + + def app_template_test(self, name=None): + """Register a custom template test, available application wide. Like + :meth:`Flask.template_test` but for a blueprint. + + .. versionadded:: 0.10 + + :param name: the optional name of the test, otherwise the + function name will be used. + """ + def decorator(f): + self.add_app_template_test(f, name=name) + return f + return decorator + + def add_app_template_test(self, f, name=None): + """Register a custom template test, available application wide. Like + :meth:`Flask.add_template_test` but for a blueprint. Works exactly + like the :meth:`app_template_test` decorator. + + .. versionadded:: 0.10 + + :param name: the optional name of the test, otherwise the + function name will be used. + """ + def register_template(state): + state.app.jinja_env.tests[name or f.__name__] = f + self.record_once(register_template) + + def app_template_global(self, name=None): + """Register a custom template global, available application wide. Like + :meth:`Flask.template_global` but for a blueprint. + + .. versionadded:: 0.10 + + :param name: the optional name of the global, otherwise the + function name will be used. + """ + def decorator(f): + self.add_app_template_global(f, name=name) + return f + return decorator + + def add_app_template_global(self, f, name=None): + """Register a custom template global, available application wide. Like + :meth:`Flask.add_template_global` but for a blueprint. Works exactly + like the :meth:`app_template_global` decorator. + + .. versionadded:: 0.10 + + :param name: the optional name of the global, otherwise the + function name will be used. + """ + def register_template(state): + state.app.jinja_env.globals[name or f.__name__] = f + self.record_once(register_template) + + def before_request(self, f): + """Like :meth:`Flask.before_request` but for a blueprint. This function + is only executed before each request that is handled by a function of + that blueprint. + """ + self.record_once(lambda s: s.app.before_request_funcs + .setdefault(self.name, []).append(f)) + return f + + def before_app_request(self, f): + """Like :meth:`Flask.before_request`. Such a function is executed + before each request, even if outside of a blueprint. + """ + self.record_once(lambda s: s.app.before_request_funcs + .setdefault(None, []).append(f)) + return f + + def before_app_first_request(self, f): + """Like :meth:`Flask.before_first_request`. Such a function is + executed before the first request to the application. + """ + self.record_once(lambda s: s.app.before_first_request_funcs.append(f)) + return f + + def after_request(self, f): + """Like :meth:`Flask.after_request` but for a blueprint. This function + is only executed after each request that is handled by a function of + that blueprint. + """ + self.record_once(lambda s: s.app.after_request_funcs + .setdefault(self.name, []).append(f)) + return f + + def after_app_request(self, f): + """Like :meth:`Flask.after_request` but for a blueprint. Such a function + is executed after each request, even if outside of the blueprint. + """ + self.record_once(lambda s: s.app.after_request_funcs + .setdefault(None, []).append(f)) + return f + + def teardown_request(self, f): + """Like :meth:`Flask.teardown_request` but for a blueprint. This + function is only executed when tearing down requests handled by a + function of that blueprint. Teardown request functions are executed + when the request context is popped, even when no actual request was + performed. + """ + self.record_once(lambda s: s.app.teardown_request_funcs + .setdefault(self.name, []).append(f)) + return f + + def teardown_app_request(self, f): + """Like :meth:`Flask.teardown_request` but for a blueprint. Such a + function is executed when tearing down each request, even if outside of + the blueprint. + """ + self.record_once(lambda s: s.app.teardown_request_funcs + .setdefault(None, []).append(f)) + return f + + def context_processor(self, f): + """Like :meth:`Flask.context_processor` but for a blueprint. This + function is only executed for requests handled by a blueprint. + """ + self.record_once(lambda s: s.app.template_context_processors + .setdefault(self.name, []).append(f)) + return f + + def app_context_processor(self, f): + """Like :meth:`Flask.context_processor` but for a blueprint. Such a + function is executed each request, even if outside of the blueprint. + """ + self.record_once(lambda s: s.app.template_context_processors + .setdefault(None, []).append(f)) + return f + + def app_errorhandler(self, code): + """Like :meth:`Flask.errorhandler` but for a blueprint. This + handler is used for all requests, even if outside of the blueprint. + """ + def decorator(f): + self.record_once(lambda s: s.app.errorhandler(code)(f)) + return f + return decorator + + def url_value_preprocessor(self, f): + """Registers a function as URL value preprocessor for this + blueprint. It's called before the view functions are called and + can modify the url values provided. + """ + self.record_once(lambda s: s.app.url_value_preprocessors + .setdefault(self.name, []).append(f)) + return f + + def url_defaults(self, f): + """Callback function for URL defaults for this blueprint. It's called + with the endpoint and values and should update the values passed + in place. + """ + self.record_once(lambda s: s.app.url_default_functions + .setdefault(self.name, []).append(f)) + return f + + def app_url_value_preprocessor(self, f): + """Same as :meth:`url_value_preprocessor` but application wide. + """ + self.record_once(lambda s: s.app.url_value_preprocessors + .setdefault(None, []).append(f)) + return f + + def app_url_defaults(self, f): + """Same as :meth:`url_defaults` but application wide. + """ + self.record_once(lambda s: s.app.url_default_functions + .setdefault(None, []).append(f)) + return f + + def errorhandler(self, code_or_exception): + """Registers an error handler that becomes active for this blueprint + only. Please be aware that routing does not happen local to a + blueprint so an error handler for 404 usually is not handled by + a blueprint unless it is caused inside a view function. Another + special case is the 500 internal server error which is always looked + up from the application. + + Otherwise works as the :meth:`~flask.Flask.errorhandler` decorator + of the :class:`~flask.Flask` object. + """ + def decorator(f): + self.record_once(lambda s: s.app._register_error_handler( + self.name, code_or_exception, f)) + return f + return decorator + + def register_error_handler(self, code_or_exception, f): + """Non-decorator version of the :meth:`errorhandler` error attach + function, akin to the :meth:`~flask.Flask.register_error_handler` + application-wide function of the :class:`~flask.Flask` object but + for error handlers limited to this blueprint. + + .. versionadded:: 0.11 + """ + self.record_once(lambda s: s.app._register_error_handler( + self.name, code_or_exception, f)) diff --git a/python/flask/cli.py b/python/flask/cli.py new file mode 100644 index 0000000..3eb93b3 --- /dev/null +++ b/python/flask/cli.py @@ -0,0 +1,910 @@ +# -*- coding: utf-8 -*- +""" + flask.cli + ~~~~~~~~~ + + A simple command line application to run flask apps. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +from __future__ import print_function + +import ast +import inspect +import os +import platform +import re +import ssl +import sys +import traceback +from functools import update_wrapper +from operator import attrgetter +from threading import Lock, Thread + +import click +from werkzeug.utils import import_string + +from . import __version__ +from ._compat import getargspec, iteritems, reraise, text_type +from .globals import current_app +from .helpers import get_debug_flag, get_env, get_load_dotenv + +try: + import dotenv +except ImportError: + dotenv = None + + +class NoAppException(click.UsageError): + """Raised if an application cannot be found or loaded.""" + + +def find_best_app(script_info, module): + """Given a module instance this tries to find the best possible + application in the module or raises an exception. + """ + from . import Flask + + # Search for the most common names first. + for attr_name in ('app', 'application'): + app = getattr(module, attr_name, None) + + if isinstance(app, Flask): + return app + + # Otherwise find the only object that is a Flask instance. + matches = [ + v for k, v in iteritems(module.__dict__) if isinstance(v, Flask) + ] + + if len(matches) == 1: + return matches[0] + elif len(matches) > 1: + raise NoAppException( + 'Detected multiple Flask applications in module "{module}". Use ' + '"FLASK_APP={module}:name" to specify the correct ' + 'one.'.format(module=module.__name__) + ) + + # Search for app factory functions. + for attr_name in ('create_app', 'make_app'): + app_factory = getattr(module, attr_name, None) + + if inspect.isfunction(app_factory): + try: + app = call_factory(script_info, app_factory) + + if isinstance(app, Flask): + return app + except TypeError: + if not _called_with_wrong_args(app_factory): + raise + raise NoAppException( + 'Detected factory "{factory}" in module "{module}", but ' + 'could not call it without arguments. Use ' + '"FLASK_APP=\'{module}:{factory}(args)\'" to specify ' + 'arguments.'.format( + factory=attr_name, module=module.__name__ + ) + ) + + raise NoAppException( + 'Failed to find Flask application or factory in module "{module}". ' + 'Use "FLASK_APP={module}:name to specify one.'.format( + module=module.__name__ + ) + ) + + +def call_factory(script_info, app_factory, arguments=()): + """Takes an app factory, a ``script_info` object and optionally a tuple + of arguments. Checks for the existence of a script_info argument and calls + the app_factory depending on that and the arguments provided. + """ + args_spec = getargspec(app_factory) + arg_names = args_spec.args + arg_defaults = args_spec.defaults + + if 'script_info' in arg_names: + return app_factory(*arguments, script_info=script_info) + elif arguments: + return app_factory(*arguments) + elif not arguments and len(arg_names) == 1 and arg_defaults is None: + return app_factory(script_info) + + return app_factory() + + +def _called_with_wrong_args(factory): + """Check whether calling a function raised a ``TypeError`` because + the call failed or because something in the factory raised the + error. + + :param factory: the factory function that was called + :return: true if the call failed + """ + tb = sys.exc_info()[2] + + try: + while tb is not None: + if tb.tb_frame.f_code is factory.__code__: + # in the factory, it was called successfully + return False + + tb = tb.tb_next + + # didn't reach the factory + return True + finally: + del tb + + +def find_app_by_string(script_info, module, app_name): + """Checks if the given string is a variable name or a function. If it is a + function, it checks for specified arguments and whether it takes a + ``script_info`` argument and calls the function with the appropriate + arguments. + """ + from flask import Flask + match = re.match(r'^ *([^ ()]+) *(?:\((.*?) *,? *\))? *$', app_name) + + if not match: + raise NoAppException( + '"{name}" is not a valid variable name or function ' + 'expression.'.format(name=app_name) + ) + + name, args = match.groups() + + try: + attr = getattr(module, name) + except AttributeError as e: + raise NoAppException(e.args[0]) + + if inspect.isfunction(attr): + if args: + try: + args = ast.literal_eval('({args},)'.format(args=args)) + except (ValueError, SyntaxError)as e: + raise NoAppException( + 'Could not parse the arguments in ' + '"{app_name}".'.format(e=e, app_name=app_name) + ) + else: + args = () + + try: + app = call_factory(script_info, attr, args) + except TypeError as e: + if not _called_with_wrong_args(attr): + raise + + raise NoAppException( + '{e}\nThe factory "{app_name}" in module "{module}" could not ' + 'be called with the specified arguments.'.format( + e=e, app_name=app_name, module=module.__name__ + ) + ) + else: + app = attr + + if isinstance(app, Flask): + return app + + raise NoAppException( + 'A valid Flask application was not obtained from ' + '"{module}:{app_name}".'.format( + module=module.__name__, app_name=app_name + ) + ) + + +def prepare_import(path): + """Given a filename this will try to calculate the python path, add it + to the search path and return the actual module name that is expected. + """ + path = os.path.realpath(path) + + if os.path.splitext(path)[1] == '.py': + path = os.path.splitext(path)[0] + + if os.path.basename(path) == '__init__': + path = os.path.dirname(path) + + module_name = [] + + # move up until outside package structure (no __init__.py) + while True: + path, name = os.path.split(path) + module_name.append(name) + + if not os.path.exists(os.path.join(path, '__init__.py')): + break + + if sys.path[0] != path: + sys.path.insert(0, path) + + return '.'.join(module_name[::-1]) + + +def locate_app(script_info, module_name, app_name, raise_if_not_found=True): + __traceback_hide__ = True + + try: + __import__(module_name) + except ImportError: + # Reraise the ImportError if it occurred within the imported module. + # Determine this by checking whether the trace has a depth > 1. + if sys.exc_info()[-1].tb_next: + raise NoAppException( + 'While importing "{name}", an ImportError was raised:' + '\n\n{tb}'.format(name=module_name, tb=traceback.format_exc()) + ) + elif raise_if_not_found: + raise NoAppException( + 'Could not import "{name}".'.format(name=module_name) + ) + else: + return + + module = sys.modules[module_name] + + if app_name is None: + return find_best_app(script_info, module) + else: + return find_app_by_string(script_info, module, app_name) + + +def get_version(ctx, param, value): + if not value or ctx.resilient_parsing: + return + import werkzeug + message = ( + 'Python %(python)s\n' + 'Flask %(flask)s\n' + 'Werkzeug %(werkzeug)s' + ) + click.echo(message % { + 'python': platform.python_version(), + 'flask': __version__, + 'werkzeug': werkzeug.__version__, + }, color=ctx.color) + ctx.exit() + + +version_option = click.Option( + ['--version'], + help='Show the flask version', + expose_value=False, + callback=get_version, + is_flag=True, + is_eager=True +) + + +class DispatchingApp(object): + """Special application that dispatches to a Flask application which + is imported by name in a background thread. If an error happens + it is recorded and shown as part of the WSGI handling which in case + of the Werkzeug debugger means that it shows up in the browser. + """ + + def __init__(self, loader, use_eager_loading=False): + self.loader = loader + self._app = None + self._lock = Lock() + self._bg_loading_exc_info = None + if use_eager_loading: + self._load_unlocked() + else: + self._load_in_background() + + def _load_in_background(self): + def _load_app(): + __traceback_hide__ = True + with self._lock: + try: + self._load_unlocked() + except Exception: + self._bg_loading_exc_info = sys.exc_info() + t = Thread(target=_load_app, args=()) + t.start() + + def _flush_bg_loading_exception(self): + __traceback_hide__ = True + exc_info = self._bg_loading_exc_info + if exc_info is not None: + self._bg_loading_exc_info = None + reraise(*exc_info) + + def _load_unlocked(self): + __traceback_hide__ = True + self._app = rv = self.loader() + self._bg_loading_exc_info = None + return rv + + def __call__(self, environ, start_response): + __traceback_hide__ = True + if self._app is not None: + return self._app(environ, start_response) + self._flush_bg_loading_exception() + with self._lock: + if self._app is not None: + rv = self._app + else: + rv = self._load_unlocked() + return rv(environ, start_response) + + +class ScriptInfo(object): + """Helper object to deal with Flask applications. This is usually not + necessary to interface with as it's used internally in the dispatching + to click. In future versions of Flask this object will most likely play + a bigger role. Typically it's created automatically by the + :class:`FlaskGroup` but you can also manually create it and pass it + onwards as click object. + """ + + def __init__(self, app_import_path=None, create_app=None, + set_debug_flag=True): + #: Optionally the import path for the Flask application. + self.app_import_path = app_import_path or os.environ.get('FLASK_APP') + #: Optionally a function that is passed the script info to create + #: the instance of the application. + self.create_app = create_app + #: A dictionary with arbitrary data that can be associated with + #: this script info. + self.data = {} + self.set_debug_flag = set_debug_flag + self._loaded_app = None + + def load_app(self): + """Loads the Flask app (if not yet loaded) and returns it. Calling + this multiple times will just result in the already loaded app to + be returned. + """ + __traceback_hide__ = True + + if self._loaded_app is not None: + return self._loaded_app + + app = None + + if self.create_app is not None: + app = call_factory(self, self.create_app) + else: + if self.app_import_path: + path, name = (re.split(r':(?![\\/])', self.app_import_path, 1) + [None])[:2] + import_name = prepare_import(path) + app = locate_app(self, import_name, name) + else: + for path in ('wsgi.py', 'app.py'): + import_name = prepare_import(path) + app = locate_app(self, import_name, None, + raise_if_not_found=False) + + if app: + break + + if not app: + raise NoAppException( + 'Could not locate a Flask application. You did not provide ' + 'the "FLASK_APP" environment variable, and a "wsgi.py" or ' + '"app.py" module was not found in the current directory.' + ) + + if self.set_debug_flag: + # Update the app's debug flag through the descriptor so that + # other values repopulate as well. + app.debug = get_debug_flag() + + self._loaded_app = app + return app + + +pass_script_info = click.make_pass_decorator(ScriptInfo, ensure=True) + + +def with_appcontext(f): + """Wraps a callback so that it's guaranteed to be executed with the + script's application context. If callbacks are registered directly + to the ``app.cli`` object then they are wrapped with this function + by default unless it's disabled. + """ + @click.pass_context + def decorator(__ctx, *args, **kwargs): + with __ctx.ensure_object(ScriptInfo).load_app().app_context(): + return __ctx.invoke(f, *args, **kwargs) + return update_wrapper(decorator, f) + + +class AppGroup(click.Group): + """This works similar to a regular click :class:`~click.Group` but it + changes the behavior of the :meth:`command` decorator so that it + automatically wraps the functions in :func:`with_appcontext`. + + Not to be confused with :class:`FlaskGroup`. + """ + + def command(self, *args, **kwargs): + """This works exactly like the method of the same name on a regular + :class:`click.Group` but it wraps callbacks in :func:`with_appcontext` + unless it's disabled by passing ``with_appcontext=False``. + """ + wrap_for_ctx = kwargs.pop('with_appcontext', True) + def decorator(f): + if wrap_for_ctx: + f = with_appcontext(f) + return click.Group.command(self, *args, **kwargs)(f) + return decorator + + def group(self, *args, **kwargs): + """This works exactly like the method of the same name on a regular + :class:`click.Group` but it defaults the group class to + :class:`AppGroup`. + """ + kwargs.setdefault('cls', AppGroup) + return click.Group.group(self, *args, **kwargs) + + +class FlaskGroup(AppGroup): + """Special subclass of the :class:`AppGroup` group that supports + loading more commands from the configured Flask app. Normally a + developer does not have to interface with this class but there are + some very advanced use cases for which it makes sense to create an + instance of this. + + For information as of why this is useful see :ref:`custom-scripts`. + + :param add_default_commands: if this is True then the default run and + shell commands wil be added. + :param add_version_option: adds the ``--version`` option. + :param create_app: an optional callback that is passed the script info and + returns the loaded app. + :param load_dotenv: Load the nearest :file:`.env` and :file:`.flaskenv` + files to set environment variables. Will also change the working + directory to the directory containing the first file found. + :param set_debug_flag: Set the app's debug flag based on the active + environment + + .. versionchanged:: 1.0 + If installed, python-dotenv will be used to load environment variables + from :file:`.env` and :file:`.flaskenv` files. + """ + + def __init__(self, add_default_commands=True, create_app=None, + add_version_option=True, load_dotenv=True, + set_debug_flag=True, **extra): + params = list(extra.pop('params', None) or ()) + + if add_version_option: + params.append(version_option) + + AppGroup.__init__(self, params=params, **extra) + self.create_app = create_app + self.load_dotenv = load_dotenv + self.set_debug_flag = set_debug_flag + + if add_default_commands: + self.add_command(run_command) + self.add_command(shell_command) + self.add_command(routes_command) + + self._loaded_plugin_commands = False + + def _load_plugin_commands(self): + if self._loaded_plugin_commands: + return + try: + import pkg_resources + except ImportError: + self._loaded_plugin_commands = True + return + + for ep in pkg_resources.iter_entry_points('flask.commands'): + self.add_command(ep.load(), ep.name) + self._loaded_plugin_commands = True + + def get_command(self, ctx, name): + self._load_plugin_commands() + + # We load built-in commands first as these should always be the + # same no matter what the app does. If the app does want to + # override this it needs to make a custom instance of this group + # and not attach the default commands. + # + # This also means that the script stays functional in case the + # application completely fails. + rv = AppGroup.get_command(self, ctx, name) + if rv is not None: + return rv + + info = ctx.ensure_object(ScriptInfo) + try: + rv = info.load_app().cli.get_command(ctx, name) + if rv is not None: + return rv + except NoAppException: + pass + + def list_commands(self, ctx): + self._load_plugin_commands() + + # The commands available is the list of both the application (if + # available) plus the builtin commands. + rv = set(click.Group.list_commands(self, ctx)) + info = ctx.ensure_object(ScriptInfo) + try: + rv.update(info.load_app().cli.list_commands(ctx)) + except Exception: + # Here we intentionally swallow all exceptions as we don't + # want the help page to break if the app does not exist. + # If someone attempts to use the command we try to create + # the app again and this will give us the error. + # However, we will not do so silently because that would confuse + # users. + traceback.print_exc() + return sorted(rv) + + def main(self, *args, **kwargs): + # Set a global flag that indicates that we were invoked from the + # command line interface. This is detected by Flask.run to make the + # call into a no-op. This is necessary to avoid ugly errors when the + # script that is loaded here also attempts to start a server. + os.environ['FLASK_RUN_FROM_CLI'] = 'true' + + if get_load_dotenv(self.load_dotenv): + load_dotenv() + + obj = kwargs.get('obj') + + if obj is None: + obj = ScriptInfo(create_app=self.create_app, + set_debug_flag=self.set_debug_flag) + + kwargs['obj'] = obj + kwargs.setdefault('auto_envvar_prefix', 'FLASK') + return super(FlaskGroup, self).main(*args, **kwargs) + + +def _path_is_ancestor(path, other): + """Take ``other`` and remove the length of ``path`` from it. Then join it + to ``path``. If it is the original value, ``path`` is an ancestor of + ``other``.""" + return os.path.join(path, other[len(path):].lstrip(os.sep)) == other + + +def load_dotenv(path=None): + """Load "dotenv" files in order of precedence to set environment variables. + + If an env var is already set it is not overwritten, so earlier files in the + list are preferred over later files. + + Changes the current working directory to the location of the first file + found, with the assumption that it is in the top level project directory + and will be where the Python path should import local packages from. + + This is a no-op if `python-dotenv`_ is not installed. + + .. _python-dotenv: https://github.com/theskumar/python-dotenv#readme + + :param path: Load the file at this location instead of searching. + :return: ``True`` if a file was loaded. + + .. versionadded:: 1.0 + """ + if dotenv is None: + if path or os.path.exists('.env') or os.path.exists('.flaskenv'): + click.secho( + ' * Tip: There are .env files present.' + ' Do "pip install python-dotenv" to use them.', + fg='yellow') + return + + if path is not None: + return dotenv.load_dotenv(path) + + new_dir = None + + for name in ('.env', '.flaskenv'): + path = dotenv.find_dotenv(name, usecwd=True) + + if not path: + continue + + if new_dir is None: + new_dir = os.path.dirname(path) + + dotenv.load_dotenv(path) + + if new_dir and os.getcwd() != new_dir: + os.chdir(new_dir) + + return new_dir is not None # at least one file was located and loaded + + +def show_server_banner(env, debug, app_import_path, eager_loading): + """Show extra startup messages the first time the server is run, + ignoring the reloader. + """ + if os.environ.get('WERKZEUG_RUN_MAIN') == 'true': + return + + if app_import_path is not None: + message = ' * Serving Flask app "{0}"'.format(app_import_path) + + if not eager_loading: + message += ' (lazy loading)' + + click.echo(message) + + click.echo(' * Environment: {0}'.format(env)) + + if env == 'production': + click.secho( + ' WARNING: This is a development server. ' + 'Do not use it in a production deployment.', fg='red') + click.secho(' Use a production WSGI server instead.', dim=True) + + if debug is not None: + click.echo(' * Debug mode: {0}'.format('on' if debug else 'off')) + + +class CertParamType(click.ParamType): + """Click option type for the ``--cert`` option. Allows either an + existing file, the string ``'adhoc'``, or an import for a + :class:`~ssl.SSLContext` object. + """ + + name = 'path' + + def __init__(self): + self.path_type = click.Path( + exists=True, dir_okay=False, resolve_path=True) + + def convert(self, value, param, ctx): + try: + return self.path_type(value, param, ctx) + except click.BadParameter: + value = click.STRING(value, param, ctx).lower() + + if value == 'adhoc': + try: + import OpenSSL + except ImportError: + raise click.BadParameter( + 'Using ad-hoc certificates requires pyOpenSSL.', + ctx, param) + + return value + + obj = import_string(value, silent=True) + + if sys.version_info < (2, 7, 9): + if obj: + return obj + else: + if isinstance(obj, ssl.SSLContext): + return obj + + raise + + +def _validate_key(ctx, param, value): + """The ``--key`` option must be specified when ``--cert`` is a file. + Modifies the ``cert`` param to be a ``(cert, key)`` pair if needed. + """ + cert = ctx.params.get('cert') + is_adhoc = cert == 'adhoc' + + if sys.version_info < (2, 7, 9): + is_context = cert and not isinstance(cert, (text_type, bytes)) + else: + is_context = isinstance(cert, ssl.SSLContext) + + if value is not None: + if is_adhoc: + raise click.BadParameter( + 'When "--cert" is "adhoc", "--key" is not used.', + ctx, param) + + if is_context: + raise click.BadParameter( + 'When "--cert" is an SSLContext object, "--key is not used.', + ctx, param) + + if not cert: + raise click.BadParameter( + '"--cert" must also be specified.', + ctx, param) + + ctx.params['cert'] = cert, value + + else: + if cert and not (is_adhoc or is_context): + raise click.BadParameter( + 'Required when using "--cert".', + ctx, param) + + return value + + +@click.command('run', short_help='Run a development server.') +@click.option('--host', '-h', default='127.0.0.1', + help='The interface to bind to.') +@click.option('--port', '-p', default=5000, + help='The port to bind to.') +@click.option('--cert', type=CertParamType(), + help='Specify a certificate file to use HTTPS.') +@click.option('--key', + type=click.Path(exists=True, dir_okay=False, resolve_path=True), + callback=_validate_key, expose_value=False, + help='The key file to use when specifying a certificate.') +@click.option('--reload/--no-reload', default=None, + help='Enable or disable the reloader. By default the reloader ' + 'is active if debug is enabled.') +@click.option('--debugger/--no-debugger', default=None, + help='Enable or disable the debugger. By default the debugger ' + 'is active if debug is enabled.') +@click.option('--eager-loading/--lazy-loader', default=None, + help='Enable or disable eager loading. By default eager ' + 'loading is enabled if the reloader is disabled.') +@click.option('--with-threads/--without-threads', default=True, + help='Enable or disable multithreading.') +@pass_script_info +def run_command(info, host, port, reload, debugger, eager_loading, + with_threads, cert): + """Run a local development server. + + This server is for development purposes only. It does not provide + the stability, security, or performance of production WSGI servers. + + The reloader and debugger are enabled by default if + FLASK_ENV=development or FLASK_DEBUG=1. + """ + debug = get_debug_flag() + + if reload is None: + reload = debug + + if debugger is None: + debugger = debug + + if eager_loading is None: + eager_loading = not reload + + show_server_banner(get_env(), debug, info.app_import_path, eager_loading) + app = DispatchingApp(info.load_app, use_eager_loading=eager_loading) + + from werkzeug.serving import run_simple + run_simple(host, port, app, use_reloader=reload, use_debugger=debugger, + threaded=with_threads, ssl_context=cert) + + +@click.command('shell', short_help='Run a shell in the app context.') +@with_appcontext +def shell_command(): + """Run an interactive Python shell in the context of a given + Flask application. The application will populate the default + namespace of this shell according to it's configuration. + + This is useful for executing small snippets of management code + without having to manually configure the application. + """ + import code + from flask.globals import _app_ctx_stack + app = _app_ctx_stack.top.app + banner = 'Python %s on %s\nApp: %s [%s]\nInstance: %s' % ( + sys.version, + sys.platform, + app.import_name, + app.env, + app.instance_path, + ) + ctx = {} + + # Support the regular Python interpreter startup script if someone + # is using it. + startup = os.environ.get('PYTHONSTARTUP') + if startup and os.path.isfile(startup): + with open(startup, 'r') as f: + eval(compile(f.read(), startup, 'exec'), ctx) + + ctx.update(app.make_shell_context()) + + code.interact(banner=banner, local=ctx) + + +@click.command('routes', short_help='Show the routes for the app.') +@click.option( + '--sort', '-s', + type=click.Choice(('endpoint', 'methods', 'rule', 'match')), + default='endpoint', + help=( + 'Method to sort routes by. "match" is the order that Flask will match ' + 'routes when dispatching a request.' + ) +) +@click.option( + '--all-methods', + is_flag=True, + help="Show HEAD and OPTIONS methods." +) +@with_appcontext +def routes_command(sort, all_methods): + """Show all registered routes with endpoints and methods.""" + + rules = list(current_app.url_map.iter_rules()) + if not rules: + click.echo('No routes were registered.') + return + + ignored_methods = set(() if all_methods else ('HEAD', 'OPTIONS')) + + if sort in ('endpoint', 'rule'): + rules = sorted(rules, key=attrgetter(sort)) + elif sort == 'methods': + rules = sorted(rules, key=lambda rule: sorted(rule.methods)) + + rule_methods = [ + ', '.join(sorted(rule.methods - ignored_methods)) for rule in rules + ] + + headers = ('Endpoint', 'Methods', 'Rule') + widths = ( + max(len(rule.endpoint) for rule in rules), + max(len(methods) for methods in rule_methods), + max(len(rule.rule) for rule in rules), + ) + widths = [max(len(h), w) for h, w in zip(headers, widths)] + row = '{{0:<{0}}} {{1:<{1}}} {{2:<{2}}}'.format(*widths) + + click.echo(row.format(*headers).strip()) + click.echo(row.format(*('-' * width for width in widths))) + + for rule, methods in zip(rules, rule_methods): + click.echo(row.format(rule.endpoint, methods, rule.rule).rstrip()) + + +cli = FlaskGroup(help="""\ +A general utility script for Flask applications. + +Provides commands from Flask, extensions, and the application. Loads the +application defined in the FLASK_APP environment variable, or from a wsgi.py +file. Setting the FLASK_ENV environment variable to 'development' will enable +debug mode. + +\b + {prefix}{cmd} FLASK_APP=hello.py + {prefix}{cmd} FLASK_ENV=development + {prefix}flask run +""".format( + cmd='export' if os.name == 'posix' else 'set', + prefix='$ ' if os.name == 'posix' else '> ' +)) + + +def main(as_module=False): + args = sys.argv[1:] + + if as_module: + this_module = 'flask' + + if sys.version_info < (2, 7): + this_module += '.cli' + + name = 'python -m ' + this_module + + # Python rewrites "python -m flask" to the path to the file in argv. + # Restore the original command so that the reloader works. + sys.argv = ['-m', this_module] + args + else: + name = None + + cli.main(args=args, prog_name=name) + + +if __name__ == '__main__': + main(as_module=True) diff --git a/python/flask/config.py b/python/flask/config.py new file mode 100644 index 0000000..a5475ed --- /dev/null +++ b/python/flask/config.py @@ -0,0 +1,269 @@ +# -*- coding: utf-8 -*- +""" + flask.config + ~~~~~~~~~~~~ + + Implements the configuration related objects. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +import os +import types +import errno + +from werkzeug.utils import import_string +from ._compat import string_types, iteritems +from . import json + + +class ConfigAttribute(object): + """Makes an attribute forward to the config""" + + def __init__(self, name, get_converter=None): + self.__name__ = name + self.get_converter = get_converter + + def __get__(self, obj, type=None): + if obj is None: + return self + rv = obj.config[self.__name__] + if self.get_converter is not None: + rv = self.get_converter(rv) + return rv + + def __set__(self, obj, value): + obj.config[self.__name__] = value + + +class Config(dict): + """Works exactly like a dict but provides ways to fill it from files + or special dictionaries. There are two common patterns to populate the + config. + + Either you can fill the config from a config file:: + + app.config.from_pyfile('yourconfig.cfg') + + Or alternatively you can define the configuration options in the + module that calls :meth:`from_object` or provide an import path to + a module that should be loaded. It is also possible to tell it to + use the same module and with that provide the configuration values + just before the call:: + + DEBUG = True + SECRET_KEY = 'development key' + app.config.from_object(__name__) + + In both cases (loading from any Python file or loading from modules), + only uppercase keys are added to the config. This makes it possible to use + lowercase values in the config file for temporary values that are not added + to the config or to define the config keys in the same file that implements + the application. + + Probably the most interesting way to load configurations is from an + environment variable pointing to a file:: + + app.config.from_envvar('YOURAPPLICATION_SETTINGS') + + In this case before launching the application you have to set this + environment variable to the file you want to use. On Linux and OS X + use the export statement:: + + export YOURAPPLICATION_SETTINGS='/path/to/config/file' + + On windows use `set` instead. + + :param root_path: path to which files are read relative from. When the + config object is created by the application, this is + the application's :attr:`~flask.Flask.root_path`. + :param defaults: an optional dictionary of default values + """ + + def __init__(self, root_path, defaults=None): + dict.__init__(self, defaults or {}) + self.root_path = root_path + + def from_envvar(self, variable_name, silent=False): + """Loads a configuration from an environment variable pointing to + a configuration file. This is basically just a shortcut with nicer + error messages for this line of code:: + + app.config.from_pyfile(os.environ['YOURAPPLICATION_SETTINGS']) + + :param variable_name: name of the environment variable + :param silent: set to ``True`` if you want silent failure for missing + files. + :return: bool. ``True`` if able to load config, ``False`` otherwise. + """ + rv = os.environ.get(variable_name) + if not rv: + if silent: + return False + raise RuntimeError('The environment variable %r is not set ' + 'and as such configuration could not be ' + 'loaded. Set this variable and make it ' + 'point to a configuration file' % + variable_name) + return self.from_pyfile(rv, silent=silent) + + def from_pyfile(self, filename, silent=False): + """Updates the values in the config from a Python file. This function + behaves as if the file was imported as module with the + :meth:`from_object` function. + + :param filename: the filename of the config. This can either be an + absolute filename or a filename relative to the + root path. + :param silent: set to ``True`` if you want silent failure for missing + files. + + .. versionadded:: 0.7 + `silent` parameter. + """ + filename = os.path.join(self.root_path, filename) + d = types.ModuleType('config') + d.__file__ = filename + try: + with open(filename, mode='rb') as config_file: + exec(compile(config_file.read(), filename, 'exec'), d.__dict__) + except IOError as e: + if silent and e.errno in ( + errno.ENOENT, errno.EISDIR, errno.ENOTDIR + ): + return False + e.strerror = 'Unable to load configuration file (%s)' % e.strerror + raise + self.from_object(d) + return True + + def from_object(self, obj): + """Updates the values from the given object. An object can be of one + of the following two types: + + - a string: in this case the object with that name will be imported + - an actual object reference: that object is used directly + + Objects are usually either modules or classes. :meth:`from_object` + loads only the uppercase attributes of the module/class. A ``dict`` + object will not work with :meth:`from_object` because the keys of a + ``dict`` are not attributes of the ``dict`` class. + + Example of module-based configuration:: + + app.config.from_object('yourapplication.default_config') + from yourapplication import default_config + app.config.from_object(default_config) + + Nothing is done to the object before loading. If the object is a + class and has ``@property`` attributes, it needs to be + instantiated before being passed to this method. + + You should not use this function to load the actual configuration but + rather configuration defaults. The actual config should be loaded + with :meth:`from_pyfile` and ideally from a location not within the + package because the package might be installed system wide. + + See :ref:`config-dev-prod` for an example of class-based configuration + using :meth:`from_object`. + + :param obj: an import name or object + """ + if isinstance(obj, string_types): + obj = import_string(obj) + for key in dir(obj): + if key.isupper(): + self[key] = getattr(obj, key) + + def from_json(self, filename, silent=False): + """Updates the values in the config from a JSON file. This function + behaves as if the JSON object was a dictionary and passed to the + :meth:`from_mapping` function. + + :param filename: the filename of the JSON file. This can either be an + absolute filename or a filename relative to the + root path. + :param silent: set to ``True`` if you want silent failure for missing + files. + + .. versionadded:: 0.11 + """ + filename = os.path.join(self.root_path, filename) + + try: + with open(filename) as json_file: + obj = json.loads(json_file.read()) + except IOError as e: + if silent and e.errno in (errno.ENOENT, errno.EISDIR): + return False + e.strerror = 'Unable to load configuration file (%s)' % e.strerror + raise + return self.from_mapping(obj) + + def from_mapping(self, *mapping, **kwargs): + """Updates the config like :meth:`update` ignoring items with non-upper + keys. + + .. versionadded:: 0.11 + """ + mappings = [] + if len(mapping) == 1: + if hasattr(mapping[0], 'items'): + mappings.append(mapping[0].items()) + else: + mappings.append(mapping[0]) + elif len(mapping) > 1: + raise TypeError( + 'expected at most 1 positional argument, got %d' % len(mapping) + ) + mappings.append(kwargs.items()) + for mapping in mappings: + for (key, value) in mapping: + if key.isupper(): + self[key] = value + return True + + def get_namespace(self, namespace, lowercase=True, trim_namespace=True): + """Returns a dictionary containing a subset of configuration options + that match the specified namespace/prefix. Example usage:: + + app.config['IMAGE_STORE_TYPE'] = 'fs' + app.config['IMAGE_STORE_PATH'] = '/var/app/images' + app.config['IMAGE_STORE_BASE_URL'] = 'http://img.website.com' + image_store_config = app.config.get_namespace('IMAGE_STORE_') + + The resulting dictionary `image_store_config` would look like:: + + { + 'type': 'fs', + 'path': '/var/app/images', + 'base_url': 'http://img.website.com' + } + + This is often useful when configuration options map directly to + keyword arguments in functions or class constructors. + + :param namespace: a configuration namespace + :param lowercase: a flag indicating if the keys of the resulting + dictionary should be lowercase + :param trim_namespace: a flag indicating if the keys of the resulting + dictionary should not include the namespace + + .. versionadded:: 0.11 + """ + rv = {} + for k, v in iteritems(self): + if not k.startswith(namespace): + continue + if trim_namespace: + key = k[len(namespace):] + else: + key = k + if lowercase: + key = key.lower() + rv[key] = v + return rv + + def __repr__(self): + return '<%s %s>' % (self.__class__.__name__, dict.__repr__(self)) diff --git a/python/flask/ctx.py b/python/flask/ctx.py new file mode 100644 index 0000000..ec8e787 --- /dev/null +++ b/python/flask/ctx.py @@ -0,0 +1,457 @@ +# -*- coding: utf-8 -*- +""" + flask.ctx + ~~~~~~~~~ + + Implements the objects required to keep the context. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +import sys +from functools import update_wrapper + +from werkzeug.exceptions import HTTPException + +from .globals import _request_ctx_stack, _app_ctx_stack +from .signals import appcontext_pushed, appcontext_popped +from ._compat import BROKEN_PYPY_CTXMGR_EXIT, reraise + + +# a singleton sentinel value for parameter defaults +_sentinel = object() + + +class _AppCtxGlobals(object): + """A plain object. Used as a namespace for storing data during an + application context. + + Creating an app context automatically creates this object, which is + made available as the :data:`g` proxy. + + .. describe:: 'key' in g + + Check whether an attribute is present. + + .. versionadded:: 0.10 + + .. describe:: iter(g) + + Return an iterator over the attribute names. + + .. versionadded:: 0.10 + """ + + def get(self, name, default=None): + """Get an attribute by name, or a default value. Like + :meth:`dict.get`. + + :param name: Name of attribute to get. + :param default: Value to return if the attribute is not present. + + .. versionadded:: 0.10 + """ + return self.__dict__.get(name, default) + + def pop(self, name, default=_sentinel): + """Get and remove an attribute by name. Like :meth:`dict.pop`. + + :param name: Name of attribute to pop. + :param default: Value to return if the attribute is not present, + instead of raise a ``KeyError``. + + .. versionadded:: 0.11 + """ + if default is _sentinel: + return self.__dict__.pop(name) + else: + return self.__dict__.pop(name, default) + + def setdefault(self, name, default=None): + """Get the value of an attribute if it is present, otherwise + set and return a default value. Like :meth:`dict.setdefault`. + + :param name: Name of attribute to get. + :param: default: Value to set and return if the attribute is not + present. + + .. versionadded:: 0.11 + """ + return self.__dict__.setdefault(name, default) + + def __contains__(self, item): + return item in self.__dict__ + + def __iter__(self): + return iter(self.__dict__) + + def __repr__(self): + top = _app_ctx_stack.top + if top is not None: + return '<flask.g of %r>' % top.app.name + return object.__repr__(self) + + +def after_this_request(f): + """Executes a function after this request. This is useful to modify + response objects. The function is passed the response object and has + to return the same or a new one. + + Example:: + + @app.route('/') + def index(): + @after_this_request + def add_header(response): + response.headers['X-Foo'] = 'Parachute' + return response + return 'Hello World!' + + This is more useful if a function other than the view function wants to + modify a response. For instance think of a decorator that wants to add + some headers without converting the return value into a response object. + + .. versionadded:: 0.9 + """ + _request_ctx_stack.top._after_request_functions.append(f) + return f + + +def copy_current_request_context(f): + """A helper function that decorates a function to retain the current + request context. This is useful when working with greenlets. The moment + the function is decorated a copy of the request context is created and + then pushed when the function is called. + + Example:: + + import gevent + from flask import copy_current_request_context + + @app.route('/') + def index(): + @copy_current_request_context + def do_some_work(): + # do some work here, it can access flask.request like you + # would otherwise in the view function. + ... + gevent.spawn(do_some_work) + return 'Regular response' + + .. versionadded:: 0.10 + """ + top = _request_ctx_stack.top + if top is None: + raise RuntimeError('This decorator can only be used at local scopes ' + 'when a request context is on the stack. For instance within ' + 'view functions.') + reqctx = top.copy() + def wrapper(*args, **kwargs): + with reqctx: + return f(*args, **kwargs) + return update_wrapper(wrapper, f) + + +def has_request_context(): + """If you have code that wants to test if a request context is there or + not this function can be used. For instance, you may want to take advantage + of request information if the request object is available, but fail + silently if it is unavailable. + + :: + + class User(db.Model): + + def __init__(self, username, remote_addr=None): + self.username = username + if remote_addr is None and has_request_context(): + remote_addr = request.remote_addr + self.remote_addr = remote_addr + + Alternatively you can also just test any of the context bound objects + (such as :class:`request` or :class:`g`) for truthness:: + + class User(db.Model): + + def __init__(self, username, remote_addr=None): + self.username = username + if remote_addr is None and request: + remote_addr = request.remote_addr + self.remote_addr = remote_addr + + .. versionadded:: 0.7 + """ + return _request_ctx_stack.top is not None + + +def has_app_context(): + """Works like :func:`has_request_context` but for the application + context. You can also just do a boolean check on the + :data:`current_app` object instead. + + .. versionadded:: 0.9 + """ + return _app_ctx_stack.top is not None + + +class AppContext(object): + """The application context binds an application object implicitly + to the current thread or greenlet, similar to how the + :class:`RequestContext` binds request information. The application + context is also implicitly created if a request context is created + but the application is not on top of the individual application + context. + """ + + def __init__(self, app): + self.app = app + self.url_adapter = app.create_url_adapter(None) + self.g = app.app_ctx_globals_class() + + # Like request context, app contexts can be pushed multiple times + # but there a basic "refcount" is enough to track them. + self._refcnt = 0 + + def push(self): + """Binds the app context to the current context.""" + self._refcnt += 1 + if hasattr(sys, 'exc_clear'): + sys.exc_clear() + _app_ctx_stack.push(self) + appcontext_pushed.send(self.app) + + def pop(self, exc=_sentinel): + """Pops the app context.""" + try: + self._refcnt -= 1 + if self._refcnt <= 0: + if exc is _sentinel: + exc = sys.exc_info()[1] + self.app.do_teardown_appcontext(exc) + finally: + rv = _app_ctx_stack.pop() + assert rv is self, 'Popped wrong app context. (%r instead of %r)' \ + % (rv, self) + appcontext_popped.send(self.app) + + def __enter__(self): + self.push() + return self + + def __exit__(self, exc_type, exc_value, tb): + self.pop(exc_value) + + if BROKEN_PYPY_CTXMGR_EXIT and exc_type is not None: + reraise(exc_type, exc_value, tb) + + +class RequestContext(object): + """The request context contains all request relevant information. It is + created at the beginning of the request and pushed to the + `_request_ctx_stack` and removed at the end of it. It will create the + URL adapter and request object for the WSGI environment provided. + + Do not attempt to use this class directly, instead use + :meth:`~flask.Flask.test_request_context` and + :meth:`~flask.Flask.request_context` to create this object. + + When the request context is popped, it will evaluate all the + functions registered on the application for teardown execution + (:meth:`~flask.Flask.teardown_request`). + + The request context is automatically popped at the end of the request + for you. In debug mode the request context is kept around if + exceptions happen so that interactive debuggers have a chance to + introspect the data. With 0.4 this can also be forced for requests + that did not fail and outside of ``DEBUG`` mode. By setting + ``'flask._preserve_context'`` to ``True`` on the WSGI environment the + context will not pop itself at the end of the request. This is used by + the :meth:`~flask.Flask.test_client` for example to implement the + deferred cleanup functionality. + + You might find this helpful for unittests where you need the + information from the context local around for a little longer. Make + sure to properly :meth:`~werkzeug.LocalStack.pop` the stack yourself in + that situation, otherwise your unittests will leak memory. + """ + + def __init__(self, app, environ, request=None): + self.app = app + if request is None: + request = app.request_class(environ) + self.request = request + self.url_adapter = app.create_url_adapter(self.request) + self.flashes = None + self.session = None + + # Request contexts can be pushed multiple times and interleaved with + # other request contexts. Now only if the last level is popped we + # get rid of them. Additionally if an application context is missing + # one is created implicitly so for each level we add this information + self._implicit_app_ctx_stack = [] + + # indicator if the context was preserved. Next time another context + # is pushed the preserved context is popped. + self.preserved = False + + # remembers the exception for pop if there is one in case the context + # preservation kicks in. + self._preserved_exc = None + + # Functions that should be executed after the request on the response + # object. These will be called before the regular "after_request" + # functions. + self._after_request_functions = [] + + self.match_request() + + def _get_g(self): + return _app_ctx_stack.top.g + def _set_g(self, value): + _app_ctx_stack.top.g = value + g = property(_get_g, _set_g) + del _get_g, _set_g + + def copy(self): + """Creates a copy of this request context with the same request object. + This can be used to move a request context to a different greenlet. + Because the actual request object is the same this cannot be used to + move a request context to a different thread unless access to the + request object is locked. + + .. versionadded:: 0.10 + """ + return self.__class__(self.app, + environ=self.request.environ, + request=self.request + ) + + def match_request(self): + """Can be overridden by a subclass to hook into the matching + of the request. + """ + try: + url_rule, self.request.view_args = \ + self.url_adapter.match(return_rule=True) + self.request.url_rule = url_rule + except HTTPException as e: + self.request.routing_exception = e + + def push(self): + """Binds the request context to the current context.""" + # If an exception occurs in debug mode or if context preservation is + # activated under exception situations exactly one context stays + # on the stack. The rationale is that you want to access that + # information under debug situations. However if someone forgets to + # pop that context again we want to make sure that on the next push + # it's invalidated, otherwise we run at risk that something leaks + # memory. This is usually only a problem in test suite since this + # functionality is not active in production environments. + top = _request_ctx_stack.top + if top is not None and top.preserved: + top.pop(top._preserved_exc) + + # Before we push the request context we have to ensure that there + # is an application context. + app_ctx = _app_ctx_stack.top + if app_ctx is None or app_ctx.app != self.app: + app_ctx = self.app.app_context() + app_ctx.push() + self._implicit_app_ctx_stack.append(app_ctx) + else: + self._implicit_app_ctx_stack.append(None) + + if hasattr(sys, 'exc_clear'): + sys.exc_clear() + + _request_ctx_stack.push(self) + + # Open the session at the moment that the request context is available. + # This allows a custom open_session method to use the request context. + # Only open a new session if this is the first time the request was + # pushed, otherwise stream_with_context loses the session. + if self.session is None: + session_interface = self.app.session_interface + self.session = session_interface.open_session( + self.app, self.request + ) + + if self.session is None: + self.session = session_interface.make_null_session(self.app) + + def pop(self, exc=_sentinel): + """Pops the request context and unbinds it by doing that. This will + also trigger the execution of functions registered by the + :meth:`~flask.Flask.teardown_request` decorator. + + .. versionchanged:: 0.9 + Added the `exc` argument. + """ + app_ctx = self._implicit_app_ctx_stack.pop() + + try: + clear_request = False + if not self._implicit_app_ctx_stack: + self.preserved = False + self._preserved_exc = None + if exc is _sentinel: + exc = sys.exc_info()[1] + self.app.do_teardown_request(exc) + + # If this interpreter supports clearing the exception information + # we do that now. This will only go into effect on Python 2.x, + # on 3.x it disappears automatically at the end of the exception + # stack. + if hasattr(sys, 'exc_clear'): + sys.exc_clear() + + request_close = getattr(self.request, 'close', None) + if request_close is not None: + request_close() + clear_request = True + finally: + rv = _request_ctx_stack.pop() + + # get rid of circular dependencies at the end of the request + # so that we don't require the GC to be active. + if clear_request: + rv.request.environ['werkzeug.request'] = None + + # Get rid of the app as well if necessary. + if app_ctx is not None: + app_ctx.pop(exc) + + assert rv is self, 'Popped wrong request context. ' \ + '(%r instead of %r)' % (rv, self) + + def auto_pop(self, exc): + if self.request.environ.get('flask._preserve_context') or \ + (exc is not None and self.app.preserve_context_on_exception): + self.preserved = True + self._preserved_exc = exc + else: + self.pop(exc) + + def __enter__(self): + self.push() + return self + + def __exit__(self, exc_type, exc_value, tb): + # do not pop the request stack if we are in debug mode and an + # exception happened. This will allow the debugger to still + # access the request object in the interactive shell. Furthermore + # the context can be force kept alive for the test client. + # See flask.testing for how this works. + self.auto_pop(exc_value) + + if BROKEN_PYPY_CTXMGR_EXIT and exc_type is not None: + reraise(exc_type, exc_value, tb) + + def __repr__(self): + return '<%s \'%s\' [%s] of %s>' % ( + self.__class__.__name__, + self.request.url, + self.request.method, + self.app.name, + ) diff --git a/python/flask/debughelpers.py b/python/flask/debughelpers.py new file mode 100644 index 0000000..e9765f2 --- /dev/null +++ b/python/flask/debughelpers.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- +""" + flask.debughelpers + ~~~~~~~~~~~~~~~~~~ + + Various helpers to make the development experience better. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +import os +from warnings import warn + +from ._compat import implements_to_string, text_type +from .app import Flask +from .blueprints import Blueprint +from .globals import _request_ctx_stack + + +class UnexpectedUnicodeError(AssertionError, UnicodeError): + """Raised in places where we want some better error reporting for + unexpected unicode or binary data. + """ + + +@implements_to_string +class DebugFilesKeyError(KeyError, AssertionError): + """Raised from request.files during debugging. The idea is that it can + provide a better error message than just a generic KeyError/BadRequest. + """ + + def __init__(self, request, key): + form_matches = request.form.getlist(key) + buf = ['You tried to access the file "%s" in the request.files ' + 'dictionary but it does not exist. The mimetype for the request ' + 'is "%s" instead of "multipart/form-data" which means that no ' + 'file contents were transmitted. To fix this error you should ' + 'provide enctype="multipart/form-data" in your form.' % + (key, request.mimetype)] + if form_matches: + buf.append('\n\nThe browser instead transmitted some file names. ' + 'This was submitted: %s' % ', '.join('"%s"' % x + for x in form_matches)) + self.msg = ''.join(buf) + + def __str__(self): + return self.msg + + +class FormDataRoutingRedirect(AssertionError): + """This exception is raised by Flask in debug mode if it detects a + redirect caused by the routing system when the request method is not + GET, HEAD or OPTIONS. Reasoning: form data will be dropped. + """ + + def __init__(self, request): + exc = request.routing_exception + buf = ['A request was sent to this URL (%s) but a redirect was ' + 'issued automatically by the routing system to "%s".' + % (request.url, exc.new_url)] + + # In case just a slash was appended we can be extra helpful + if request.base_url + '/' == exc.new_url.split('?')[0]: + buf.append(' The URL was defined with a trailing slash so ' + 'Flask will automatically redirect to the URL ' + 'with the trailing slash if it was accessed ' + 'without one.') + + buf.append(' Make sure to directly send your %s-request to this URL ' + 'since we can\'t make browsers or HTTP clients redirect ' + 'with form data reliably or without user interaction.' % + request.method) + buf.append('\n\nNote: this exception is only raised in debug mode') + AssertionError.__init__(self, ''.join(buf).encode('utf-8')) + + +def attach_enctype_error_multidict(request): + """Since Flask 0.8 we're monkeypatching the files object in case a + request is detected that does not use multipart form data but the files + object is accessed. + """ + oldcls = request.files.__class__ + class newcls(oldcls): + def __getitem__(self, key): + try: + return oldcls.__getitem__(self, key) + except KeyError: + if key not in request.form: + raise + raise DebugFilesKeyError(request, key) + newcls.__name__ = oldcls.__name__ + newcls.__module__ = oldcls.__module__ + request.files.__class__ = newcls + + +def _dump_loader_info(loader): + yield 'class: %s.%s' % (type(loader).__module__, type(loader).__name__) + for key, value in sorted(loader.__dict__.items()): + if key.startswith('_'): + continue + if isinstance(value, (tuple, list)): + if not all(isinstance(x, (str, text_type)) for x in value): + continue + yield '%s:' % key + for item in value: + yield ' - %s' % item + continue + elif not isinstance(value, (str, text_type, int, float, bool)): + continue + yield '%s: %r' % (key, value) + + +def explain_template_loading_attempts(app, template, attempts): + """This should help developers understand what failed""" + info = ['Locating template "%s":' % template] + total_found = 0 + blueprint = None + reqctx = _request_ctx_stack.top + if reqctx is not None and reqctx.request.blueprint is not None: + blueprint = reqctx.request.blueprint + + for idx, (loader, srcobj, triple) in enumerate(attempts): + if isinstance(srcobj, Flask): + src_info = 'application "%s"' % srcobj.import_name + elif isinstance(srcobj, Blueprint): + src_info = 'blueprint "%s" (%s)' % (srcobj.name, + srcobj.import_name) + else: + src_info = repr(srcobj) + + info.append('% 5d: trying loader of %s' % ( + idx + 1, src_info)) + + for line in _dump_loader_info(loader): + info.append(' %s' % line) + + if triple is None: + detail = 'no match' + else: + detail = 'found (%r)' % (triple[1] or '<string>') + total_found += 1 + info.append(' -> %s' % detail) + + seems_fishy = False + if total_found == 0: + info.append('Error: the template could not be found.') + seems_fishy = True + elif total_found > 1: + info.append('Warning: multiple loaders returned a match for the template.') + seems_fishy = True + + if blueprint is not None and seems_fishy: + info.append(' The template was looked up from an endpoint that ' + 'belongs to the blueprint "%s".' % blueprint) + info.append(' Maybe you did not place a template in the right folder?') + info.append(' See http://flask.pocoo.org/docs/blueprints/#templates') + + app.logger.info('\n'.join(info)) + + +def explain_ignored_app_run(): + if os.environ.get('WERKZEUG_RUN_MAIN') != 'true': + warn(Warning('Silently ignoring app.run() because the ' + 'application is run from the flask command line ' + 'executable. Consider putting app.run() behind an ' + 'if __name__ == "__main__" guard to silence this ' + 'warning.'), stacklevel=3) diff --git a/python/flask/globals.py b/python/flask/globals.py new file mode 100644 index 0000000..7d50a6f --- /dev/null +++ b/python/flask/globals.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +""" + flask.globals + ~~~~~~~~~~~~~ + + Defines all the global objects that are proxies to the current + active context. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +from functools import partial +from werkzeug.local import LocalStack, LocalProxy + + +_request_ctx_err_msg = '''\ +Working outside of request context. + +This typically means that you attempted to use functionality that needed +an active HTTP request. Consult the documentation on testing for +information about how to avoid this problem.\ +''' +_app_ctx_err_msg = '''\ +Working outside of application context. + +This typically means that you attempted to use functionality that needed +to interface with the current application object in some way. To solve +this, set up an application context with app.app_context(). See the +documentation for more information.\ +''' + + +def _lookup_req_object(name): + top = _request_ctx_stack.top + if top is None: + raise RuntimeError(_request_ctx_err_msg) + return getattr(top, name) + + +def _lookup_app_object(name): + top = _app_ctx_stack.top + if top is None: + raise RuntimeError(_app_ctx_err_msg) + return getattr(top, name) + + +def _find_app(): + top = _app_ctx_stack.top + if top is None: + raise RuntimeError(_app_ctx_err_msg) + return top.app + + +# context locals +_request_ctx_stack = LocalStack() +_app_ctx_stack = LocalStack() +current_app = LocalProxy(_find_app) +request = LocalProxy(partial(_lookup_req_object, 'request')) +session = LocalProxy(partial(_lookup_req_object, 'session')) +g = LocalProxy(partial(_lookup_app_object, 'g')) diff --git a/python/flask/helpers.py b/python/flask/helpers.py new file mode 100644 index 0000000..158edc5 --- /dev/null +++ b/python/flask/helpers.py @@ -0,0 +1,1051 @@ +# -*- coding: utf-8 -*- +""" + flask.helpers + ~~~~~~~~~~~~~ + + Implements various helpers. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +import os +import socket +import sys +import pkgutil +import posixpath +import mimetypes +from time import time +from zlib import adler32 +from threading import RLock +import unicodedata +from werkzeug.routing import BuildError +from functools import update_wrapper + +from werkzeug.urls import url_quote +from werkzeug.datastructures import Headers, Range +from werkzeug.exceptions import BadRequest, NotFound, \ + RequestedRangeNotSatisfiable + +from werkzeug.wsgi import wrap_file +from jinja2 import FileSystemLoader + +from .signals import message_flashed +from .globals import session, _request_ctx_stack, _app_ctx_stack, \ + current_app, request +from ._compat import string_types, text_type, PY2 + +# sentinel +_missing = object() + + +# what separators does this operating system provide that are not a slash? +# this is used by the send_from_directory function to ensure that nobody is +# able to access files from outside the filesystem. +_os_alt_seps = list(sep for sep in [os.path.sep, os.path.altsep] + if sep not in (None, '/')) + + +def get_env(): + """Get the environment the app is running in, indicated by the + :envvar:`FLASK_ENV` environment variable. The default is + ``'production'``. + """ + return os.environ.get('FLASK_ENV') or 'production' + + +def get_debug_flag(): + """Get whether debug mode should be enabled for the app, indicated + by the :envvar:`FLASK_DEBUG` environment variable. The default is + ``True`` if :func:`.get_env` returns ``'development'``, or ``False`` + otherwise. + """ + val = os.environ.get('FLASK_DEBUG') + + if not val: + return get_env() == 'development' + + return val.lower() not in ('0', 'false', 'no') + + +def get_load_dotenv(default=True): + """Get whether the user has disabled loading dotenv files by setting + :envvar:`FLASK_SKIP_DOTENV`. The default is ``True``, load the + files. + + :param default: What to return if the env var isn't set. + """ + val = os.environ.get('FLASK_SKIP_DOTENV') + + if not val: + return default + + return val.lower() in ('0', 'false', 'no') + + +def _endpoint_from_view_func(view_func): + """Internal helper that returns the default endpoint for a given + function. This always is the function name. + """ + assert view_func is not None, 'expected view func if endpoint ' \ + 'is not provided.' + return view_func.__name__ + + +def stream_with_context(generator_or_function): + """Request contexts disappear when the response is started on the server. + This is done for efficiency reasons and to make it less likely to encounter + memory leaks with badly written WSGI middlewares. The downside is that if + you are using streamed responses, the generator cannot access request bound + information any more. + + This function however can help you keep the context around for longer:: + + from flask import stream_with_context, request, Response + + @app.route('/stream') + def streamed_response(): + @stream_with_context + def generate(): + yield 'Hello ' + yield request.args['name'] + yield '!' + return Response(generate()) + + Alternatively it can also be used around a specific generator:: + + from flask import stream_with_context, request, Response + + @app.route('/stream') + def streamed_response(): + def generate(): + yield 'Hello ' + yield request.args['name'] + yield '!' + return Response(stream_with_context(generate())) + + .. versionadded:: 0.9 + """ + try: + gen = iter(generator_or_function) + except TypeError: + def decorator(*args, **kwargs): + gen = generator_or_function(*args, **kwargs) + return stream_with_context(gen) + return update_wrapper(decorator, generator_or_function) + + def generator(): + ctx = _request_ctx_stack.top + if ctx is None: + raise RuntimeError('Attempted to stream with context but ' + 'there was no context in the first place to keep around.') + with ctx: + # Dummy sentinel. Has to be inside the context block or we're + # not actually keeping the context around. + yield None + + # The try/finally is here so that if someone passes a WSGI level + # iterator in we're still running the cleanup logic. Generators + # don't need that because they are closed on their destruction + # automatically. + try: + for item in gen: + yield item + finally: + if hasattr(gen, 'close'): + gen.close() + + # The trick is to start the generator. Then the code execution runs until + # the first dummy None is yielded at which point the context was already + # pushed. This item is discarded. Then when the iteration continues the + # real generator is executed. + wrapped_g = generator() + next(wrapped_g) + return wrapped_g + + +def make_response(*args): + """Sometimes it is necessary to set additional headers in a view. Because + views do not have to return response objects but can return a value that + is converted into a response object by Flask itself, it becomes tricky to + add headers to it. This function can be called instead of using a return + and you will get a response object which you can use to attach headers. + + If view looked like this and you want to add a new header:: + + def index(): + return render_template('index.html', foo=42) + + You can now do something like this:: + + def index(): + response = make_response(render_template('index.html', foo=42)) + response.headers['X-Parachutes'] = 'parachutes are cool' + return response + + This function accepts the very same arguments you can return from a + view function. This for example creates a response with a 404 error + code:: + + response = make_response(render_template('not_found.html'), 404) + + The other use case of this function is to force the return value of a + view function into a response which is helpful with view + decorators:: + + response = make_response(view_function()) + response.headers['X-Parachutes'] = 'parachutes are cool' + + Internally this function does the following things: + + - if no arguments are passed, it creates a new response argument + - if one argument is passed, :meth:`flask.Flask.make_response` + is invoked with it. + - if more than one argument is passed, the arguments are passed + to the :meth:`flask.Flask.make_response` function as tuple. + + .. versionadded:: 0.6 + """ + if not args: + return current_app.response_class() + if len(args) == 1: + args = args[0] + return current_app.make_response(args) + + +def url_for(endpoint, **values): + """Generates a URL to the given endpoint with the method provided. + + Variable arguments that are unknown to the target endpoint are appended + to the generated URL as query arguments. If the value of a query argument + is ``None``, the whole pair is skipped. In case blueprints are active + you can shortcut references to the same blueprint by prefixing the + local endpoint with a dot (``.``). + + This will reference the index function local to the current blueprint:: + + url_for('.index') + + For more information, head over to the :ref:`Quickstart <url-building>`. + + To integrate applications, :class:`Flask` has a hook to intercept URL build + errors through :attr:`Flask.url_build_error_handlers`. The `url_for` + function results in a :exc:`~werkzeug.routing.BuildError` when the current + app does not have a URL for the given endpoint and values. When it does, the + :data:`~flask.current_app` calls its :attr:`~Flask.url_build_error_handlers` if + it is not ``None``, which can return a string to use as the result of + `url_for` (instead of `url_for`'s default to raise the + :exc:`~werkzeug.routing.BuildError` exception) or re-raise the exception. + An example:: + + def external_url_handler(error, endpoint, values): + "Looks up an external URL when `url_for` cannot build a URL." + # This is an example of hooking the build_error_handler. + # Here, lookup_url is some utility function you've built + # which looks up the endpoint in some external URL registry. + url = lookup_url(endpoint, **values) + if url is None: + # External lookup did not have a URL. + # Re-raise the BuildError, in context of original traceback. + exc_type, exc_value, tb = sys.exc_info() + if exc_value is error: + raise exc_type, exc_value, tb + else: + raise error + # url_for will use this result, instead of raising BuildError. + return url + + app.url_build_error_handlers.append(external_url_handler) + + Here, `error` is the instance of :exc:`~werkzeug.routing.BuildError`, and + `endpoint` and `values` are the arguments passed into `url_for`. Note + that this is for building URLs outside the current application, and not for + handling 404 NotFound errors. + + .. versionadded:: 0.10 + The `_scheme` parameter was added. + + .. versionadded:: 0.9 + The `_anchor` and `_method` parameters were added. + + .. versionadded:: 0.9 + Calls :meth:`Flask.handle_build_error` on + :exc:`~werkzeug.routing.BuildError`. + + :param endpoint: the endpoint of the URL (name of the function) + :param values: the variable arguments of the URL rule + :param _external: if set to ``True``, an absolute URL is generated. Server + address can be changed via ``SERVER_NAME`` configuration variable which + falls back to the `Host` header, then to the IP and port of the request. + :param _scheme: a string specifying the desired URL scheme. The `_external` + parameter must be set to ``True`` or a :exc:`ValueError` is raised. The default + behavior uses the same scheme as the current request, or + ``PREFERRED_URL_SCHEME`` from the :ref:`app configuration <config>` if no + request context is available. As of Werkzeug 0.10, this also can be set + to an empty string to build protocol-relative URLs. + :param _anchor: if provided this is added as anchor to the URL. + :param _method: if provided this explicitly specifies an HTTP method. + """ + appctx = _app_ctx_stack.top + reqctx = _request_ctx_stack.top + + if appctx is None: + raise RuntimeError( + 'Attempted to generate a URL without the application context being' + ' pushed. This has to be executed when application context is' + ' available.' + ) + + # If request specific information is available we have some extra + # features that support "relative" URLs. + if reqctx is not None: + url_adapter = reqctx.url_adapter + blueprint_name = request.blueprint + + if endpoint[:1] == '.': + if blueprint_name is not None: + endpoint = blueprint_name + endpoint + else: + endpoint = endpoint[1:] + + external = values.pop('_external', False) + + # Otherwise go with the url adapter from the appctx and make + # the URLs external by default. + else: + url_adapter = appctx.url_adapter + + if url_adapter is None: + raise RuntimeError( + 'Application was not able to create a URL adapter for request' + ' independent URL generation. You might be able to fix this by' + ' setting the SERVER_NAME config variable.' + ) + + external = values.pop('_external', True) + + anchor = values.pop('_anchor', None) + method = values.pop('_method', None) + scheme = values.pop('_scheme', None) + appctx.app.inject_url_defaults(endpoint, values) + + # This is not the best way to deal with this but currently the + # underlying Werkzeug router does not support overriding the scheme on + # a per build call basis. + old_scheme = None + if scheme is not None: + if not external: + raise ValueError('When specifying _scheme, _external must be True') + old_scheme = url_adapter.url_scheme + url_adapter.url_scheme = scheme + + try: + try: + rv = url_adapter.build(endpoint, values, method=method, + force_external=external) + finally: + if old_scheme is not None: + url_adapter.url_scheme = old_scheme + except BuildError as error: + # We need to inject the values again so that the app callback can + # deal with that sort of stuff. + values['_external'] = external + values['_anchor'] = anchor + values['_method'] = method + values['_scheme'] = scheme + return appctx.app.handle_url_build_error(error, endpoint, values) + + if anchor is not None: + rv += '#' + url_quote(anchor) + return rv + + +def get_template_attribute(template_name, attribute): + """Loads a macro (or variable) a template exports. This can be used to + invoke a macro from within Python code. If you for example have a + template named :file:`_cider.html` with the following contents: + + .. sourcecode:: html+jinja + + {% macro hello(name) %}Hello {{ name }}!{% endmacro %} + + You can access this from Python code like this:: + + hello = get_template_attribute('_cider.html', 'hello') + return hello('World') + + .. versionadded:: 0.2 + + :param template_name: the name of the template + :param attribute: the name of the variable of macro to access + """ + return getattr(current_app.jinja_env.get_template(template_name).module, + attribute) + + +def flash(message, category='message'): + """Flashes a message to the next request. In order to remove the + flashed message from the session and to display it to the user, + the template has to call :func:`get_flashed_messages`. + + .. versionchanged:: 0.3 + `category` parameter added. + + :param message: the message to be flashed. + :param category: the category for the message. The following values + are recommended: ``'message'`` for any kind of message, + ``'error'`` for errors, ``'info'`` for information + messages and ``'warning'`` for warnings. However any + kind of string can be used as category. + """ + # Original implementation: + # + # session.setdefault('_flashes', []).append((category, message)) + # + # This assumed that changes made to mutable structures in the session are + # always in sync with the session object, which is not true for session + # implementations that use external storage for keeping their keys/values. + flashes = session.get('_flashes', []) + flashes.append((category, message)) + session['_flashes'] = flashes + message_flashed.send(current_app._get_current_object(), + message=message, category=category) + + +def get_flashed_messages(with_categories=False, category_filter=[]): + """Pulls all flashed messages from the session and returns them. + Further calls in the same request to the function will return + the same messages. By default just the messages are returned, + but when `with_categories` is set to ``True``, the return value will + be a list of tuples in the form ``(category, message)`` instead. + + Filter the flashed messages to one or more categories by providing those + categories in `category_filter`. This allows rendering categories in + separate html blocks. The `with_categories` and `category_filter` + arguments are distinct: + + * `with_categories` controls whether categories are returned with message + text (``True`` gives a tuple, where ``False`` gives just the message text). + * `category_filter` filters the messages down to only those matching the + provided categories. + + See :ref:`message-flashing-pattern` for examples. + + .. versionchanged:: 0.3 + `with_categories` parameter added. + + .. versionchanged:: 0.9 + `category_filter` parameter added. + + :param with_categories: set to ``True`` to also receive categories. + :param category_filter: whitelist of categories to limit return values + """ + flashes = _request_ctx_stack.top.flashes + if flashes is None: + _request_ctx_stack.top.flashes = flashes = session.pop('_flashes') \ + if '_flashes' in session else [] + if category_filter: + flashes = list(filter(lambda f: f[0] in category_filter, flashes)) + if not with_categories: + return [x[1] for x in flashes] + return flashes + + +def send_file(filename_or_fp, mimetype=None, as_attachment=False, + attachment_filename=None, add_etags=True, + cache_timeout=None, conditional=False, last_modified=None): + """Sends the contents of a file to the client. This will use the + most efficient method available and configured. By default it will + try to use the WSGI server's file_wrapper support. Alternatively + you can set the application's :attr:`~Flask.use_x_sendfile` attribute + to ``True`` to directly emit an ``X-Sendfile`` header. This however + requires support of the underlying webserver for ``X-Sendfile``. + + By default it will try to guess the mimetype for you, but you can + also explicitly provide one. For extra security you probably want + to send certain files as attachment (HTML for instance). The mimetype + guessing requires a `filename` or an `attachment_filename` to be + provided. + + ETags will also be attached automatically if a `filename` is provided. You + can turn this off by setting `add_etags=False`. + + If `conditional=True` and `filename` is provided, this method will try to + upgrade the response stream to support range requests. This will allow + the request to be answered with partial content response. + + Please never pass filenames to this function from user sources; + you should use :func:`send_from_directory` instead. + + .. versionadded:: 0.2 + + .. versionadded:: 0.5 + The `add_etags`, `cache_timeout` and `conditional` parameters were + added. The default behavior is now to attach etags. + + .. versionchanged:: 0.7 + mimetype guessing and etag support for file objects was + deprecated because it was unreliable. Pass a filename if you are + able to, otherwise attach an etag yourself. This functionality + will be removed in Flask 1.0 + + .. versionchanged:: 0.9 + cache_timeout pulls its default from application config, when None. + + .. versionchanged:: 0.12 + The filename is no longer automatically inferred from file objects. If + you want to use automatic mimetype and etag support, pass a filepath via + `filename_or_fp` or `attachment_filename`. + + .. versionchanged:: 0.12 + The `attachment_filename` is preferred over `filename` for MIME-type + detection. + + .. versionchanged:: 1.0 + UTF-8 filenames, as specified in `RFC 2231`_, are supported. + + .. _RFC 2231: https://tools.ietf.org/html/rfc2231#section-4 + + .. versionchanged:: 1.0.3 + Filenames are encoded with ASCII instead of Latin-1 for broader + compatibility with WSGI servers. + + :param filename_or_fp: the filename of the file to send. + This is relative to the :attr:`~Flask.root_path` + if a relative path is specified. + Alternatively a file object might be provided in + which case ``X-Sendfile`` might not work and fall + back to the traditional method. Make sure that the + file pointer is positioned at the start of data to + send before calling :func:`send_file`. + :param mimetype: the mimetype of the file if provided. If a file path is + given, auto detection happens as fallback, otherwise an + error will be raised. + :param as_attachment: set to ``True`` if you want to send this file with + a ``Content-Disposition: attachment`` header. + :param attachment_filename: the filename for the attachment if it + differs from the file's filename. + :param add_etags: set to ``False`` to disable attaching of etags. + :param conditional: set to ``True`` to enable conditional responses. + + :param cache_timeout: the timeout in seconds for the headers. When ``None`` + (default), this value is set by + :meth:`~Flask.get_send_file_max_age` of + :data:`~flask.current_app`. + :param last_modified: set the ``Last-Modified`` header to this value, + a :class:`~datetime.datetime` or timestamp. + If a file was passed, this overrides its mtime. + """ + mtime = None + fsize = None + if isinstance(filename_or_fp, string_types): + filename = filename_or_fp + if not os.path.isabs(filename): + filename = os.path.join(current_app.root_path, filename) + file = None + if attachment_filename is None: + attachment_filename = os.path.basename(filename) + else: + file = filename_or_fp + filename = None + + if mimetype is None: + if attachment_filename is not None: + mimetype = mimetypes.guess_type(attachment_filename)[0] \ + or 'application/octet-stream' + + if mimetype is None: + raise ValueError( + 'Unable to infer MIME-type because no filename is available. ' + 'Please set either `attachment_filename`, pass a filepath to ' + '`filename_or_fp` or set your own MIME-type via `mimetype`.' + ) + + headers = Headers() + if as_attachment: + if attachment_filename is None: + raise TypeError('filename unavailable, required for ' + 'sending as attachment') + + if not isinstance(attachment_filename, text_type): + attachment_filename = attachment_filename.decode('utf-8') + + try: + attachment_filename = attachment_filename.encode('ascii') + except UnicodeEncodeError: + filenames = { + 'filename': unicodedata.normalize( + 'NFKD', attachment_filename).encode('ascii', 'ignore'), + 'filename*': "UTF-8''%s" % url_quote(attachment_filename), + } + else: + filenames = {'filename': attachment_filename} + + headers.add('Content-Disposition', 'attachment', **filenames) + + if current_app.use_x_sendfile and filename: + if file is not None: + file.close() + headers['X-Sendfile'] = filename + fsize = os.path.getsize(filename) + headers['Content-Length'] = fsize + data = None + else: + if file is None: + file = open(filename, 'rb') + mtime = os.path.getmtime(filename) + fsize = os.path.getsize(filename) + headers['Content-Length'] = fsize + data = wrap_file(request.environ, file) + + rv = current_app.response_class(data, mimetype=mimetype, headers=headers, + direct_passthrough=True) + + if last_modified is not None: + rv.last_modified = last_modified + elif mtime is not None: + rv.last_modified = mtime + + rv.cache_control.public = True + if cache_timeout is None: + cache_timeout = current_app.get_send_file_max_age(filename) + if cache_timeout is not None: + rv.cache_control.max_age = cache_timeout + rv.expires = int(time() + cache_timeout) + + if add_etags and filename is not None: + from warnings import warn + + try: + rv.set_etag('%s-%s-%s' % ( + os.path.getmtime(filename), + os.path.getsize(filename), + adler32( + filename.encode('utf-8') if isinstance(filename, text_type) + else filename + ) & 0xffffffff + )) + except OSError: + warn('Access %s failed, maybe it does not exist, so ignore etags in ' + 'headers' % filename, stacklevel=2) + + if conditional: + try: + rv = rv.make_conditional(request, accept_ranges=True, + complete_length=fsize) + except RequestedRangeNotSatisfiable: + if file is not None: + file.close() + raise + # make sure we don't send x-sendfile for servers that + # ignore the 304 status code for x-sendfile. + if rv.status_code == 304: + rv.headers.pop('x-sendfile', None) + return rv + + +def safe_join(directory, *pathnames): + """Safely join `directory` and zero or more untrusted `pathnames` + components. + + Example usage:: + + @app.route('/wiki/<path:filename>') + def wiki_page(filename): + filename = safe_join(app.config['WIKI_FOLDER'], filename) + with open(filename, 'rb') as fd: + content = fd.read() # Read and process the file content... + + :param directory: the trusted base directory. + :param pathnames: the untrusted pathnames relative to that directory. + :raises: :class:`~werkzeug.exceptions.NotFound` if one or more passed + paths fall out of its boundaries. + """ + + parts = [directory] + + for filename in pathnames: + if filename != '': + filename = posixpath.normpath(filename) + + if ( + any(sep in filename for sep in _os_alt_seps) + or os.path.isabs(filename) + or filename == '..' + or filename.startswith('../') + ): + raise NotFound() + + parts.append(filename) + + return posixpath.join(*parts) + + +def send_from_directory(directory, filename, **options): + """Send a file from a given directory with :func:`send_file`. This + is a secure way to quickly expose static files from an upload folder + or something similar. + + Example usage:: + + @app.route('/uploads/<path:filename>') + def download_file(filename): + return send_from_directory(app.config['UPLOAD_FOLDER'], + filename, as_attachment=True) + + .. admonition:: Sending files and Performance + + It is strongly recommended to activate either ``X-Sendfile`` support in + your webserver or (if no authentication happens) to tell the webserver + to serve files for the given path on its own without calling into the + web application for improved performance. + + .. versionadded:: 0.5 + + :param directory: the directory where all the files are stored. + :param filename: the filename relative to that directory to + download. + :param options: optional keyword arguments that are directly + forwarded to :func:`send_file`. + """ + filename = safe_join(directory, filename) + if not os.path.isabs(filename): + filename = os.path.join(current_app.root_path, filename) + try: + if not os.path.isfile(filename): + raise NotFound() + except (TypeError, ValueError): + raise BadRequest() + options.setdefault('conditional', True) + return send_file(filename, **options) + + +def get_root_path(import_name): + """Returns the path to a package or cwd if that cannot be found. This + returns the path of a package or the folder that contains a module. + + Not to be confused with the package path returned by :func:`find_package`. + """ + # Module already imported and has a file attribute. Use that first. + mod = sys.modules.get(import_name) + if mod is not None and hasattr(mod, '__file__'): + return os.path.dirname(os.path.abspath(mod.__file__)) + + # Next attempt: check the loader. + loader = pkgutil.get_loader(import_name) + + # Loader does not exist or we're referring to an unloaded main module + # or a main module without path (interactive sessions), go with the + # current working directory. + if loader is None or import_name == '__main__': + return os.getcwd() + + # For .egg, zipimporter does not have get_filename until Python 2.7. + # Some other loaders might exhibit the same behavior. + if hasattr(loader, 'get_filename'): + filepath = loader.get_filename(import_name) + else: + # Fall back to imports. + __import__(import_name) + mod = sys.modules[import_name] + filepath = getattr(mod, '__file__', None) + + # If we don't have a filepath it might be because we are a + # namespace package. In this case we pick the root path from the + # first module that is contained in our package. + if filepath is None: + raise RuntimeError('No root path can be found for the provided ' + 'module "%s". This can happen because the ' + 'module came from an import hook that does ' + 'not provide file name information or because ' + 'it\'s a namespace package. In this case ' + 'the root path needs to be explicitly ' + 'provided.' % import_name) + + # filepath is import_name.py for a module, or __init__.py for a package. + return os.path.dirname(os.path.abspath(filepath)) + + +def _matching_loader_thinks_module_is_package(loader, mod_name): + """Given the loader that loaded a module and the module this function + attempts to figure out if the given module is actually a package. + """ + # If the loader can tell us if something is a package, we can + # directly ask the loader. + if hasattr(loader, 'is_package'): + return loader.is_package(mod_name) + # importlib's namespace loaders do not have this functionality but + # all the modules it loads are packages, so we can take advantage of + # this information. + elif (loader.__class__.__module__ == '_frozen_importlib' and + loader.__class__.__name__ == 'NamespaceLoader'): + return True + # Otherwise we need to fail with an error that explains what went + # wrong. + raise AttributeError( + ('%s.is_package() method is missing but is required by Flask of ' + 'PEP 302 import hooks. If you do not use import hooks and ' + 'you encounter this error please file a bug against Flask.') % + loader.__class__.__name__) + + +def find_package(import_name): + """Finds a package and returns the prefix (or None if the package is + not installed) as well as the folder that contains the package or + module as a tuple. The package path returned is the module that would + have to be added to the pythonpath in order to make it possible to + import the module. The prefix is the path below which a UNIX like + folder structure exists (lib, share etc.). + """ + root_mod_name = import_name.split('.')[0] + loader = pkgutil.get_loader(root_mod_name) + if loader is None or import_name == '__main__': + # import name is not found, or interactive/main module + package_path = os.getcwd() + else: + # For .egg, zipimporter does not have get_filename until Python 2.7. + if hasattr(loader, 'get_filename'): + filename = loader.get_filename(root_mod_name) + elif hasattr(loader, 'archive'): + # zipimporter's loader.archive points to the .egg or .zip + # archive filename is dropped in call to dirname below. + filename = loader.archive + else: + # At least one loader is missing both get_filename and archive: + # Google App Engine's HardenedModulesHook + # + # Fall back to imports. + __import__(import_name) + filename = sys.modules[import_name].__file__ + package_path = os.path.abspath(os.path.dirname(filename)) + + # In case the root module is a package we need to chop of the + # rightmost part. This needs to go through a helper function + # because of python 3.3 namespace packages. + if _matching_loader_thinks_module_is_package( + loader, root_mod_name): + package_path = os.path.dirname(package_path) + + site_parent, site_folder = os.path.split(package_path) + py_prefix = os.path.abspath(sys.prefix) + if package_path.startswith(py_prefix): + return py_prefix, package_path + elif site_folder.lower() == 'site-packages': + parent, folder = os.path.split(site_parent) + # Windows like installations + if folder.lower() == 'lib': + base_dir = parent + # UNIX like installations + elif os.path.basename(parent).lower() == 'lib': + base_dir = os.path.dirname(parent) + else: + base_dir = site_parent + return base_dir, package_path + return None, package_path + + +class locked_cached_property(object): + """A decorator that converts a function into a lazy property. The + function wrapped is called the first time to retrieve the result + and then that calculated result is used the next time you access + the value. Works like the one in Werkzeug but has a lock for + thread safety. + """ + + def __init__(self, func, name=None, doc=None): + self.__name__ = name or func.__name__ + self.__module__ = func.__module__ + self.__doc__ = doc or func.__doc__ + self.func = func + self.lock = RLock() + + def __get__(self, obj, type=None): + if obj is None: + return self + with self.lock: + value = obj.__dict__.get(self.__name__, _missing) + if value is _missing: + value = self.func(obj) + obj.__dict__[self.__name__] = value + return value + + +class _PackageBoundObject(object): + #: The name of the package or module that this app belongs to. Do not + #: change this once it is set by the constructor. + import_name = None + + #: Location of the template files to be added to the template lookup. + #: ``None`` if templates should not be added. + template_folder = None + + #: Absolute path to the package on the filesystem. Used to look up + #: resources contained in the package. + root_path = None + + def __init__(self, import_name, template_folder=None, root_path=None): + self.import_name = import_name + self.template_folder = template_folder + + if root_path is None: + root_path = get_root_path(self.import_name) + + self.root_path = root_path + self._static_folder = None + self._static_url_path = None + + def _get_static_folder(self): + if self._static_folder is not None: + return os.path.join(self.root_path, self._static_folder) + + def _set_static_folder(self, value): + self._static_folder = value + + static_folder = property( + _get_static_folder, _set_static_folder, + doc='The absolute path to the configured static folder.' + ) + del _get_static_folder, _set_static_folder + + def _get_static_url_path(self): + if self._static_url_path is not None: + return self._static_url_path + + if self.static_folder is not None: + return '/' + os.path.basename(self.static_folder) + + def _set_static_url_path(self, value): + self._static_url_path = value + + static_url_path = property( + _get_static_url_path, _set_static_url_path, + doc='The URL prefix that the static route will be registered for.' + ) + del _get_static_url_path, _set_static_url_path + + @property + def has_static_folder(self): + """This is ``True`` if the package bound object's container has a + folder for static files. + + .. versionadded:: 0.5 + """ + return self.static_folder is not None + + @locked_cached_property + def jinja_loader(self): + """The Jinja loader for this package bound object. + + .. versionadded:: 0.5 + """ + if self.template_folder is not None: + return FileSystemLoader(os.path.join(self.root_path, + self.template_folder)) + + def get_send_file_max_age(self, filename): + """Provides default cache_timeout for the :func:`send_file` functions. + + By default, this function returns ``SEND_FILE_MAX_AGE_DEFAULT`` from + the configuration of :data:`~flask.current_app`. + + Static file functions such as :func:`send_from_directory` use this + function, and :func:`send_file` calls this function on + :data:`~flask.current_app` when the given cache_timeout is ``None``. If a + cache_timeout is given in :func:`send_file`, that timeout is used; + otherwise, this method is called. + + This allows subclasses to change the behavior when sending files based + on the filename. For example, to set the cache timeout for .js files + to 60 seconds:: + + class MyFlask(flask.Flask): + def get_send_file_max_age(self, name): + if name.lower().endswith('.js'): + return 60 + return flask.Flask.get_send_file_max_age(self, name) + + .. versionadded:: 0.9 + """ + return total_seconds(current_app.send_file_max_age_default) + + def send_static_file(self, filename): + """Function used internally to send static files from the static + folder to the browser. + + .. versionadded:: 0.5 + """ + if not self.has_static_folder: + raise RuntimeError('No static folder for this object') + # Ensure get_send_file_max_age is called in all cases. + # Here, we ensure get_send_file_max_age is called for Blueprints. + cache_timeout = self.get_send_file_max_age(filename) + return send_from_directory(self.static_folder, filename, + cache_timeout=cache_timeout) + + def open_resource(self, resource, mode='rb'): + """Opens a resource from the application's resource folder. To see + how this works, consider the following folder structure:: + + /myapplication.py + /schema.sql + /static + /style.css + /templates + /layout.html + /index.html + + If you want to open the :file:`schema.sql` file you would do the + following:: + + with app.open_resource('schema.sql') as f: + contents = f.read() + do_something_with(contents) + + :param resource: the name of the resource. To access resources within + subfolders use forward slashes as separator. + :param mode: resource file opening mode, default is 'rb'. + """ + if mode not in ('r', 'rb'): + raise ValueError('Resources can only be opened for reading') + return open(os.path.join(self.root_path, resource), mode) + + +def total_seconds(td): + """Returns the total seconds from a timedelta object. + + :param timedelta td: the timedelta to be converted in seconds + + :returns: number of seconds + :rtype: int + """ + return td.days * 60 * 60 * 24 + td.seconds + + +def is_ip(value): + """Determine if the given string is an IP address. + + Python 2 on Windows doesn't provide ``inet_pton``, so this only + checks IPv4 addresses in that environment. + + :param value: value to check + :type value: str + + :return: True if string is an IP address + :rtype: bool + """ + if PY2 and os.name == 'nt': + try: + socket.inet_aton(value) + return True + except socket.error: + return False + + for family in (socket.AF_INET, socket.AF_INET6): + try: + socket.inet_pton(family, value) + except socket.error: + pass + else: + return True + + return False diff --git a/python/flask/json/__init__.py b/python/flask/json/__init__.py new file mode 100644 index 0000000..c24286c --- /dev/null +++ b/python/flask/json/__init__.py @@ -0,0 +1,357 @@ +# -*- coding: utf-8 -*- +""" +flask.json +~~~~~~~~~~ + +:copyright: © 2010 by the Pallets team. +:license: BSD, see LICENSE for more details. +""" +import codecs +import io +import uuid +from datetime import date, datetime +from flask.globals import current_app, request +from flask._compat import text_type, PY2 + +from werkzeug.http import http_date +from jinja2 import Markup + +# Use the same json implementation as itsdangerous on which we +# depend anyways. +from itsdangerous import json as _json + + +# Figure out if simplejson escapes slashes. This behavior was changed +# from one version to another without reason. +_slash_escape = '\\/' not in _json.dumps('/') + + +__all__ = ['dump', 'dumps', 'load', 'loads', 'htmlsafe_dump', + 'htmlsafe_dumps', 'JSONDecoder', 'JSONEncoder', + 'jsonify'] + + +def _wrap_reader_for_text(fp, encoding): + if isinstance(fp.read(0), bytes): + fp = io.TextIOWrapper(io.BufferedReader(fp), encoding) + return fp + + +def _wrap_writer_for_text(fp, encoding): + try: + fp.write('') + except TypeError: + fp = io.TextIOWrapper(fp, encoding) + return fp + + +class JSONEncoder(_json.JSONEncoder): + """The default Flask JSON encoder. This one extends the default simplejson + encoder by also supporting ``datetime`` objects, ``UUID`` as well as + ``Markup`` objects which are serialized as RFC 822 datetime strings (same + as the HTTP date format). In order to support more data types override the + :meth:`default` method. + """ + + def default(self, o): + """Implement this method in a subclass such that it returns a + serializable object for ``o``, or calls the base implementation (to + raise a :exc:`TypeError`). + + For example, to support arbitrary iterators, you could implement + default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + """ + if isinstance(o, datetime): + return http_date(o.utctimetuple()) + if isinstance(o, date): + return http_date(o.timetuple()) + if isinstance(o, uuid.UUID): + return str(o) + if hasattr(o, '__html__'): + return text_type(o.__html__()) + return _json.JSONEncoder.default(self, o) + + +class JSONDecoder(_json.JSONDecoder): + """The default JSON decoder. This one does not change the behavior from + the default simplejson decoder. Consult the :mod:`json` documentation + for more information. This decoder is not only used for the load + functions of this module but also :attr:`~flask.Request`. + """ + + +def _dump_arg_defaults(kwargs, app=None): + """Inject default arguments for dump functions.""" + if app is None: + app = current_app + + if app: + bp = app.blueprints.get(request.blueprint) if request else None + kwargs.setdefault( + 'cls', bp.json_encoder if bp and bp.json_encoder else app.json_encoder + ) + + if not app.config['JSON_AS_ASCII']: + kwargs.setdefault('ensure_ascii', False) + + kwargs.setdefault('sort_keys', app.config['JSON_SORT_KEYS']) + else: + kwargs.setdefault('sort_keys', True) + kwargs.setdefault('cls', JSONEncoder) + + +def _load_arg_defaults(kwargs, app=None): + """Inject default arguments for load functions.""" + if app is None: + app = current_app + + if app: + bp = app.blueprints.get(request.blueprint) if request else None + kwargs.setdefault( + 'cls', + bp.json_decoder if bp and bp.json_decoder + else app.json_decoder + ) + else: + kwargs.setdefault('cls', JSONDecoder) + + +def detect_encoding(data): + """Detect which UTF codec was used to encode the given bytes. + + The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is + accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big + or little endian. Some editors or libraries may prepend a BOM. + + :param data: Bytes in unknown UTF encoding. + :return: UTF encoding name + """ + head = data[:4] + + if head[:3] == codecs.BOM_UTF8: + return 'utf-8-sig' + + if b'\x00' not in head: + return 'utf-8' + + if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE): + return 'utf-32' + + if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE): + return 'utf-16' + + if len(head) == 4: + if head[:3] == b'\x00\x00\x00': + return 'utf-32-be' + + if head[::2] == b'\x00\x00': + return 'utf-16-be' + + if head[1:] == b'\x00\x00\x00': + return 'utf-32-le' + + if head[1::2] == b'\x00\x00': + return 'utf-16-le' + + if len(head) == 2: + return 'utf-16-be' if head.startswith(b'\x00') else 'utf-16-le' + + return 'utf-8' + + +def dumps(obj, app=None, **kwargs): + """Serialize ``obj`` to a JSON-formatted string. If there is an + app context pushed, use the current app's configured encoder + (:attr:`~flask.Flask.json_encoder`), or fall back to the default + :class:`JSONEncoder`. + + Takes the same arguments as the built-in :func:`json.dumps`, and + does some extra configuration based on the application. If the + simplejson package is installed, it is preferred. + + :param obj: Object to serialize to JSON. + :param app: App instance to use to configure the JSON encoder. + Uses ``current_app`` if not given, and falls back to the default + encoder when not in an app context. + :param kwargs: Extra arguments passed to :func:`json.dumps`. + + .. versionchanged:: 1.0.3 + + ``app`` can be passed directly, rather than requiring an app + context for configuration. + """ + _dump_arg_defaults(kwargs, app=app) + encoding = kwargs.pop('encoding', None) + rv = _json.dumps(obj, **kwargs) + if encoding is not None and isinstance(rv, text_type): + rv = rv.encode(encoding) + return rv + + +def dump(obj, fp, app=None, **kwargs): + """Like :func:`dumps` but writes into a file object.""" + _dump_arg_defaults(kwargs, app=app) + encoding = kwargs.pop('encoding', None) + if encoding is not None: + fp = _wrap_writer_for_text(fp, encoding) + _json.dump(obj, fp, **kwargs) + + +def loads(s, app=None, **kwargs): + """Deserialize an object from a JSON-formatted string ``s``. If + there is an app context pushed, use the current app's configured + decoder (:attr:`~flask.Flask.json_decoder`), or fall back to the + default :class:`JSONDecoder`. + + Takes the same arguments as the built-in :func:`json.loads`, and + does some extra configuration based on the application. If the + simplejson package is installed, it is preferred. + + :param s: JSON string to deserialize. + :param app: App instance to use to configure the JSON decoder. + Uses ``current_app`` if not given, and falls back to the default + encoder when not in an app context. + :param kwargs: Extra arguments passed to :func:`json.dumps`. + + .. versionchanged:: 1.0.3 + + ``app`` can be passed directly, rather than requiring an app + context for configuration. + """ + _load_arg_defaults(kwargs, app=app) + if isinstance(s, bytes): + encoding = kwargs.pop('encoding', None) + if encoding is None: + encoding = detect_encoding(s) + s = s.decode(encoding) + return _json.loads(s, **kwargs) + + +def load(fp, app=None, **kwargs): + """Like :func:`loads` but reads from a file object.""" + _load_arg_defaults(kwargs, app=app) + if not PY2: + fp = _wrap_reader_for_text(fp, kwargs.pop('encoding', None) or 'utf-8') + return _json.load(fp, **kwargs) + + +def htmlsafe_dumps(obj, **kwargs): + """Works exactly like :func:`dumps` but is safe for use in ``<script>`` + tags. It accepts the same arguments and returns a JSON string. Note that + this is available in templates through the ``|tojson`` filter which will + also mark the result as safe. Due to how this function escapes certain + characters this is safe even if used outside of ``<script>`` tags. + + The following characters are escaped in strings: + + - ``<`` + - ``>`` + - ``&`` + - ``'`` + + This makes it safe to embed such strings in any place in HTML with the + notable exception of double quoted attributes. In that case single + quote your attributes or HTML escape it in addition. + + .. versionchanged:: 0.10 + This function's return value is now always safe for HTML usage, even + if outside of script tags or if used in XHTML. This rule does not + hold true when using this function in HTML attributes that are double + quoted. Always single quote attributes if you use the ``|tojson`` + filter. Alternatively use ``|tojson|forceescape``. + """ + rv = dumps(obj, **kwargs) \ + .replace(u'<', u'\\u003c') \ + .replace(u'>', u'\\u003e') \ + .replace(u'&', u'\\u0026') \ + .replace(u"'", u'\\u0027') + if not _slash_escape: + rv = rv.replace('\\/', '/') + return rv + + +def htmlsafe_dump(obj, fp, **kwargs): + """Like :func:`htmlsafe_dumps` but writes into a file object.""" + fp.write(text_type(htmlsafe_dumps(obj, **kwargs))) + + +def jsonify(*args, **kwargs): + """This function wraps :func:`dumps` to add a few enhancements that make + life easier. It turns the JSON output into a :class:`~flask.Response` + object with the :mimetype:`application/json` mimetype. For convenience, it + also converts multiple arguments into an array or multiple keyword arguments + into a dict. This means that both ``jsonify(1,2,3)`` and + ``jsonify([1,2,3])`` serialize to ``[1,2,3]``. + + For clarity, the JSON serialization behavior has the following differences + from :func:`dumps`: + + 1. Single argument: Passed straight through to :func:`dumps`. + 2. Multiple arguments: Converted to an array before being passed to + :func:`dumps`. + 3. Multiple keyword arguments: Converted to a dict before being passed to + :func:`dumps`. + 4. Both args and kwargs: Behavior undefined and will throw an exception. + + Example usage:: + + from flask import jsonify + + @app.route('/_get_current_user') + def get_current_user(): + return jsonify(username=g.user.username, + email=g.user.email, + id=g.user.id) + + This will send a JSON response like this to the browser:: + + { + "username": "admin", + "email": "admin@localhost", + "id": 42 + } + + + .. versionchanged:: 0.11 + Added support for serializing top-level arrays. This introduces a + security risk in ancient browsers. See :ref:`json-security` for details. + + This function's response will be pretty printed if the + ``JSONIFY_PRETTYPRINT_REGULAR`` config parameter is set to True or the + Flask app is running in debug mode. Compressed (not pretty) formatting + currently means no indents and no spaces after separators. + + .. versionadded:: 0.2 + """ + + indent = None + separators = (',', ':') + + if current_app.config['JSONIFY_PRETTYPRINT_REGULAR'] or current_app.debug: + indent = 2 + separators = (', ', ': ') + + if args and kwargs: + raise TypeError('jsonify() behavior undefined when passed both args and kwargs') + elif len(args) == 1: # single args are passed directly to dumps() + data = args[0] + else: + data = args or kwargs + + return current_app.response_class( + dumps(data, indent=indent, separators=separators) + '\n', + mimetype=current_app.config['JSONIFY_MIMETYPE'] + ) + + +def tojson_filter(obj, **kwargs): + return Markup(htmlsafe_dumps(obj, **kwargs)) diff --git a/python/flask/json/tag.py b/python/flask/json/tag.py new file mode 100644 index 0000000..11c966c --- /dev/null +++ b/python/flask/json/tag.py @@ -0,0 +1,300 @@ +# -*- coding: utf-8 -*- +""" +Tagged JSON +~~~~~~~~~~~ + +A compact representation for lossless serialization of non-standard JSON types. +:class:`~flask.sessions.SecureCookieSessionInterface` uses this to serialize +the session data, but it may be useful in other places. It can be extended to +support other types. + +.. autoclass:: TaggedJSONSerializer + :members: + +.. autoclass:: JSONTag + :members: + +Let's seen an example that adds support for :class:`~collections.OrderedDict`. +Dicts don't have an order in Python or JSON, so to handle this we will dump +the items as a list of ``[key, value]`` pairs. Subclass :class:`JSONTag` and +give it the new key ``' od'`` to identify the type. The session serializer +processes dicts first, so insert the new tag at the front of the order since +``OrderedDict`` must be processed before ``dict``. :: + + from flask.json.tag import JSONTag + + class TagOrderedDict(JSONTag): + __slots__ = ('serializer',) + key = ' od' + + def check(self, value): + return isinstance(value, OrderedDict) + + def to_json(self, value): + return [[k, self.serializer.tag(v)] for k, v in iteritems(value)] + + def to_python(self, value): + return OrderedDict(value) + + app.session_interface.serializer.register(TagOrderedDict, index=0) + +:copyright: © 2010 by the Pallets team. +:license: BSD, see LICENSE for more details. +""" + +from base64 import b64decode, b64encode +from datetime import datetime +from uuid import UUID + +from jinja2 import Markup +from werkzeug.http import http_date, parse_date + +from flask._compat import iteritems, text_type +from flask.json import dumps, loads + + +class JSONTag(object): + """Base class for defining type tags for :class:`TaggedJSONSerializer`.""" + + __slots__ = ('serializer',) + + #: The tag to mark the serialized object with. If ``None``, this tag is + #: only used as an intermediate step during tagging. + key = None + + def __init__(self, serializer): + """Create a tagger for the given serializer.""" + self.serializer = serializer + + def check(self, value): + """Check if the given value should be tagged by this tag.""" + raise NotImplementedError + + def to_json(self, value): + """Convert the Python object to an object that is a valid JSON type. + The tag will be added later.""" + raise NotImplementedError + + def to_python(self, value): + """Convert the JSON representation back to the correct type. The tag + will already be removed.""" + raise NotImplementedError + + def tag(self, value): + """Convert the value to a valid JSON type and add the tag structure + around it.""" + return {self.key: self.to_json(value)} + + +class TagDict(JSONTag): + """Tag for 1-item dicts whose only key matches a registered tag. + + Internally, the dict key is suffixed with `__`, and the suffix is removed + when deserializing. + """ + + __slots__ = () + key = ' di' + + def check(self, value): + return ( + isinstance(value, dict) + and len(value) == 1 + and next(iter(value)) in self.serializer.tags + ) + + def to_json(self, value): + key = next(iter(value)) + return {key + '__': self.serializer.tag(value[key])} + + def to_python(self, value): + key = next(iter(value)) + return {key[:-2]: value[key]} + + +class PassDict(JSONTag): + __slots__ = () + + def check(self, value): + return isinstance(value, dict) + + def to_json(self, value): + # JSON objects may only have string keys, so don't bother tagging the + # key here. + return dict((k, self.serializer.tag(v)) for k, v in iteritems(value)) + + tag = to_json + + +class TagTuple(JSONTag): + __slots__ = () + key = ' t' + + def check(self, value): + return isinstance(value, tuple) + + def to_json(self, value): + return [self.serializer.tag(item) for item in value] + + def to_python(self, value): + return tuple(value) + + +class PassList(JSONTag): + __slots__ = () + + def check(self, value): + return isinstance(value, list) + + def to_json(self, value): + return [self.serializer.tag(item) for item in value] + + tag = to_json + + +class TagBytes(JSONTag): + __slots__ = () + key = ' b' + + def check(self, value): + return isinstance(value, bytes) + + def to_json(self, value): + return b64encode(value).decode('ascii') + + def to_python(self, value): + return b64decode(value) + + +class TagMarkup(JSONTag): + """Serialize anything matching the :class:`~flask.Markup` API by + having a ``__html__`` method to the result of that method. Always + deserializes to an instance of :class:`~flask.Markup`.""" + + __slots__ = () + key = ' m' + + def check(self, value): + return callable(getattr(value, '__html__', None)) + + def to_json(self, value): + return text_type(value.__html__()) + + def to_python(self, value): + return Markup(value) + + +class TagUUID(JSONTag): + __slots__ = () + key = ' u' + + def check(self, value): + return isinstance(value, UUID) + + def to_json(self, value): + return value.hex + + def to_python(self, value): + return UUID(value) + + +class TagDateTime(JSONTag): + __slots__ = () + key = ' d' + + def check(self, value): + return isinstance(value, datetime) + + def to_json(self, value): + return http_date(value) + + def to_python(self, value): + return parse_date(value) + + +class TaggedJSONSerializer(object): + """Serializer that uses a tag system to compactly represent objects that + are not JSON types. Passed as the intermediate serializer to + :class:`itsdangerous.Serializer`. + + The following extra types are supported: + + * :class:`dict` + * :class:`tuple` + * :class:`bytes` + * :class:`~flask.Markup` + * :class:`~uuid.UUID` + * :class:`~datetime.datetime` + """ + + __slots__ = ('tags', 'order') + + #: Tag classes to bind when creating the serializer. Other tags can be + #: added later using :meth:`~register`. + default_tags = [ + TagDict, PassDict, TagTuple, PassList, TagBytes, TagMarkup, TagUUID, + TagDateTime, + ] + + def __init__(self): + self.tags = {} + self.order = [] + + for cls in self.default_tags: + self.register(cls) + + def register(self, tag_class, force=False, index=None): + """Register a new tag with this serializer. + + :param tag_class: tag class to register. Will be instantiated with this + serializer instance. + :param force: overwrite an existing tag. If false (default), a + :exc:`KeyError` is raised. + :param index: index to insert the new tag in the tag order. Useful when + the new tag is a special case of an existing tag. If ``None`` + (default), the tag is appended to the end of the order. + + :raise KeyError: if the tag key is already registered and ``force`` is + not true. + """ + tag = tag_class(self) + key = tag.key + + if key is not None: + if not force and key in self.tags: + raise KeyError("Tag '{0}' is already registered.".format(key)) + + self.tags[key] = tag + + if index is None: + self.order.append(tag) + else: + self.order.insert(index, tag) + + def tag(self, value): + """Convert a value to a tagged representation if necessary.""" + for tag in self.order: + if tag.check(value): + return tag.tag(value) + + return value + + def untag(self, value): + """Convert a tagged representation back to the original type.""" + if len(value) != 1: + return value + + key = next(iter(value)) + + if key not in self.tags: + return value + + return self.tags[key].to_python(value[key]) + + def dumps(self, value): + """Tag the value and dump it to a compact JSON string.""" + return dumps(self.tag(value), separators=(',', ':')) + + def loads(self, value): + """Load data from a JSON string and deserialized any tagged objects.""" + return loads(value, object_hook=self.untag) diff --git a/python/flask/logging.py b/python/flask/logging.py new file mode 100644 index 0000000..389c2c2 --- /dev/null +++ b/python/flask/logging.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +""" +flask.logging +~~~~~~~~~~~~~ + +:copyright: © 2010 by the Pallets team. +:license: BSD, see LICENSE for more details. +""" + +from __future__ import absolute_import + +import logging +import sys + +from werkzeug.local import LocalProxy + +from .globals import request + + +@LocalProxy +def wsgi_errors_stream(): + """Find the most appropriate error stream for the application. If a request + is active, log to ``wsgi.errors``, otherwise use ``sys.stderr``. + + If you configure your own :class:`logging.StreamHandler`, you may want to + use this for the stream. If you are using file or dict configuration and + can't import this directly, you can refer to it as + ``ext://flask.logging.wsgi_errors_stream``. + """ + return request.environ['wsgi.errors'] if request else sys.stderr + + +def has_level_handler(logger): + """Check if there is a handler in the logging chain that will handle the + given logger's :meth:`effective level <~logging.Logger.getEffectiveLevel>`. + """ + level = logger.getEffectiveLevel() + current = logger + + while current: + if any(handler.level <= level for handler in current.handlers): + return True + + if not current.propagate: + break + + current = current.parent + + return False + + +#: Log messages to :func:`~flask.logging.wsgi_errors_stream` with the format +#: ``[%(asctime)s] %(levelname)s in %(module)s: %(message)s``. +default_handler = logging.StreamHandler(wsgi_errors_stream) +default_handler.setFormatter(logging.Formatter( + '[%(asctime)s] %(levelname)s in %(module)s: %(message)s' +)) + + +def create_logger(app): + """Get the ``'flask.app'`` logger and configure it if needed. + + When :attr:`~flask.Flask.debug` is enabled, set the logger level to + :data:`logging.DEBUG` if it is not set. + + If there is no handler for the logger's effective level, add a + :class:`~logging.StreamHandler` for + :func:`~flask.logging.wsgi_errors_stream` with a basic format. + """ + logger = logging.getLogger('flask.app') + + if app.debug and logger.level == logging.NOTSET: + logger.setLevel(logging.DEBUG) + + if not has_level_handler(logger): + logger.addHandler(default_handler) + + return logger diff --git a/python/flask/sessions.py b/python/flask/sessions.py new file mode 100644 index 0000000..c8b7d4e --- /dev/null +++ b/python/flask/sessions.py @@ -0,0 +1,385 @@ +# -*- coding: utf-8 -*- +""" + flask.sessions + ~~~~~~~~~~~~~~ + + Implements cookie based sessions based on itsdangerous. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +import hashlib +import warnings +from datetime import datetime + +from itsdangerous import BadSignature, URLSafeTimedSerializer +from werkzeug.datastructures import CallbackDict + +from flask._compat import collections_abc +from flask.helpers import is_ip, total_seconds +from flask.json.tag import TaggedJSONSerializer + + +class SessionMixin(collections_abc.MutableMapping): + """Expands a basic dictionary with session attributes.""" + + @property + def permanent(self): + """This reflects the ``'_permanent'`` key in the dict.""" + return self.get('_permanent', False) + + @permanent.setter + def permanent(self, value): + self['_permanent'] = bool(value) + + #: Some implementations can detect whether a session is newly + #: created, but that is not guaranteed. Use with caution. The mixin + # default is hard-coded ``False``. + new = False + + #: Some implementations can detect changes to the session and set + #: this when that happens. The mixin default is hard coded to + #: ``True``. + modified = True + + #: Some implementations can detect when session data is read or + #: written and set this when that happens. The mixin default is hard + #: coded to ``True``. + accessed = True + + +class SecureCookieSession(CallbackDict, SessionMixin): + """Base class for sessions based on signed cookies. + + This session backend will set the :attr:`modified` and + :attr:`accessed` attributes. It cannot reliably track whether a + session is new (vs. empty), so :attr:`new` remains hard coded to + ``False``. + """ + + #: When data is changed, this is set to ``True``. Only the session + #: dictionary itself is tracked; if the session contains mutable + #: data (for example a nested dict) then this must be set to + #: ``True`` manually when modifying that data. The session cookie + #: will only be written to the response if this is ``True``. + modified = False + + #: When data is read or written, this is set to ``True``. Used by + # :class:`.SecureCookieSessionInterface` to add a ``Vary: Cookie`` + #: header, which allows caching proxies to cache different pages for + #: different users. + accessed = False + + def __init__(self, initial=None): + def on_update(self): + self.modified = True + self.accessed = True + + super(SecureCookieSession, self).__init__(initial, on_update) + + def __getitem__(self, key): + self.accessed = True + return super(SecureCookieSession, self).__getitem__(key) + + def get(self, key, default=None): + self.accessed = True + return super(SecureCookieSession, self).get(key, default) + + def setdefault(self, key, default=None): + self.accessed = True + return super(SecureCookieSession, self).setdefault(key, default) + + +class NullSession(SecureCookieSession): + """Class used to generate nicer error messages if sessions are not + available. Will still allow read-only access to the empty session + but fail on setting. + """ + + def _fail(self, *args, **kwargs): + raise RuntimeError('The session is unavailable because no secret ' + 'key was set. Set the secret_key on the ' + 'application to something unique and secret.') + __setitem__ = __delitem__ = clear = pop = popitem = \ + update = setdefault = _fail + del _fail + + +class SessionInterface(object): + """The basic interface you have to implement in order to replace the + default session interface which uses werkzeug's securecookie + implementation. The only methods you have to implement are + :meth:`open_session` and :meth:`save_session`, the others have + useful defaults which you don't need to change. + + The session object returned by the :meth:`open_session` method has to + provide a dictionary like interface plus the properties and methods + from the :class:`SessionMixin`. We recommend just subclassing a dict + and adding that mixin:: + + class Session(dict, SessionMixin): + pass + + If :meth:`open_session` returns ``None`` Flask will call into + :meth:`make_null_session` to create a session that acts as replacement + if the session support cannot work because some requirement is not + fulfilled. The default :class:`NullSession` class that is created + will complain that the secret key was not set. + + To replace the session interface on an application all you have to do + is to assign :attr:`flask.Flask.session_interface`:: + + app = Flask(__name__) + app.session_interface = MySessionInterface() + + .. versionadded:: 0.8 + """ + + #: :meth:`make_null_session` will look here for the class that should + #: be created when a null session is requested. Likewise the + #: :meth:`is_null_session` method will perform a typecheck against + #: this type. + null_session_class = NullSession + + #: A flag that indicates if the session interface is pickle based. + #: This can be used by Flask extensions to make a decision in regards + #: to how to deal with the session object. + #: + #: .. versionadded:: 0.10 + pickle_based = False + + def make_null_session(self, app): + """Creates a null session which acts as a replacement object if the + real session support could not be loaded due to a configuration + error. This mainly aids the user experience because the job of the + null session is to still support lookup without complaining but + modifications are answered with a helpful error message of what + failed. + + This creates an instance of :attr:`null_session_class` by default. + """ + return self.null_session_class() + + def is_null_session(self, obj): + """Checks if a given object is a null session. Null sessions are + not asked to be saved. + + This checks if the object is an instance of :attr:`null_session_class` + by default. + """ + return isinstance(obj, self.null_session_class) + + def get_cookie_domain(self, app): + """Returns the domain that should be set for the session cookie. + + Uses ``SESSION_COOKIE_DOMAIN`` if it is configured, otherwise + falls back to detecting the domain based on ``SERVER_NAME``. + + Once detected (or if not set at all), ``SESSION_COOKIE_DOMAIN`` is + updated to avoid re-running the logic. + """ + + rv = app.config['SESSION_COOKIE_DOMAIN'] + + # set explicitly, or cached from SERVER_NAME detection + # if False, return None + if rv is not None: + return rv if rv else None + + rv = app.config['SERVER_NAME'] + + # server name not set, cache False to return none next time + if not rv: + app.config['SESSION_COOKIE_DOMAIN'] = False + return None + + # chop off the port which is usually not supported by browsers + # remove any leading '.' since we'll add that later + rv = rv.rsplit(':', 1)[0].lstrip('.') + + if '.' not in rv: + # Chrome doesn't allow names without a '.' + # this should only come up with localhost + # hack around this by not setting the name, and show a warning + warnings.warn( + '"{rv}" is not a valid cookie domain, it must contain a ".".' + ' Add an entry to your hosts file, for example' + ' "{rv}.localdomain", and use that instead.'.format(rv=rv) + ) + app.config['SESSION_COOKIE_DOMAIN'] = False + return None + + ip = is_ip(rv) + + if ip: + warnings.warn( + 'The session cookie domain is an IP address. This may not work' + ' as intended in some browsers. Add an entry to your hosts' + ' file, for example "localhost.localdomain", and use that' + ' instead.' + ) + + # if this is not an ip and app is mounted at the root, allow subdomain + # matching by adding a '.' prefix + if self.get_cookie_path(app) == '/' and not ip: + rv = '.' + rv + + app.config['SESSION_COOKIE_DOMAIN'] = rv + return rv + + def get_cookie_path(self, app): + """Returns the path for which the cookie should be valid. The + default implementation uses the value from the ``SESSION_COOKIE_PATH`` + config var if it's set, and falls back to ``APPLICATION_ROOT`` or + uses ``/`` if it's ``None``. + """ + return app.config['SESSION_COOKIE_PATH'] \ + or app.config['APPLICATION_ROOT'] + + def get_cookie_httponly(self, app): + """Returns True if the session cookie should be httponly. This + currently just returns the value of the ``SESSION_COOKIE_HTTPONLY`` + config var. + """ + return app.config['SESSION_COOKIE_HTTPONLY'] + + def get_cookie_secure(self, app): + """Returns True if the cookie should be secure. This currently + just returns the value of the ``SESSION_COOKIE_SECURE`` setting. + """ + return app.config['SESSION_COOKIE_SECURE'] + + def get_cookie_samesite(self, app): + """Return ``'Strict'`` or ``'Lax'`` if the cookie should use the + ``SameSite`` attribute. This currently just returns the value of + the :data:`SESSION_COOKIE_SAMESITE` setting. + """ + return app.config['SESSION_COOKIE_SAMESITE'] + + def get_expiration_time(self, app, session): + """A helper method that returns an expiration date for the session + or ``None`` if the session is linked to the browser session. The + default implementation returns now + the permanent session + lifetime configured on the application. + """ + if session.permanent: + return datetime.utcnow() + app.permanent_session_lifetime + + def should_set_cookie(self, app, session): + """Used by session backends to determine if a ``Set-Cookie`` header + should be set for this session cookie for this response. If the session + has been modified, the cookie is set. If the session is permanent and + the ``SESSION_REFRESH_EACH_REQUEST`` config is true, the cookie is + always set. + + This check is usually skipped if the session was deleted. + + .. versionadded:: 0.11 + """ + + return session.modified or ( + session.permanent and app.config['SESSION_REFRESH_EACH_REQUEST'] + ) + + def open_session(self, app, request): + """This method has to be implemented and must either return ``None`` + in case the loading failed because of a configuration error or an + instance of a session object which implements a dictionary like + interface + the methods and attributes on :class:`SessionMixin`. + """ + raise NotImplementedError() + + def save_session(self, app, session, response): + """This is called for actual sessions returned by :meth:`open_session` + at the end of the request. This is still called during a request + context so if you absolutely need access to the request you can do + that. + """ + raise NotImplementedError() + + +session_json_serializer = TaggedJSONSerializer() + + +class SecureCookieSessionInterface(SessionInterface): + """The default session interface that stores sessions in signed cookies + through the :mod:`itsdangerous` module. + """ + #: the salt that should be applied on top of the secret key for the + #: signing of cookie based sessions. + salt = 'cookie-session' + #: the hash function to use for the signature. The default is sha1 + digest_method = staticmethod(hashlib.sha1) + #: the name of the itsdangerous supported key derivation. The default + #: is hmac. + key_derivation = 'hmac' + #: A python serializer for the payload. The default is a compact + #: JSON derived serializer with support for some extra Python types + #: such as datetime objects or tuples. + serializer = session_json_serializer + session_class = SecureCookieSession + + def get_signing_serializer(self, app): + if not app.secret_key: + return None + signer_kwargs = dict( + key_derivation=self.key_derivation, + digest_method=self.digest_method + ) + return URLSafeTimedSerializer(app.secret_key, salt=self.salt, + serializer=self.serializer, + signer_kwargs=signer_kwargs) + + def open_session(self, app, request): + s = self.get_signing_serializer(app) + if s is None: + return None + val = request.cookies.get(app.session_cookie_name) + if not val: + return self.session_class() + max_age = total_seconds(app.permanent_session_lifetime) + try: + data = s.loads(val, max_age=max_age) + return self.session_class(data) + except BadSignature: + return self.session_class() + + def save_session(self, app, session, response): + domain = self.get_cookie_domain(app) + path = self.get_cookie_path(app) + + # If the session is modified to be empty, remove the cookie. + # If the session is empty, return without setting the cookie. + if not session: + if session.modified: + response.delete_cookie( + app.session_cookie_name, + domain=domain, + path=path + ) + + return + + # Add a "Vary: Cookie" header if the session was accessed at all. + if session.accessed: + response.vary.add('Cookie') + + if not self.should_set_cookie(app, session): + return + + httponly = self.get_cookie_httponly(app) + secure = self.get_cookie_secure(app) + samesite = self.get_cookie_samesite(app) + expires = self.get_expiration_time(app, session) + val = self.get_signing_serializer(app).dumps(dict(session)) + response.set_cookie( + app.session_cookie_name, + val, + expires=expires, + httponly=httponly, + domain=domain, + path=path, + secure=secure, + samesite=samesite + ) diff --git a/python/flask/signals.py b/python/flask/signals.py new file mode 100644 index 0000000..18f2630 --- /dev/null +++ b/python/flask/signals.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" + flask.signals + ~~~~~~~~~~~~~ + + Implements signals based on blinker if available, otherwise + falls silently back to a noop. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +signals_available = False +try: + from blinker import Namespace + signals_available = True +except ImportError: + class Namespace(object): + def signal(self, name, doc=None): + return _FakeSignal(name, doc) + + class _FakeSignal(object): + """If blinker is unavailable, create a fake class with the same + interface that allows sending of signals but will fail with an + error on anything else. Instead of doing anything on send, it + will just ignore the arguments and do nothing instead. + """ + + def __init__(self, name, doc=None): + self.name = name + self.__doc__ = doc + def _fail(self, *args, **kwargs): + raise RuntimeError('signalling support is unavailable ' + 'because the blinker library is ' + 'not installed.') + send = lambda *a, **kw: None + connect = disconnect = has_receivers_for = receivers_for = \ + temporarily_connected_to = connected_to = _fail + del _fail + +# The namespace for code signals. If you are not Flask code, do +# not put signals in here. Create your own namespace instead. +_signals = Namespace() + + +# Core signals. For usage examples grep the source code or consult +# the API documentation in docs/api.rst as well as docs/signals.rst +template_rendered = _signals.signal('template-rendered') +before_render_template = _signals.signal('before-render-template') +request_started = _signals.signal('request-started') +request_finished = _signals.signal('request-finished') +request_tearing_down = _signals.signal('request-tearing-down') +got_request_exception = _signals.signal('got-request-exception') +appcontext_tearing_down = _signals.signal('appcontext-tearing-down') +appcontext_pushed = _signals.signal('appcontext-pushed') +appcontext_popped = _signals.signal('appcontext-popped') +message_flashed = _signals.signal('message-flashed') diff --git a/python/flask/templating.py b/python/flask/templating.py new file mode 100644 index 0000000..0240200 --- /dev/null +++ b/python/flask/templating.py @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- +""" + flask.templating + ~~~~~~~~~~~~~~~~ + + Implements the bridge to Jinja2. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +from jinja2 import BaseLoader, Environment as BaseEnvironment, \ + TemplateNotFound + +from .globals import _request_ctx_stack, _app_ctx_stack +from .signals import template_rendered, before_render_template + + +def _default_template_ctx_processor(): + """Default template context processor. Injects `request`, + `session` and `g`. + """ + reqctx = _request_ctx_stack.top + appctx = _app_ctx_stack.top + rv = {} + if appctx is not None: + rv['g'] = appctx.g + if reqctx is not None: + rv['request'] = reqctx.request + rv['session'] = reqctx.session + return rv + + +class Environment(BaseEnvironment): + """Works like a regular Jinja2 environment but has some additional + knowledge of how Flask's blueprint works so that it can prepend the + name of the blueprint to referenced templates if necessary. + """ + + def __init__(self, app, **options): + if 'loader' not in options: + options['loader'] = app.create_global_jinja_loader() + BaseEnvironment.__init__(self, **options) + self.app = app + + +class DispatchingJinjaLoader(BaseLoader): + """A loader that looks for templates in the application and all + the blueprint folders. + """ + + def __init__(self, app): + self.app = app + + def get_source(self, environment, template): + if self.app.config['EXPLAIN_TEMPLATE_LOADING']: + return self._get_source_explained(environment, template) + return self._get_source_fast(environment, template) + + def _get_source_explained(self, environment, template): + attempts = [] + trv = None + + for srcobj, loader in self._iter_loaders(template): + try: + rv = loader.get_source(environment, template) + if trv is None: + trv = rv + except TemplateNotFound: + rv = None + attempts.append((loader, srcobj, rv)) + + from .debughelpers import explain_template_loading_attempts + explain_template_loading_attempts(self.app, template, attempts) + + if trv is not None: + return trv + raise TemplateNotFound(template) + + def _get_source_fast(self, environment, template): + for srcobj, loader in self._iter_loaders(template): + try: + return loader.get_source(environment, template) + except TemplateNotFound: + continue + raise TemplateNotFound(template) + + def _iter_loaders(self, template): + loader = self.app.jinja_loader + if loader is not None: + yield self.app, loader + + for blueprint in self.app.iter_blueprints(): + loader = blueprint.jinja_loader + if loader is not None: + yield blueprint, loader + + def list_templates(self): + result = set() + loader = self.app.jinja_loader + if loader is not None: + result.update(loader.list_templates()) + + for blueprint in self.app.iter_blueprints(): + loader = blueprint.jinja_loader + if loader is not None: + for template in loader.list_templates(): + result.add(template) + + return list(result) + + +def _render(template, context, app): + """Renders the template and fires the signal""" + + before_render_template.send(app, template=template, context=context) + rv = template.render(context) + template_rendered.send(app, template=template, context=context) + return rv + + +def render_template(template_name_or_list, **context): + """Renders a template from the template folder with the given + context. + + :param template_name_or_list: the name of the template to be + rendered, or an iterable with template names + the first one existing will be rendered + :param context: the variables that should be available in the + context of the template. + """ + ctx = _app_ctx_stack.top + ctx.app.update_template_context(context) + return _render(ctx.app.jinja_env.get_or_select_template(template_name_or_list), + context, ctx.app) + + +def render_template_string(source, **context): + """Renders a template from the given template source string + with the given context. Template variables will be autoescaped. + + :param source: the source code of the template to be + rendered + :param context: the variables that should be available in the + context of the template. + """ + ctx = _app_ctx_stack.top + ctx.app.update_template_context(context) + return _render(ctx.app.jinja_env.from_string(source), + context, ctx.app) diff --git a/python/flask/testing.py b/python/flask/testing.py new file mode 100644 index 0000000..114c5cc --- /dev/null +++ b/python/flask/testing.py @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- +""" + flask.testing + ~~~~~~~~~~~~~ + + Implements test support helpers. This module is lazily imported + and usually not used in production environments. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +import werkzeug +from contextlib import contextmanager + +from click.testing import CliRunner +from flask.cli import ScriptInfo +from werkzeug.test import Client, EnvironBuilder +from flask import _request_ctx_stack +from flask.json import dumps as json_dumps +from werkzeug.urls import url_parse + + +def make_test_environ_builder( + app, path='/', base_url=None, subdomain=None, url_scheme=None, + *args, **kwargs +): + """Create a :class:`~werkzeug.test.EnvironBuilder`, taking some + defaults from the application. + + :param app: The Flask application to configure the environment from. + :param path: URL path being requested. + :param base_url: Base URL where the app is being served, which + ``path`` is relative to. If not given, built from + :data:`PREFERRED_URL_SCHEME`, ``subdomain``, + :data:`SERVER_NAME`, and :data:`APPLICATION_ROOT`. + :param subdomain: Subdomain name to append to :data:`SERVER_NAME`. + :param url_scheme: Scheme to use instead of + :data:`PREFERRED_URL_SCHEME`. + :param json: If given, this is serialized as JSON and passed as + ``data``. Also defaults ``content_type`` to + ``application/json``. + :param args: other positional arguments passed to + :class:`~werkzeug.test.EnvironBuilder`. + :param kwargs: other keyword arguments passed to + :class:`~werkzeug.test.EnvironBuilder`. + """ + + assert ( + not (base_url or subdomain or url_scheme) + or (base_url is not None) != bool(subdomain or url_scheme) + ), 'Cannot pass "subdomain" or "url_scheme" with "base_url".' + + if base_url is None: + http_host = app.config.get('SERVER_NAME') or 'localhost' + app_root = app.config['APPLICATION_ROOT'] + + if subdomain: + http_host = '{0}.{1}'.format(subdomain, http_host) + + if url_scheme is None: + url_scheme = app.config['PREFERRED_URL_SCHEME'] + + url = url_parse(path) + base_url = '{scheme}://{netloc}/{path}'.format( + scheme=url.scheme or url_scheme, + netloc=url.netloc or http_host, + path=app_root.lstrip('/') + ) + path = url.path + + if url.query: + sep = b'?' if isinstance(url.query, bytes) else '?' + path += sep + url.query + + # TODO use EnvironBuilder.json_dumps once we require Werkzeug 0.15 + if 'json' in kwargs: + assert 'data' not in kwargs, "Client cannot provide both 'json' and 'data'." + kwargs['data'] = json_dumps(kwargs.pop('json'), app=app) + + if 'content_type' not in kwargs: + kwargs['content_type'] = 'application/json' + + return EnvironBuilder(path, base_url, *args, **kwargs) + + +class FlaskClient(Client): + """Works like a regular Werkzeug test client but has some knowledge about + how Flask works to defer the cleanup of the request context stack to the + end of a ``with`` body when used in a ``with`` statement. For general + information about how to use this class refer to + :class:`werkzeug.test.Client`. + + .. versionchanged:: 0.12 + `app.test_client()` includes preset default environment, which can be + set after instantiation of the `app.test_client()` object in + `client.environ_base`. + + Basic usage is outlined in the :ref:`testing` chapter. + """ + + preserve_context = False + + def __init__(self, *args, **kwargs): + super(FlaskClient, self).__init__(*args, **kwargs) + self.environ_base = { + "REMOTE_ADDR": "127.0.0.1", + "HTTP_USER_AGENT": "werkzeug/" + werkzeug.__version__ + } + + @contextmanager + def session_transaction(self, *args, **kwargs): + """When used in combination with a ``with`` statement this opens a + session transaction. This can be used to modify the session that + the test client uses. Once the ``with`` block is left the session is + stored back. + + :: + + with client.session_transaction() as session: + session['value'] = 42 + + Internally this is implemented by going through a temporary test + request context and since session handling could depend on + request variables this function accepts the same arguments as + :meth:`~flask.Flask.test_request_context` which are directly + passed through. + """ + if self.cookie_jar is None: + raise RuntimeError('Session transactions only make sense ' + 'with cookies enabled.') + app = self.application + environ_overrides = kwargs.setdefault('environ_overrides', {}) + self.cookie_jar.inject_wsgi(environ_overrides) + outer_reqctx = _request_ctx_stack.top + with app.test_request_context(*args, **kwargs) as c: + session_interface = app.session_interface + sess = session_interface.open_session(app, c.request) + if sess is None: + raise RuntimeError('Session backend did not open a session. ' + 'Check the configuration') + + # Since we have to open a new request context for the session + # handling we want to make sure that we hide out own context + # from the caller. By pushing the original request context + # (or None) on top of this and popping it we get exactly that + # behavior. It's important to not use the push and pop + # methods of the actual request context object since that would + # mean that cleanup handlers are called + _request_ctx_stack.push(outer_reqctx) + try: + yield sess + finally: + _request_ctx_stack.pop() + + resp = app.response_class() + if not session_interface.is_null_session(sess): + session_interface.save_session(app, sess, resp) + headers = resp.get_wsgi_headers(c.request.environ) + self.cookie_jar.extract_wsgi(c.request.environ, headers) + + def open(self, *args, **kwargs): + as_tuple = kwargs.pop('as_tuple', False) + buffered = kwargs.pop('buffered', False) + follow_redirects = kwargs.pop('follow_redirects', False) + + if ( + not kwargs and len(args) == 1 + and isinstance(args[0], (EnvironBuilder, dict)) + ): + environ = self.environ_base.copy() + + if isinstance(args[0], EnvironBuilder): + environ.update(args[0].get_environ()) + else: + environ.update(args[0]) + + environ['flask._preserve_context'] = self.preserve_context + else: + kwargs.setdefault('environ_overrides', {}) \ + ['flask._preserve_context'] = self.preserve_context + kwargs.setdefault('environ_base', self.environ_base) + builder = make_test_environ_builder( + self.application, *args, **kwargs + ) + + try: + environ = builder.get_environ() + finally: + builder.close() + + return Client.open( + self, environ, + as_tuple=as_tuple, + buffered=buffered, + follow_redirects=follow_redirects + ) + + def __enter__(self): + if self.preserve_context: + raise RuntimeError('Cannot nest client invocations') + self.preserve_context = True + return self + + def __exit__(self, exc_type, exc_value, tb): + self.preserve_context = False + + # on exit we want to clean up earlier. Normally the request context + # stays preserved until the next request in the same thread comes + # in. See RequestGlobals.push() for the general behavior. + top = _request_ctx_stack.top + if top is not None and top.preserved: + top.pop() + + +class FlaskCliRunner(CliRunner): + """A :class:`~click.testing.CliRunner` for testing a Flask app's + CLI commands. Typically created using + :meth:`~flask.Flask.test_cli_runner`. See :ref:`testing-cli`. + """ + def __init__(self, app, **kwargs): + self.app = app + super(FlaskCliRunner, self).__init__(**kwargs) + + def invoke(self, cli=None, args=None, **kwargs): + """Invokes a CLI command in an isolated environment. See + :meth:`CliRunner.invoke <click.testing.CliRunner.invoke>` for + full method documentation. See :ref:`testing-cli` for examples. + + If the ``obj`` argument is not given, passes an instance of + :class:`~flask.cli.ScriptInfo` that knows how to load the Flask + app being tested. + + :param cli: Command object to invoke. Default is the app's + :attr:`~flask.app.Flask.cli` group. + :param args: List of strings to invoke the command with. + + :return: a :class:`~click.testing.Result` object. + """ + if cli is None: + cli = self.app.cli + + if 'obj' not in kwargs: + kwargs['obj'] = ScriptInfo(create_app=lambda: self.app) + + return super(FlaskCliRunner, self).invoke(cli, args, **kwargs) diff --git a/python/flask/views.py b/python/flask/views.py new file mode 100644 index 0000000..1f2c997 --- /dev/null +++ b/python/flask/views.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +""" + flask.views + ~~~~~~~~~~~ + + This module provides class-based views inspired by the ones in Django. + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +from .globals import request +from ._compat import with_metaclass + + +http_method_funcs = frozenset(['get', 'post', 'head', 'options', + 'delete', 'put', 'trace', 'patch']) + + +class View(object): + """Alternative way to use view functions. A subclass has to implement + :meth:`dispatch_request` which is called with the view arguments from + the URL routing system. If :attr:`methods` is provided the methods + do not have to be passed to the :meth:`~flask.Flask.add_url_rule` + method explicitly:: + + class MyView(View): + methods = ['GET'] + + def dispatch_request(self, name): + return 'Hello %s!' % name + + app.add_url_rule('/hello/<name>', view_func=MyView.as_view('myview')) + + When you want to decorate a pluggable view you will have to either do that + when the view function is created (by wrapping the return value of + :meth:`as_view`) or you can use the :attr:`decorators` attribute:: + + class SecretView(View): + methods = ['GET'] + decorators = [superuser_required] + + def dispatch_request(self): + ... + + The decorators stored in the decorators list are applied one after another + when the view function is created. Note that you can *not* use the class + based decorators since those would decorate the view class and not the + generated view function! + """ + + #: A list of methods this view can handle. + methods = None + + #: Setting this disables or force-enables the automatic options handling. + provide_automatic_options = None + + #: The canonical way to decorate class-based views is to decorate the + #: return value of as_view(). However since this moves parts of the + #: logic from the class declaration to the place where it's hooked + #: into the routing system. + #: + #: You can place one or more decorators in this list and whenever the + #: view function is created the result is automatically decorated. + #: + #: .. versionadded:: 0.8 + decorators = () + + def dispatch_request(self): + """Subclasses have to override this method to implement the + actual view function code. This method is called with all + the arguments from the URL rule. + """ + raise NotImplementedError() + + @classmethod + def as_view(cls, name, *class_args, **class_kwargs): + """Converts the class into an actual view function that can be used + with the routing system. Internally this generates a function on the + fly which will instantiate the :class:`View` on each request and call + the :meth:`dispatch_request` method on it. + + The arguments passed to :meth:`as_view` are forwarded to the + constructor of the class. + """ + def view(*args, **kwargs): + self = view.view_class(*class_args, **class_kwargs) + return self.dispatch_request(*args, **kwargs) + + if cls.decorators: + view.__name__ = name + view.__module__ = cls.__module__ + for decorator in cls.decorators: + view = decorator(view) + + # We attach the view class to the view function for two reasons: + # first of all it allows us to easily figure out what class-based + # view this thing came from, secondly it's also used for instantiating + # the view class so you can actually replace it with something else + # for testing purposes and debugging. + view.view_class = cls + view.__name__ = name + view.__doc__ = cls.__doc__ + view.__module__ = cls.__module__ + view.methods = cls.methods + view.provide_automatic_options = cls.provide_automatic_options + return view + + +class MethodViewType(type): + """Metaclass for :class:`MethodView` that determines what methods the view + defines. + """ + + def __init__(cls, name, bases, d): + super(MethodViewType, cls).__init__(name, bases, d) + + if 'methods' not in d: + methods = set() + + for key in http_method_funcs: + if hasattr(cls, key): + methods.add(key.upper()) + + # If we have no method at all in there we don't want to add a + # method list. This is for instance the case for the base class + # or another subclass of a base method view that does not introduce + # new methods. + if methods: + cls.methods = methods + + +class MethodView(with_metaclass(MethodViewType, View)): + """A class-based view that dispatches request methods to the corresponding + class methods. For example, if you implement a ``get`` method, it will be + used to handle ``GET`` requests. :: + + class CounterAPI(MethodView): + def get(self): + return session.get('counter', 0) + + def post(self): + session['counter'] = session.get('counter', 0) + 1 + return 'OK' + + app.add_url_rule('/counter', view_func=CounterAPI.as_view('counter')) + """ + + def dispatch_request(self, *args, **kwargs): + meth = getattr(self, request.method.lower(), None) + + # If the request method is HEAD and we don't have a handler for it + # retry with GET. + if meth is None and request.method == 'HEAD': + meth = getattr(self, 'get', None) + + assert meth is not None, 'Unimplemented method %r' % request.method + return meth(*args, **kwargs) diff --git a/python/flask/wrappers.py b/python/flask/wrappers.py new file mode 100644 index 0000000..12eff2c --- /dev/null +++ b/python/flask/wrappers.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +""" + flask.wrappers + ~~~~~~~~~~~~~~ + + Implements the WSGI wrappers (request and response). + + :copyright: © 2010 by the Pallets team. + :license: BSD, see LICENSE for more details. +""" + +from werkzeug.exceptions import BadRequest +from werkzeug.wrappers import Request as RequestBase, Response as ResponseBase + +from flask import json +from flask.globals import current_app + + +class JSONMixin(object): + """Common mixin for both request and response objects to provide JSON + parsing capabilities. + + .. versionadded:: 1.0 + """ + + _cached_json = (Ellipsis, Ellipsis) + + @property + def is_json(self): + """Check if the mimetype indicates JSON data, either + :mimetype:`application/json` or :mimetype:`application/*+json`. + + .. versionadded:: 0.11 + """ + mt = self.mimetype + return ( + mt == 'application/json' + or (mt.startswith('application/')) and mt.endswith('+json') + ) + + @property + def json(self): + """This will contain the parsed JSON data if the mimetype indicates + JSON (:mimetype:`application/json`, see :meth:`is_json`), otherwise it + will be ``None``. + """ + return self.get_json() + + def _get_data_for_json(self, cache): + return self.get_data(cache=cache) + + def get_json(self, force=False, silent=False, cache=True): + """Parse and return the data as JSON. If the mimetype does not + indicate JSON (:mimetype:`application/json`, see + :meth:`is_json`), this returns ``None`` unless ``force`` is + true. If parsing fails, :meth:`on_json_loading_failed` is called + and its return value is used as the return value. + + :param force: Ignore the mimetype and always try to parse JSON. + :param silent: Silence parsing errors and return ``None`` + instead. + :param cache: Store the parsed JSON to return for subsequent + calls. + """ + if cache and self._cached_json[silent] is not Ellipsis: + return self._cached_json[silent] + + if not (force or self.is_json): + return None + + data = self._get_data_for_json(cache=cache) + + try: + rv = json.loads(data) + except ValueError as e: + if silent: + rv = None + if cache: + normal_rv, _ = self._cached_json + self._cached_json = (normal_rv, rv) + else: + rv = self.on_json_loading_failed(e) + if cache: + _, silent_rv = self._cached_json + self._cached_json = (rv, silent_rv) + else: + if cache: + self._cached_json = (rv, rv) + + return rv + + def on_json_loading_failed(self, e): + """Called if :meth:`get_json` parsing fails and isn't silenced. If + this method returns a value, it is used as the return value for + :meth:`get_json`. The default implementation raises a + :class:`BadRequest` exception. + + .. versionchanged:: 0.10 + Raise a :exc:`BadRequest` error instead of returning an error + message as JSON. If you want that behavior you can add it by + subclassing. + + .. versionadded:: 0.8 + """ + if current_app is not None and current_app.debug: + raise BadRequest('Failed to decode JSON object: {0}'.format(e)) + + raise BadRequest() + + +class Request(RequestBase, JSONMixin): + """The request object used by default in Flask. Remembers the + matched endpoint and view arguments. + + It is what ends up as :class:`~flask.request`. If you want to replace + the request object used you can subclass this and set + :attr:`~flask.Flask.request_class` to your subclass. + + The request object is a :class:`~werkzeug.wrappers.Request` subclass and + provides all of the attributes Werkzeug defines plus a few Flask + specific ones. + """ + + #: The internal URL rule that matched the request. This can be + #: useful to inspect which methods are allowed for the URL from + #: a before/after handler (``request.url_rule.methods``) etc. + #: Though if the request's method was invalid for the URL rule, + #: the valid list is available in ``routing_exception.valid_methods`` + #: instead (an attribute of the Werkzeug exception :exc:`~werkzeug.exceptions.MethodNotAllowed`) + #: because the request was never internally bound. + #: + #: .. versionadded:: 0.6 + url_rule = None + + #: A dict of view arguments that matched the request. If an exception + #: happened when matching, this will be ``None``. + view_args = None + + #: If matching the URL failed, this is the exception that will be + #: raised / was raised as part of the request handling. This is + #: usually a :exc:`~werkzeug.exceptions.NotFound` exception or + #: something similar. + routing_exception = None + + @property + def max_content_length(self): + """Read-only view of the ``MAX_CONTENT_LENGTH`` config key.""" + if current_app: + return current_app.config['MAX_CONTENT_LENGTH'] + + @property + def endpoint(self): + """The endpoint that matched the request. This in combination with + :attr:`view_args` can be used to reconstruct the same or a + modified URL. If an exception happened when matching, this will + be ``None``. + """ + if self.url_rule is not None: + return self.url_rule.endpoint + + @property + def blueprint(self): + """The name of the current blueprint""" + if self.url_rule and '.' in self.url_rule.endpoint: + return self.url_rule.endpoint.rsplit('.', 1)[0] + + def _load_form_data(self): + RequestBase._load_form_data(self) + + # In debug mode we're replacing the files multidict with an ad-hoc + # subclass that raises a different error for key errors. + if ( + current_app + and current_app.debug + and self.mimetype != 'multipart/form-data' + and not self.files + ): + from .debughelpers import attach_enctype_error_multidict + attach_enctype_error_multidict(self) + + +class Response(ResponseBase, JSONMixin): + """The response object that is used by default in Flask. Works like the + response object from Werkzeug but is set to have an HTML mimetype by + default. Quite often you don't have to create this object yourself because + :meth:`~flask.Flask.make_response` will take care of that for you. + + If you want to replace the response object used you can subclass this and + set :attr:`~flask.Flask.response_class` to your subclass. + + .. versionchanged:: 1.0 + JSON support is added to the response, like the request. This is useful + when testing to get the test client response data as JSON. + + .. versionchanged:: 1.0 + + Added :attr:`max_cookie_size`. + """ + + default_mimetype = 'text/html' + + def _get_data_for_json(self, cache): + return self.get_data() + + @property + def max_cookie_size(self): + """Read-only view of the :data:`MAX_COOKIE_SIZE` config key. + + See :attr:`~werkzeug.wrappers.BaseResponse.max_cookie_size` in + Werkzeug's docs. + """ + if current_app: + return current_app.config['MAX_COOKIE_SIZE'] + + # return Werkzeug's default when not in an app context + return super(Response, self).max_cookie_size diff --git a/python/itsdangerous/__init__.py b/python/itsdangerous/__init__.py new file mode 100644 index 0000000..0fcd8c1 --- /dev/null +++ b/python/itsdangerous/__init__.py @@ -0,0 +1,22 @@ +from ._json import json +from .encoding import base64_decode +from .encoding import base64_encode +from .encoding import want_bytes +from .exc import BadData +from .exc import BadHeader +from .exc import BadPayload +from .exc import BadSignature +from .exc import BadTimeSignature +from .exc import SignatureExpired +from .jws import JSONWebSignatureSerializer +from .jws import TimedJSONWebSignatureSerializer +from .serializer import Serializer +from .signer import HMACAlgorithm +from .signer import NoneAlgorithm +from .signer import Signer +from .timed import TimedSerializer +from .timed import TimestampSigner +from .url_safe import URLSafeSerializer +from .url_safe import URLSafeTimedSerializer + +__version__ = "1.1.0" diff --git a/python/itsdangerous/_compat.py b/python/itsdangerous/_compat.py new file mode 100644 index 0000000..2291bce --- /dev/null +++ b/python/itsdangerous/_compat.py @@ -0,0 +1,46 @@ +import decimal +import hmac +import numbers +import sys + +PY2 = sys.version_info[0] == 2 + +if PY2: + from itertools import izip + + text_type = unicode # noqa: 821 +else: + izip = zip + text_type = str + +number_types = (numbers.Real, decimal.Decimal) + + +def _constant_time_compare(val1, val2): + """Return ``True`` if the two strings are equal, ``False`` + otherwise. + + The time taken is independent of the number of characters that + match. Do not use this function for anything else than comparision + with known length targets. + + This is should be implemented in C in order to get it completely + right. + + This is an alias of :func:`hmac.compare_digest` on Python>=2.7,3.3. + """ + len_eq = len(val1) == len(val2) + if len_eq: + result = 0 + left = val1 + else: + result = 1 + left = val2 + for x, y in izip(bytearray(left), bytearray(val2)): + result |= x ^ y + return result == 0 + + +# Starting with 2.7/3.3 the standard library has a c-implementation for +# constant time string compares. +constant_time_compare = getattr(hmac, "compare_digest", _constant_time_compare) diff --git a/python/itsdangerous/_json.py b/python/itsdangerous/_json.py new file mode 100644 index 0000000..426b36e --- /dev/null +++ b/python/itsdangerous/_json.py @@ -0,0 +1,18 @@ +try: + import simplejson as json +except ImportError: + import json + + +class _CompactJSON(object): + """Wrapper around json module that strips whitespace.""" + + @staticmethod + def loads(payload): + return json.loads(payload) + + @staticmethod + def dumps(obj, **kwargs): + kwargs.setdefault("ensure_ascii", False) + kwargs.setdefault("separators", (",", ":")) + return json.dumps(obj, **kwargs) diff --git a/python/itsdangerous/encoding.py b/python/itsdangerous/encoding.py new file mode 100644 index 0000000..1e28969 --- /dev/null +++ b/python/itsdangerous/encoding.py @@ -0,0 +1,49 @@ +import base64 +import string +import struct + +from ._compat import text_type +from .exc import BadData + + +def want_bytes(s, encoding="utf-8", errors="strict"): + if isinstance(s, text_type): + s = s.encode(encoding, errors) + return s + + +def base64_encode(string): + """Base64 encode a string of bytes or text. The resulting bytes are + safe to use in URLs. + """ + string = want_bytes(string) + return base64.urlsafe_b64encode(string).rstrip(b"=") + + +def base64_decode(string): + """Base64 decode a URL-safe string of bytes or text. The result is + bytes. + """ + string = want_bytes(string, encoding="ascii", errors="ignore") + string += b"=" * (-len(string) % 4) + + try: + return base64.urlsafe_b64decode(string) + except (TypeError, ValueError): + raise BadData("Invalid base64-encoded data") + + +# The alphabet used by base64.urlsafe_* +_base64_alphabet = (string.ascii_letters + string.digits + "-_=").encode("ascii") + +_int64_struct = struct.Struct(">Q") +_int_to_bytes = _int64_struct.pack +_bytes_to_int = _int64_struct.unpack + + +def int_to_bytes(num): + return _int_to_bytes(num).lstrip(b"\x00") + + +def bytes_to_int(bytestr): + return _bytes_to_int(bytestr.rjust(8, b"\x00"))[0] diff --git a/python/itsdangerous/exc.py b/python/itsdangerous/exc.py new file mode 100644 index 0000000..287d691 --- /dev/null +++ b/python/itsdangerous/exc.py @@ -0,0 +1,98 @@ +from ._compat import PY2 +from ._compat import text_type + + +class BadData(Exception): + """Raised if bad data of any sort was encountered. This is the base + for all exceptions that itsdangerous defines. + + .. versionadded:: 0.15 + """ + + message = None + + def __init__(self, message): + super(BadData, self).__init__(self, message) + self.message = message + + def __str__(self): + return text_type(self.message) + + if PY2: + __unicode__ = __str__ + + def __str__(self): + return self.__unicode__().encode("utf-8") + + +class BadSignature(BadData): + """Raised if a signature does not match.""" + + def __init__(self, message, payload=None): + BadData.__init__(self, message) + + #: The payload that failed the signature test. In some + #: situations you might still want to inspect this, even if + #: you know it was tampered with. + #: + #: .. versionadded:: 0.14 + self.payload = payload + + +class BadTimeSignature(BadSignature): + """Raised if a time-based signature is invalid. This is a subclass + of :class:`BadSignature`. + """ + + def __init__(self, message, payload=None, date_signed=None): + BadSignature.__init__(self, message, payload) + + #: If the signature expired this exposes the date of when the + #: signature was created. This can be helpful in order to + #: tell the user how long a link has been gone stale. + #: + #: .. versionadded:: 0.14 + self.date_signed = date_signed + + +class SignatureExpired(BadTimeSignature): + """Raised if a signature timestamp is older than ``max_age``. This + is a subclass of :exc:`BadTimeSignature`. + """ + + +class BadHeader(BadSignature): + """Raised if a signed header is invalid in some form. This only + happens for serializers that have a header that goes with the + signature. + + .. versionadded:: 0.24 + """ + + def __init__(self, message, payload=None, header=None, original_error=None): + BadSignature.__init__(self, message, payload) + + #: If the header is actually available but just malformed it + #: might be stored here. + self.header = header + + #: If available, the error that indicates why the payload was + #: not valid. This might be ``None``. + self.original_error = original_error + + +class BadPayload(BadData): + """Raised if a payload is invalid. This could happen if the payload + is loaded despite an invalid signature, or if there is a mismatch + between the serializer and deserializer. The original exception + that occurred during loading is stored on as :attr:`original_error`. + + .. versionadded:: 0.15 + """ + + def __init__(self, message, original_error=None): + BadData.__init__(self, message) + + #: If available, the error that indicates why the payload was + #: not valid. This might be ``None``. + self.original_error = original_error diff --git a/python/itsdangerous/jws.py b/python/itsdangerous/jws.py new file mode 100644 index 0000000..92e9ec8 --- /dev/null +++ b/python/itsdangerous/jws.py @@ -0,0 +1,218 @@ +import hashlib +import time +from datetime import datetime + +from ._compat import number_types +from ._json import _CompactJSON +from ._json import json +from .encoding import base64_decode +from .encoding import base64_encode +from .encoding import want_bytes +from .exc import BadData +from .exc import BadHeader +from .exc import BadPayload +from .exc import BadSignature +from .exc import SignatureExpired +from .serializer import Serializer +from .signer import HMACAlgorithm +from .signer import NoneAlgorithm + + +class JSONWebSignatureSerializer(Serializer): + """This serializer implements JSON Web Signature (JWS) support. Only + supports the JWS Compact Serialization. + """ + + jws_algorithms = { + "HS256": HMACAlgorithm(hashlib.sha256), + "HS384": HMACAlgorithm(hashlib.sha384), + "HS512": HMACAlgorithm(hashlib.sha512), + "none": NoneAlgorithm(), + } + + #: The default algorithm to use for signature generation + default_algorithm = "HS512" + + default_serializer = _CompactJSON + + def __init__( + self, + secret_key, + salt=None, + serializer=None, + serializer_kwargs=None, + signer=None, + signer_kwargs=None, + algorithm_name=None, + ): + Serializer.__init__( + self, + secret_key=secret_key, + salt=salt, + serializer=serializer, + serializer_kwargs=serializer_kwargs, + signer=signer, + signer_kwargs=signer_kwargs, + ) + if algorithm_name is None: + algorithm_name = self.default_algorithm + self.algorithm_name = algorithm_name + self.algorithm = self.make_algorithm(algorithm_name) + + def load_payload(self, payload, serializer=None, return_header=False): + payload = want_bytes(payload) + if b"." not in payload: + raise BadPayload('No "." found in value') + base64d_header, base64d_payload = payload.split(b".", 1) + try: + json_header = base64_decode(base64d_header) + except Exception as e: + raise BadHeader( + "Could not base64 decode the header because of an exception", + original_error=e, + ) + try: + json_payload = base64_decode(base64d_payload) + except Exception as e: + raise BadPayload( + "Could not base64 decode the payload because of an exception", + original_error=e, + ) + try: + header = Serializer.load_payload(self, json_header, serializer=json) + except BadData as e: + raise BadHeader( + "Could not unserialize header because it was malformed", + original_error=e, + ) + if not isinstance(header, dict): + raise BadHeader("Header payload is not a JSON object", header=header) + payload = Serializer.load_payload(self, json_payload, serializer=serializer) + if return_header: + return payload, header + return payload + + def dump_payload(self, header, obj): + base64d_header = base64_encode( + self.serializer.dumps(header, **self.serializer_kwargs) + ) + base64d_payload = base64_encode( + self.serializer.dumps(obj, **self.serializer_kwargs) + ) + return base64d_header + b"." + base64d_payload + + def make_algorithm(self, algorithm_name): + try: + return self.jws_algorithms[algorithm_name] + except KeyError: + raise NotImplementedError("Algorithm not supported") + + def make_signer(self, salt=None, algorithm=None): + if salt is None: + salt = self.salt + key_derivation = "none" if salt is None else None + if algorithm is None: + algorithm = self.algorithm + return self.signer( + self.secret_key, + salt=salt, + sep=".", + key_derivation=key_derivation, + algorithm=algorithm, + ) + + def make_header(self, header_fields): + header = header_fields.copy() if header_fields else {} + header["alg"] = self.algorithm_name + return header + + def dumps(self, obj, salt=None, header_fields=None): + """Like :meth:`.Serializer.dumps` but creates a JSON Web + Signature. It also allows for specifying additional fields to be + included in the JWS header. + """ + header = self.make_header(header_fields) + signer = self.make_signer(salt, self.algorithm) + return signer.sign(self.dump_payload(header, obj)) + + def loads(self, s, salt=None, return_header=False): + """Reverse of :meth:`dumps`. If requested via ``return_header`` + it will return a tuple of payload and header. + """ + payload, header = self.load_payload( + self.make_signer(salt, self.algorithm).unsign(want_bytes(s)), + return_header=True, + ) + if header.get("alg") != self.algorithm_name: + raise BadHeader("Algorithm mismatch", header=header, payload=payload) + if return_header: + return payload, header + return payload + + def loads_unsafe(self, s, salt=None, return_header=False): + kwargs = {"return_header": return_header} + return self._loads_unsafe_impl(s, salt, kwargs, kwargs) + + +class TimedJSONWebSignatureSerializer(JSONWebSignatureSerializer): + """Works like the regular :class:`JSONWebSignatureSerializer` but + also records the time of the signing and can be used to expire + signatures. + + JWS currently does not specify this behavior but it mentions a + possible extension like this in the spec. Expiry date is encoded + into the header similar to what's specified in `draft-ietf-oauth + -json-web-token <http://self-issued.info/docs/draft-ietf-oauth-json + -web-token.html#expDef>`_. + """ + + DEFAULT_EXPIRES_IN = 3600 + + def __init__(self, secret_key, expires_in=None, **kwargs): + JSONWebSignatureSerializer.__init__(self, secret_key, **kwargs) + if expires_in is None: + expires_in = self.DEFAULT_EXPIRES_IN + self.expires_in = expires_in + + def make_header(self, header_fields): + header = JSONWebSignatureSerializer.make_header(self, header_fields) + iat = self.now() + exp = iat + self.expires_in + header["iat"] = iat + header["exp"] = exp + return header + + def loads(self, s, salt=None, return_header=False): + payload, header = JSONWebSignatureSerializer.loads( + self, s, salt, return_header=True + ) + + if "exp" not in header: + raise BadSignature("Missing expiry date", payload=payload) + + int_date_error = BadHeader("Expiry date is not an IntDate", payload=payload) + try: + header["exp"] = int(header["exp"]) + except ValueError: + raise int_date_error + if header["exp"] < 0: + raise int_date_error + + if header["exp"] < self.now(): + raise SignatureExpired( + "Signature expired", + payload=payload, + date_signed=self.get_issue_date(header), + ) + + if return_header: + return payload, header + return payload + + def get_issue_date(self, header): + rv = header.get("iat") + if isinstance(rv, number_types): + return datetime.utcfromtimestamp(int(rv)) + + def now(self): + return int(time.time()) diff --git a/python/itsdangerous/serializer.py b/python/itsdangerous/serializer.py new file mode 100644 index 0000000..12c20f4 --- /dev/null +++ b/python/itsdangerous/serializer.py @@ -0,0 +1,233 @@ +import hashlib + +from ._compat import text_type +from ._json import json +from .encoding import want_bytes +from .exc import BadPayload +from .exc import BadSignature +from .signer import Signer + + +def is_text_serializer(serializer): + """Checks whether a serializer generates text or binary.""" + return isinstance(serializer.dumps({}), text_type) + + +class Serializer(object): + """This class provides a serialization interface on top of the + signer. It provides a similar API to json/pickle and other modules + but is structured differently internally. If you want to change the + underlying implementation for parsing and loading you have to + override the :meth:`load_payload` and :meth:`dump_payload` + functions. + + This implementation uses simplejson if available for dumping and + loading and will fall back to the standard library's json module if + it's not available. + + You do not need to subclass this class in order to switch out or + customize the :class:`.Signer`. You can instead pass a different + class to the constructor as well as keyword arguments as a dict that + should be forwarded. + + .. code-block:: python + + s = Serializer(signer_kwargs={'key_derivation': 'hmac'}) + + You may want to upgrade the signing parameters without invalidating + existing signatures that are in use. Fallback signatures can be + given that will be tried if unsigning with the current signer fails. + + Fallback signers can be defined by providing a list of + ``fallback_signers``. Each item can be one of the following: a + signer class (which is instantiated with ``signer_kwargs``, + ``salt``, and ``secret_key``), a tuple + ``(signer_class, signer_kwargs)``, or a dict of ``signer_kwargs``. + + For example, this is a serializer that signs using SHA-512, but will + unsign using either SHA-512 or SHA1: + + .. code-block:: python + + s = Serializer( + signer_kwargs={"digest_method": hashlib.sha512}, + fallback_signers=[{"digest_method": hashlib.sha1}] + ) + + .. versionchanged:: 0.14: + The ``signer`` and ``signer_kwargs`` parameters were added to + the constructor. + + .. versionchanged:: 1.1.0: + Added support for ``fallback_signers`` and configured a default + SHA-512 fallback. This fallback is for users who used the yanked + 1.0.0 release which defaulted to SHA-512. + """ + + #: If a serializer module or class is not passed to the constructor + #: this one is picked up. This currently defaults to :mod:`json`. + default_serializer = json + + #: The default :class:`Signer` class that is being used by this + #: serializer. + #: + #: .. versionadded:: 0.14 + default_signer = Signer + + #: The default fallback signers. + default_fallback_signers = [{"digest_method": hashlib.sha512}] + + def __init__( + self, + secret_key, + salt=b"itsdangerous", + serializer=None, + serializer_kwargs=None, + signer=None, + signer_kwargs=None, + fallback_signers=None, + ): + self.secret_key = want_bytes(secret_key) + self.salt = want_bytes(salt) + if serializer is None: + serializer = self.default_serializer + self.serializer = serializer + self.is_text_serializer = is_text_serializer(serializer) + if signer is None: + signer = self.default_signer + self.signer = signer + self.signer_kwargs = signer_kwargs or {} + if fallback_signers is None: + fallback_signers = list(self.default_fallback_signers or ()) + self.fallback_signers = fallback_signers + self.serializer_kwargs = serializer_kwargs or {} + + def load_payload(self, payload, serializer=None): + """Loads the encoded object. This function raises + :class:`.BadPayload` if the payload is not valid. The + ``serializer`` parameter can be used to override the serializer + stored on the class. The encoded ``payload`` should always be + bytes. + """ + if serializer is None: + serializer = self.serializer + is_text = self.is_text_serializer + else: + is_text = is_text_serializer(serializer) + try: + if is_text: + payload = payload.decode("utf-8") + return serializer.loads(payload) + except Exception as e: + raise BadPayload( + "Could not load the payload because an exception" + " occurred on unserializing the data.", + original_error=e, + ) + + def dump_payload(self, obj): + """Dumps the encoded object. The return value is always bytes. + If the internal serializer returns text, the value will be + encoded as UTF-8. + """ + return want_bytes(self.serializer.dumps(obj, **self.serializer_kwargs)) + + def make_signer(self, salt=None): + """Creates a new instance of the signer to be used. The default + implementation uses the :class:`.Signer` base class. + """ + if salt is None: + salt = self.salt + return self.signer(self.secret_key, salt=salt, **self.signer_kwargs) + + def iter_unsigners(self, salt=None): + """Iterates over all signers to be tried for unsigning. Starts + with the configured signer, then constructs each signer + specified in ``fallback_signers``. + """ + if salt is None: + salt = self.salt + yield self.make_signer(salt) + for fallback in self.fallback_signers: + if type(fallback) is dict: + kwargs = fallback + fallback = self.signer + elif type(fallback) is tuple: + fallback, kwargs = fallback + else: + kwargs = self.signer_kwargs + yield fallback(self.secret_key, salt=salt, **kwargs) + + def dumps(self, obj, salt=None): + """Returns a signed string serialized with the internal + serializer. The return value can be either a byte or unicode + string depending on the format of the internal serializer. + """ + payload = want_bytes(self.dump_payload(obj)) + rv = self.make_signer(salt).sign(payload) + if self.is_text_serializer: + rv = rv.decode("utf-8") + return rv + + def dump(self, obj, f, salt=None): + """Like :meth:`dumps` but dumps into a file. The file handle has + to be compatible with what the internal serializer expects. + """ + f.write(self.dumps(obj, salt)) + + def loads(self, s, salt=None): + """Reverse of :meth:`dumps`. Raises :exc:`.BadSignature` if the + signature validation fails. + """ + s = want_bytes(s) + last_exception = None + for signer in self.iter_unsigners(salt): + try: + return self.load_payload(signer.unsign(s)) + except BadSignature as err: + last_exception = err + raise last_exception + + def load(self, f, salt=None): + """Like :meth:`loads` but loads from a file.""" + return self.loads(f.read(), salt) + + def loads_unsafe(self, s, salt=None): + """Like :meth:`loads` but without verifying the signature. This + is potentially very dangerous to use depending on how your + serializer works. The return value is ``(signature_valid, + payload)`` instead of just the payload. The first item will be a + boolean that indicates if the signature is valid. This function + never fails. + + Use it for debugging only and if you know that your serializer + module is not exploitable (for example, do not use it with a + pickle serializer). + + .. versionadded:: 0.15 + """ + return self._loads_unsafe_impl(s, salt) + + def _loads_unsafe_impl(self, s, salt, load_kwargs=None, load_payload_kwargs=None): + """Low level helper function to implement :meth:`loads_unsafe` + in serializer subclasses. + """ + try: + return True, self.loads(s, salt=salt, **(load_kwargs or {})) + except BadSignature as e: + if e.payload is None: + return False, None + try: + return ( + False, + self.load_payload(e.payload, **(load_payload_kwargs or {})), + ) + except BadPayload: + return False, None + + def load_unsafe(self, f, *args, **kwargs): + """Like :meth:`loads_unsafe` but loads from a file. + + .. versionadded:: 0.15 + """ + return self.loads_unsafe(f.read(), *args, **kwargs) diff --git a/python/itsdangerous/signer.py b/python/itsdangerous/signer.py new file mode 100644 index 0000000..6bddc03 --- /dev/null +++ b/python/itsdangerous/signer.py @@ -0,0 +1,179 @@ +import hashlib +import hmac + +from ._compat import constant_time_compare +from .encoding import _base64_alphabet +from .encoding import base64_decode +from .encoding import base64_encode +from .encoding import want_bytes +from .exc import BadSignature + + +class SigningAlgorithm(object): + """Subclasses must implement :meth:`get_signature` to provide + signature generation functionality. + """ + + def get_signature(self, key, value): + """Returns the signature for the given key and value.""" + raise NotImplementedError() + + def verify_signature(self, key, value, sig): + """Verifies the given signature matches the expected + signature. + """ + return constant_time_compare(sig, self.get_signature(key, value)) + + +class NoneAlgorithm(SigningAlgorithm): + """Provides an algorithm that does not perform any signing and + returns an empty signature. + """ + + def get_signature(self, key, value): + return b"" + + +class HMACAlgorithm(SigningAlgorithm): + """Provides signature generation using HMACs.""" + + #: The digest method to use with the MAC algorithm. This defaults to + #: SHA1, but can be changed to any other function in the hashlib + #: module. + default_digest_method = staticmethod(hashlib.sha1) + + def __init__(self, digest_method=None): + if digest_method is None: + digest_method = self.default_digest_method + self.digest_method = digest_method + + def get_signature(self, key, value): + mac = hmac.new(key, msg=value, digestmod=self.digest_method) + return mac.digest() + + +class Signer(object): + """This class can sign and unsign bytes, validating the signature + provided. + + Salt can be used to namespace the hash, so that a signed string is + only valid for a given namespace. Leaving this at the default value + or re-using a salt value across different parts of your application + where the same signed value in one part can mean something different + in another part is a security risk. + + See :ref:`the-salt` for an example of what the salt is doing and how + you can utilize it. + + .. versionadded:: 0.14 + ``key_derivation`` and ``digest_method`` were added as arguments + to the class constructor. + + .. versionadded:: 0.18 + ``algorithm`` was added as an argument to the class constructor. + """ + + #: The digest method to use for the signer. This defaults to + #: SHA1 but can be changed to any other function in the hashlib + #: module. + #: + #: .. versionadded:: 0.14 + default_digest_method = staticmethod(hashlib.sha1) + + #: Controls how the key is derived. The default is Django-style + #: concatenation. Possible values are ``concat``, ``django-concat`` + #: and ``hmac``. This is used for deriving a key from the secret key + #: with an added salt. + #: + #: .. versionadded:: 0.14 + default_key_derivation = "django-concat" + + def __init__( + self, + secret_key, + salt=None, + sep=".", + key_derivation=None, + digest_method=None, + algorithm=None, + ): + self.secret_key = want_bytes(secret_key) + self.sep = want_bytes(sep) + if self.sep in _base64_alphabet: + raise ValueError( + "The given separator cannot be used because it may be" + " contained in the signature itself. Alphanumeric" + " characters and `-_=` must not be used." + ) + self.salt = "itsdangerous.Signer" if salt is None else salt + if key_derivation is None: + key_derivation = self.default_key_derivation + self.key_derivation = key_derivation + if digest_method is None: + digest_method = self.default_digest_method + self.digest_method = digest_method + if algorithm is None: + algorithm = HMACAlgorithm(self.digest_method) + self.algorithm = algorithm + + def derive_key(self): + """This method is called to derive the key. The default key + derivation choices can be overridden here. Key derivation is not + intended to be used as a security method to make a complex key + out of a short password. Instead you should use large random + secret keys. + """ + salt = want_bytes(self.salt) + if self.key_derivation == "concat": + return self.digest_method(salt + self.secret_key).digest() + elif self.key_derivation == "django-concat": + return self.digest_method(salt + b"signer" + self.secret_key).digest() + elif self.key_derivation == "hmac": + mac = hmac.new(self.secret_key, digestmod=self.digest_method) + mac.update(salt) + return mac.digest() + elif self.key_derivation == "none": + return self.secret_key + else: + raise TypeError("Unknown key derivation method") + + def get_signature(self, value): + """Returns the signature for the given value.""" + value = want_bytes(value) + key = self.derive_key() + sig = self.algorithm.get_signature(key, value) + return base64_encode(sig) + + def sign(self, value): + """Signs the given string.""" + return want_bytes(value) + want_bytes(self.sep) + self.get_signature(value) + + def verify_signature(self, value, sig): + """Verifies the signature for the given value.""" + key = self.derive_key() + try: + sig = base64_decode(sig) + except Exception: + return False + return self.algorithm.verify_signature(key, value, sig) + + def unsign(self, signed_value): + """Unsigns the given string.""" + signed_value = want_bytes(signed_value) + sep = want_bytes(self.sep) + if sep not in signed_value: + raise BadSignature("No %r found in value" % self.sep) + value, sig = signed_value.rsplit(sep, 1) + if self.verify_signature(value, sig): + return value + raise BadSignature("Signature %r does not match" % sig, payload=value) + + def validate(self, signed_value): + """Only validates the given signed value. Returns ``True`` if + the signature exists and is valid. + """ + try: + self.unsign(signed_value) + return True + except BadSignature: + return False diff --git a/python/itsdangerous/timed.py b/python/itsdangerous/timed.py new file mode 100644 index 0000000..4c117e4 --- /dev/null +++ b/python/itsdangerous/timed.py @@ -0,0 +1,147 @@ +import time +from datetime import datetime + +from ._compat import text_type +from .encoding import base64_decode +from .encoding import base64_encode +from .encoding import bytes_to_int +from .encoding import int_to_bytes +from .encoding import want_bytes +from .exc import BadSignature +from .exc import BadTimeSignature +from .exc import SignatureExpired +from .serializer import Serializer +from .signer import Signer + + +class TimestampSigner(Signer): + """Works like the regular :class:`.Signer` but also records the time + of the signing and can be used to expire signatures. The + :meth:`unsign` method can raise :exc:`.SignatureExpired` if the + unsigning failed because the signature is expired. + """ + + def get_timestamp(self): + """Returns the current timestamp. The function must return an + integer. + """ + return int(time.time()) + + def timestamp_to_datetime(self, ts): + """Used to convert the timestamp from :meth:`get_timestamp` into + a datetime object. + """ + return datetime.utcfromtimestamp(ts) + + def sign(self, value): + """Signs the given string and also attaches time information.""" + value = want_bytes(value) + timestamp = base64_encode(int_to_bytes(self.get_timestamp())) + sep = want_bytes(self.sep) + value = value + sep + timestamp + return value + sep + self.get_signature(value) + + def unsign(self, value, max_age=None, return_timestamp=False): + """Works like the regular :meth:`.Signer.unsign` but can also + validate the time. See the base docstring of the class for + the general behavior. If ``return_timestamp`` is ``True`` the + timestamp of the signature will be returned as a naive + :class:`datetime.datetime` object in UTC. + """ + try: + result = Signer.unsign(self, value) + sig_error = None + except BadSignature as e: + sig_error = e + result = e.payload or b"" + sep = want_bytes(self.sep) + + # If there is no timestamp in the result there is something + # seriously wrong. In case there was a signature error, we raise + # that one directly, otherwise we have a weird situation in + # which we shouldn't have come except someone uses a time-based + # serializer on non-timestamp data, so catch that. + if sep not in result: + if sig_error: + raise sig_error + raise BadTimeSignature("timestamp missing", payload=result) + + value, timestamp = result.rsplit(sep, 1) + try: + timestamp = bytes_to_int(base64_decode(timestamp)) + except Exception: + timestamp = None + + # Signature is *not* okay. Raise a proper error now that we have + # split the value and the timestamp. + if sig_error is not None: + raise BadTimeSignature( + text_type(sig_error), payload=value, date_signed=timestamp + ) + + # Signature was okay but the timestamp is actually not there or + # malformed. Should not happen, but we handle it anyway. + if timestamp is None: + raise BadTimeSignature("Malformed timestamp", payload=value) + + # Check timestamp is not older than max_age + if max_age is not None: + age = self.get_timestamp() - timestamp + if age > max_age: + raise SignatureExpired( + "Signature age %s > %s seconds" % (age, max_age), + payload=value, + date_signed=self.timestamp_to_datetime(timestamp), + ) + + if return_timestamp: + return value, self.timestamp_to_datetime(timestamp) + return value + + def validate(self, signed_value, max_age=None): + """Only validates the given signed value. Returns ``True`` if + the signature exists and is valid.""" + try: + self.unsign(signed_value, max_age=max_age) + return True + except BadSignature: + return False + + +class TimedSerializer(Serializer): + """Uses :class:`TimestampSigner` instead of the default + :class:`.Signer`. + """ + + default_signer = TimestampSigner + + def loads(self, s, max_age=None, return_timestamp=False, salt=None): + """Reverse of :meth:`dumps`, raises :exc:`.BadSignature` if the + signature validation fails. If a ``max_age`` is provided it will + ensure the signature is not older than that time in seconds. In + case the signature is outdated, :exc:`.SignatureExpired` is + raised. All arguments are forwarded to the signer's + :meth:`~TimestampSigner.unsign` method. + """ + s = want_bytes(s) + last_exception = None + for signer in self.iter_unsigners(salt): + try: + base64d, timestamp = signer.unsign(s, max_age, return_timestamp=True) + payload = self.load_payload(base64d) + if return_timestamp: + return payload, timestamp + return payload + # If we get a signature expired it means we could read the + # signature but it's invalid. In that case we do not want to + # try the next signer. + except SignatureExpired: + raise + except BadSignature as err: + last_exception = err + raise last_exception + + def loads_unsafe(self, s, max_age=None, salt=None): + load_kwargs = {"max_age": max_age} + load_payload_kwargs = {} + return self._loads_unsafe_impl(s, salt, load_kwargs, load_payload_kwargs) diff --git a/python/itsdangerous/url_safe.py b/python/itsdangerous/url_safe.py new file mode 100644 index 0000000..fcaa011 --- /dev/null +++ b/python/itsdangerous/url_safe.py @@ -0,0 +1,65 @@ +import zlib + +from ._json import _CompactJSON +from .encoding import base64_decode +from .encoding import base64_encode +from .exc import BadPayload +from .serializer import Serializer +from .timed import TimedSerializer + + +class URLSafeSerializerMixin(object): + """Mixed in with a regular serializer it will attempt to zlib + compress the string to make it shorter if necessary. It will also + base64 encode the string so that it can safely be placed in a URL. + """ + + default_serializer = _CompactJSON + + def load_payload(self, payload, *args, **kwargs): + decompress = False + if payload.startswith(b"."): + payload = payload[1:] + decompress = True + try: + json = base64_decode(payload) + except Exception as e: + raise BadPayload( + "Could not base64 decode the payload because of an exception", + original_error=e, + ) + if decompress: + try: + json = zlib.decompress(json) + except Exception as e: + raise BadPayload( + "Could not zlib decompress the payload before decoding the payload", + original_error=e, + ) + return super(URLSafeSerializerMixin, self).load_payload(json, *args, **kwargs) + + def dump_payload(self, obj): + json = super(URLSafeSerializerMixin, self).dump_payload(obj) + is_compressed = False + compressed = zlib.compress(json) + if len(compressed) < (len(json) - 1): + json = compressed + is_compressed = True + base64d = base64_encode(json) + if is_compressed: + base64d = b"." + base64d + return base64d + + +class URLSafeSerializer(URLSafeSerializerMixin, Serializer): + """Works like :class:`.Serializer` but dumps and loads into a URL + safe string consisting of the upper and lowercase character of the + alphabet as well as ``'_'``, ``'-'`` and ``'.'``. + """ + + +class URLSafeTimedSerializer(URLSafeSerializerMixin, TimedSerializer): + """Works like :class:`.TimedSerializer` but dumps and loads into a + URL safe string consisting of the upper and lowercase character of + the alphabet as well as ``'_'``, ``'-'`` and ``'.'``. + """ diff --git a/python/jinja2/__init__.py b/python/jinja2/__init__.py new file mode 100644 index 0000000..15e13b6 --- /dev/null +++ b/python/jinja2/__init__.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +""" + jinja2 + ~~~~~~ + + Jinja2 is a template engine written in pure Python. It provides a + Django inspired non-XML syntax but supports inline expressions and + an optional sandboxed environment. + + Nutshell + -------- + + Here a small example of a Jinja2 template:: + + {% extends 'base.html' %} + {% block title %}Memberlist{% endblock %} + {% block content %} + <ul> + {% for user in users %} + <li><a href="{{ user.url }}">{{ user.username }}</a></li> + {% endfor %} + </ul> + {% endblock %} + + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +__docformat__ = 'restructuredtext en' +__version__ = '2.10.1' + +# high level interface +from jinja2.environment import Environment, Template + +# loaders +from jinja2.loaders import BaseLoader, FileSystemLoader, PackageLoader, \ + DictLoader, FunctionLoader, PrefixLoader, ChoiceLoader, \ + ModuleLoader + +# bytecode caches +from jinja2.bccache import BytecodeCache, FileSystemBytecodeCache, \ + MemcachedBytecodeCache + +# undefined types +from jinja2.runtime import Undefined, DebugUndefined, StrictUndefined, \ + make_logging_undefined + +# exceptions +from jinja2.exceptions import TemplateError, UndefinedError, \ + TemplateNotFound, TemplatesNotFound, TemplateSyntaxError, \ + TemplateAssertionError, TemplateRuntimeError + +# decorators and public utilities +from jinja2.filters import environmentfilter, contextfilter, \ + evalcontextfilter +from jinja2.utils import Markup, escape, clear_caches, \ + environmentfunction, evalcontextfunction, contextfunction, \ + is_undefined, select_autoescape + +__all__ = [ + 'Environment', 'Template', 'BaseLoader', 'FileSystemLoader', + 'PackageLoader', 'DictLoader', 'FunctionLoader', 'PrefixLoader', + 'ChoiceLoader', 'BytecodeCache', 'FileSystemBytecodeCache', + 'MemcachedBytecodeCache', 'Undefined', 'DebugUndefined', + 'StrictUndefined', 'TemplateError', 'UndefinedError', 'TemplateNotFound', + 'TemplatesNotFound', 'TemplateSyntaxError', 'TemplateAssertionError', + 'TemplateRuntimeError', + 'ModuleLoader', 'environmentfilter', 'contextfilter', 'Markup', 'escape', + 'environmentfunction', 'contextfunction', 'clear_caches', 'is_undefined', + 'evalcontextfilter', 'evalcontextfunction', 'make_logging_undefined', + 'select_autoescape', +] + + +def _patch_async(): + from jinja2.utils import have_async_gen + if have_async_gen: + from jinja2.asyncsupport import patch_all + patch_all() + + +_patch_async() +del _patch_async diff --git a/python/jinja2/_compat.py b/python/jinja2/_compat.py new file mode 100644 index 0000000..61d8530 --- /dev/null +++ b/python/jinja2/_compat.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +""" + jinja2._compat + ~~~~~~~~~~~~~~ + + Some py2/py3 compatibility support based on a stripped down + version of six so we don't have to depend on a specific version + of it. + + :copyright: Copyright 2013 by the Jinja team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +import sys + +PY2 = sys.version_info[0] == 2 +PYPY = hasattr(sys, 'pypy_translation_info') +_identity = lambda x: x + + +if not PY2: + unichr = chr + range_type = range + text_type = str + string_types = (str,) + integer_types = (int,) + + iterkeys = lambda d: iter(d.keys()) + itervalues = lambda d: iter(d.values()) + iteritems = lambda d: iter(d.items()) + + import pickle + from io import BytesIO, StringIO + NativeStringIO = StringIO + + def reraise(tp, value, tb=None): + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + + ifilter = filter + imap = map + izip = zip + intern = sys.intern + + implements_iterator = _identity + implements_to_string = _identity + encode_filename = _identity + +else: + unichr = unichr + text_type = unicode + range_type = xrange + string_types = (str, unicode) + integer_types = (int, long) + + iterkeys = lambda d: d.iterkeys() + itervalues = lambda d: d.itervalues() + iteritems = lambda d: d.iteritems() + + import cPickle as pickle + from cStringIO import StringIO as BytesIO, StringIO + NativeStringIO = BytesIO + + exec('def reraise(tp, value, tb=None):\n raise tp, value, tb') + + from itertools import imap, izip, ifilter + intern = intern + + def implements_iterator(cls): + cls.next = cls.__next__ + del cls.__next__ + return cls + + def implements_to_string(cls): + cls.__unicode__ = cls.__str__ + cls.__str__ = lambda x: x.__unicode__().encode('utf-8') + return cls + + def encode_filename(filename): + if isinstance(filename, unicode): + return filename.encode('utf-8') + return filename + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a + # dummy metaclass for one level of class instantiation that replaces + # itself with the actual metaclass. + class metaclass(type): + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +try: + from urllib.parse import quote_from_bytes as url_quote +except ImportError: + from urllib import quote as url_quote diff --git a/python/jinja2/_identifier.py b/python/jinja2/_identifier.py new file mode 100644 index 0000000..2eac35d --- /dev/null +++ b/python/jinja2/_identifier.py @@ -0,0 +1,2 @@ +# generated by scripts/generate_identifier_pattern.py +pattern = '·̀-ͯ·҃-֑҇-ׇֽֿׁׂׅׄؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ܑۭܰ-݊ަ-ް߫-߳ࠖ-࠙ࠛ-ࠣࠥ-ࠧࠩ-࡙࠭-࡛ࣔ-ࣣ࣡-ःऺ-़ा-ॏ॑-ॗॢॣঁ-ঃ়া-ৄেৈো-্ৗৢৣਁ-ਃ਼ਾ-ੂੇੈੋ-੍ੑੰੱੵઁ-ઃ઼ા-ૅે-ૉો-્ૢૣଁ-ଃ଼ା-ୄେୈୋ-୍ୖୗୢୣஂா-ூெ-ைொ-்ௗఀ-ఃా-ౄె-ైొ-్ౕౖౢౣಁ-ಃ಼ಾ-ೄೆ-ೈೊ-್ೕೖೢೣഁ-ഃാ-ൄെ-ൈൊ-്ൗൢൣංඃ්ා-ුූෘ-ෟෲෳัิ-ฺ็-๎ັິ-ູົຼ່-ໍ༹༘༙༵༷༾༿ཱ-྄྆྇ྍ-ྗྙ-ྼ࿆ါ-ှၖ-ၙၞ-ၠၢ-ၤၧ-ၭၱ-ၴႂ-ႍႏႚ-ႝ፝-፟ᜒ-᜔ᜲ-᜴ᝒᝓᝲᝳ឴-៓៝᠋-᠍ᢅᢆᢩᤠ-ᤫᤰ-᤻ᨗ-ᨛᩕ-ᩞ᩠-᩿᩼᪰-᪽ᬀ-ᬄ᬴-᭄᭫-᭳ᮀ-ᮂᮡ-ᮭ᯦-᯳ᰤ-᰷᳐-᳔᳒-᳨᳭ᳲ-᳴᳸᳹᷀-᷵᷻-᷿‿⁀⁔⃐-⃥⃜⃡-⃰℘℮⳯-⵿⳱ⷠ-〪ⷿ-゙゚〯꙯ꙴ-꙽ꚞꚟ꛰꛱ꠂ꠆ꠋꠣ-ꠧꢀꢁꢴ-ꣅ꣠-꣱ꤦ-꤭ꥇ-꥓ꦀ-ꦃ꦳-꧀ꧥꨩ-ꨶꩃꩌꩍꩻ-ꩽꪰꪲ-ꪴꪷꪸꪾ꪿꫁ꫫ-ꫯꫵ꫶ꯣ-ꯪ꯬꯭ﬞ︀-️︠-︯︳︴﹍-﹏_𐇽𐋠𐍶-𐍺𐨁-𐨃𐨅𐨆𐨌-𐨏𐨸-𐨿𐨺𐫦𐫥𑀀-𑀂𑀸-𑁆𑁿-𑂂𑂰-𑂺𑄀-𑄂𑄧-𑅳𑄴𑆀-𑆂𑆳-𑇊𑇀-𑇌𑈬-𑈷𑈾𑋟-𑋪𑌀-𑌃𑌼𑌾-𑍄𑍇𑍈𑍋-𑍍𑍗𑍢𑍣𑍦-𑍬𑍰-𑍴𑐵-𑑆𑒰-𑓃𑖯-𑖵𑖸-𑗀𑗜𑗝𑘰-𑙀𑚫-𑚷𑜝-𑜫𑰯-𑰶𑰸-𑰿𑲒-𑲧𑲩-𑲶𖫰-𖫴𖬰-𖬶𖽑-𖽾𖾏-𖾒𛲝𛲞𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄𝨀-𝨶𝨻-𝩬𝩵𝪄𝪛-𝪟𝪡-𝪯𞀀-𞀆𞀈-𞀘𞀛-𞀡𞀣𞀤𞀦-𞣐𞀪-𞣖𞥄-𞥊󠄀-󠇯' diff --git a/python/jinja2/asyncfilters.py b/python/jinja2/asyncfilters.py new file mode 100644 index 0000000..5c1f46d --- /dev/null +++ b/python/jinja2/asyncfilters.py @@ -0,0 +1,146 @@ +from functools import wraps + +from jinja2.asyncsupport import auto_aiter +from jinja2 import filters + + +async def auto_to_seq(value): + seq = [] + if hasattr(value, '__aiter__'): + async for item in value: + seq.append(item) + else: + for item in value: + seq.append(item) + return seq + + +async def async_select_or_reject(args, kwargs, modfunc, lookup_attr): + seq, func = filters.prepare_select_or_reject( + args, kwargs, modfunc, lookup_attr) + if seq: + async for item in auto_aiter(seq): + if func(item): + yield item + + +def dualfilter(normal_filter, async_filter): + wrap_evalctx = False + if getattr(normal_filter, 'environmentfilter', False): + is_async = lambda args: args[0].is_async + wrap_evalctx = False + else: + if not getattr(normal_filter, 'evalcontextfilter', False) and \ + not getattr(normal_filter, 'contextfilter', False): + wrap_evalctx = True + is_async = lambda args: args[0].environment.is_async + + @wraps(normal_filter) + def wrapper(*args, **kwargs): + b = is_async(args) + if wrap_evalctx: + args = args[1:] + if b: + return async_filter(*args, **kwargs) + return normal_filter(*args, **kwargs) + + if wrap_evalctx: + wrapper.evalcontextfilter = True + + wrapper.asyncfiltervariant = True + + return wrapper + + +def asyncfiltervariant(original): + def decorator(f): + return dualfilter(original, f) + return decorator + + +@asyncfiltervariant(filters.do_first) +async def do_first(environment, seq): + try: + return await auto_aiter(seq).__anext__() + except StopAsyncIteration: + return environment.undefined('No first item, sequence was empty.') + + +@asyncfiltervariant(filters.do_groupby) +async def do_groupby(environment, value, attribute): + expr = filters.make_attrgetter(environment, attribute) + return [filters._GroupTuple(key, await auto_to_seq(values)) + for key, values in filters.groupby(sorted( + await auto_to_seq(value), key=expr), expr)] + + +@asyncfiltervariant(filters.do_join) +async def do_join(eval_ctx, value, d=u'', attribute=None): + return filters.do_join(eval_ctx, await auto_to_seq(value), d, attribute) + + +@asyncfiltervariant(filters.do_list) +async def do_list(value): + return await auto_to_seq(value) + + +@asyncfiltervariant(filters.do_reject) +async def do_reject(*args, **kwargs): + return async_select_or_reject(args, kwargs, lambda x: not x, False) + + +@asyncfiltervariant(filters.do_rejectattr) +async def do_rejectattr(*args, **kwargs): + return async_select_or_reject(args, kwargs, lambda x: not x, True) + + +@asyncfiltervariant(filters.do_select) +async def do_select(*args, **kwargs): + return async_select_or_reject(args, kwargs, lambda x: x, False) + + +@asyncfiltervariant(filters.do_selectattr) +async def do_selectattr(*args, **kwargs): + return async_select_or_reject(args, kwargs, lambda x: x, True) + + +@asyncfiltervariant(filters.do_map) +async def do_map(*args, **kwargs): + seq, func = filters.prepare_map(args, kwargs) + if seq: + async for item in auto_aiter(seq): + yield func(item) + + +@asyncfiltervariant(filters.do_sum) +async def do_sum(environment, iterable, attribute=None, start=0): + rv = start + if attribute is not None: + func = filters.make_attrgetter(environment, attribute) + else: + func = lambda x: x + async for item in auto_aiter(iterable): + rv += func(item) + return rv + + +@asyncfiltervariant(filters.do_slice) +async def do_slice(value, slices, fill_with=None): + return filters.do_slice(await auto_to_seq(value), slices, fill_with) + + +ASYNC_FILTERS = { + 'first': do_first, + 'groupby': do_groupby, + 'join': do_join, + 'list': do_list, + # we intentionally do not support do_last because that would be + # ridiculous + 'reject': do_reject, + 'rejectattr': do_rejectattr, + 'map': do_map, + 'select': do_select, + 'selectattr': do_selectattr, + 'sum': do_sum, + 'slice': do_slice, +} diff --git a/python/jinja2/asyncsupport.py b/python/jinja2/asyncsupport.py new file mode 100644 index 0000000..b1e7b5c --- /dev/null +++ b/python/jinja2/asyncsupport.py @@ -0,0 +1,256 @@ +# -*- coding: utf-8 -*- +""" + jinja2.asyncsupport + ~~~~~~~~~~~~~~~~~~~ + + Has all the code for async support which is implemented as a patch + for supported Python versions. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import sys +import asyncio +import inspect +from functools import update_wrapper + +from jinja2.utils import concat, internalcode, Markup +from jinja2.environment import TemplateModule +from jinja2.runtime import LoopContextBase, _last_iteration + + +async def concat_async(async_gen): + rv = [] + async def collect(): + async for event in async_gen: + rv.append(event) + await collect() + return concat(rv) + + +async def generate_async(self, *args, **kwargs): + vars = dict(*args, **kwargs) + try: + async for event in self.root_render_func(self.new_context(vars)): + yield event + except Exception: + exc_info = sys.exc_info() + else: + return + yield self.environment.handle_exception(exc_info, True) + + +def wrap_generate_func(original_generate): + def _convert_generator(self, loop, args, kwargs): + async_gen = self.generate_async(*args, **kwargs) + try: + while 1: + yield loop.run_until_complete(async_gen.__anext__()) + except StopAsyncIteration: + pass + def generate(self, *args, **kwargs): + if not self.environment.is_async: + return original_generate(self, *args, **kwargs) + return _convert_generator(self, asyncio.get_event_loop(), args, kwargs) + return update_wrapper(generate, original_generate) + + +async def render_async(self, *args, **kwargs): + if not self.environment.is_async: + raise RuntimeError('The environment was not created with async mode ' + 'enabled.') + + vars = dict(*args, **kwargs) + ctx = self.new_context(vars) + + try: + return await concat_async(self.root_render_func(ctx)) + except Exception: + exc_info = sys.exc_info() + return self.environment.handle_exception(exc_info, True) + + +def wrap_render_func(original_render): + def render(self, *args, **kwargs): + if not self.environment.is_async: + return original_render(self, *args, **kwargs) + loop = asyncio.get_event_loop() + return loop.run_until_complete(self.render_async(*args, **kwargs)) + return update_wrapper(render, original_render) + + +def wrap_block_reference_call(original_call): + @internalcode + async def async_call(self): + rv = await concat_async(self._stack[self._depth](self._context)) + if self._context.eval_ctx.autoescape: + rv = Markup(rv) + return rv + + @internalcode + def __call__(self): + if not self._context.environment.is_async: + return original_call(self) + return async_call(self) + + return update_wrapper(__call__, original_call) + + +def wrap_macro_invoke(original_invoke): + @internalcode + async def async_invoke(self, arguments, autoescape): + rv = await self._func(*arguments) + if autoescape: + rv = Markup(rv) + return rv + + @internalcode + def _invoke(self, arguments, autoescape): + if not self._environment.is_async: + return original_invoke(self, arguments, autoescape) + return async_invoke(self, arguments, autoescape) + return update_wrapper(_invoke, original_invoke) + + +@internalcode +async def get_default_module_async(self): + if self._module is not None: + return self._module + self._module = rv = await self.make_module_async() + return rv + + +def wrap_default_module(original_default_module): + @internalcode + def _get_default_module(self): + if self.environment.is_async: + raise RuntimeError('Template module attribute is unavailable ' + 'in async mode') + return original_default_module(self) + return _get_default_module + + +async def make_module_async(self, vars=None, shared=False, locals=None): + context = self.new_context(vars, shared, locals) + body_stream = [] + async for item in self.root_render_func(context): + body_stream.append(item) + return TemplateModule(self, context, body_stream) + + +def patch_template(): + from jinja2 import Template + Template.generate = wrap_generate_func(Template.generate) + Template.generate_async = update_wrapper( + generate_async, Template.generate_async) + Template.render_async = update_wrapper( + render_async, Template.render_async) + Template.render = wrap_render_func(Template.render) + Template._get_default_module = wrap_default_module( + Template._get_default_module) + Template._get_default_module_async = get_default_module_async + Template.make_module_async = update_wrapper( + make_module_async, Template.make_module_async) + + +def patch_runtime(): + from jinja2.runtime import BlockReference, Macro + BlockReference.__call__ = wrap_block_reference_call( + BlockReference.__call__) + Macro._invoke = wrap_macro_invoke(Macro._invoke) + + +def patch_filters(): + from jinja2.filters import FILTERS + from jinja2.asyncfilters import ASYNC_FILTERS + FILTERS.update(ASYNC_FILTERS) + + +def patch_all(): + patch_template() + patch_runtime() + patch_filters() + + +async def auto_await(value): + if inspect.isawaitable(value): + return await value + return value + + +async def auto_aiter(iterable): + if hasattr(iterable, '__aiter__'): + async for item in iterable: + yield item + return + for item in iterable: + yield item + + +class AsyncLoopContext(LoopContextBase): + + def __init__(self, async_iterator, undefined, after, length, recurse=None, + depth0=0): + LoopContextBase.__init__(self, undefined, recurse, depth0) + self._async_iterator = async_iterator + self._after = after + self._length = length + + @property + def length(self): + if self._length is None: + raise TypeError('Loop length for some iterators cannot be ' + 'lazily calculated in async mode') + return self._length + + def __aiter__(self): + return AsyncLoopContextIterator(self) + + +class AsyncLoopContextIterator(object): + __slots__ = ('context',) + + def __init__(self, context): + self.context = context + + def __aiter__(self): + return self + + async def __anext__(self): + ctx = self.context + ctx.index0 += 1 + if ctx._after is _last_iteration: + raise StopAsyncIteration() + ctx._before = ctx._current + ctx._current = ctx._after + try: + ctx._after = await ctx._async_iterator.__anext__() + except StopAsyncIteration: + ctx._after = _last_iteration + return ctx._current, ctx + + +async def make_async_loop_context(iterable, undefined, recurse=None, depth0=0): + # Length is more complicated and less efficient in async mode. The + # reason for this is that we cannot know if length will be used + # upfront but because length is a property we cannot lazily execute it + # later. This means that we need to buffer it up and measure :( + # + # We however only do this for actual iterators, not for async + # iterators as blocking here does not seem like the best idea in the + # world. + try: + length = len(iterable) + except (TypeError, AttributeError): + if not hasattr(iterable, '__aiter__'): + iterable = tuple(iterable) + length = len(iterable) + else: + length = None + async_iterator = auto_aiter(iterable) + try: + after = await async_iterator.__anext__() + except StopAsyncIteration: + after = _last_iteration + return AsyncLoopContext(async_iterator, undefined, after, length, recurse, + depth0) diff --git a/python/jinja2/bccache.py b/python/jinja2/bccache.py new file mode 100644 index 0000000..080e527 --- /dev/null +++ b/python/jinja2/bccache.py @@ -0,0 +1,362 @@ +# -*- coding: utf-8 -*- +""" + jinja2.bccache + ~~~~~~~~~~~~~~ + + This module implements the bytecode cache system Jinja is optionally + using. This is useful if you have very complex template situations and + the compiliation of all those templates slow down your application too + much. + + Situations where this is useful are often forking web applications that + are initialized on the first request. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD. +""" +from os import path, listdir +import os +import sys +import stat +import errno +import marshal +import tempfile +import fnmatch +from hashlib import sha1 +from jinja2.utils import open_if_exists +from jinja2._compat import BytesIO, pickle, PY2, text_type + + +# marshal works better on 3.x, one hack less required +if not PY2: + marshal_dump = marshal.dump + marshal_load = marshal.load +else: + + def marshal_dump(code, f): + if isinstance(f, file): + marshal.dump(code, f) + else: + f.write(marshal.dumps(code)) + + def marshal_load(f): + if isinstance(f, file): + return marshal.load(f) + return marshal.loads(f.read()) + + +bc_version = 3 + +# magic version used to only change with new jinja versions. With 2.6 +# we change this to also take Python version changes into account. The +# reason for this is that Python tends to segfault if fed earlier bytecode +# versions because someone thought it would be a good idea to reuse opcodes +# or make Python incompatible with earlier versions. +bc_magic = 'j2'.encode('ascii') + \ + pickle.dumps(bc_version, 2) + \ + pickle.dumps((sys.version_info[0] << 24) | sys.version_info[1]) + + +class Bucket(object): + """Buckets are used to store the bytecode for one template. It's created + and initialized by the bytecode cache and passed to the loading functions. + + The buckets get an internal checksum from the cache assigned and use this + to automatically reject outdated cache material. Individual bytecode + cache subclasses don't have to care about cache invalidation. + """ + + def __init__(self, environment, key, checksum): + self.environment = environment + self.key = key + self.checksum = checksum + self.reset() + + def reset(self): + """Resets the bucket (unloads the bytecode).""" + self.code = None + + def load_bytecode(self, f): + """Loads bytecode from a file or file like object.""" + # make sure the magic header is correct + magic = f.read(len(bc_magic)) + if magic != bc_magic: + self.reset() + return + # the source code of the file changed, we need to reload + checksum = pickle.load(f) + if self.checksum != checksum: + self.reset() + return + # if marshal_load fails then we need to reload + try: + self.code = marshal_load(f) + except (EOFError, ValueError, TypeError): + self.reset() + return + + def write_bytecode(self, f): + """Dump the bytecode into the file or file like object passed.""" + if self.code is None: + raise TypeError('can\'t write empty bucket') + f.write(bc_magic) + pickle.dump(self.checksum, f, 2) + marshal_dump(self.code, f) + + def bytecode_from_string(self, string): + """Load bytecode from a string.""" + self.load_bytecode(BytesIO(string)) + + def bytecode_to_string(self): + """Return the bytecode as string.""" + out = BytesIO() + self.write_bytecode(out) + return out.getvalue() + + +class BytecodeCache(object): + """To implement your own bytecode cache you have to subclass this class + and override :meth:`load_bytecode` and :meth:`dump_bytecode`. Both of + these methods are passed a :class:`~jinja2.bccache.Bucket`. + + A very basic bytecode cache that saves the bytecode on the file system:: + + from os import path + + class MyCache(BytecodeCache): + + def __init__(self, directory): + self.directory = directory + + def load_bytecode(self, bucket): + filename = path.join(self.directory, bucket.key) + if path.exists(filename): + with open(filename, 'rb') as f: + bucket.load_bytecode(f) + + def dump_bytecode(self, bucket): + filename = path.join(self.directory, bucket.key) + with open(filename, 'wb') as f: + bucket.write_bytecode(f) + + A more advanced version of a filesystem based bytecode cache is part of + Jinja2. + """ + + def load_bytecode(self, bucket): + """Subclasses have to override this method to load bytecode into a + bucket. If they are not able to find code in the cache for the + bucket, it must not do anything. + """ + raise NotImplementedError() + + def dump_bytecode(self, bucket): + """Subclasses have to override this method to write the bytecode + from a bucket back to the cache. If it unable to do so it must not + fail silently but raise an exception. + """ + raise NotImplementedError() + + def clear(self): + """Clears the cache. This method is not used by Jinja2 but should be + implemented to allow applications to clear the bytecode cache used + by a particular environment. + """ + + def get_cache_key(self, name, filename=None): + """Returns the unique hash key for this template name.""" + hash = sha1(name.encode('utf-8')) + if filename is not None: + filename = '|' + filename + if isinstance(filename, text_type): + filename = filename.encode('utf-8') + hash.update(filename) + return hash.hexdigest() + + def get_source_checksum(self, source): + """Returns a checksum for the source.""" + return sha1(source.encode('utf-8')).hexdigest() + + def get_bucket(self, environment, name, filename, source): + """Return a cache bucket for the given template. All arguments are + mandatory but filename may be `None`. + """ + key = self.get_cache_key(name, filename) + checksum = self.get_source_checksum(source) + bucket = Bucket(environment, key, checksum) + self.load_bytecode(bucket) + return bucket + + def set_bucket(self, bucket): + """Put the bucket into the cache.""" + self.dump_bytecode(bucket) + + +class FileSystemBytecodeCache(BytecodeCache): + """A bytecode cache that stores bytecode on the filesystem. It accepts + two arguments: The directory where the cache items are stored and a + pattern string that is used to build the filename. + + If no directory is specified a default cache directory is selected. On + Windows the user's temp directory is used, on UNIX systems a directory + is created for the user in the system temp directory. + + The pattern can be used to have multiple separate caches operate on the + same directory. The default pattern is ``'__jinja2_%s.cache'``. ``%s`` + is replaced with the cache key. + + >>> bcc = FileSystemBytecodeCache('/tmp/jinja_cache', '%s.cache') + + This bytecode cache supports clearing of the cache using the clear method. + """ + + def __init__(self, directory=None, pattern='__jinja2_%s.cache'): + if directory is None: + directory = self._get_default_cache_dir() + self.directory = directory + self.pattern = pattern + + def _get_default_cache_dir(self): + def _unsafe_dir(): + raise RuntimeError('Cannot determine safe temp directory. You ' + 'need to explicitly provide one.') + + tmpdir = tempfile.gettempdir() + + # On windows the temporary directory is used specific unless + # explicitly forced otherwise. We can just use that. + if os.name == 'nt': + return tmpdir + if not hasattr(os, 'getuid'): + _unsafe_dir() + + dirname = '_jinja2-cache-%d' % os.getuid() + actual_dir = os.path.join(tmpdir, dirname) + + try: + os.mkdir(actual_dir, stat.S_IRWXU) + except OSError as e: + if e.errno != errno.EEXIST: + raise + try: + os.chmod(actual_dir, stat.S_IRWXU) + actual_dir_stat = os.lstat(actual_dir) + if actual_dir_stat.st_uid != os.getuid() \ + or not stat.S_ISDIR(actual_dir_stat.st_mode) \ + or stat.S_IMODE(actual_dir_stat.st_mode) != stat.S_IRWXU: + _unsafe_dir() + except OSError as e: + if e.errno != errno.EEXIST: + raise + + actual_dir_stat = os.lstat(actual_dir) + if actual_dir_stat.st_uid != os.getuid() \ + or not stat.S_ISDIR(actual_dir_stat.st_mode) \ + or stat.S_IMODE(actual_dir_stat.st_mode) != stat.S_IRWXU: + _unsafe_dir() + + return actual_dir + + def _get_cache_filename(self, bucket): + return path.join(self.directory, self.pattern % bucket.key) + + def load_bytecode(self, bucket): + f = open_if_exists(self._get_cache_filename(bucket), 'rb') + if f is not None: + try: + bucket.load_bytecode(f) + finally: + f.close() + + def dump_bytecode(self, bucket): + f = open(self._get_cache_filename(bucket), 'wb') + try: + bucket.write_bytecode(f) + finally: + f.close() + + def clear(self): + # imported lazily here because google app-engine doesn't support + # write access on the file system and the function does not exist + # normally. + from os import remove + files = fnmatch.filter(listdir(self.directory), self.pattern % '*') + for filename in files: + try: + remove(path.join(self.directory, filename)) + except OSError: + pass + + +class MemcachedBytecodeCache(BytecodeCache): + """This class implements a bytecode cache that uses a memcache cache for + storing the information. It does not enforce a specific memcache library + (tummy's memcache or cmemcache) but will accept any class that provides + the minimal interface required. + + Libraries compatible with this class: + + - `werkzeug <http://werkzeug.pocoo.org/>`_.contrib.cache + - `python-memcached <https://www.tummy.com/Community/software/python-memcached/>`_ + - `cmemcache <http://gijsbert.org/cmemcache/>`_ + + (Unfortunately the django cache interface is not compatible because it + does not support storing binary data, only unicode. You can however pass + the underlying cache client to the bytecode cache which is available + as `django.core.cache.cache._client`.) + + The minimal interface for the client passed to the constructor is this: + + .. class:: MinimalClientInterface + + .. method:: set(key, value[, timeout]) + + Stores the bytecode in the cache. `value` is a string and + `timeout` the timeout of the key. If timeout is not provided + a default timeout or no timeout should be assumed, if it's + provided it's an integer with the number of seconds the cache + item should exist. + + .. method:: get(key) + + Returns the value for the cache key. If the item does not + exist in the cache the return value must be `None`. + + The other arguments to the constructor are the prefix for all keys that + is added before the actual cache key and the timeout for the bytecode in + the cache system. We recommend a high (or no) timeout. + + This bytecode cache does not support clearing of used items in the cache. + The clear method is a no-operation function. + + .. versionadded:: 2.7 + Added support for ignoring memcache errors through the + `ignore_memcache_errors` parameter. + """ + + def __init__(self, client, prefix='jinja2/bytecode/', timeout=None, + ignore_memcache_errors=True): + self.client = client + self.prefix = prefix + self.timeout = timeout + self.ignore_memcache_errors = ignore_memcache_errors + + def load_bytecode(self, bucket): + try: + code = self.client.get(self.prefix + bucket.key) + except Exception: + if not self.ignore_memcache_errors: + raise + code = None + if code is not None: + bucket.bytecode_from_string(code) + + def dump_bytecode(self, bucket): + args = (self.prefix + bucket.key, bucket.bytecode_to_string()) + if self.timeout is not None: + args += (self.timeout,) + try: + self.client.set(*args) + except Exception: + if not self.ignore_memcache_errors: + raise diff --git a/python/jinja2/compiler.py b/python/jinja2/compiler.py new file mode 100644 index 0000000..d534a82 --- /dev/null +++ b/python/jinja2/compiler.py @@ -0,0 +1,1721 @@ +# -*- coding: utf-8 -*- +""" + jinja2.compiler + ~~~~~~~~~~~~~~~ + + Compiles nodes into python code. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +from itertools import chain +from copy import deepcopy +from keyword import iskeyword as is_python_keyword +from functools import update_wrapper +from jinja2 import nodes +from jinja2.nodes import EvalContext +from jinja2.visitor import NodeVisitor +from jinja2.optimizer import Optimizer +from jinja2.exceptions import TemplateAssertionError +from jinja2.utils import Markup, concat, escape +from jinja2._compat import range_type, text_type, string_types, \ + iteritems, NativeStringIO, imap, izip +from jinja2.idtracking import Symbols, VAR_LOAD_PARAMETER, \ + VAR_LOAD_RESOLVE, VAR_LOAD_ALIAS, VAR_LOAD_UNDEFINED + + +operators = { + 'eq': '==', + 'ne': '!=', + 'gt': '>', + 'gteq': '>=', + 'lt': '<', + 'lteq': '<=', + 'in': 'in', + 'notin': 'not in' +} + +# what method to iterate over items do we want to use for dict iteration +# in generated code? on 2.x let's go with iteritems, on 3.x with items +if hasattr(dict, 'iteritems'): + dict_item_iter = 'iteritems' +else: + dict_item_iter = 'items' + +code_features = ['division'] + +# does this python version support generator stops? (PEP 0479) +try: + exec('from __future__ import generator_stop') + code_features.append('generator_stop') +except SyntaxError: + pass + +# does this python version support yield from? +try: + exec('def f(): yield from x()') +except SyntaxError: + supports_yield_from = False +else: + supports_yield_from = True + + +def optimizeconst(f): + def new_func(self, node, frame, **kwargs): + # Only optimize if the frame is not volatile + if self.optimized and not frame.eval_ctx.volatile: + new_node = self.optimizer.visit(node, frame.eval_ctx) + if new_node != node: + return self.visit(new_node, frame) + return f(self, node, frame, **kwargs) + return update_wrapper(new_func, f) + + +def generate(node, environment, name, filename, stream=None, + defer_init=False, optimized=True): + """Generate the python source for a node tree.""" + if not isinstance(node, nodes.Template): + raise TypeError('Can\'t compile non template nodes') + generator = environment.code_generator_class(environment, name, filename, + stream, defer_init, + optimized) + generator.visit(node) + if stream is None: + return generator.stream.getvalue() + + +def has_safe_repr(value): + """Does the node have a safe representation?""" + if value is None or value is NotImplemented or value is Ellipsis: + return True + if type(value) in (bool, int, float, complex, range_type, Markup) + string_types: + return True + if type(value) in (tuple, list, set, frozenset): + for item in value: + if not has_safe_repr(item): + return False + return True + elif type(value) is dict: + for key, value in iteritems(value): + if not has_safe_repr(key): + return False + if not has_safe_repr(value): + return False + return True + return False + + +def find_undeclared(nodes, names): + """Check if the names passed are accessed undeclared. The return value + is a set of all the undeclared names from the sequence of names found. + """ + visitor = UndeclaredNameVisitor(names) + try: + for node in nodes: + visitor.visit(node) + except VisitorExit: + pass + return visitor.undeclared + + +class MacroRef(object): + + def __init__(self, node): + self.node = node + self.accesses_caller = False + self.accesses_kwargs = False + self.accesses_varargs = False + + +class Frame(object): + """Holds compile time information for us.""" + + def __init__(self, eval_ctx, parent=None, level=None): + self.eval_ctx = eval_ctx + self.symbols = Symbols(parent and parent.symbols or None, + level=level) + + # a toplevel frame is the root + soft frames such as if conditions. + self.toplevel = False + + # the root frame is basically just the outermost frame, so no if + # conditions. This information is used to optimize inheritance + # situations. + self.rootlevel = False + + # in some dynamic inheritance situations the compiler needs to add + # write tests around output statements. + self.require_output_check = parent and parent.require_output_check + + # inside some tags we are using a buffer rather than yield statements. + # this for example affects {% filter %} or {% macro %}. If a frame + # is buffered this variable points to the name of the list used as + # buffer. + self.buffer = None + + # the name of the block we're in, otherwise None. + self.block = parent and parent.block or None + + # the parent of this frame + self.parent = parent + + if parent is not None: + self.buffer = parent.buffer + + def copy(self): + """Create a copy of the current one.""" + rv = object.__new__(self.__class__) + rv.__dict__.update(self.__dict__) + rv.symbols = self.symbols.copy() + return rv + + def inner(self, isolated=False): + """Return an inner frame.""" + if isolated: + return Frame(self.eval_ctx, level=self.symbols.level + 1) + return Frame(self.eval_ctx, self) + + def soft(self): + """Return a soft frame. A soft frame may not be modified as + standalone thing as it shares the resources with the frame it + was created of, but it's not a rootlevel frame any longer. + + This is only used to implement if-statements. + """ + rv = self.copy() + rv.rootlevel = False + return rv + + __copy__ = copy + + +class VisitorExit(RuntimeError): + """Exception used by the `UndeclaredNameVisitor` to signal a stop.""" + + +class DependencyFinderVisitor(NodeVisitor): + """A visitor that collects filter and test calls.""" + + def __init__(self): + self.filters = set() + self.tests = set() + + def visit_Filter(self, node): + self.generic_visit(node) + self.filters.add(node.name) + + def visit_Test(self, node): + self.generic_visit(node) + self.tests.add(node.name) + + def visit_Block(self, node): + """Stop visiting at blocks.""" + + +class UndeclaredNameVisitor(NodeVisitor): + """A visitor that checks if a name is accessed without being + declared. This is different from the frame visitor as it will + not stop at closure frames. + """ + + def __init__(self, names): + self.names = set(names) + self.undeclared = set() + + def visit_Name(self, node): + if node.ctx == 'load' and node.name in self.names: + self.undeclared.add(node.name) + if self.undeclared == self.names: + raise VisitorExit() + else: + self.names.discard(node.name) + + def visit_Block(self, node): + """Stop visiting a blocks.""" + + +class CompilerExit(Exception): + """Raised if the compiler encountered a situation where it just + doesn't make sense to further process the code. Any block that + raises such an exception is not further processed. + """ + + +class CodeGenerator(NodeVisitor): + + def __init__(self, environment, name, filename, stream=None, + defer_init=False, optimized=True): + if stream is None: + stream = NativeStringIO() + self.environment = environment + self.name = name + self.filename = filename + self.stream = stream + self.created_block_context = False + self.defer_init = defer_init + self.optimized = optimized + if optimized: + self.optimizer = Optimizer(environment) + + # aliases for imports + self.import_aliases = {} + + # a registry for all blocks. Because blocks are moved out + # into the global python scope they are registered here + self.blocks = {} + + # the number of extends statements so far + self.extends_so_far = 0 + + # some templates have a rootlevel extends. In this case we + # can safely assume that we're a child template and do some + # more optimizations. + self.has_known_extends = False + + # the current line number + self.code_lineno = 1 + + # registry of all filters and tests (global, not block local) + self.tests = {} + self.filters = {} + + # the debug information + self.debug_info = [] + self._write_debug_info = None + + # the number of new lines before the next write() + self._new_lines = 0 + + # the line number of the last written statement + self._last_line = 0 + + # true if nothing was written so far. + self._first_write = True + + # used by the `temporary_identifier` method to get new + # unique, temporary identifier + self._last_identifier = 0 + + # the current indentation + self._indentation = 0 + + # Tracks toplevel assignments + self._assign_stack = [] + + # Tracks parameter definition blocks + self._param_def_block = [] + + # Tracks the current context. + self._context_reference_stack = ['context'] + + # -- Various compilation helpers + + def fail(self, msg, lineno): + """Fail with a :exc:`TemplateAssertionError`.""" + raise TemplateAssertionError(msg, lineno, self.name, self.filename) + + def temporary_identifier(self): + """Get a new unique identifier.""" + self._last_identifier += 1 + return 't_%d' % self._last_identifier + + def buffer(self, frame): + """Enable buffering for the frame from that point onwards.""" + frame.buffer = self.temporary_identifier() + self.writeline('%s = []' % frame.buffer) + + def return_buffer_contents(self, frame, force_unescaped=False): + """Return the buffer contents of the frame.""" + if not force_unescaped: + if frame.eval_ctx.volatile: + self.writeline('if context.eval_ctx.autoescape:') + self.indent() + self.writeline('return Markup(concat(%s))' % frame.buffer) + self.outdent() + self.writeline('else:') + self.indent() + self.writeline('return concat(%s)' % frame.buffer) + self.outdent() + return + elif frame.eval_ctx.autoescape: + self.writeline('return Markup(concat(%s))' % frame.buffer) + return + self.writeline('return concat(%s)' % frame.buffer) + + def indent(self): + """Indent by one.""" + self._indentation += 1 + + def outdent(self, step=1): + """Outdent by step.""" + self._indentation -= step + + def start_write(self, frame, node=None): + """Yield or write into the frame buffer.""" + if frame.buffer is None: + self.writeline('yield ', node) + else: + self.writeline('%s.append(' % frame.buffer, node) + + def end_write(self, frame): + """End the writing process started by `start_write`.""" + if frame.buffer is not None: + self.write(')') + + def simple_write(self, s, frame, node=None): + """Simple shortcut for start_write + write + end_write.""" + self.start_write(frame, node) + self.write(s) + self.end_write(frame) + + def blockvisit(self, nodes, frame): + """Visit a list of nodes as block in a frame. If the current frame + is no buffer a dummy ``if 0: yield None`` is written automatically. + """ + try: + self.writeline('pass') + for node in nodes: + self.visit(node, frame) + except CompilerExit: + pass + + def write(self, x): + """Write a string into the output stream.""" + if self._new_lines: + if not self._first_write: + self.stream.write('\n' * self._new_lines) + self.code_lineno += self._new_lines + if self._write_debug_info is not None: + self.debug_info.append((self._write_debug_info, + self.code_lineno)) + self._write_debug_info = None + self._first_write = False + self.stream.write(' ' * self._indentation) + self._new_lines = 0 + self.stream.write(x) + + def writeline(self, x, node=None, extra=0): + """Combination of newline and write.""" + self.newline(node, extra) + self.write(x) + + def newline(self, node=None, extra=0): + """Add one or more newlines before the next write.""" + self._new_lines = max(self._new_lines, 1 + extra) + if node is not None and node.lineno != self._last_line: + self._write_debug_info = node.lineno + self._last_line = node.lineno + + def signature(self, node, frame, extra_kwargs=None): + """Writes a function call to the stream for the current node. + A leading comma is added automatically. The extra keyword + arguments may not include python keywords otherwise a syntax + error could occour. The extra keyword arguments should be given + as python dict. + """ + # if any of the given keyword arguments is a python keyword + # we have to make sure that no invalid call is created. + kwarg_workaround = False + for kwarg in chain((x.key for x in node.kwargs), extra_kwargs or ()): + if is_python_keyword(kwarg): + kwarg_workaround = True + break + + for arg in node.args: + self.write(', ') + self.visit(arg, frame) + + if not kwarg_workaround: + for kwarg in node.kwargs: + self.write(', ') + self.visit(kwarg, frame) + if extra_kwargs is not None: + for key, value in iteritems(extra_kwargs): + self.write(', %s=%s' % (key, value)) + if node.dyn_args: + self.write(', *') + self.visit(node.dyn_args, frame) + + if kwarg_workaround: + if node.dyn_kwargs is not None: + self.write(', **dict({') + else: + self.write(', **{') + for kwarg in node.kwargs: + self.write('%r: ' % kwarg.key) + self.visit(kwarg.value, frame) + self.write(', ') + if extra_kwargs is not None: + for key, value in iteritems(extra_kwargs): + self.write('%r: %s, ' % (key, value)) + if node.dyn_kwargs is not None: + self.write('}, **') + self.visit(node.dyn_kwargs, frame) + self.write(')') + else: + self.write('}') + + elif node.dyn_kwargs is not None: + self.write(', **') + self.visit(node.dyn_kwargs, frame) + + def pull_dependencies(self, nodes): + """Pull all the dependencies.""" + visitor = DependencyFinderVisitor() + for node in nodes: + visitor.visit(node) + for dependency in 'filters', 'tests': + mapping = getattr(self, dependency) + for name in getattr(visitor, dependency): + if name not in mapping: + mapping[name] = self.temporary_identifier() + self.writeline('%s = environment.%s[%r]' % + (mapping[name], dependency, name)) + + def enter_frame(self, frame): + undefs = [] + for target, (action, param) in iteritems(frame.symbols.loads): + if action == VAR_LOAD_PARAMETER: + pass + elif action == VAR_LOAD_RESOLVE: + self.writeline('%s = %s(%r)' % + (target, self.get_resolve_func(), param)) + elif action == VAR_LOAD_ALIAS: + self.writeline('%s = %s' % (target, param)) + elif action == VAR_LOAD_UNDEFINED: + undefs.append(target) + else: + raise NotImplementedError('unknown load instruction') + if undefs: + self.writeline('%s = missing' % ' = '.join(undefs)) + + def leave_frame(self, frame, with_python_scope=False): + if not with_python_scope: + undefs = [] + for target, _ in iteritems(frame.symbols.loads): + undefs.append(target) + if undefs: + self.writeline('%s = missing' % ' = '.join(undefs)) + + def func(self, name): + if self.environment.is_async: + return 'async def %s' % name + return 'def %s' % name + + def macro_body(self, node, frame): + """Dump the function def of a macro or call block.""" + frame = frame.inner() + frame.symbols.analyze_node(node) + macro_ref = MacroRef(node) + + explicit_caller = None + skip_special_params = set() + args = [] + for idx, arg in enumerate(node.args): + if arg.name == 'caller': + explicit_caller = idx + if arg.name in ('kwargs', 'varargs'): + skip_special_params.add(arg.name) + args.append(frame.symbols.ref(arg.name)) + + undeclared = find_undeclared(node.body, ('caller', 'kwargs', 'varargs')) + + if 'caller' in undeclared: + # In older Jinja2 versions there was a bug that allowed caller + # to retain the special behavior even if it was mentioned in + # the argument list. However thankfully this was only really + # working if it was the last argument. So we are explicitly + # checking this now and error out if it is anywhere else in + # the argument list. + if explicit_caller is not None: + try: + node.defaults[explicit_caller - len(node.args)] + except IndexError: + self.fail('When defining macros or call blocks the ' + 'special "caller" argument must be omitted ' + 'or be given a default.', node.lineno) + else: + args.append(frame.symbols.declare_parameter('caller')) + macro_ref.accesses_caller = True + if 'kwargs' in undeclared and not 'kwargs' in skip_special_params: + args.append(frame.symbols.declare_parameter('kwargs')) + macro_ref.accesses_kwargs = True + if 'varargs' in undeclared and not 'varargs' in skip_special_params: + args.append(frame.symbols.declare_parameter('varargs')) + macro_ref.accesses_varargs = True + + # macros are delayed, they never require output checks + frame.require_output_check = False + frame.symbols.analyze_node(node) + self.writeline('%s(%s):' % (self.func('macro'), ', '.join(args)), node) + self.indent() + + self.buffer(frame) + self.enter_frame(frame) + + self.push_parameter_definitions(frame) + for idx, arg in enumerate(node.args): + ref = frame.symbols.ref(arg.name) + self.writeline('if %s is missing:' % ref) + self.indent() + try: + default = node.defaults[idx - len(node.args)] + except IndexError: + self.writeline('%s = undefined(%r, name=%r)' % ( + ref, + 'parameter %r was not provided' % arg.name, + arg.name)) + else: + self.writeline('%s = ' % ref) + self.visit(default, frame) + self.mark_parameter_stored(ref) + self.outdent() + self.pop_parameter_definitions() + + self.blockvisit(node.body, frame) + self.return_buffer_contents(frame, force_unescaped=True) + self.leave_frame(frame, with_python_scope=True) + self.outdent() + + return frame, macro_ref + + def macro_def(self, macro_ref, frame): + """Dump the macro definition for the def created by macro_body.""" + arg_tuple = ', '.join(repr(x.name) for x in macro_ref.node.args) + name = getattr(macro_ref.node, 'name', None) + if len(macro_ref.node.args) == 1: + arg_tuple += ',' + self.write('Macro(environment, macro, %r, (%s), %r, %r, %r, ' + 'context.eval_ctx.autoescape)' % + (name, arg_tuple, macro_ref.accesses_kwargs, + macro_ref.accesses_varargs, macro_ref.accesses_caller)) + + def position(self, node): + """Return a human readable position for the node.""" + rv = 'line %d' % node.lineno + if self.name is not None: + rv += ' in ' + repr(self.name) + return rv + + def dump_local_context(self, frame): + return '{%s}' % ', '.join( + '%r: %s' % (name, target) for name, target + in iteritems(frame.symbols.dump_stores())) + + def write_commons(self): + """Writes a common preamble that is used by root and block functions. + Primarily this sets up common local helpers and enforces a generator + through a dead branch. + """ + self.writeline('resolve = context.resolve_or_missing') + self.writeline('undefined = environment.undefined') + self.writeline('if 0: yield None') + + def push_parameter_definitions(self, frame): + """Pushes all parameter targets from the given frame into a local + stack that permits tracking of yet to be assigned parameters. In + particular this enables the optimization from `visit_Name` to skip + undefined expressions for parameters in macros as macros can reference + otherwise unbound parameters. + """ + self._param_def_block.append(frame.symbols.dump_param_targets()) + + def pop_parameter_definitions(self): + """Pops the current parameter definitions set.""" + self._param_def_block.pop() + + def mark_parameter_stored(self, target): + """Marks a parameter in the current parameter definitions as stored. + This will skip the enforced undefined checks. + """ + if self._param_def_block: + self._param_def_block[-1].discard(target) + + def push_context_reference(self, target): + self._context_reference_stack.append(target) + + def pop_context_reference(self): + self._context_reference_stack.pop() + + def get_context_ref(self): + return self._context_reference_stack[-1] + + def get_resolve_func(self): + target = self._context_reference_stack[-1] + if target == 'context': + return 'resolve' + return '%s.resolve' % target + + def derive_context(self, frame): + return '%s.derived(%s)' % ( + self.get_context_ref(), + self.dump_local_context(frame), + ) + + def parameter_is_undeclared(self, target): + """Checks if a given target is an undeclared parameter.""" + if not self._param_def_block: + return False + return target in self._param_def_block[-1] + + def push_assign_tracking(self): + """Pushes a new layer for assignment tracking.""" + self._assign_stack.append(set()) + + def pop_assign_tracking(self, frame): + """Pops the topmost level for assignment tracking and updates the + context variables if necessary. + """ + vars = self._assign_stack.pop() + if not frame.toplevel or not vars: + return + public_names = [x for x in vars if x[:1] != '_'] + if len(vars) == 1: + name = next(iter(vars)) + ref = frame.symbols.ref(name) + self.writeline('context.vars[%r] = %s' % (name, ref)) + else: + self.writeline('context.vars.update({') + for idx, name in enumerate(vars): + if idx: + self.write(', ') + ref = frame.symbols.ref(name) + self.write('%r: %s' % (name, ref)) + self.write('})') + if public_names: + if len(public_names) == 1: + self.writeline('context.exported_vars.add(%r)' % + public_names[0]) + else: + self.writeline('context.exported_vars.update((%s))' % + ', '.join(imap(repr, public_names))) + + # -- Statement Visitors + + def visit_Template(self, node, frame=None): + assert frame is None, 'no root frame allowed' + eval_ctx = EvalContext(self.environment, self.name) + + from jinja2.runtime import __all__ as exported + self.writeline('from __future__ import %s' % ', '.join(code_features)) + self.writeline('from jinja2.runtime import ' + ', '.join(exported)) + + if self.environment.is_async: + self.writeline('from jinja2.asyncsupport import auto_await, ' + 'auto_aiter, make_async_loop_context') + + # if we want a deferred initialization we cannot move the + # environment into a local name + envenv = not self.defer_init and ', environment=environment' or '' + + # do we have an extends tag at all? If not, we can save some + # overhead by just not processing any inheritance code. + have_extends = node.find(nodes.Extends) is not None + + # find all blocks + for block in node.find_all(nodes.Block): + if block.name in self.blocks: + self.fail('block %r defined twice' % block.name, block.lineno) + self.blocks[block.name] = block + + # find all imports and import them + for import_ in node.find_all(nodes.ImportedName): + if import_.importname not in self.import_aliases: + imp = import_.importname + self.import_aliases[imp] = alias = self.temporary_identifier() + if '.' in imp: + module, obj = imp.rsplit('.', 1) + self.writeline('from %s import %s as %s' % + (module, obj, alias)) + else: + self.writeline('import %s as %s' % (imp, alias)) + + # add the load name + self.writeline('name = %r' % self.name) + + # generate the root render function. + self.writeline('%s(context, missing=missing%s):' % + (self.func('root'), envenv), extra=1) + self.indent() + self.write_commons() + + # process the root + frame = Frame(eval_ctx) + if 'self' in find_undeclared(node.body, ('self',)): + ref = frame.symbols.declare_parameter('self') + self.writeline('%s = TemplateReference(context)' % ref) + frame.symbols.analyze_node(node) + frame.toplevel = frame.rootlevel = True + frame.require_output_check = have_extends and not self.has_known_extends + if have_extends: + self.writeline('parent_template = None') + self.enter_frame(frame) + self.pull_dependencies(node.body) + self.blockvisit(node.body, frame) + self.leave_frame(frame, with_python_scope=True) + self.outdent() + + # make sure that the parent root is called. + if have_extends: + if not self.has_known_extends: + self.indent() + self.writeline('if parent_template is not None:') + self.indent() + if supports_yield_from and not self.environment.is_async: + self.writeline('yield from parent_template.' + 'root_render_func(context)') + else: + self.writeline('%sfor event in parent_template.' + 'root_render_func(context):' % + (self.environment.is_async and 'async ' or '')) + self.indent() + self.writeline('yield event') + self.outdent() + self.outdent(1 + (not self.has_known_extends)) + + # at this point we now have the blocks collected and can visit them too. + for name, block in iteritems(self.blocks): + self.writeline('%s(context, missing=missing%s):' % + (self.func('block_' + name), envenv), + block, 1) + self.indent() + self.write_commons() + # It's important that we do not make this frame a child of the + # toplevel template. This would cause a variety of + # interesting issues with identifier tracking. + block_frame = Frame(eval_ctx) + undeclared = find_undeclared(block.body, ('self', 'super')) + if 'self' in undeclared: + ref = block_frame.symbols.declare_parameter('self') + self.writeline('%s = TemplateReference(context)' % ref) + if 'super' in undeclared: + ref = block_frame.symbols.declare_parameter('super') + self.writeline('%s = context.super(%r, ' + 'block_%s)' % (ref, name, name)) + block_frame.symbols.analyze_node(block) + block_frame.block = name + self.enter_frame(block_frame) + self.pull_dependencies(block.body) + self.blockvisit(block.body, block_frame) + self.leave_frame(block_frame, with_python_scope=True) + self.outdent() + + self.writeline('blocks = {%s}' % ', '.join('%r: block_%s' % (x, x) + for x in self.blocks), + extra=1) + + # add a function that returns the debug info + self.writeline('debug_info = %r' % '&'.join('%s=%s' % x for x + in self.debug_info)) + + def visit_Block(self, node, frame): + """Call a block and register it for the template.""" + level = 0 + if frame.toplevel: + # if we know that we are a child template, there is no need to + # check if we are one + if self.has_known_extends: + return + if self.extends_so_far > 0: + self.writeline('if parent_template is None:') + self.indent() + level += 1 + + if node.scoped: + context = self.derive_context(frame) + else: + context = self.get_context_ref() + + if supports_yield_from and not self.environment.is_async and \ + frame.buffer is None: + self.writeline('yield from context.blocks[%r][0](%s)' % ( + node.name, context), node) + else: + loop = self.environment.is_async and 'async for' or 'for' + self.writeline('%s event in context.blocks[%r][0](%s):' % ( + loop, node.name, context), node) + self.indent() + self.simple_write('event', frame) + self.outdent() + + self.outdent(level) + + def visit_Extends(self, node, frame): + """Calls the extender.""" + if not frame.toplevel: + self.fail('cannot use extend from a non top-level scope', + node.lineno) + + # if the number of extends statements in general is zero so + # far, we don't have to add a check if something extended + # the template before this one. + if self.extends_so_far > 0: + + # if we have a known extends we just add a template runtime + # error into the generated code. We could catch that at compile + # time too, but i welcome it not to confuse users by throwing the + # same error at different times just "because we can". + if not self.has_known_extends: + self.writeline('if parent_template is not None:') + self.indent() + self.writeline('raise TemplateRuntimeError(%r)' % + 'extended multiple times') + + # if we have a known extends already we don't need that code here + # as we know that the template execution will end here. + if self.has_known_extends: + raise CompilerExit() + else: + self.outdent() + + self.writeline('parent_template = environment.get_template(', node) + self.visit(node.template, frame) + self.write(', %r)' % self.name) + self.writeline('for name, parent_block in parent_template.' + 'blocks.%s():' % dict_item_iter) + self.indent() + self.writeline('context.blocks.setdefault(name, []).' + 'append(parent_block)') + self.outdent() + + # if this extends statement was in the root level we can take + # advantage of that information and simplify the generated code + # in the top level from this point onwards + if frame.rootlevel: + self.has_known_extends = True + + # and now we have one more + self.extends_so_far += 1 + + def visit_Include(self, node, frame): + """Handles includes.""" + if node.ignore_missing: + self.writeline('try:') + self.indent() + + func_name = 'get_or_select_template' + if isinstance(node.template, nodes.Const): + if isinstance(node.template.value, string_types): + func_name = 'get_template' + elif isinstance(node.template.value, (tuple, list)): + func_name = 'select_template' + elif isinstance(node.template, (nodes.Tuple, nodes.List)): + func_name = 'select_template' + + self.writeline('template = environment.%s(' % func_name, node) + self.visit(node.template, frame) + self.write(', %r)' % self.name) + if node.ignore_missing: + self.outdent() + self.writeline('except TemplateNotFound:') + self.indent() + self.writeline('pass') + self.outdent() + self.writeline('else:') + self.indent() + + skip_event_yield = False + if node.with_context: + loop = self.environment.is_async and 'async for' or 'for' + self.writeline('%s event in template.root_render_func(' + 'template.new_context(context.get_all(), True, ' + '%s)):' % (loop, self.dump_local_context(frame))) + elif self.environment.is_async: + self.writeline('for event in (await ' + 'template._get_default_module_async())' + '._body_stream:') + else: + if supports_yield_from: + self.writeline('yield from template._get_default_module()' + '._body_stream') + skip_event_yield = True + else: + self.writeline('for event in template._get_default_module()' + '._body_stream:') + + if not skip_event_yield: + self.indent() + self.simple_write('event', frame) + self.outdent() + + if node.ignore_missing: + self.outdent() + + def visit_Import(self, node, frame): + """Visit regular imports.""" + self.writeline('%s = ' % frame.symbols.ref(node.target), node) + if frame.toplevel: + self.write('context.vars[%r] = ' % node.target) + if self.environment.is_async: + self.write('await ') + self.write('environment.get_template(') + self.visit(node.template, frame) + self.write(', %r).' % self.name) + if node.with_context: + self.write('make_module%s(context.get_all(), True, %s)' + % (self.environment.is_async and '_async' or '', + self.dump_local_context(frame))) + elif self.environment.is_async: + self.write('_get_default_module_async()') + else: + self.write('_get_default_module()') + if frame.toplevel and not node.target.startswith('_'): + self.writeline('context.exported_vars.discard(%r)' % node.target) + + def visit_FromImport(self, node, frame): + """Visit named imports.""" + self.newline(node) + self.write('included_template = %senvironment.get_template(' + % (self.environment.is_async and 'await ' or '')) + self.visit(node.template, frame) + self.write(', %r).' % self.name) + if node.with_context: + self.write('make_module%s(context.get_all(), True, %s)' + % (self.environment.is_async and '_async' or '', + self.dump_local_context(frame))) + elif self.environment.is_async: + self.write('_get_default_module_async()') + else: + self.write('_get_default_module()') + + var_names = [] + discarded_names = [] + for name in node.names: + if isinstance(name, tuple): + name, alias = name + else: + alias = name + self.writeline('%s = getattr(included_template, ' + '%r, missing)' % (frame.symbols.ref(alias), name)) + self.writeline('if %s is missing:' % frame.symbols.ref(alias)) + self.indent() + self.writeline('%s = undefined(%r %% ' + 'included_template.__name__, ' + 'name=%r)' % + (frame.symbols.ref(alias), + 'the template %%r (imported on %s) does ' + 'not export the requested name %s' % ( + self.position(node), + repr(name) + ), name)) + self.outdent() + if frame.toplevel: + var_names.append(alias) + if not alias.startswith('_'): + discarded_names.append(alias) + + if var_names: + if len(var_names) == 1: + name = var_names[0] + self.writeline('context.vars[%r] = %s' % + (name, frame.symbols.ref(name))) + else: + self.writeline('context.vars.update({%s})' % ', '.join( + '%r: %s' % (name, frame.symbols.ref(name)) for name in var_names + )) + if discarded_names: + if len(discarded_names) == 1: + self.writeline('context.exported_vars.discard(%r)' % + discarded_names[0]) + else: + self.writeline('context.exported_vars.difference_' + 'update((%s))' % ', '.join(imap(repr, discarded_names))) + + def visit_For(self, node, frame): + loop_frame = frame.inner() + test_frame = frame.inner() + else_frame = frame.inner() + + # try to figure out if we have an extended loop. An extended loop + # is necessary if the loop is in recursive mode if the special loop + # variable is accessed in the body. + extended_loop = node.recursive or 'loop' in \ + find_undeclared(node.iter_child_nodes( + only=('body',)), ('loop',)) + + loop_ref = None + if extended_loop: + loop_ref = loop_frame.symbols.declare_parameter('loop') + + loop_frame.symbols.analyze_node(node, for_branch='body') + if node.else_: + else_frame.symbols.analyze_node(node, for_branch='else') + + if node.test: + loop_filter_func = self.temporary_identifier() + test_frame.symbols.analyze_node(node, for_branch='test') + self.writeline('%s(fiter):' % self.func(loop_filter_func), node.test) + self.indent() + self.enter_frame(test_frame) + self.writeline(self.environment.is_async and 'async for ' or 'for ') + self.visit(node.target, loop_frame) + self.write(' in ') + self.write(self.environment.is_async and 'auto_aiter(fiter)' or 'fiter') + self.write(':') + self.indent() + self.writeline('if ', node.test) + self.visit(node.test, test_frame) + self.write(':') + self.indent() + self.writeline('yield ') + self.visit(node.target, loop_frame) + self.outdent(3) + self.leave_frame(test_frame, with_python_scope=True) + + # if we don't have an recursive loop we have to find the shadowed + # variables at that point. Because loops can be nested but the loop + # variable is a special one we have to enforce aliasing for it. + if node.recursive: + self.writeline('%s(reciter, loop_render_func, depth=0):' % + self.func('loop'), node) + self.indent() + self.buffer(loop_frame) + + # Use the same buffer for the else frame + else_frame.buffer = loop_frame.buffer + + # make sure the loop variable is a special one and raise a template + # assertion error if a loop tries to write to loop + if extended_loop: + self.writeline('%s = missing' % loop_ref) + + for name in node.find_all(nodes.Name): + if name.ctx == 'store' and name.name == 'loop': + self.fail('Can\'t assign to special loop variable ' + 'in for-loop target', name.lineno) + + if node.else_: + iteration_indicator = self.temporary_identifier() + self.writeline('%s = 1' % iteration_indicator) + + self.writeline(self.environment.is_async and 'async for ' or 'for ', node) + self.visit(node.target, loop_frame) + if extended_loop: + if self.environment.is_async: + self.write(', %s in await make_async_loop_context(' % loop_ref) + else: + self.write(', %s in LoopContext(' % loop_ref) + else: + self.write(' in ') + + if node.test: + self.write('%s(' % loop_filter_func) + if node.recursive: + self.write('reciter') + else: + if self.environment.is_async and not extended_loop: + self.write('auto_aiter(') + self.visit(node.iter, frame) + if self.environment.is_async and not extended_loop: + self.write(')') + if node.test: + self.write(')') + + if node.recursive: + self.write(', undefined, loop_render_func, depth):') + else: + self.write(extended_loop and ', undefined):' or ':') + + self.indent() + self.enter_frame(loop_frame) + + self.blockvisit(node.body, loop_frame) + if node.else_: + self.writeline('%s = 0' % iteration_indicator) + self.outdent() + self.leave_frame(loop_frame, with_python_scope=node.recursive + and not node.else_) + + if node.else_: + self.writeline('if %s:' % iteration_indicator) + self.indent() + self.enter_frame(else_frame) + self.blockvisit(node.else_, else_frame) + self.leave_frame(else_frame) + self.outdent() + + # if the node was recursive we have to return the buffer contents + # and start the iteration code + if node.recursive: + self.return_buffer_contents(loop_frame) + self.outdent() + self.start_write(frame, node) + if self.environment.is_async: + self.write('await ') + self.write('loop(') + if self.environment.is_async: + self.write('auto_aiter(') + self.visit(node.iter, frame) + if self.environment.is_async: + self.write(')') + self.write(', loop)') + self.end_write(frame) + + def visit_If(self, node, frame): + if_frame = frame.soft() + self.writeline('if ', node) + self.visit(node.test, if_frame) + self.write(':') + self.indent() + self.blockvisit(node.body, if_frame) + self.outdent() + for elif_ in node.elif_: + self.writeline('elif ', elif_) + self.visit(elif_.test, if_frame) + self.write(':') + self.indent() + self.blockvisit(elif_.body, if_frame) + self.outdent() + if node.else_: + self.writeline('else:') + self.indent() + self.blockvisit(node.else_, if_frame) + self.outdent() + + def visit_Macro(self, node, frame): + macro_frame, macro_ref = self.macro_body(node, frame) + self.newline() + if frame.toplevel: + if not node.name.startswith('_'): + self.write('context.exported_vars.add(%r)' % node.name) + ref = frame.symbols.ref(node.name) + self.writeline('context.vars[%r] = ' % node.name) + self.write('%s = ' % frame.symbols.ref(node.name)) + self.macro_def(macro_ref, macro_frame) + + def visit_CallBlock(self, node, frame): + call_frame, macro_ref = self.macro_body(node, frame) + self.writeline('caller = ') + self.macro_def(macro_ref, call_frame) + self.start_write(frame, node) + self.visit_Call(node.call, frame, forward_caller=True) + self.end_write(frame) + + def visit_FilterBlock(self, node, frame): + filter_frame = frame.inner() + filter_frame.symbols.analyze_node(node) + self.enter_frame(filter_frame) + self.buffer(filter_frame) + self.blockvisit(node.body, filter_frame) + self.start_write(frame, node) + self.visit_Filter(node.filter, filter_frame) + self.end_write(frame) + self.leave_frame(filter_frame) + + def visit_With(self, node, frame): + with_frame = frame.inner() + with_frame.symbols.analyze_node(node) + self.enter_frame(with_frame) + for idx, (target, expr) in enumerate(izip(node.targets, node.values)): + self.newline() + self.visit(target, with_frame) + self.write(' = ') + self.visit(expr, frame) + self.blockvisit(node.body, with_frame) + self.leave_frame(with_frame) + + def visit_ExprStmt(self, node, frame): + self.newline(node) + self.visit(node.node, frame) + + def visit_Output(self, node, frame): + # if we have a known extends statement, we don't output anything + # if we are in a require_output_check section + if self.has_known_extends and frame.require_output_check: + return + + allow_constant_finalize = True + if self.environment.finalize: + func = self.environment.finalize + if getattr(func, 'contextfunction', False) or \ + getattr(func, 'evalcontextfunction', False): + allow_constant_finalize = False + elif getattr(func, 'environmentfunction', False): + finalize = lambda x: text_type( + self.environment.finalize(self.environment, x)) + else: + finalize = lambda x: text_type(self.environment.finalize(x)) + else: + finalize = text_type + + # if we are inside a frame that requires output checking, we do so + outdent_later = False + if frame.require_output_check: + self.writeline('if parent_template is None:') + self.indent() + outdent_later = True + + # try to evaluate as many chunks as possible into a static + # string at compile time. + body = [] + for child in node.nodes: + try: + if not allow_constant_finalize: + raise nodes.Impossible() + const = child.as_const(frame.eval_ctx) + except nodes.Impossible: + body.append(child) + continue + # the frame can't be volatile here, becaus otherwise the + # as_const() function would raise an Impossible exception + # at that point. + try: + if frame.eval_ctx.autoescape: + if hasattr(const, '__html__'): + const = const.__html__() + else: + const = escape(const) + const = finalize(const) + except Exception: + # if something goes wrong here we evaluate the node + # at runtime for easier debugging + body.append(child) + continue + if body and isinstance(body[-1], list): + body[-1].append(const) + else: + body.append([const]) + + # if we have less than 3 nodes or a buffer we yield or extend/append + if len(body) < 3 or frame.buffer is not None: + if frame.buffer is not None: + # for one item we append, for more we extend + if len(body) == 1: + self.writeline('%s.append(' % frame.buffer) + else: + self.writeline('%s.extend((' % frame.buffer) + self.indent() + for item in body: + if isinstance(item, list): + val = repr(concat(item)) + if frame.buffer is None: + self.writeline('yield ' + val) + else: + self.writeline(val + ',') + else: + if frame.buffer is None: + self.writeline('yield ', item) + else: + self.newline(item) + close = 1 + if frame.eval_ctx.volatile: + self.write('(escape if context.eval_ctx.autoescape' + ' else to_string)(') + elif frame.eval_ctx.autoescape: + self.write('escape(') + else: + self.write('to_string(') + if self.environment.finalize is not None: + self.write('environment.finalize(') + if getattr(self.environment.finalize, + "contextfunction", False): + self.write('context, ') + close += 1 + self.visit(item, frame) + self.write(')' * close) + if frame.buffer is not None: + self.write(',') + if frame.buffer is not None: + # close the open parentheses + self.outdent() + self.writeline(len(body) == 1 and ')' or '))') + + # otherwise we create a format string as this is faster in that case + else: + format = [] + arguments = [] + for item in body: + if isinstance(item, list): + format.append(concat(item).replace('%', '%%')) + else: + format.append('%s') + arguments.append(item) + self.writeline('yield ') + self.write(repr(concat(format)) + ' % (') + self.indent() + for argument in arguments: + self.newline(argument) + close = 0 + if frame.eval_ctx.volatile: + self.write('(escape if context.eval_ctx.autoescape else' + ' to_string)(') + close += 1 + elif frame.eval_ctx.autoescape: + self.write('escape(') + close += 1 + if self.environment.finalize is not None: + self.write('environment.finalize(') + if getattr(self.environment.finalize, + 'contextfunction', False): + self.write('context, ') + elif getattr(self.environment.finalize, + 'evalcontextfunction', False): + self.write('context.eval_ctx, ') + elif getattr(self.environment.finalize, + 'environmentfunction', False): + self.write('environment, ') + close += 1 + self.visit(argument, frame) + self.write(')' * close + ', ') + self.outdent() + self.writeline(')') + + if outdent_later: + self.outdent() + + def visit_Assign(self, node, frame): + self.push_assign_tracking() + self.newline(node) + self.visit(node.target, frame) + self.write(' = ') + self.visit(node.node, frame) + self.pop_assign_tracking(frame) + + def visit_AssignBlock(self, node, frame): + self.push_assign_tracking() + block_frame = frame.inner() + # This is a special case. Since a set block always captures we + # will disable output checks. This way one can use set blocks + # toplevel even in extended templates. + block_frame.require_output_check = False + block_frame.symbols.analyze_node(node) + self.enter_frame(block_frame) + self.buffer(block_frame) + self.blockvisit(node.body, block_frame) + self.newline(node) + self.visit(node.target, frame) + self.write(' = (Markup if context.eval_ctx.autoescape ' + 'else identity)(') + if node.filter is not None: + self.visit_Filter(node.filter, block_frame) + else: + self.write('concat(%s)' % block_frame.buffer) + self.write(')') + self.pop_assign_tracking(frame) + self.leave_frame(block_frame) + + # -- Expression Visitors + + def visit_Name(self, node, frame): + if node.ctx == 'store' and frame.toplevel: + if self._assign_stack: + self._assign_stack[-1].add(node.name) + ref = frame.symbols.ref(node.name) + + # If we are looking up a variable we might have to deal with the + # case where it's undefined. We can skip that case if the load + # instruction indicates a parameter which are always defined. + if node.ctx == 'load': + load = frame.symbols.find_load(ref) + if not (load is not None and load[0] == VAR_LOAD_PARAMETER and \ + not self.parameter_is_undeclared(ref)): + self.write('(undefined(name=%r) if %s is missing else %s)' % + (node.name, ref, ref)) + return + + self.write(ref) + + def visit_NSRef(self, node, frame): + # NSRefs can only be used to store values; since they use the normal + # `foo.bar` notation they will be parsed as a normal attribute access + # when used anywhere but in a `set` context + ref = frame.symbols.ref(node.name) + self.writeline('if not isinstance(%s, Namespace):' % ref) + self.indent() + self.writeline('raise TemplateRuntimeError(%r)' % + 'cannot assign attribute on non-namespace object') + self.outdent() + self.writeline('%s[%r]' % (ref, node.attr)) + + def visit_Const(self, node, frame): + val = node.as_const(frame.eval_ctx) + if isinstance(val, float): + self.write(str(val)) + else: + self.write(repr(val)) + + def visit_TemplateData(self, node, frame): + try: + self.write(repr(node.as_const(frame.eval_ctx))) + except nodes.Impossible: + self.write('(Markup if context.eval_ctx.autoescape else identity)(%r)' + % node.data) + + def visit_Tuple(self, node, frame): + self.write('(') + idx = -1 + for idx, item in enumerate(node.items): + if idx: + self.write(', ') + self.visit(item, frame) + self.write(idx == 0 and ',)' or ')') + + def visit_List(self, node, frame): + self.write('[') + for idx, item in enumerate(node.items): + if idx: + self.write(', ') + self.visit(item, frame) + self.write(']') + + def visit_Dict(self, node, frame): + self.write('{') + for idx, item in enumerate(node.items): + if idx: + self.write(', ') + self.visit(item.key, frame) + self.write(': ') + self.visit(item.value, frame) + self.write('}') + + def binop(operator, interceptable=True): + @optimizeconst + def visitor(self, node, frame): + if self.environment.sandboxed and \ + operator in self.environment.intercepted_binops: + self.write('environment.call_binop(context, %r, ' % operator) + self.visit(node.left, frame) + self.write(', ') + self.visit(node.right, frame) + else: + self.write('(') + self.visit(node.left, frame) + self.write(' %s ' % operator) + self.visit(node.right, frame) + self.write(')') + return visitor + + def uaop(operator, interceptable=True): + @optimizeconst + def visitor(self, node, frame): + if self.environment.sandboxed and \ + operator in self.environment.intercepted_unops: + self.write('environment.call_unop(context, %r, ' % operator) + self.visit(node.node, frame) + else: + self.write('(' + operator) + self.visit(node.node, frame) + self.write(')') + return visitor + + visit_Add = binop('+') + visit_Sub = binop('-') + visit_Mul = binop('*') + visit_Div = binop('/') + visit_FloorDiv = binop('//') + visit_Pow = binop('**') + visit_Mod = binop('%') + visit_And = binop('and', interceptable=False) + visit_Or = binop('or', interceptable=False) + visit_Pos = uaop('+') + visit_Neg = uaop('-') + visit_Not = uaop('not ', interceptable=False) + del binop, uaop + + @optimizeconst + def visit_Concat(self, node, frame): + if frame.eval_ctx.volatile: + func_name = '(context.eval_ctx.volatile and' \ + ' markup_join or unicode_join)' + elif frame.eval_ctx.autoescape: + func_name = 'markup_join' + else: + func_name = 'unicode_join' + self.write('%s((' % func_name) + for arg in node.nodes: + self.visit(arg, frame) + self.write(', ') + self.write('))') + + @optimizeconst + def visit_Compare(self, node, frame): + self.visit(node.expr, frame) + for op in node.ops: + self.visit(op, frame) + + def visit_Operand(self, node, frame): + self.write(' %s ' % operators[node.op]) + self.visit(node.expr, frame) + + @optimizeconst + def visit_Getattr(self, node, frame): + self.write('environment.getattr(') + self.visit(node.node, frame) + self.write(', %r)' % node.attr) + + @optimizeconst + def visit_Getitem(self, node, frame): + # slices bypass the environment getitem method. + if isinstance(node.arg, nodes.Slice): + self.visit(node.node, frame) + self.write('[') + self.visit(node.arg, frame) + self.write(']') + else: + self.write('environment.getitem(') + self.visit(node.node, frame) + self.write(', ') + self.visit(node.arg, frame) + self.write(')') + + def visit_Slice(self, node, frame): + if node.start is not None: + self.visit(node.start, frame) + self.write(':') + if node.stop is not None: + self.visit(node.stop, frame) + if node.step is not None: + self.write(':') + self.visit(node.step, frame) + + @optimizeconst + def visit_Filter(self, node, frame): + if self.environment.is_async: + self.write('await auto_await(') + self.write(self.filters[node.name] + '(') + func = self.environment.filters.get(node.name) + if func is None: + self.fail('no filter named %r' % node.name, node.lineno) + if getattr(func, 'contextfilter', False): + self.write('context, ') + elif getattr(func, 'evalcontextfilter', False): + self.write('context.eval_ctx, ') + elif getattr(func, 'environmentfilter', False): + self.write('environment, ') + + # if the filter node is None we are inside a filter block + # and want to write to the current buffer + if node.node is not None: + self.visit(node.node, frame) + elif frame.eval_ctx.volatile: + self.write('(context.eval_ctx.autoescape and' + ' Markup(concat(%s)) or concat(%s))' % + (frame.buffer, frame.buffer)) + elif frame.eval_ctx.autoescape: + self.write('Markup(concat(%s))' % frame.buffer) + else: + self.write('concat(%s)' % frame.buffer) + self.signature(node, frame) + self.write(')') + if self.environment.is_async: + self.write(')') + + @optimizeconst + def visit_Test(self, node, frame): + self.write(self.tests[node.name] + '(') + if node.name not in self.environment.tests: + self.fail('no test named %r' % node.name, node.lineno) + self.visit(node.node, frame) + self.signature(node, frame) + self.write(')') + + @optimizeconst + def visit_CondExpr(self, node, frame): + def write_expr2(): + if node.expr2 is not None: + return self.visit(node.expr2, frame) + self.write('undefined(%r)' % ('the inline if-' + 'expression on %s evaluated to false and ' + 'no else section was defined.' % self.position(node))) + + self.write('(') + self.visit(node.expr1, frame) + self.write(' if ') + self.visit(node.test, frame) + self.write(' else ') + write_expr2() + self.write(')') + + @optimizeconst + def visit_Call(self, node, frame, forward_caller=False): + if self.environment.is_async: + self.write('await auto_await(') + if self.environment.sandboxed: + self.write('environment.call(context, ') + else: + self.write('context.call(') + self.visit(node.node, frame) + extra_kwargs = forward_caller and {'caller': 'caller'} or None + self.signature(node, frame, extra_kwargs) + self.write(')') + if self.environment.is_async: + self.write(')') + + def visit_Keyword(self, node, frame): + self.write(node.key + '=') + self.visit(node.value, frame) + + # -- Unused nodes for extensions + + def visit_MarkSafe(self, node, frame): + self.write('Markup(') + self.visit(node.expr, frame) + self.write(')') + + def visit_MarkSafeIfAutoescape(self, node, frame): + self.write('(context.eval_ctx.autoescape and Markup or identity)(') + self.visit(node.expr, frame) + self.write(')') + + def visit_EnvironmentAttribute(self, node, frame): + self.write('environment.' + node.name) + + def visit_ExtensionAttribute(self, node, frame): + self.write('environment.extensions[%r].%s' % (node.identifier, node.name)) + + def visit_ImportedName(self, node, frame): + self.write(self.import_aliases[node.importname]) + + def visit_InternalName(self, node, frame): + self.write(node.name) + + def visit_ContextReference(self, node, frame): + self.write('context') + + def visit_Continue(self, node, frame): + self.writeline('continue', node) + + def visit_Break(self, node, frame): + self.writeline('break', node) + + def visit_Scope(self, node, frame): + scope_frame = frame.inner() + scope_frame.symbols.analyze_node(node) + self.enter_frame(scope_frame) + self.blockvisit(node.body, scope_frame) + self.leave_frame(scope_frame) + + def visit_OverlayScope(self, node, frame): + ctx = self.temporary_identifier() + self.writeline('%s = %s' % (ctx, self.derive_context(frame))) + self.writeline('%s.vars = ' % ctx) + self.visit(node.context, frame) + self.push_context_reference(ctx) + + scope_frame = frame.inner(isolated=True) + scope_frame.symbols.analyze_node(node) + self.enter_frame(scope_frame) + self.blockvisit(node.body, scope_frame) + self.leave_frame(scope_frame) + self.pop_context_reference() + + def visit_EvalContextModifier(self, node, frame): + for keyword in node.options: + self.writeline('context.eval_ctx.%s = ' % keyword.key) + self.visit(keyword.value, frame) + try: + val = keyword.value.as_const(frame.eval_ctx) + except nodes.Impossible: + frame.eval_ctx.volatile = True + else: + setattr(frame.eval_ctx, keyword.key, val) + + def visit_ScopedEvalContextModifier(self, node, frame): + old_ctx_name = self.temporary_identifier() + saved_ctx = frame.eval_ctx.save() + self.writeline('%s = context.eval_ctx.save()' % old_ctx_name) + self.visit_EvalContextModifier(node, frame) + for child in node.body: + self.visit(child, frame) + frame.eval_ctx.revert(saved_ctx) + self.writeline('context.eval_ctx.revert(%s)' % old_ctx_name) diff --git a/python/jinja2/constants.py b/python/jinja2/constants.py new file mode 100644 index 0000000..11efd1e --- /dev/null +++ b/python/jinja2/constants.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +""" + jinja.constants + ~~~~~~~~~~~~~~~ + + Various constants. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" + + +#: list of lorem ipsum words used by the lipsum() helper function +LOREM_IPSUM_WORDS = u'''\ +a ac accumsan ad adipiscing aenean aliquam aliquet amet ante aptent arcu at +auctor augue bibendum blandit class commodo condimentum congue consectetuer +consequat conubia convallis cras cubilia cum curabitur curae cursus dapibus +diam dictum dictumst dignissim dis dolor donec dui duis egestas eget eleifend +elementum elit enim erat eros est et etiam eu euismod facilisi facilisis fames +faucibus felis fermentum feugiat fringilla fusce gravida habitant habitasse hac +hendrerit hymenaeos iaculis id imperdiet in inceptos integer interdum ipsum +justo lacinia lacus laoreet lectus leo libero ligula litora lobortis lorem +luctus maecenas magna magnis malesuada massa mattis mauris metus mi molestie +mollis montes morbi mus nam nascetur natoque nec neque netus nibh nisi nisl non +nonummy nostra nulla nullam nunc odio orci ornare parturient pede pellentesque +penatibus per pharetra phasellus placerat platea porta porttitor posuere +potenti praesent pretium primis proin pulvinar purus quam quis quisque rhoncus +ridiculus risus rutrum sagittis sapien scelerisque sed sem semper senectus sit +sociis sociosqu sodales sollicitudin suscipit suspendisse taciti tellus tempor +tempus tincidunt torquent tortor tristique turpis ullamcorper ultrices +ultricies urna ut varius vehicula vel velit venenatis vestibulum vitae vivamus +viverra volutpat vulputate''' diff --git a/python/jinja2/debug.py b/python/jinja2/debug.py new file mode 100644 index 0000000..b61139f --- /dev/null +++ b/python/jinja2/debug.py @@ -0,0 +1,372 @@ +# -*- coding: utf-8 -*- +""" + jinja2.debug + ~~~~~~~~~~~~ + + Implements the debug interface for Jinja. This module does some pretty + ugly stuff with the Python traceback system in order to achieve tracebacks + with correct line numbers, locals and contents. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import sys +import traceback +from types import TracebackType, CodeType +from jinja2.utils import missing, internal_code +from jinja2.exceptions import TemplateSyntaxError +from jinja2._compat import iteritems, reraise, PY2 + +# on pypy we can take advantage of transparent proxies +try: + from __pypy__ import tproxy +except ImportError: + tproxy = None + + +# how does the raise helper look like? +try: + exec("raise TypeError, 'foo'") +except SyntaxError: + raise_helper = 'raise __jinja_exception__[1]' +except TypeError: + raise_helper = 'raise __jinja_exception__[0], __jinja_exception__[1]' + + +class TracebackFrameProxy(object): + """Proxies a traceback frame.""" + + def __init__(self, tb): + self.tb = tb + self._tb_next = None + + @property + def tb_next(self): + return self._tb_next + + def set_next(self, next): + if tb_set_next is not None: + try: + tb_set_next(self.tb, next and next.tb or None) + except Exception: + # this function can fail due to all the hackery it does + # on various python implementations. We just catch errors + # down and ignore them if necessary. + pass + self._tb_next = next + + @property + def is_jinja_frame(self): + return '__jinja_template__' in self.tb.tb_frame.f_globals + + def __getattr__(self, name): + return getattr(self.tb, name) + + +def make_frame_proxy(frame): + proxy = TracebackFrameProxy(frame) + if tproxy is None: + return proxy + def operation_handler(operation, *args, **kwargs): + if operation in ('__getattribute__', '__getattr__'): + return getattr(proxy, args[0]) + elif operation == '__setattr__': + proxy.__setattr__(*args, **kwargs) + else: + return getattr(proxy, operation)(*args, **kwargs) + return tproxy(TracebackType, operation_handler) + + +class ProcessedTraceback(object): + """Holds a Jinja preprocessed traceback for printing or reraising.""" + + def __init__(self, exc_type, exc_value, frames): + assert frames, 'no frames for this traceback?' + self.exc_type = exc_type + self.exc_value = exc_value + self.frames = frames + + # newly concatenate the frames (which are proxies) + prev_tb = None + for tb in self.frames: + if prev_tb is not None: + prev_tb.set_next(tb) + prev_tb = tb + prev_tb.set_next(None) + + def render_as_text(self, limit=None): + """Return a string with the traceback.""" + lines = traceback.format_exception(self.exc_type, self.exc_value, + self.frames[0], limit=limit) + return ''.join(lines).rstrip() + + def render_as_html(self, full=False): + """Return a unicode string with the traceback as rendered HTML.""" + from jinja2.debugrenderer import render_traceback + return u'%s\n\n<!--\n%s\n-->' % ( + render_traceback(self, full=full), + self.render_as_text().decode('utf-8', 'replace') + ) + + @property + def is_template_syntax_error(self): + """`True` if this is a template syntax error.""" + return isinstance(self.exc_value, TemplateSyntaxError) + + @property + def exc_info(self): + """Exception info tuple with a proxy around the frame objects.""" + return self.exc_type, self.exc_value, self.frames[0] + + @property + def standard_exc_info(self): + """Standard python exc_info for re-raising""" + tb = self.frames[0] + # the frame will be an actual traceback (or transparent proxy) if + # we are on pypy or a python implementation with support for tproxy + if type(tb) is not TracebackType: + tb = tb.tb + return self.exc_type, self.exc_value, tb + + +def make_traceback(exc_info, source_hint=None): + """Creates a processed traceback object from the exc_info.""" + exc_type, exc_value, tb = exc_info + if isinstance(exc_value, TemplateSyntaxError): + exc_info = translate_syntax_error(exc_value, source_hint) + initial_skip = 0 + else: + initial_skip = 1 + return translate_exception(exc_info, initial_skip) + + +def translate_syntax_error(error, source=None): + """Rewrites a syntax error to please traceback systems.""" + error.source = source + error.translated = True + exc_info = (error.__class__, error, None) + filename = error.filename + if filename is None: + filename = '<unknown>' + return fake_exc_info(exc_info, filename, error.lineno) + + +def translate_exception(exc_info, initial_skip=0): + """If passed an exc_info it will automatically rewrite the exceptions + all the way down to the correct line numbers and frames. + """ + tb = exc_info[2] + frames = [] + + # skip some internal frames if wanted + for x in range(initial_skip): + if tb is not None: + tb = tb.tb_next + initial_tb = tb + + while tb is not None: + # skip frames decorated with @internalcode. These are internal + # calls we can't avoid and that are useless in template debugging + # output. + if tb.tb_frame.f_code in internal_code: + tb = tb.tb_next + continue + + # save a reference to the next frame if we override the current + # one with a faked one. + next = tb.tb_next + + # fake template exceptions + template = tb.tb_frame.f_globals.get('__jinja_template__') + if template is not None: + lineno = template.get_corresponding_lineno(tb.tb_lineno) + tb = fake_exc_info(exc_info[:2] + (tb,), template.filename, + lineno)[2] + + frames.append(make_frame_proxy(tb)) + tb = next + + # if we don't have any exceptions in the frames left, we have to + # reraise it unchanged. + # XXX: can we backup here? when could this happen? + if not frames: + reraise(exc_info[0], exc_info[1], exc_info[2]) + + return ProcessedTraceback(exc_info[0], exc_info[1], frames) + + +def get_jinja_locals(real_locals): + ctx = real_locals.get('context') + if ctx: + locals = ctx.get_all().copy() + else: + locals = {} + + local_overrides = {} + + for name, value in iteritems(real_locals): + if not name.startswith('l_') or value is missing: + continue + try: + _, depth, name = name.split('_', 2) + depth = int(depth) + except ValueError: + continue + cur_depth = local_overrides.get(name, (-1,))[0] + if cur_depth < depth: + local_overrides[name] = (depth, value) + + for name, (_, value) in iteritems(local_overrides): + if value is missing: + locals.pop(name, None) + else: + locals[name] = value + + return locals + + +def fake_exc_info(exc_info, filename, lineno): + """Helper for `translate_exception`.""" + exc_type, exc_value, tb = exc_info + + # figure the real context out + if tb is not None: + locals = get_jinja_locals(tb.tb_frame.f_locals) + + # if there is a local called __jinja_exception__, we get + # rid of it to not break the debug functionality. + locals.pop('__jinja_exception__', None) + else: + locals = {} + + # assamble fake globals we need + globals = { + '__name__': filename, + '__file__': filename, + '__jinja_exception__': exc_info[:2], + + # we don't want to keep the reference to the template around + # to not cause circular dependencies, but we mark it as Jinja + # frame for the ProcessedTraceback + '__jinja_template__': None + } + + # and fake the exception + code = compile('\n' * (lineno - 1) + raise_helper, filename, 'exec') + + # if it's possible, change the name of the code. This won't work + # on some python environments such as google appengine + try: + if tb is None: + location = 'template' + else: + function = tb.tb_frame.f_code.co_name + if function == 'root': + location = 'top-level template code' + elif function.startswith('block_'): + location = 'block "%s"' % function[6:] + else: + location = 'template' + + if PY2: + code = CodeType(0, code.co_nlocals, code.co_stacksize, + code.co_flags, code.co_code, code.co_consts, + code.co_names, code.co_varnames, filename, + location, code.co_firstlineno, + code.co_lnotab, (), ()) + else: + code = CodeType(0, code.co_kwonlyargcount, + code.co_nlocals, code.co_stacksize, + code.co_flags, code.co_code, code.co_consts, + code.co_names, code.co_varnames, filename, + location, code.co_firstlineno, + code.co_lnotab, (), ()) + except Exception as e: + pass + + # execute the code and catch the new traceback + try: + exec(code, globals, locals) + except: + exc_info = sys.exc_info() + new_tb = exc_info[2].tb_next + + # return without this frame + return exc_info[:2] + (new_tb,) + + +def _init_ugly_crap(): + """This function implements a few ugly things so that we can patch the + traceback objects. The function returned allows resetting `tb_next` on + any python traceback object. Do not attempt to use this on non cpython + interpreters + """ + import ctypes + from types import TracebackType + + if PY2: + # figure out size of _Py_ssize_t for Python 2: + if hasattr(ctypes.pythonapi, 'Py_InitModule4_64'): + _Py_ssize_t = ctypes.c_int64 + else: + _Py_ssize_t = ctypes.c_int + else: + # platform ssize_t on Python 3 + _Py_ssize_t = ctypes.c_ssize_t + + # regular python + class _PyObject(ctypes.Structure): + pass + _PyObject._fields_ = [ + ('ob_refcnt', _Py_ssize_t), + ('ob_type', ctypes.POINTER(_PyObject)) + ] + + # python with trace + if hasattr(sys, 'getobjects'): + class _PyObject(ctypes.Structure): + pass + _PyObject._fields_ = [ + ('_ob_next', ctypes.POINTER(_PyObject)), + ('_ob_prev', ctypes.POINTER(_PyObject)), + ('ob_refcnt', _Py_ssize_t), + ('ob_type', ctypes.POINTER(_PyObject)) + ] + + class _Traceback(_PyObject): + pass + _Traceback._fields_ = [ + ('tb_next', ctypes.POINTER(_Traceback)), + ('tb_frame', ctypes.POINTER(_PyObject)), + ('tb_lasti', ctypes.c_int), + ('tb_lineno', ctypes.c_int) + ] + + def tb_set_next(tb, next): + """Set the tb_next attribute of a traceback object.""" + if not (isinstance(tb, TracebackType) and + (next is None or isinstance(next, TracebackType))): + raise TypeError('tb_set_next arguments must be traceback objects') + obj = _Traceback.from_address(id(tb)) + if tb.tb_next is not None: + old = _Traceback.from_address(id(tb.tb_next)) + old.ob_refcnt -= 1 + if next is None: + obj.tb_next = ctypes.POINTER(_Traceback)() + else: + next = _Traceback.from_address(id(next)) + next.ob_refcnt += 1 + obj.tb_next = ctypes.pointer(next) + + return tb_set_next + + +# try to get a tb_set_next implementation if we don't have transparent +# proxies. +tb_set_next = None +if tproxy is None: + try: + tb_set_next = _init_ugly_crap() + except: + pass + del _init_ugly_crap diff --git a/python/jinja2/defaults.py b/python/jinja2/defaults.py new file mode 100644 index 0000000..7c93dec --- /dev/null +++ b/python/jinja2/defaults.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +""" + jinja2.defaults + ~~~~~~~~~~~~~~~ + + Jinja default filters and tags. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +from jinja2._compat import range_type +from jinja2.utils import generate_lorem_ipsum, Cycler, Joiner, Namespace + + +# defaults for the parser / lexer +BLOCK_START_STRING = '{%' +BLOCK_END_STRING = '%}' +VARIABLE_START_STRING = '{{' +VARIABLE_END_STRING = '}}' +COMMENT_START_STRING = '{#' +COMMENT_END_STRING = '#}' +LINE_STATEMENT_PREFIX = None +LINE_COMMENT_PREFIX = None +TRIM_BLOCKS = False +LSTRIP_BLOCKS = False +NEWLINE_SEQUENCE = '\n' +KEEP_TRAILING_NEWLINE = False + + +# default filters, tests and namespace +from jinja2.filters import FILTERS as DEFAULT_FILTERS +from jinja2.tests import TESTS as DEFAULT_TESTS +DEFAULT_NAMESPACE = { + 'range': range_type, + 'dict': dict, + 'lipsum': generate_lorem_ipsum, + 'cycler': Cycler, + 'joiner': Joiner, + 'namespace': Namespace +} + + +# default policies +DEFAULT_POLICIES = { + 'compiler.ascii_str': True, + 'urlize.rel': 'noopener', + 'urlize.target': None, + 'truncate.leeway': 5, + 'json.dumps_function': None, + 'json.dumps_kwargs': {'sort_keys': True}, + 'ext.i18n.trimmed': False, +} + + +# export all constants +__all__ = tuple(x for x in locals().keys() if x.isupper()) diff --git a/python/jinja2/environment.py b/python/jinja2/environment.py new file mode 100644 index 0000000..549d9af --- /dev/null +++ b/python/jinja2/environment.py @@ -0,0 +1,1276 @@ +# -*- coding: utf-8 -*- +""" + jinja2.environment + ~~~~~~~~~~~~~~~~~~ + + Provides a class that holds runtime and parsing time options. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import os +import sys +import weakref +from functools import reduce, partial +from jinja2 import nodes +from jinja2.defaults import BLOCK_START_STRING, \ + BLOCK_END_STRING, VARIABLE_START_STRING, VARIABLE_END_STRING, \ + COMMENT_START_STRING, COMMENT_END_STRING, LINE_STATEMENT_PREFIX, \ + LINE_COMMENT_PREFIX, TRIM_BLOCKS, NEWLINE_SEQUENCE, \ + DEFAULT_FILTERS, DEFAULT_TESTS, DEFAULT_NAMESPACE, \ + DEFAULT_POLICIES, KEEP_TRAILING_NEWLINE, LSTRIP_BLOCKS +from jinja2.lexer import get_lexer, TokenStream +from jinja2.parser import Parser +from jinja2.nodes import EvalContext +from jinja2.compiler import generate, CodeGenerator +from jinja2.runtime import Undefined, new_context, Context +from jinja2.exceptions import TemplateSyntaxError, TemplateNotFound, \ + TemplatesNotFound, TemplateRuntimeError +from jinja2.utils import import_string, LRUCache, Markup, missing, \ + concat, consume, internalcode, have_async_gen +from jinja2._compat import imap, ifilter, string_types, iteritems, \ + text_type, reraise, implements_iterator, implements_to_string, \ + encode_filename, PY2, PYPY + + +# for direct template usage we have up to ten living environments +_spontaneous_environments = LRUCache(10) + +# the function to create jinja traceback objects. This is dynamically +# imported on the first exception in the exception handler. +_make_traceback = None + + +def get_spontaneous_environment(*args): + """Return a new spontaneous environment. A spontaneous environment is an + unnamed and unaccessible (in theory) environment that is used for + templates generated from a string and not from the file system. + """ + try: + env = _spontaneous_environments.get(args) + except TypeError: + return Environment(*args) + if env is not None: + return env + _spontaneous_environments[args] = env = Environment(*args) + env.shared = True + return env + + +def create_cache(size): + """Return the cache class for the given size.""" + if size == 0: + return None + if size < 0: + return {} + return LRUCache(size) + + +def copy_cache(cache): + """Create an empty copy of the given cache.""" + if cache is None: + return None + elif type(cache) is dict: + return {} + return LRUCache(cache.capacity) + + +def load_extensions(environment, extensions): + """Load the extensions from the list and bind it to the environment. + Returns a dict of instantiated environments. + """ + result = {} + for extension in extensions: + if isinstance(extension, string_types): + extension = import_string(extension) + result[extension.identifier] = extension(environment) + return result + + +def fail_for_missing_callable(string, name): + msg = string % name + if isinstance(name, Undefined): + try: + name._fail_with_undefined_error() + except Exception as e: + msg = '%s (%s; did you forget to quote the callable name?)' % (msg, e) + raise TemplateRuntimeError(msg) + + +def _environment_sanity_check(environment): + """Perform a sanity check on the environment.""" + assert issubclass(environment.undefined, Undefined), 'undefined must ' \ + 'be a subclass of undefined because filters depend on it.' + assert environment.block_start_string != \ + environment.variable_start_string != \ + environment.comment_start_string, 'block, variable and comment ' \ + 'start strings must be different' + assert environment.newline_sequence in ('\r', '\r\n', '\n'), \ + 'newline_sequence set to unknown line ending string.' + return environment + + +class Environment(object): + r"""The core component of Jinja is the `Environment`. It contains + important shared variables like configuration, filters, tests, + globals and others. Instances of this class may be modified if + they are not shared and if no template was loaded so far. + Modifications on environments after the first template was loaded + will lead to surprising effects and undefined behavior. + + Here are the possible initialization parameters: + + `block_start_string` + The string marking the beginning of a block. Defaults to ``'{%'``. + + `block_end_string` + The string marking the end of a block. Defaults to ``'%}'``. + + `variable_start_string` + The string marking the beginning of a print statement. + Defaults to ``'{{'``. + + `variable_end_string` + The string marking the end of a print statement. Defaults to + ``'}}'``. + + `comment_start_string` + The string marking the beginning of a comment. Defaults to ``'{#'``. + + `comment_end_string` + The string marking the end of a comment. Defaults to ``'#}'``. + + `line_statement_prefix` + If given and a string, this will be used as prefix for line based + statements. See also :ref:`line-statements`. + + `line_comment_prefix` + If given and a string, this will be used as prefix for line based + comments. See also :ref:`line-statements`. + + .. versionadded:: 2.2 + + `trim_blocks` + If this is set to ``True`` the first newline after a block is + removed (block, not variable tag!). Defaults to `False`. + + `lstrip_blocks` + If this is set to ``True`` leading spaces and tabs are stripped + from the start of a line to a block. Defaults to `False`. + + `newline_sequence` + The sequence that starts a newline. Must be one of ``'\r'``, + ``'\n'`` or ``'\r\n'``. The default is ``'\n'`` which is a + useful default for Linux and OS X systems as well as web + applications. + + `keep_trailing_newline` + Preserve the trailing newline when rendering templates. + The default is ``False``, which causes a single newline, + if present, to be stripped from the end of the template. + + .. versionadded:: 2.7 + + `extensions` + List of Jinja extensions to use. This can either be import paths + as strings or extension classes. For more information have a + look at :ref:`the extensions documentation <jinja-extensions>`. + + `optimized` + should the optimizer be enabled? Default is ``True``. + + `undefined` + :class:`Undefined` or a subclass of it that is used to represent + undefined values in the template. + + `finalize` + A callable that can be used to process the result of a variable + expression before it is output. For example one can convert + ``None`` implicitly into an empty string here. + + `autoescape` + If set to ``True`` the XML/HTML autoescaping feature is enabled by + default. For more details about autoescaping see + :class:`~jinja2.utils.Markup`. As of Jinja 2.4 this can also + be a callable that is passed the template name and has to + return ``True`` or ``False`` depending on autoescape should be + enabled by default. + + .. versionchanged:: 2.4 + `autoescape` can now be a function + + `loader` + The template loader for this environment. + + `cache_size` + The size of the cache. Per default this is ``400`` which means + that if more than 400 templates are loaded the loader will clean + out the least recently used template. If the cache size is set to + ``0`` templates are recompiled all the time, if the cache size is + ``-1`` the cache will not be cleaned. + + .. versionchanged:: 2.8 + The cache size was increased to 400 from a low 50. + + `auto_reload` + Some loaders load templates from locations where the template + sources may change (ie: file system or database). If + ``auto_reload`` is set to ``True`` (default) every time a template is + requested the loader checks if the source changed and if yes, it + will reload the template. For higher performance it's possible to + disable that. + + `bytecode_cache` + If set to a bytecode cache object, this object will provide a + cache for the internal Jinja bytecode so that templates don't + have to be parsed if they were not changed. + + See :ref:`bytecode-cache` for more information. + + `enable_async` + If set to true this enables async template execution which allows + you to take advantage of newer Python features. This requires + Python 3.6 or later. + """ + + #: if this environment is sandboxed. Modifying this variable won't make + #: the environment sandboxed though. For a real sandboxed environment + #: have a look at jinja2.sandbox. This flag alone controls the code + #: generation by the compiler. + sandboxed = False + + #: True if the environment is just an overlay + overlayed = False + + #: the environment this environment is linked to if it is an overlay + linked_to = None + + #: shared environments have this set to `True`. A shared environment + #: must not be modified + shared = False + + #: these are currently EXPERIMENTAL undocumented features. + exception_handler = None + exception_formatter = None + + #: the class that is used for code generation. See + #: :class:`~jinja2.compiler.CodeGenerator` for more information. + code_generator_class = CodeGenerator + + #: the context class thatis used for templates. See + #: :class:`~jinja2.runtime.Context` for more information. + context_class = Context + + def __init__(self, + block_start_string=BLOCK_START_STRING, + block_end_string=BLOCK_END_STRING, + variable_start_string=VARIABLE_START_STRING, + variable_end_string=VARIABLE_END_STRING, + comment_start_string=COMMENT_START_STRING, + comment_end_string=COMMENT_END_STRING, + line_statement_prefix=LINE_STATEMENT_PREFIX, + line_comment_prefix=LINE_COMMENT_PREFIX, + trim_blocks=TRIM_BLOCKS, + lstrip_blocks=LSTRIP_BLOCKS, + newline_sequence=NEWLINE_SEQUENCE, + keep_trailing_newline=KEEP_TRAILING_NEWLINE, + extensions=(), + optimized=True, + undefined=Undefined, + finalize=None, + autoescape=False, + loader=None, + cache_size=400, + auto_reload=True, + bytecode_cache=None, + enable_async=False): + # !!Important notice!! + # The constructor accepts quite a few arguments that should be + # passed by keyword rather than position. However it's important to + # not change the order of arguments because it's used at least + # internally in those cases: + # - spontaneous environments (i18n extension and Template) + # - unittests + # If parameter changes are required only add parameters at the end + # and don't change the arguments (or the defaults!) of the arguments + # existing already. + + # lexer / parser information + self.block_start_string = block_start_string + self.block_end_string = block_end_string + self.variable_start_string = variable_start_string + self.variable_end_string = variable_end_string + self.comment_start_string = comment_start_string + self.comment_end_string = comment_end_string + self.line_statement_prefix = line_statement_prefix + self.line_comment_prefix = line_comment_prefix + self.trim_blocks = trim_blocks + self.lstrip_blocks = lstrip_blocks + self.newline_sequence = newline_sequence + self.keep_trailing_newline = keep_trailing_newline + + # runtime information + self.undefined = undefined + self.optimized = optimized + self.finalize = finalize + self.autoescape = autoescape + + # defaults + self.filters = DEFAULT_FILTERS.copy() + self.tests = DEFAULT_TESTS.copy() + self.globals = DEFAULT_NAMESPACE.copy() + + # set the loader provided + self.loader = loader + self.cache = create_cache(cache_size) + self.bytecode_cache = bytecode_cache + self.auto_reload = auto_reload + + # configurable policies + self.policies = DEFAULT_POLICIES.copy() + + # load extensions + self.extensions = load_extensions(self, extensions) + + self.enable_async = enable_async + self.is_async = self.enable_async and have_async_gen + + _environment_sanity_check(self) + + def add_extension(self, extension): + """Adds an extension after the environment was created. + + .. versionadded:: 2.5 + """ + self.extensions.update(load_extensions(self, [extension])) + + def extend(self, **attributes): + """Add the items to the instance of the environment if they do not exist + yet. This is used by :ref:`extensions <writing-extensions>` to register + callbacks and configuration values without breaking inheritance. + """ + for key, value in iteritems(attributes): + if not hasattr(self, key): + setattr(self, key, value) + + def overlay(self, block_start_string=missing, block_end_string=missing, + variable_start_string=missing, variable_end_string=missing, + comment_start_string=missing, comment_end_string=missing, + line_statement_prefix=missing, line_comment_prefix=missing, + trim_blocks=missing, lstrip_blocks=missing, + extensions=missing, optimized=missing, + undefined=missing, finalize=missing, autoescape=missing, + loader=missing, cache_size=missing, auto_reload=missing, + bytecode_cache=missing): + """Create a new overlay environment that shares all the data with the + current environment except for cache and the overridden attributes. + Extensions cannot be removed for an overlayed environment. An overlayed + environment automatically gets all the extensions of the environment it + is linked to plus optional extra extensions. + + Creating overlays should happen after the initial environment was set + up completely. Not all attributes are truly linked, some are just + copied over so modifications on the original environment may not shine + through. + """ + args = dict(locals()) + del args['self'], args['cache_size'], args['extensions'] + + rv = object.__new__(self.__class__) + rv.__dict__.update(self.__dict__) + rv.overlayed = True + rv.linked_to = self + + for key, value in iteritems(args): + if value is not missing: + setattr(rv, key, value) + + if cache_size is not missing: + rv.cache = create_cache(cache_size) + else: + rv.cache = copy_cache(self.cache) + + rv.extensions = {} + for key, value in iteritems(self.extensions): + rv.extensions[key] = value.bind(rv) + if extensions is not missing: + rv.extensions.update(load_extensions(rv, extensions)) + + return _environment_sanity_check(rv) + + lexer = property(get_lexer, doc="The lexer for this environment.") + + def iter_extensions(self): + """Iterates over the extensions by priority.""" + return iter(sorted(self.extensions.values(), + key=lambda x: x.priority)) + + def getitem(self, obj, argument): + """Get an item or attribute of an object but prefer the item.""" + try: + return obj[argument] + except (AttributeError, TypeError, LookupError): + if isinstance(argument, string_types): + try: + attr = str(argument) + except Exception: + pass + else: + try: + return getattr(obj, attr) + except AttributeError: + pass + return self.undefined(obj=obj, name=argument) + + def getattr(self, obj, attribute): + """Get an item or attribute of an object but prefer the attribute. + Unlike :meth:`getitem` the attribute *must* be a bytestring. + """ + try: + return getattr(obj, attribute) + except AttributeError: + pass + try: + return obj[attribute] + except (TypeError, LookupError, AttributeError): + return self.undefined(obj=obj, name=attribute) + + def call_filter(self, name, value, args=None, kwargs=None, + context=None, eval_ctx=None): + """Invokes a filter on a value the same way the compiler does it. + + Note that on Python 3 this might return a coroutine in case the + filter is running from an environment in async mode and the filter + supports async execution. It's your responsibility to await this + if needed. + + .. versionadded:: 2.7 + """ + func = self.filters.get(name) + if func is None: + fail_for_missing_callable('no filter named %r', name) + args = [value] + list(args or ()) + if getattr(func, 'contextfilter', False): + if context is None: + raise TemplateRuntimeError('Attempted to invoke context ' + 'filter without context') + args.insert(0, context) + elif getattr(func, 'evalcontextfilter', False): + if eval_ctx is None: + if context is not None: + eval_ctx = context.eval_ctx + else: + eval_ctx = EvalContext(self) + args.insert(0, eval_ctx) + elif getattr(func, 'environmentfilter', False): + args.insert(0, self) + return func(*args, **(kwargs or {})) + + def call_test(self, name, value, args=None, kwargs=None): + """Invokes a test on a value the same way the compiler does it. + + .. versionadded:: 2.7 + """ + func = self.tests.get(name) + if func is None: + fail_for_missing_callable('no test named %r', name) + return func(value, *(args or ()), **(kwargs or {})) + + @internalcode + def parse(self, source, name=None, filename=None): + """Parse the sourcecode and return the abstract syntax tree. This + tree of nodes is used by the compiler to convert the template into + executable source- or bytecode. This is useful for debugging or to + extract information from templates. + + If you are :ref:`developing Jinja2 extensions <writing-extensions>` + this gives you a good overview of the node tree generated. + """ + try: + return self._parse(source, name, filename) + except TemplateSyntaxError: + exc_info = sys.exc_info() + self.handle_exception(exc_info, source_hint=source) + + def _parse(self, source, name, filename): + """Internal parsing function used by `parse` and `compile`.""" + return Parser(self, source, name, encode_filename(filename)).parse() + + def lex(self, source, name=None, filename=None): + """Lex the given sourcecode and return a generator that yields + tokens as tuples in the form ``(lineno, token_type, value)``. + This can be useful for :ref:`extension development <writing-extensions>` + and debugging templates. + + This does not perform preprocessing. If you want the preprocessing + of the extensions to be applied you have to filter source through + the :meth:`preprocess` method. + """ + source = text_type(source) + try: + return self.lexer.tokeniter(source, name, filename) + except TemplateSyntaxError: + exc_info = sys.exc_info() + self.handle_exception(exc_info, source_hint=source) + + def preprocess(self, source, name=None, filename=None): + """Preprocesses the source with all extensions. This is automatically + called for all parsing and compiling methods but *not* for :meth:`lex` + because there you usually only want the actual source tokenized. + """ + return reduce(lambda s, e: e.preprocess(s, name, filename), + self.iter_extensions(), text_type(source)) + + def _tokenize(self, source, name, filename=None, state=None): + """Called by the parser to do the preprocessing and filtering + for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`. + """ + source = self.preprocess(source, name, filename) + stream = self.lexer.tokenize(source, name, filename, state) + for ext in self.iter_extensions(): + stream = ext.filter_stream(stream) + if not isinstance(stream, TokenStream): + stream = TokenStream(stream, name, filename) + return stream + + def _generate(self, source, name, filename, defer_init=False): + """Internal hook that can be overridden to hook a different generate + method in. + + .. versionadded:: 2.5 + """ + return generate(source, self, name, filename, defer_init=defer_init, + optimized=self.optimized) + + def _compile(self, source, filename): + """Internal hook that can be overridden to hook a different compile + method in. + + .. versionadded:: 2.5 + """ + return compile(source, filename, 'exec') + + @internalcode + def compile(self, source, name=None, filename=None, raw=False, + defer_init=False): + """Compile a node or template source code. The `name` parameter is + the load name of the template after it was joined using + :meth:`join_path` if necessary, not the filename on the file system. + the `filename` parameter is the estimated filename of the template on + the file system. If the template came from a database or memory this + can be omitted. + + The return value of this method is a python code object. If the `raw` + parameter is `True` the return value will be a string with python + code equivalent to the bytecode returned otherwise. This method is + mainly used internally. + + `defer_init` is use internally to aid the module code generator. This + causes the generated code to be able to import without the global + environment variable to be set. + + .. versionadded:: 2.4 + `defer_init` parameter added. + """ + source_hint = None + try: + if isinstance(source, string_types): + source_hint = source + source = self._parse(source, name, filename) + source = self._generate(source, name, filename, + defer_init=defer_init) + if raw: + return source + if filename is None: + filename = '<template>' + else: + filename = encode_filename(filename) + return self._compile(source, filename) + except TemplateSyntaxError: + exc_info = sys.exc_info() + self.handle_exception(exc_info, source_hint=source_hint) + + def compile_expression(self, source, undefined_to_none=True): + """A handy helper method that returns a callable that accepts keyword + arguments that appear as variables in the expression. If called it + returns the result of the expression. + + This is useful if applications want to use the same rules as Jinja + in template "configuration files" or similar situations. + + Example usage: + + >>> env = Environment() + >>> expr = env.compile_expression('foo == 42') + >>> expr(foo=23) + False + >>> expr(foo=42) + True + + Per default the return value is converted to `None` if the + expression returns an undefined value. This can be changed + by setting `undefined_to_none` to `False`. + + >>> env.compile_expression('var')() is None + True + >>> env.compile_expression('var', undefined_to_none=False)() + Undefined + + .. versionadded:: 2.1 + """ + parser = Parser(self, source, state='variable') + exc_info = None + try: + expr = parser.parse_expression() + if not parser.stream.eos: + raise TemplateSyntaxError('chunk after expression', + parser.stream.current.lineno, + None, None) + expr.set_environment(self) + except TemplateSyntaxError: + exc_info = sys.exc_info() + if exc_info is not None: + self.handle_exception(exc_info, source_hint=source) + body = [nodes.Assign(nodes.Name('result', 'store'), expr, lineno=1)] + template = self.from_string(nodes.Template(body, lineno=1)) + return TemplateExpression(template, undefined_to_none) + + def compile_templates(self, target, extensions=None, filter_func=None, + zip='deflated', log_function=None, + ignore_errors=True, py_compile=False): + """Finds all the templates the loader can find, compiles them + and stores them in `target`. If `zip` is `None`, instead of in a + zipfile, the templates will be stored in a directory. + By default a deflate zip algorithm is used. To switch to + the stored algorithm, `zip` can be set to ``'stored'``. + + `extensions` and `filter_func` are passed to :meth:`list_templates`. + Each template returned will be compiled to the target folder or + zipfile. + + By default template compilation errors are ignored. In case a + log function is provided, errors are logged. If you want template + syntax errors to abort the compilation you can set `ignore_errors` + to `False` and you will get an exception on syntax errors. + + If `py_compile` is set to `True` .pyc files will be written to the + target instead of standard .py files. This flag does not do anything + on pypy and Python 3 where pyc files are not picked up by itself and + don't give much benefit. + + .. versionadded:: 2.4 + """ + from jinja2.loaders import ModuleLoader + + if log_function is None: + log_function = lambda x: None + + if py_compile: + if not PY2 or PYPY: + from warnings import warn + warn(Warning('py_compile has no effect on pypy or Python 3')) + py_compile = False + else: + import imp + import marshal + py_header = imp.get_magic() + \ + u'\xff\xff\xff\xff'.encode('iso-8859-15') + + # Python 3.3 added a source filesize to the header + if sys.version_info >= (3, 3): + py_header += u'\x00\x00\x00\x00'.encode('iso-8859-15') + + def write_file(filename, data, mode): + if zip: + info = ZipInfo(filename) + info.external_attr = 0o755 << 16 + zip_file.writestr(info, data) + else: + f = open(os.path.join(target, filename), mode) + try: + f.write(data) + finally: + f.close() + + if zip is not None: + from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED, ZIP_STORED + zip_file = ZipFile(target, 'w', dict(deflated=ZIP_DEFLATED, + stored=ZIP_STORED)[zip]) + log_function('Compiling into Zip archive "%s"' % target) + else: + if not os.path.isdir(target): + os.makedirs(target) + log_function('Compiling into folder "%s"' % target) + + try: + for name in self.list_templates(extensions, filter_func): + source, filename, _ = self.loader.get_source(self, name) + try: + code = self.compile(source, name, filename, True, True) + except TemplateSyntaxError as e: + if not ignore_errors: + raise + log_function('Could not compile "%s": %s' % (name, e)) + continue + + filename = ModuleLoader.get_module_filename(name) + + if py_compile: + c = self._compile(code, encode_filename(filename)) + write_file(filename + 'c', py_header + + marshal.dumps(c), 'wb') + log_function('Byte-compiled "%s" as %s' % + (name, filename + 'c')) + else: + write_file(filename, code, 'w') + log_function('Compiled "%s" as %s' % (name, filename)) + finally: + if zip: + zip_file.close() + + log_function('Finished compiling templates') + + def list_templates(self, extensions=None, filter_func=None): + """Returns a list of templates for this environment. This requires + that the loader supports the loader's + :meth:`~BaseLoader.list_templates` method. + + If there are other files in the template folder besides the + actual templates, the returned list can be filtered. There are two + ways: either `extensions` is set to a list of file extensions for + templates, or a `filter_func` can be provided which is a callable that + is passed a template name and should return `True` if it should end up + in the result list. + + If the loader does not support that, a :exc:`TypeError` is raised. + + .. versionadded:: 2.4 + """ + x = self.loader.list_templates() + if extensions is not None: + if filter_func is not None: + raise TypeError('either extensions or filter_func ' + 'can be passed, but not both') + filter_func = lambda x: '.' in x and \ + x.rsplit('.', 1)[1] in extensions + if filter_func is not None: + x = list(ifilter(filter_func, x)) + return x + + def handle_exception(self, exc_info=None, rendered=False, source_hint=None): + """Exception handling helper. This is used internally to either raise + rewritten exceptions or return a rendered traceback for the template. + """ + global _make_traceback + if exc_info is None: + exc_info = sys.exc_info() + + # the debugging module is imported when it's used for the first time. + # we're doing a lot of stuff there and for applications that do not + # get any exceptions in template rendering there is no need to load + # all of that. + if _make_traceback is None: + from jinja2.debug import make_traceback as _make_traceback + traceback = _make_traceback(exc_info, source_hint) + if rendered and self.exception_formatter is not None: + return self.exception_formatter(traceback) + if self.exception_handler is not None: + self.exception_handler(traceback) + exc_type, exc_value, tb = traceback.standard_exc_info + reraise(exc_type, exc_value, tb) + + def join_path(self, template, parent): + """Join a template with the parent. By default all the lookups are + relative to the loader root so this method returns the `template` + parameter unchanged, but if the paths should be relative to the + parent template, this function can be used to calculate the real + template name. + + Subclasses may override this method and implement template path + joining here. + """ + return template + + @internalcode + def _load_template(self, name, globals): + if self.loader is None: + raise TypeError('no loader for this environment specified') + cache_key = (weakref.ref(self.loader), name) + if self.cache is not None: + template = self.cache.get(cache_key) + if template is not None and (not self.auto_reload or + template.is_up_to_date): + return template + template = self.loader.load(self, name, globals) + if self.cache is not None: + self.cache[cache_key] = template + return template + + @internalcode + def get_template(self, name, parent=None, globals=None): + """Load a template from the loader. If a loader is configured this + method asks the loader for the template and returns a :class:`Template`. + If the `parent` parameter is not `None`, :meth:`join_path` is called + to get the real template name before loading. + + The `globals` parameter can be used to provide template wide globals. + These variables are available in the context at render time. + + If the template does not exist a :exc:`TemplateNotFound` exception is + raised. + + .. versionchanged:: 2.4 + If `name` is a :class:`Template` object it is returned from the + function unchanged. + """ + if isinstance(name, Template): + return name + if parent is not None: + name = self.join_path(name, parent) + return self._load_template(name, self.make_globals(globals)) + + @internalcode + def select_template(self, names, parent=None, globals=None): + """Works like :meth:`get_template` but tries a number of templates + before it fails. If it cannot find any of the templates, it will + raise a :exc:`TemplatesNotFound` exception. + + .. versionadded:: 2.3 + + .. versionchanged:: 2.4 + If `names` contains a :class:`Template` object it is returned + from the function unchanged. + """ + if not names: + raise TemplatesNotFound(message=u'Tried to select from an empty list ' + u'of templates.') + globals = self.make_globals(globals) + for name in names: + if isinstance(name, Template): + return name + if parent is not None: + name = self.join_path(name, parent) + try: + return self._load_template(name, globals) + except TemplateNotFound: + pass + raise TemplatesNotFound(names) + + @internalcode + def get_or_select_template(self, template_name_or_list, + parent=None, globals=None): + """Does a typecheck and dispatches to :meth:`select_template` + if an iterable of template names is given, otherwise to + :meth:`get_template`. + + .. versionadded:: 2.3 + """ + if isinstance(template_name_or_list, string_types): + return self.get_template(template_name_or_list, parent, globals) + elif isinstance(template_name_or_list, Template): + return template_name_or_list + return self.select_template(template_name_or_list, parent, globals) + + def from_string(self, source, globals=None, template_class=None): + """Load a template from a string. This parses the source given and + returns a :class:`Template` object. + """ + globals = self.make_globals(globals) + cls = template_class or self.template_class + return cls.from_code(self, self.compile(source), globals, None) + + def make_globals(self, d): + """Return a dict for the globals.""" + if not d: + return self.globals + return dict(self.globals, **d) + + +class Template(object): + """The central template object. This class represents a compiled template + and is used to evaluate it. + + Normally the template object is generated from an :class:`Environment` but + it also has a constructor that makes it possible to create a template + instance directly using the constructor. It takes the same arguments as + the environment constructor but it's not possible to specify a loader. + + Every template object has a few methods and members that are guaranteed + to exist. However it's important that a template object should be + considered immutable. Modifications on the object are not supported. + + Template objects created from the constructor rather than an environment + do have an `environment` attribute that points to a temporary environment + that is probably shared with other templates created with the constructor + and compatible settings. + + >>> template = Template('Hello {{ name }}!') + >>> template.render(name='John Doe') == u'Hello John Doe!' + True + >>> stream = template.stream(name='John Doe') + >>> next(stream) == u'Hello John Doe!' + True + >>> next(stream) + Traceback (most recent call last): + ... + StopIteration + """ + + def __new__(cls, source, + block_start_string=BLOCK_START_STRING, + block_end_string=BLOCK_END_STRING, + variable_start_string=VARIABLE_START_STRING, + variable_end_string=VARIABLE_END_STRING, + comment_start_string=COMMENT_START_STRING, + comment_end_string=COMMENT_END_STRING, + line_statement_prefix=LINE_STATEMENT_PREFIX, + line_comment_prefix=LINE_COMMENT_PREFIX, + trim_blocks=TRIM_BLOCKS, + lstrip_blocks=LSTRIP_BLOCKS, + newline_sequence=NEWLINE_SEQUENCE, + keep_trailing_newline=KEEP_TRAILING_NEWLINE, + extensions=(), + optimized=True, + undefined=Undefined, + finalize=None, + autoescape=False, + enable_async=False): + env = get_spontaneous_environment( + block_start_string, block_end_string, variable_start_string, + variable_end_string, comment_start_string, comment_end_string, + line_statement_prefix, line_comment_prefix, trim_blocks, + lstrip_blocks, newline_sequence, keep_trailing_newline, + frozenset(extensions), optimized, undefined, finalize, autoescape, + None, 0, False, None, enable_async) + return env.from_string(source, template_class=cls) + + @classmethod + def from_code(cls, environment, code, globals, uptodate=None): + """Creates a template object from compiled code and the globals. This + is used by the loaders and environment to create a template object. + """ + namespace = { + 'environment': environment, + '__file__': code.co_filename + } + exec(code, namespace) + rv = cls._from_namespace(environment, namespace, globals) + rv._uptodate = uptodate + return rv + + @classmethod + def from_module_dict(cls, environment, module_dict, globals): + """Creates a template object from a module. This is used by the + module loader to create a template object. + + .. versionadded:: 2.4 + """ + return cls._from_namespace(environment, module_dict, globals) + + @classmethod + def _from_namespace(cls, environment, namespace, globals): + t = object.__new__(cls) + t.environment = environment + t.globals = globals + t.name = namespace['name'] + t.filename = namespace['__file__'] + t.blocks = namespace['blocks'] + + # render function and module + t.root_render_func = namespace['root'] + t._module = None + + # debug and loader helpers + t._debug_info = namespace['debug_info'] + t._uptodate = None + + # store the reference + namespace['environment'] = environment + namespace['__jinja_template__'] = t + + return t + + def render(self, *args, **kwargs): + """This method accepts the same arguments as the `dict` constructor: + A dict, a dict subclass or some keyword arguments. If no arguments + are given the context will be empty. These two calls do the same:: + + template.render(knights='that say nih') + template.render({'knights': 'that say nih'}) + + This will return the rendered template as unicode string. + """ + vars = dict(*args, **kwargs) + try: + return concat(self.root_render_func(self.new_context(vars))) + except Exception: + exc_info = sys.exc_info() + return self.environment.handle_exception(exc_info, True) + + def render_async(self, *args, **kwargs): + """This works similar to :meth:`render` but returns a coroutine + that when awaited returns the entire rendered template string. This + requires the async feature to be enabled. + + Example usage:: + + await template.render_async(knights='that say nih; asynchronously') + """ + # see asyncsupport for the actual implementation + raise NotImplementedError('This feature is not available for this ' + 'version of Python') + + def stream(self, *args, **kwargs): + """Works exactly like :meth:`generate` but returns a + :class:`TemplateStream`. + """ + return TemplateStream(self.generate(*args, **kwargs)) + + def generate(self, *args, **kwargs): + """For very large templates it can be useful to not render the whole + template at once but evaluate each statement after another and yield + piece for piece. This method basically does exactly that and returns + a generator that yields one item after another as unicode strings. + + It accepts the same arguments as :meth:`render`. + """ + vars = dict(*args, **kwargs) + try: + for event in self.root_render_func(self.new_context(vars)): + yield event + except Exception: + exc_info = sys.exc_info() + else: + return + yield self.environment.handle_exception(exc_info, True) + + def generate_async(self, *args, **kwargs): + """An async version of :meth:`generate`. Works very similarly but + returns an async iterator instead. + """ + # see asyncsupport for the actual implementation + raise NotImplementedError('This feature is not available for this ' + 'version of Python') + + def new_context(self, vars=None, shared=False, locals=None): + """Create a new :class:`Context` for this template. The vars + provided will be passed to the template. Per default the globals + are added to the context. If shared is set to `True` the data + is passed as it to the context without adding the globals. + + `locals` can be a dict of local variables for internal usage. + """ + return new_context(self.environment, self.name, self.blocks, + vars, shared, self.globals, locals) + + def make_module(self, vars=None, shared=False, locals=None): + """This method works like the :attr:`module` attribute when called + without arguments but it will evaluate the template on every call + rather than caching it. It's also possible to provide + a dict which is then used as context. The arguments are the same + as for the :meth:`new_context` method. + """ + return TemplateModule(self, self.new_context(vars, shared, locals)) + + def make_module_async(self, vars=None, shared=False, locals=None): + """As template module creation can invoke template code for + asynchronous exections this method must be used instead of the + normal :meth:`make_module` one. Likewise the module attribute + becomes unavailable in async mode. + """ + # see asyncsupport for the actual implementation + raise NotImplementedError('This feature is not available for this ' + 'version of Python') + + @internalcode + def _get_default_module(self): + if self._module is not None: + return self._module + self._module = rv = self.make_module() + return rv + + @property + def module(self): + """The template as module. This is used for imports in the + template runtime but is also useful if one wants to access + exported template variables from the Python layer: + + >>> t = Template('{% macro foo() %}42{% endmacro %}23') + >>> str(t.module) + '23' + >>> t.module.foo() == u'42' + True + + This attribute is not available if async mode is enabled. + """ + return self._get_default_module() + + def get_corresponding_lineno(self, lineno): + """Return the source line number of a line number in the + generated bytecode as they are not in sync. + """ + for template_line, code_line in reversed(self.debug_info): + if code_line <= lineno: + return template_line + return 1 + + @property + def is_up_to_date(self): + """If this variable is `False` there is a newer version available.""" + if self._uptodate is None: + return True + return self._uptodate() + + @property + def debug_info(self): + """The debug info mapping.""" + return [tuple(imap(int, x.split('='))) for x in + self._debug_info.split('&')] + + def __repr__(self): + if self.name is None: + name = 'memory:%x' % id(self) + else: + name = repr(self.name) + return '<%s %s>' % (self.__class__.__name__, name) + + +@implements_to_string +class TemplateModule(object): + """Represents an imported template. All the exported names of the + template are available as attributes on this object. Additionally + converting it into an unicode- or bytestrings renders the contents. + """ + + def __init__(self, template, context, body_stream=None): + if body_stream is None: + if context.environment.is_async: + raise RuntimeError('Async mode requires a body stream ' + 'to be passed to a template module. Use ' + 'the async methods of the API you are ' + 'using.') + body_stream = list(template.root_render_func(context)) + self._body_stream = body_stream + self.__dict__.update(context.get_exported()) + self.__name__ = template.name + + def __html__(self): + return Markup(concat(self._body_stream)) + + def __str__(self): + return concat(self._body_stream) + + def __repr__(self): + if self.__name__ is None: + name = 'memory:%x' % id(self) + else: + name = repr(self.__name__) + return '<%s %s>' % (self.__class__.__name__, name) + + +class TemplateExpression(object): + """The :meth:`jinja2.Environment.compile_expression` method returns an + instance of this object. It encapsulates the expression-like access + to the template with an expression it wraps. + """ + + def __init__(self, template, undefined_to_none): + self._template = template + self._undefined_to_none = undefined_to_none + + def __call__(self, *args, **kwargs): + context = self._template.new_context(dict(*args, **kwargs)) + consume(self._template.root_render_func(context)) + rv = context.vars['result'] + if self._undefined_to_none and isinstance(rv, Undefined): + rv = None + return rv + + +@implements_iterator +class TemplateStream(object): + """A template stream works pretty much like an ordinary python generator + but it can buffer multiple items to reduce the number of total iterations. + Per default the output is unbuffered which means that for every unbuffered + instruction in the template one unicode string is yielded. + + If buffering is enabled with a buffer size of 5, five items are combined + into a new unicode string. This is mainly useful if you are streaming + big templates to a client via WSGI which flushes after each iteration. + """ + + def __init__(self, gen): + self._gen = gen + self.disable_buffering() + + def dump(self, fp, encoding=None, errors='strict'): + """Dump the complete stream into a file or file-like object. + Per default unicode strings are written, if you want to encode + before writing specify an `encoding`. + + Example usage:: + + Template('Hello {{ name }}!').stream(name='foo').dump('hello.html') + """ + close = False + if isinstance(fp, string_types): + if encoding is None: + encoding = 'utf-8' + fp = open(fp, 'wb') + close = True + try: + if encoding is not None: + iterable = (x.encode(encoding, errors) for x in self) + else: + iterable = self + if hasattr(fp, 'writelines'): + fp.writelines(iterable) + else: + for item in iterable: + fp.write(item) + finally: + if close: + fp.close() + + def disable_buffering(self): + """Disable the output buffering.""" + self._next = partial(next, self._gen) + self.buffered = False + + def _buffered_generator(self, size): + buf = [] + c_size = 0 + push = buf.append + + while 1: + try: + while c_size < size: + c = next(self._gen) + push(c) + if c: + c_size += 1 + except StopIteration: + if not c_size: + return + yield concat(buf) + del buf[:] + c_size = 0 + + def enable_buffering(self, size=5): + """Enable buffering. Buffer `size` items before yielding them.""" + if size <= 1: + raise ValueError('buffer size too small') + + self.buffered = True + self._next = partial(next, self._buffered_generator(size)) + + def __iter__(self): + return self + + def __next__(self): + return self._next() + + +# hook in default template class. if anyone reads this comment: ignore that +# it's possible to use custom templates ;-) +Environment.template_class = Template diff --git a/python/jinja2/exceptions.py b/python/jinja2/exceptions.py new file mode 100644 index 0000000..c018a33 --- /dev/null +++ b/python/jinja2/exceptions.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +""" + jinja2.exceptions + ~~~~~~~~~~~~~~~~~ + + Jinja exceptions. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +from jinja2._compat import imap, text_type, PY2, implements_to_string + + +class TemplateError(Exception): + """Baseclass for all template errors.""" + + if PY2: + def __init__(self, message=None): + if message is not None: + message = text_type(message).encode('utf-8') + Exception.__init__(self, message) + + @property + def message(self): + if self.args: + message = self.args[0] + if message is not None: + return message.decode('utf-8', 'replace') + + def __unicode__(self): + return self.message or u'' + else: + def __init__(self, message=None): + Exception.__init__(self, message) + + @property + def message(self): + if self.args: + message = self.args[0] + if message is not None: + return message + + +@implements_to_string +class TemplateNotFound(IOError, LookupError, TemplateError): + """Raised if a template does not exist.""" + + # looks weird, but removes the warning descriptor that just + # bogusly warns us about message being deprecated + message = None + + def __init__(self, name, message=None): + IOError.__init__(self) + if message is None: + message = name + self.message = message + self.name = name + self.templates = [name] + + def __str__(self): + return self.message + + +class TemplatesNotFound(TemplateNotFound): + """Like :class:`TemplateNotFound` but raised if multiple templates + are selected. This is a subclass of :class:`TemplateNotFound` + exception, so just catching the base exception will catch both. + + .. versionadded:: 2.2 + """ + + def __init__(self, names=(), message=None): + if message is None: + message = u'none of the templates given were found: ' + \ + u', '.join(imap(text_type, names)) + TemplateNotFound.__init__(self, names and names[-1] or None, message) + self.templates = list(names) + + +@implements_to_string +class TemplateSyntaxError(TemplateError): + """Raised to tell the user that there is a problem with the template.""" + + def __init__(self, message, lineno, name=None, filename=None): + TemplateError.__init__(self, message) + self.lineno = lineno + self.name = name + self.filename = filename + self.source = None + + # this is set to True if the debug.translate_syntax_error + # function translated the syntax error into a new traceback + self.translated = False + + def __str__(self): + # for translated errors we only return the message + if self.translated: + return self.message + + # otherwise attach some stuff + location = 'line %d' % self.lineno + name = self.filename or self.name + if name: + location = 'File "%s", %s' % (name, location) + lines = [self.message, ' ' + location] + + # if the source is set, add the line to the output + if self.source is not None: + try: + line = self.source.splitlines()[self.lineno - 1] + except IndexError: + line = None + if line: + lines.append(' ' + line.strip()) + + return u'\n'.join(lines) + + +class TemplateAssertionError(TemplateSyntaxError): + """Like a template syntax error, but covers cases where something in the + template caused an error at compile time that wasn't necessarily caused + by a syntax error. However it's a direct subclass of + :exc:`TemplateSyntaxError` and has the same attributes. + """ + + +class TemplateRuntimeError(TemplateError): + """A generic runtime error in the template engine. Under some situations + Jinja may raise this exception. + """ + + +class UndefinedError(TemplateRuntimeError): + """Raised if a template tries to operate on :class:`Undefined`.""" + + +class SecurityError(TemplateRuntimeError): + """Raised if a template tries to do something insecure if the + sandbox is enabled. + """ + + +class FilterArgumentError(TemplateRuntimeError): + """This error is raised if a filter was called with inappropriate + arguments + """ diff --git a/python/jinja2/ext.py b/python/jinja2/ext.py new file mode 100644 index 0000000..0734a84 --- /dev/null +++ b/python/jinja2/ext.py @@ -0,0 +1,627 @@ +# -*- coding: utf-8 -*- +""" + jinja2.ext + ~~~~~~~~~~ + + Jinja extensions allow to add custom tags similar to the way django custom + tags work. By default two example extensions exist: an i18n and a cache + extension. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD. +""" +import re + +from jinja2 import nodes +from jinja2.defaults import BLOCK_START_STRING, \ + BLOCK_END_STRING, VARIABLE_START_STRING, VARIABLE_END_STRING, \ + COMMENT_START_STRING, COMMENT_END_STRING, LINE_STATEMENT_PREFIX, \ + LINE_COMMENT_PREFIX, TRIM_BLOCKS, NEWLINE_SEQUENCE, \ + KEEP_TRAILING_NEWLINE, LSTRIP_BLOCKS +from jinja2.environment import Environment +from jinja2.runtime import concat +from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError +from jinja2.utils import contextfunction, import_string, Markup +from jinja2._compat import with_metaclass, string_types, iteritems + + +# the only real useful gettext functions for a Jinja template. Note +# that ugettext must be assigned to gettext as Jinja doesn't support +# non unicode strings. +GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext') + + +class ExtensionRegistry(type): + """Gives the extension an unique identifier.""" + + def __new__(cls, name, bases, d): + rv = type.__new__(cls, name, bases, d) + rv.identifier = rv.__module__ + '.' + rv.__name__ + return rv + + +class Extension(with_metaclass(ExtensionRegistry, object)): + """Extensions can be used to add extra functionality to the Jinja template + system at the parser level. Custom extensions are bound to an environment + but may not store environment specific data on `self`. The reason for + this is that an extension can be bound to another environment (for + overlays) by creating a copy and reassigning the `environment` attribute. + + As extensions are created by the environment they cannot accept any + arguments for configuration. One may want to work around that by using + a factory function, but that is not possible as extensions are identified + by their import name. The correct way to configure the extension is + storing the configuration values on the environment. Because this way the + environment ends up acting as central configuration storage the + attributes may clash which is why extensions have to ensure that the names + they choose for configuration are not too generic. ``prefix`` for example + is a terrible name, ``fragment_cache_prefix`` on the other hand is a good + name as includes the name of the extension (fragment cache). + """ + + #: if this extension parses this is the list of tags it's listening to. + tags = set() + + #: the priority of that extension. This is especially useful for + #: extensions that preprocess values. A lower value means higher + #: priority. + #: + #: .. versionadded:: 2.4 + priority = 100 + + def __init__(self, environment): + self.environment = environment + + def bind(self, environment): + """Create a copy of this extension bound to another environment.""" + rv = object.__new__(self.__class__) + rv.__dict__.update(self.__dict__) + rv.environment = environment + return rv + + def preprocess(self, source, name, filename=None): + """This method is called before the actual lexing and can be used to + preprocess the source. The `filename` is optional. The return value + must be the preprocessed source. + """ + return source + + def filter_stream(self, stream): + """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used + to filter tokens returned. This method has to return an iterable of + :class:`~jinja2.lexer.Token`\\s, but it doesn't have to return a + :class:`~jinja2.lexer.TokenStream`. + + In the `ext` folder of the Jinja2 source distribution there is a file + called `inlinegettext.py` which implements a filter that utilizes this + method. + """ + return stream + + def parse(self, parser): + """If any of the :attr:`tags` matched this method is called with the + parser as first argument. The token the parser stream is pointing at + is the name token that matched. This method has to return one or a + list of multiple nodes. + """ + raise NotImplementedError() + + def attr(self, name, lineno=None): + """Return an attribute node for the current extension. This is useful + to pass constants on extensions to generated template code. + + :: + + self.attr('_my_attribute', lineno=lineno) + """ + return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno) + + def call_method(self, name, args=None, kwargs=None, dyn_args=None, + dyn_kwargs=None, lineno=None): + """Call a method of the extension. This is a shortcut for + :meth:`attr` + :class:`jinja2.nodes.Call`. + """ + if args is None: + args = [] + if kwargs is None: + kwargs = [] + return nodes.Call(self.attr(name, lineno=lineno), args, kwargs, + dyn_args, dyn_kwargs, lineno=lineno) + + +@contextfunction +def _gettext_alias(__context, *args, **kwargs): + return __context.call(__context.resolve('gettext'), *args, **kwargs) + + +def _make_new_gettext(func): + @contextfunction + def gettext(__context, __string, **variables): + rv = __context.call(func, __string) + if __context.eval_ctx.autoescape: + rv = Markup(rv) + return rv % variables + return gettext + + +def _make_new_ngettext(func): + @contextfunction + def ngettext(__context, __singular, __plural, __num, **variables): + variables.setdefault('num', __num) + rv = __context.call(func, __singular, __plural, __num) + if __context.eval_ctx.autoescape: + rv = Markup(rv) + return rv % variables + return ngettext + + +class InternationalizationExtension(Extension): + """This extension adds gettext support to Jinja2.""" + tags = set(['trans']) + + # TODO: the i18n extension is currently reevaluating values in a few + # situations. Take this example: + # {% trans count=something() %}{{ count }} foo{% pluralize + # %}{{ count }} fooss{% endtrans %} + # something is called twice here. One time for the gettext value and + # the other time for the n-parameter of the ngettext function. + + def __init__(self, environment): + Extension.__init__(self, environment) + environment.globals['_'] = _gettext_alias + environment.extend( + install_gettext_translations=self._install, + install_null_translations=self._install_null, + install_gettext_callables=self._install_callables, + uninstall_gettext_translations=self._uninstall, + extract_translations=self._extract, + newstyle_gettext=False + ) + + def _install(self, translations, newstyle=None): + gettext = getattr(translations, 'ugettext', None) + if gettext is None: + gettext = translations.gettext + ngettext = getattr(translations, 'ungettext', None) + if ngettext is None: + ngettext = translations.ngettext + self._install_callables(gettext, ngettext, newstyle) + + def _install_null(self, newstyle=None): + self._install_callables( + lambda x: x, + lambda s, p, n: (n != 1 and (p,) or (s,))[0], + newstyle + ) + + def _install_callables(self, gettext, ngettext, newstyle=None): + if newstyle is not None: + self.environment.newstyle_gettext = newstyle + if self.environment.newstyle_gettext: + gettext = _make_new_gettext(gettext) + ngettext = _make_new_ngettext(ngettext) + self.environment.globals.update( + gettext=gettext, + ngettext=ngettext + ) + + def _uninstall(self, translations): + for key in 'gettext', 'ngettext': + self.environment.globals.pop(key, None) + + def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS): + if isinstance(source, string_types): + source = self.environment.parse(source) + return extract_from_ast(source, gettext_functions) + + def parse(self, parser): + """Parse a translatable tag.""" + lineno = next(parser.stream).lineno + num_called_num = False + + # find all the variables referenced. Additionally a variable can be + # defined in the body of the trans block too, but this is checked at + # a later state. + plural_expr = None + plural_expr_assignment = None + variables = {} + trimmed = None + while parser.stream.current.type != 'block_end': + if variables: + parser.stream.expect('comma') + + # skip colon for python compatibility + if parser.stream.skip_if('colon'): + break + + name = parser.stream.expect('name') + if name.value in variables: + parser.fail('translatable variable %r defined twice.' % + name.value, name.lineno, + exc=TemplateAssertionError) + + # expressions + if parser.stream.current.type == 'assign': + next(parser.stream) + variables[name.value] = var = parser.parse_expression() + elif trimmed is None and name.value in ('trimmed', 'notrimmed'): + trimmed = name.value == 'trimmed' + continue + else: + variables[name.value] = var = nodes.Name(name.value, 'load') + + if plural_expr is None: + if isinstance(var, nodes.Call): + plural_expr = nodes.Name('_trans', 'load') + variables[name.value] = plural_expr + plural_expr_assignment = nodes.Assign( + nodes.Name('_trans', 'store'), var) + else: + plural_expr = var + num_called_num = name.value == 'num' + + parser.stream.expect('block_end') + + plural = None + have_plural = False + referenced = set() + + # now parse until endtrans or pluralize + singular_names, singular = self._parse_block(parser, True) + if singular_names: + referenced.update(singular_names) + if plural_expr is None: + plural_expr = nodes.Name(singular_names[0], 'load') + num_called_num = singular_names[0] == 'num' + + # if we have a pluralize block, we parse that too + if parser.stream.current.test('name:pluralize'): + have_plural = True + next(parser.stream) + if parser.stream.current.type != 'block_end': + name = parser.stream.expect('name') + if name.value not in variables: + parser.fail('unknown variable %r for pluralization' % + name.value, name.lineno, + exc=TemplateAssertionError) + plural_expr = variables[name.value] + num_called_num = name.value == 'num' + parser.stream.expect('block_end') + plural_names, plural = self._parse_block(parser, False) + next(parser.stream) + referenced.update(plural_names) + else: + next(parser.stream) + + # register free names as simple name expressions + for var in referenced: + if var not in variables: + variables[var] = nodes.Name(var, 'load') + + if not have_plural: + plural_expr = None + elif plural_expr is None: + parser.fail('pluralize without variables', lineno) + + if trimmed is None: + trimmed = self.environment.policies['ext.i18n.trimmed'] + if trimmed: + singular = self._trim_whitespace(singular) + if plural: + plural = self._trim_whitespace(plural) + + node = self._make_node(singular, plural, variables, plural_expr, + bool(referenced), + num_called_num and have_plural) + node.set_lineno(lineno) + if plural_expr_assignment is not None: + return [plural_expr_assignment, node] + else: + return node + + def _trim_whitespace(self, string, _ws_re=re.compile(r'\s*\n\s*')): + return _ws_re.sub(' ', string.strip()) + + def _parse_block(self, parser, allow_pluralize): + """Parse until the next block tag with a given name.""" + referenced = [] + buf = [] + while 1: + if parser.stream.current.type == 'data': + buf.append(parser.stream.current.value.replace('%', '%%')) + next(parser.stream) + elif parser.stream.current.type == 'variable_begin': + next(parser.stream) + name = parser.stream.expect('name').value + referenced.append(name) + buf.append('%%(%s)s' % name) + parser.stream.expect('variable_end') + elif parser.stream.current.type == 'block_begin': + next(parser.stream) + if parser.stream.current.test('name:endtrans'): + break + elif parser.stream.current.test('name:pluralize'): + if allow_pluralize: + break + parser.fail('a translatable section can have only one ' + 'pluralize section') + parser.fail('control structures in translatable sections are ' + 'not allowed') + elif parser.stream.eos: + parser.fail('unclosed translation block') + else: + assert False, 'internal parser error' + + return referenced, concat(buf) + + def _make_node(self, singular, plural, variables, plural_expr, + vars_referenced, num_called_num): + """Generates a useful node from the data provided.""" + # no variables referenced? no need to escape for old style + # gettext invocations only if there are vars. + if not vars_referenced and not self.environment.newstyle_gettext: + singular = singular.replace('%%', '%') + if plural: + plural = plural.replace('%%', '%') + + # singular only: + if plural_expr is None: + gettext = nodes.Name('gettext', 'load') + node = nodes.Call(gettext, [nodes.Const(singular)], + [], None, None) + + # singular and plural + else: + ngettext = nodes.Name('ngettext', 'load') + node = nodes.Call(ngettext, [ + nodes.Const(singular), + nodes.Const(plural), + plural_expr + ], [], None, None) + + # in case newstyle gettext is used, the method is powerful + # enough to handle the variable expansion and autoescape + # handling itself + if self.environment.newstyle_gettext: + for key, value in iteritems(variables): + # the function adds that later anyways in case num was + # called num, so just skip it. + if num_called_num and key == 'num': + continue + node.kwargs.append(nodes.Keyword(key, value)) + + # otherwise do that here + else: + # mark the return value as safe if we are in an + # environment with autoescaping turned on + node = nodes.MarkSafeIfAutoescape(node) + if variables: + node = nodes.Mod(node, nodes.Dict([ + nodes.Pair(nodes.Const(key), value) + for key, value in variables.items() + ])) + return nodes.Output([node]) + + +class ExprStmtExtension(Extension): + """Adds a `do` tag to Jinja2 that works like the print statement just + that it doesn't print the return value. + """ + tags = set(['do']) + + def parse(self, parser): + node = nodes.ExprStmt(lineno=next(parser.stream).lineno) + node.node = parser.parse_tuple() + return node + + +class LoopControlExtension(Extension): + """Adds break and continue to the template engine.""" + tags = set(['break', 'continue']) + + def parse(self, parser): + token = next(parser.stream) + if token.value == 'break': + return nodes.Break(lineno=token.lineno) + return nodes.Continue(lineno=token.lineno) + + +class WithExtension(Extension): + pass + + +class AutoEscapeExtension(Extension): + pass + + +def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS, + babel_style=True): + """Extract localizable strings from the given template node. Per + default this function returns matches in babel style that means non string + parameters as well as keyword arguments are returned as `None`. This + allows Babel to figure out what you really meant if you are using + gettext functions that allow keyword arguments for placeholder expansion. + If you don't want that behavior set the `babel_style` parameter to `False` + which causes only strings to be returned and parameters are always stored + in tuples. As a consequence invalid gettext calls (calls without a single + string parameter or string parameters after non-string parameters) are + skipped. + + This example explains the behavior: + + >>> from jinja2 import Environment + >>> env = Environment() + >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}') + >>> list(extract_from_ast(node)) + [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))] + >>> list(extract_from_ast(node, babel_style=False)) + [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))] + + For every string found this function yields a ``(lineno, function, + message)`` tuple, where: + + * ``lineno`` is the number of the line on which the string was found, + * ``function`` is the name of the ``gettext`` function used (if the + string was extracted from embedded Python code), and + * ``message`` is the string itself (a ``unicode`` object, or a tuple + of ``unicode`` objects for functions with multiple string arguments). + + This extraction function operates on the AST and is because of that unable + to extract any comments. For comment support you have to use the babel + extraction interface or extract comments yourself. + """ + for node in node.find_all(nodes.Call): + if not isinstance(node.node, nodes.Name) or \ + node.node.name not in gettext_functions: + continue + + strings = [] + for arg in node.args: + if isinstance(arg, nodes.Const) and \ + isinstance(arg.value, string_types): + strings.append(arg.value) + else: + strings.append(None) + + for arg in node.kwargs: + strings.append(None) + if node.dyn_args is not None: + strings.append(None) + if node.dyn_kwargs is not None: + strings.append(None) + + if not babel_style: + strings = tuple(x for x in strings if x is not None) + if not strings: + continue + else: + if len(strings) == 1: + strings = strings[0] + else: + strings = tuple(strings) + yield node.lineno, node.node.name, strings + + +class _CommentFinder(object): + """Helper class to find comments in a token stream. Can only + find comments for gettext calls forwards. Once the comment + from line 4 is found, a comment for line 1 will not return a + usable value. + """ + + def __init__(self, tokens, comment_tags): + self.tokens = tokens + self.comment_tags = comment_tags + self.offset = 0 + self.last_lineno = 0 + + def find_backwards(self, offset): + try: + for _, token_type, token_value in \ + reversed(self.tokens[self.offset:offset]): + if token_type in ('comment', 'linecomment'): + try: + prefix, comment = token_value.split(None, 1) + except ValueError: + continue + if prefix in self.comment_tags: + return [comment.rstrip()] + return [] + finally: + self.offset = offset + + def find_comments(self, lineno): + if not self.comment_tags or self.last_lineno > lineno: + return [] + for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]): + if token_lineno > lineno: + return self.find_backwards(self.offset + idx) + return self.find_backwards(len(self.tokens)) + + +def babel_extract(fileobj, keywords, comment_tags, options): + """Babel extraction method for Jinja templates. + + .. versionchanged:: 2.3 + Basic support for translation comments was added. If `comment_tags` + is now set to a list of keywords for extraction, the extractor will + try to find the best preceeding comment that begins with one of the + keywords. For best results, make sure to not have more than one + gettext call in one line of code and the matching comment in the + same line or the line before. + + .. versionchanged:: 2.5.1 + The `newstyle_gettext` flag can be set to `True` to enable newstyle + gettext calls. + + .. versionchanged:: 2.7 + A `silent` option can now be provided. If set to `False` template + syntax errors are propagated instead of being ignored. + + :param fileobj: the file-like object the messages should be extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results. + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` tuples. + (comments will be empty currently) + """ + extensions = set() + for extension in options.get('extensions', '').split(','): + extension = extension.strip() + if not extension: + continue + extensions.add(import_string(extension)) + if InternationalizationExtension not in extensions: + extensions.add(InternationalizationExtension) + + def getbool(options, key, default=False): + return options.get(key, str(default)).lower() in \ + ('1', 'on', 'yes', 'true') + + silent = getbool(options, 'silent', True) + environment = Environment( + options.get('block_start_string', BLOCK_START_STRING), + options.get('block_end_string', BLOCK_END_STRING), + options.get('variable_start_string', VARIABLE_START_STRING), + options.get('variable_end_string', VARIABLE_END_STRING), + options.get('comment_start_string', COMMENT_START_STRING), + options.get('comment_end_string', COMMENT_END_STRING), + options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX, + options.get('line_comment_prefix') or LINE_COMMENT_PREFIX, + getbool(options, 'trim_blocks', TRIM_BLOCKS), + getbool(options, 'lstrip_blocks', LSTRIP_BLOCKS), + NEWLINE_SEQUENCE, + getbool(options, 'keep_trailing_newline', KEEP_TRAILING_NEWLINE), + frozenset(extensions), + cache_size=0, + auto_reload=False + ) + + if getbool(options, 'trimmed'): + environment.policies['ext.i18n.trimmed'] = True + if getbool(options, 'newstyle_gettext'): + environment.newstyle_gettext = True + + source = fileobj.read().decode(options.get('encoding', 'utf-8')) + try: + node = environment.parse(source) + tokens = list(environment.lex(environment.preprocess(source))) + except TemplateSyntaxError as e: + if not silent: + raise + # skip templates with syntax errors + return + + finder = _CommentFinder(tokens, comment_tags) + for lineno, func, message in extract_from_ast(node, keywords): + yield lineno, func, message, finder.find_comments(lineno) + + +#: nicer import names +i18n = InternationalizationExtension +do = ExprStmtExtension +loopcontrols = LoopControlExtension +with_ = WithExtension +autoescape = AutoEscapeExtension diff --git a/python/jinja2/filters.py b/python/jinja2/filters.py new file mode 100644 index 0000000..267dddd --- /dev/null +++ b/python/jinja2/filters.py @@ -0,0 +1,1190 @@ +# -*- coding: utf-8 -*- +""" + jinja2.filters + ~~~~~~~~~~~~~~ + + Bundled jinja filters. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import re +import math +import random +import warnings + +from itertools import groupby, chain +from collections import namedtuple +from jinja2.utils import Markup, escape, pformat, urlize, soft_unicode, \ + unicode_urlencode, htmlsafe_json_dumps +from jinja2.runtime import Undefined +from jinja2.exceptions import FilterArgumentError +from jinja2._compat import imap, string_types, text_type, iteritems, PY2 + + +_word_re = re.compile(r'\w+', re.UNICODE) +_word_beginning_split_re = re.compile(r'([-\s\(\{\[\<]+)', re.UNICODE) + + +def contextfilter(f): + """Decorator for marking context dependent filters. The current + :class:`Context` will be passed as first argument. + """ + f.contextfilter = True + return f + + +def evalcontextfilter(f): + """Decorator for marking eval-context dependent filters. An eval + context object is passed as first argument. For more information + about the eval context, see :ref:`eval-context`. + + .. versionadded:: 2.4 + """ + f.evalcontextfilter = True + return f + + +def environmentfilter(f): + """Decorator for marking environment dependent filters. The current + :class:`Environment` is passed to the filter as first argument. + """ + f.environmentfilter = True + return f + + +def ignore_case(value): + """For use as a postprocessor for :func:`make_attrgetter`. Converts strings + to lowercase and returns other types as-is.""" + return value.lower() if isinstance(value, string_types) else value + + +def make_attrgetter(environment, attribute, postprocess=None): + """Returns a callable that looks up the given attribute from a + passed object with the rules of the environment. Dots are allowed + to access attributes of attributes. Integer parts in paths are + looked up as integers. + """ + if attribute is None: + attribute = [] + elif isinstance(attribute, string_types): + attribute = [int(x) if x.isdigit() else x for x in attribute.split('.')] + else: + attribute = [attribute] + + def attrgetter(item): + for part in attribute: + item = environment.getitem(item, part) + + if postprocess is not None: + item = postprocess(item) + + return item + + return attrgetter + + +def do_forceescape(value): + """Enforce HTML escaping. This will probably double escape variables.""" + if hasattr(value, '__html__'): + value = value.__html__() + return escape(text_type(value)) + + +def do_urlencode(value): + """Escape strings for use in URLs (uses UTF-8 encoding). It accepts both + dictionaries and regular strings as well as pairwise iterables. + + .. versionadded:: 2.7 + """ + itemiter = None + if isinstance(value, dict): + itemiter = iteritems(value) + elif not isinstance(value, string_types): + try: + itemiter = iter(value) + except TypeError: + pass + if itemiter is None: + return unicode_urlencode(value) + return u'&'.join(unicode_urlencode(k) + '=' + + unicode_urlencode(v, for_qs=True) + for k, v in itemiter) + + +@evalcontextfilter +def do_replace(eval_ctx, s, old, new, count=None): + """Return a copy of the value with all occurrences of a substring + replaced with a new one. The first argument is the substring + that should be replaced, the second is the replacement string. + If the optional third argument ``count`` is given, only the first + ``count`` occurrences are replaced: + + .. sourcecode:: jinja + + {{ "Hello World"|replace("Hello", "Goodbye") }} + -> Goodbye World + + {{ "aaaaargh"|replace("a", "d'oh, ", 2) }} + -> d'oh, d'oh, aaargh + """ + if count is None: + count = -1 + if not eval_ctx.autoescape: + return text_type(s).replace(text_type(old), text_type(new), count) + if hasattr(old, '__html__') or hasattr(new, '__html__') and \ + not hasattr(s, '__html__'): + s = escape(s) + else: + s = soft_unicode(s) + return s.replace(soft_unicode(old), soft_unicode(new), count) + + +def do_upper(s): + """Convert a value to uppercase.""" + return soft_unicode(s).upper() + + +def do_lower(s): + """Convert a value to lowercase.""" + return soft_unicode(s).lower() + + +@evalcontextfilter +def do_xmlattr(_eval_ctx, d, autospace=True): + """Create an SGML/XML attribute string based on the items in a dict. + All values that are neither `none` nor `undefined` are automatically + escaped: + + .. sourcecode:: html+jinja + + <ul{{ {'class': 'my_list', 'missing': none, + 'id': 'list-%d'|format(variable)}|xmlattr }}> + ... + </ul> + + Results in something like this: + + .. sourcecode:: html + + <ul class="my_list" id="list-42"> + ... + </ul> + + As you can see it automatically prepends a space in front of the item + if the filter returned something unless the second parameter is false. + """ + rv = u' '.join( + u'%s="%s"' % (escape(key), escape(value)) + for key, value in iteritems(d) + if value is not None and not isinstance(value, Undefined) + ) + if autospace and rv: + rv = u' ' + rv + if _eval_ctx.autoescape: + rv = Markup(rv) + return rv + + +def do_capitalize(s): + """Capitalize a value. The first character will be uppercase, all others + lowercase. + """ + return soft_unicode(s).capitalize() + + +def do_title(s): + """Return a titlecased version of the value. I.e. words will start with + uppercase letters, all remaining characters are lowercase. + """ + return ''.join( + [item[0].upper() + item[1:].lower() + for item in _word_beginning_split_re.split(soft_unicode(s)) + if item]) + + +def do_dictsort(value, case_sensitive=False, by='key', reverse=False): + """Sort a dict and yield (key, value) pairs. Because python dicts are + unsorted you may want to use this function to order them by either + key or value: + + .. sourcecode:: jinja + + {% for item in mydict|dictsort %} + sort the dict by key, case insensitive + + {% for item in mydict|dictsort(reverse=true) %} + sort the dict by key, case insensitive, reverse order + + {% for item in mydict|dictsort(true) %} + sort the dict by key, case sensitive + + {% for item in mydict|dictsort(false, 'value') %} + sort the dict by value, case insensitive + """ + if by == 'key': + pos = 0 + elif by == 'value': + pos = 1 + else: + raise FilterArgumentError( + 'You can only sort by either "key" or "value"' + ) + + def sort_func(item): + value = item[pos] + + if not case_sensitive: + value = ignore_case(value) + + return value + + return sorted(value.items(), key=sort_func, reverse=reverse) + + +@environmentfilter +def do_sort( + environment, value, reverse=False, case_sensitive=False, attribute=None +): + """Sort an iterable. Per default it sorts ascending, if you pass it + true as first argument it will reverse the sorting. + + If the iterable is made of strings the third parameter can be used to + control the case sensitiveness of the comparison which is disabled by + default. + + .. sourcecode:: jinja + + {% for item in iterable|sort %} + ... + {% endfor %} + + It is also possible to sort by an attribute (for example to sort + by the date of an object) by specifying the `attribute` parameter: + + .. sourcecode:: jinja + + {% for item in iterable|sort(attribute='date') %} + ... + {% endfor %} + + .. versionchanged:: 2.6 + The `attribute` parameter was added. + """ + key_func = make_attrgetter( + environment, attribute, + postprocess=ignore_case if not case_sensitive else None + ) + return sorted(value, key=key_func, reverse=reverse) + + +@environmentfilter +def do_unique(environment, value, case_sensitive=False, attribute=None): + """Returns a list of unique items from the the given iterable. + + .. sourcecode:: jinja + + {{ ['foo', 'bar', 'foobar', 'FooBar']|unique }} + -> ['foo', 'bar', 'foobar'] + + The unique items are yielded in the same order as their first occurrence in + the iterable passed to the filter. + + :param case_sensitive: Treat upper and lower case strings as distinct. + :param attribute: Filter objects with unique values for this attribute. + """ + getter = make_attrgetter( + environment, attribute, + postprocess=ignore_case if not case_sensitive else None + ) + seen = set() + + for item in value: + key = getter(item) + + if key not in seen: + seen.add(key) + yield item + + +def _min_or_max(environment, value, func, case_sensitive, attribute): + it = iter(value) + + try: + first = next(it) + except StopIteration: + return environment.undefined('No aggregated item, sequence was empty.') + + key_func = make_attrgetter( + environment, attribute, + ignore_case if not case_sensitive else None + ) + return func(chain([first], it), key=key_func) + + +@environmentfilter +def do_min(environment, value, case_sensitive=False, attribute=None): + """Return the smallest item from the sequence. + + .. sourcecode:: jinja + + {{ [1, 2, 3]|min }} + -> 1 + + :param case_sensitive: Treat upper and lower case strings as distinct. + :param attribute: Get the object with the max value of this attribute. + """ + return _min_or_max(environment, value, min, case_sensitive, attribute) + + +@environmentfilter +def do_max(environment, value, case_sensitive=False, attribute=None): + """Return the largest item from the sequence. + + .. sourcecode:: jinja + + {{ [1, 2, 3]|max }} + -> 3 + + :param case_sensitive: Treat upper and lower case strings as distinct. + :param attribute: Get the object with the max value of this attribute. + """ + return _min_or_max(environment, value, max, case_sensitive, attribute) + + +def do_default(value, default_value=u'', boolean=False): + """If the value is undefined it will return the passed default value, + otherwise the value of the variable: + + .. sourcecode:: jinja + + {{ my_variable|default('my_variable is not defined') }} + + This will output the value of ``my_variable`` if the variable was + defined, otherwise ``'my_variable is not defined'``. If you want + to use default with variables that evaluate to false you have to + set the second parameter to `true`: + + .. sourcecode:: jinja + + {{ ''|default('the string was empty', true) }} + """ + if isinstance(value, Undefined) or (boolean and not value): + return default_value + return value + + +@evalcontextfilter +def do_join(eval_ctx, value, d=u'', attribute=None): + """Return a string which is the concatenation of the strings in the + sequence. The separator between elements is an empty string per + default, you can define it with the optional parameter: + + .. sourcecode:: jinja + + {{ [1, 2, 3]|join('|') }} + -> 1|2|3 + + {{ [1, 2, 3]|join }} + -> 123 + + It is also possible to join certain attributes of an object: + + .. sourcecode:: jinja + + {{ users|join(', ', attribute='username') }} + + .. versionadded:: 2.6 + The `attribute` parameter was added. + """ + if attribute is not None: + value = imap(make_attrgetter(eval_ctx.environment, attribute), value) + + # no automatic escaping? joining is a lot eaiser then + if not eval_ctx.autoescape: + return text_type(d).join(imap(text_type, value)) + + # if the delimiter doesn't have an html representation we check + # if any of the items has. If yes we do a coercion to Markup + if not hasattr(d, '__html__'): + value = list(value) + do_escape = False + for idx, item in enumerate(value): + if hasattr(item, '__html__'): + do_escape = True + else: + value[idx] = text_type(item) + if do_escape: + d = escape(d) + else: + d = text_type(d) + return d.join(value) + + # no html involved, to normal joining + return soft_unicode(d).join(imap(soft_unicode, value)) + + +def do_center(value, width=80): + """Centers the value in a field of a given width.""" + return text_type(value).center(width) + + +@environmentfilter +def do_first(environment, seq): + """Return the first item of a sequence.""" + try: + return next(iter(seq)) + except StopIteration: + return environment.undefined('No first item, sequence was empty.') + + +@environmentfilter +def do_last(environment, seq): + """Return the last item of a sequence.""" + try: + return next(iter(reversed(seq))) + except StopIteration: + return environment.undefined('No last item, sequence was empty.') + + +@contextfilter +def do_random(context, seq): + """Return a random item from the sequence.""" + try: + return random.choice(seq) + except IndexError: + return context.environment.undefined('No random item, sequence was empty.') + + +def do_filesizeformat(value, binary=False): + """Format the value like a 'human-readable' file size (i.e. 13 kB, + 4.1 MB, 102 Bytes, etc). Per default decimal prefixes are used (Mega, + Giga, etc.), if the second parameter is set to `True` the binary + prefixes are used (Mebi, Gibi). + """ + bytes = float(value) + base = binary and 1024 or 1000 + prefixes = [ + (binary and 'KiB' or 'kB'), + (binary and 'MiB' or 'MB'), + (binary and 'GiB' or 'GB'), + (binary and 'TiB' or 'TB'), + (binary and 'PiB' or 'PB'), + (binary and 'EiB' or 'EB'), + (binary and 'ZiB' or 'ZB'), + (binary and 'YiB' or 'YB') + ] + if bytes == 1: + return '1 Byte' + elif bytes < base: + return '%d Bytes' % bytes + else: + for i, prefix in enumerate(prefixes): + unit = base ** (i + 2) + if bytes < unit: + return '%.1f %s' % ((base * bytes / unit), prefix) + return '%.1f %s' % ((base * bytes / unit), prefix) + + +def do_pprint(value, verbose=False): + """Pretty print a variable. Useful for debugging. + + With Jinja 1.2 onwards you can pass it a parameter. If this parameter + is truthy the output will be more verbose (this requires `pretty`) + """ + return pformat(value, verbose=verbose) + + +@evalcontextfilter +def do_urlize(eval_ctx, value, trim_url_limit=None, nofollow=False, + target=None, rel=None): + """Converts URLs in plain text into clickable links. + + If you pass the filter an additional integer it will shorten the urls + to that number. Also a third argument exists that makes the urls + "nofollow": + + .. sourcecode:: jinja + + {{ mytext|urlize(40, true) }} + links are shortened to 40 chars and defined with rel="nofollow" + + If *target* is specified, the ``target`` attribute will be added to the + ``<a>`` tag: + + .. sourcecode:: jinja + + {{ mytext|urlize(40, target='_blank') }} + + .. versionchanged:: 2.8+ + The *target* parameter was added. + """ + policies = eval_ctx.environment.policies + rel = set((rel or '').split() or []) + if nofollow: + rel.add('nofollow') + rel.update((policies['urlize.rel'] or '').split()) + if target is None: + target = policies['urlize.target'] + rel = ' '.join(sorted(rel)) or None + rv = urlize(value, trim_url_limit, rel=rel, target=target) + if eval_ctx.autoescape: + rv = Markup(rv) + return rv + + +def do_indent( + s, width=4, first=False, blank=False, indentfirst=None +): + """Return a copy of the string with each line indented by 4 spaces. The + first line and blank lines are not indented by default. + + :param width: Number of spaces to indent by. + :param first: Don't skip indenting the first line. + :param blank: Don't skip indenting empty lines. + + .. versionchanged:: 2.10 + Blank lines are not indented by default. + + Rename the ``indentfirst`` argument to ``first``. + """ + if indentfirst is not None: + warnings.warn(DeprecationWarning( + 'The "indentfirst" argument is renamed to "first".' + ), stacklevel=2) + first = indentfirst + + s += u'\n' # this quirk is necessary for splitlines method + indention = u' ' * width + + if blank: + rv = (u'\n' + indention).join(s.splitlines()) + else: + lines = s.splitlines() + rv = lines.pop(0) + + if lines: + rv += u'\n' + u'\n'.join( + indention + line if line else line for line in lines + ) + + if first: + rv = indention + rv + + return rv + + +@environmentfilter +def do_truncate(env, s, length=255, killwords=False, end='...', leeway=None): + """Return a truncated copy of the string. The length is specified + with the first parameter which defaults to ``255``. If the second + parameter is ``true`` the filter will cut the text at length. Otherwise + it will discard the last word. If the text was in fact + truncated it will append an ellipsis sign (``"..."``). If you want a + different ellipsis sign than ``"..."`` you can specify it using the + third parameter. Strings that only exceed the length by the tolerance + margin given in the fourth parameter will not be truncated. + + .. sourcecode:: jinja + + {{ "foo bar baz qux"|truncate(9) }} + -> "foo..." + {{ "foo bar baz qux"|truncate(9, True) }} + -> "foo ba..." + {{ "foo bar baz qux"|truncate(11) }} + -> "foo bar baz qux" + {{ "foo bar baz qux"|truncate(11, False, '...', 0) }} + -> "foo bar..." + + The default leeway on newer Jinja2 versions is 5 and was 0 before but + can be reconfigured globally. + """ + if leeway is None: + leeway = env.policies['truncate.leeway'] + assert length >= len(end), 'expected length >= %s, got %s' % (len(end), length) + assert leeway >= 0, 'expected leeway >= 0, got %s' % leeway + if len(s) <= length + leeway: + return s + if killwords: + return s[:length - len(end)] + end + result = s[:length - len(end)].rsplit(' ', 1)[0] + return result + end + + +@environmentfilter +def do_wordwrap(environment, s, width=79, break_long_words=True, + wrapstring=None): + """ + Return a copy of the string passed to the filter wrapped after + ``79`` characters. You can override this default using the first + parameter. If you set the second parameter to `false` Jinja will not + split words apart if they are longer than `width`. By default, the newlines + will be the default newlines for the environment, but this can be changed + using the wrapstring keyword argument. + + .. versionadded:: 2.7 + Added support for the `wrapstring` parameter. + """ + if not wrapstring: + wrapstring = environment.newline_sequence + import textwrap + return wrapstring.join(textwrap.wrap(s, width=width, expand_tabs=False, + replace_whitespace=False, + break_long_words=break_long_words)) + + +def do_wordcount(s): + """Count the words in that string.""" + return len(_word_re.findall(s)) + + +def do_int(value, default=0, base=10): + """Convert the value into an integer. If the + conversion doesn't work it will return ``0``. You can + override this default using the first parameter. You + can also override the default base (10) in the second + parameter, which handles input with prefixes such as + 0b, 0o and 0x for bases 2, 8 and 16 respectively. + The base is ignored for decimal numbers and non-string values. + """ + try: + if isinstance(value, string_types): + return int(value, base) + return int(value) + except (TypeError, ValueError): + # this quirk is necessary so that "42.23"|int gives 42. + try: + return int(float(value)) + except (TypeError, ValueError): + return default + + +def do_float(value, default=0.0): + """Convert the value into a floating point number. If the + conversion doesn't work it will return ``0.0``. You can + override this default using the first parameter. + """ + try: + return float(value) + except (TypeError, ValueError): + return default + + +def do_format(value, *args, **kwargs): + """ + Apply python string formatting on an object: + + .. sourcecode:: jinja + + {{ "%s - %s"|format("Hello?", "Foo!") }} + -> Hello? - Foo! + """ + if args and kwargs: + raise FilterArgumentError('can\'t handle positional and keyword ' + 'arguments at the same time') + return soft_unicode(value) % (kwargs or args) + + +def do_trim(value): + """Strip leading and trailing whitespace.""" + return soft_unicode(value).strip() + + +def do_striptags(value): + """Strip SGML/XML tags and replace adjacent whitespace by one space. + """ + if hasattr(value, '__html__'): + value = value.__html__() + return Markup(text_type(value)).striptags() + + +def do_slice(value, slices, fill_with=None): + """Slice an iterator and return a list of lists containing + those items. Useful if you want to create a div containing + three ul tags that represent columns: + + .. sourcecode:: html+jinja + + <div class="columwrapper"> + {%- for column in items|slice(3) %} + <ul class="column-{{ loop.index }}"> + {%- for item in column %} + <li>{{ item }}</li> + {%- endfor %} + </ul> + {%- endfor %} + </div> + + If you pass it a second argument it's used to fill missing + values on the last iteration. + """ + seq = list(value) + length = len(seq) + items_per_slice = length // slices + slices_with_extra = length % slices + offset = 0 + for slice_number in range(slices): + start = offset + slice_number * items_per_slice + if slice_number < slices_with_extra: + offset += 1 + end = offset + (slice_number + 1) * items_per_slice + tmp = seq[start:end] + if fill_with is not None and slice_number >= slices_with_extra: + tmp.append(fill_with) + yield tmp + + +def do_batch(value, linecount, fill_with=None): + """ + A filter that batches items. It works pretty much like `slice` + just the other way round. It returns a list of lists with the + given number of items. If you provide a second parameter this + is used to fill up missing items. See this example: + + .. sourcecode:: html+jinja + + <table> + {%- for row in items|batch(3, ' ') %} + <tr> + {%- for column in row %} + <td>{{ column }}</td> + {%- endfor %} + </tr> + {%- endfor %} + </table> + """ + tmp = [] + for item in value: + if len(tmp) == linecount: + yield tmp + tmp = [] + tmp.append(item) + if tmp: + if fill_with is not None and len(tmp) < linecount: + tmp += [fill_with] * (linecount - len(tmp)) + yield tmp + + +def do_round(value, precision=0, method='common'): + """Round the number to a given precision. The first + parameter specifies the precision (default is ``0``), the + second the rounding method: + + - ``'common'`` rounds either up or down + - ``'ceil'`` always rounds up + - ``'floor'`` always rounds down + + If you don't specify a method ``'common'`` is used. + + .. sourcecode:: jinja + + {{ 42.55|round }} + -> 43.0 + {{ 42.55|round(1, 'floor') }} + -> 42.5 + + Note that even if rounded to 0 precision, a float is returned. If + you need a real integer, pipe it through `int`: + + .. sourcecode:: jinja + + {{ 42.55|round|int }} + -> 43 + """ + if not method in ('common', 'ceil', 'floor'): + raise FilterArgumentError('method must be common, ceil or floor') + if method == 'common': + return round(value, precision) + func = getattr(math, method) + return func(value * (10 ** precision)) / (10 ** precision) + + +# Use a regular tuple repr here. This is what we did in the past and we +# really want to hide this custom type as much as possible. In particular +# we do not want to accidentally expose an auto generated repr in case +# people start to print this out in comments or something similar for +# debugging. +_GroupTuple = namedtuple('_GroupTuple', ['grouper', 'list']) +_GroupTuple.__repr__ = tuple.__repr__ +_GroupTuple.__str__ = tuple.__str__ + +@environmentfilter +def do_groupby(environment, value, attribute): + """Group a sequence of objects by a common attribute. + + If you for example have a list of dicts or objects that represent persons + with `gender`, `first_name` and `last_name` attributes and you want to + group all users by genders you can do something like the following + snippet: + + .. sourcecode:: html+jinja + + <ul> + {% for group in persons|groupby('gender') %} + <li>{{ group.grouper }}<ul> + {% for person in group.list %} + <li>{{ person.first_name }} {{ person.last_name }}</li> + {% endfor %}</ul></li> + {% endfor %} + </ul> + + Additionally it's possible to use tuple unpacking for the grouper and + list: + + .. sourcecode:: html+jinja + + <ul> + {% for grouper, list in persons|groupby('gender') %} + ... + {% endfor %} + </ul> + + As you can see the item we're grouping by is stored in the `grouper` + attribute and the `list` contains all the objects that have this grouper + in common. + + .. versionchanged:: 2.6 + It's now possible to use dotted notation to group by the child + attribute of another attribute. + """ + expr = make_attrgetter(environment, attribute) + return [_GroupTuple(key, list(values)) for key, values + in groupby(sorted(value, key=expr), expr)] + + +@environmentfilter +def do_sum(environment, iterable, attribute=None, start=0): + """Returns the sum of a sequence of numbers plus the value of parameter + 'start' (which defaults to 0). When the sequence is empty it returns + start. + + It is also possible to sum up only certain attributes: + + .. sourcecode:: jinja + + Total: {{ items|sum(attribute='price') }} + + .. versionchanged:: 2.6 + The `attribute` parameter was added to allow suming up over + attributes. Also the `start` parameter was moved on to the right. + """ + if attribute is not None: + iterable = imap(make_attrgetter(environment, attribute), iterable) + return sum(iterable, start) + + +def do_list(value): + """Convert the value into a list. If it was a string the returned list + will be a list of characters. + """ + return list(value) + + +def do_mark_safe(value): + """Mark the value as safe which means that in an environment with automatic + escaping enabled this variable will not be escaped. + """ + return Markup(value) + + +def do_mark_unsafe(value): + """Mark a value as unsafe. This is the reverse operation for :func:`safe`.""" + return text_type(value) + + +def do_reverse(value): + """Reverse the object or return an iterator that iterates over it the other + way round. + """ + if isinstance(value, string_types): + return value[::-1] + try: + return reversed(value) + except TypeError: + try: + rv = list(value) + rv.reverse() + return rv + except TypeError: + raise FilterArgumentError('argument must be iterable') + + +@environmentfilter +def do_attr(environment, obj, name): + """Get an attribute of an object. ``foo|attr("bar")`` works like + ``foo.bar`` just that always an attribute is returned and items are not + looked up. + + See :ref:`Notes on subscriptions <notes-on-subscriptions>` for more details. + """ + try: + name = str(name) + except UnicodeError: + pass + else: + try: + value = getattr(obj, name) + except AttributeError: + pass + else: + if environment.sandboxed and not \ + environment.is_safe_attribute(obj, name, value): + return environment.unsafe_undefined(obj, name) + return value + return environment.undefined(obj=obj, name=name) + + +@contextfilter +def do_map(*args, **kwargs): + """Applies a filter on a sequence of objects or looks up an attribute. + This is useful when dealing with lists of objects but you are really + only interested in a certain value of it. + + The basic usage is mapping on an attribute. Imagine you have a list + of users but you are only interested in a list of usernames: + + .. sourcecode:: jinja + + Users on this page: {{ users|map(attribute='username')|join(', ') }} + + Alternatively you can let it invoke a filter by passing the name of the + filter and the arguments afterwards. A good example would be applying a + text conversion filter on a sequence: + + .. sourcecode:: jinja + + Users on this page: {{ titles|map('lower')|join(', ') }} + + .. versionadded:: 2.7 + """ + seq, func = prepare_map(args, kwargs) + if seq: + for item in seq: + yield func(item) + + +@contextfilter +def do_select(*args, **kwargs): + """Filters a sequence of objects by applying a test to each object, + and only selecting the objects with the test succeeding. + + If no test is specified, each object will be evaluated as a boolean. + + Example usage: + + .. sourcecode:: jinja + + {{ numbers|select("odd") }} + {{ numbers|select("odd") }} + {{ numbers|select("divisibleby", 3) }} + {{ numbers|select("lessthan", 42) }} + {{ strings|select("equalto", "mystring") }} + + .. versionadded:: 2.7 + """ + return select_or_reject(args, kwargs, lambda x: x, False) + + +@contextfilter +def do_reject(*args, **kwargs): + """Filters a sequence of objects by applying a test to each object, + and rejecting the objects with the test succeeding. + + If no test is specified, each object will be evaluated as a boolean. + + Example usage: + + .. sourcecode:: jinja + + {{ numbers|reject("odd") }} + + .. versionadded:: 2.7 + """ + return select_or_reject(args, kwargs, lambda x: not x, False) + + +@contextfilter +def do_selectattr(*args, **kwargs): + """Filters a sequence of objects by applying a test to the specified + attribute of each object, and only selecting the objects with the + test succeeding. + + If no test is specified, the attribute's value will be evaluated as + a boolean. + + Example usage: + + .. sourcecode:: jinja + + {{ users|selectattr("is_active") }} + {{ users|selectattr("email", "none") }} + + .. versionadded:: 2.7 + """ + return select_or_reject(args, kwargs, lambda x: x, True) + + +@contextfilter +def do_rejectattr(*args, **kwargs): + """Filters a sequence of objects by applying a test to the specified + attribute of each object, and rejecting the objects with the test + succeeding. + + If no test is specified, the attribute's value will be evaluated as + a boolean. + + .. sourcecode:: jinja + + {{ users|rejectattr("is_active") }} + {{ users|rejectattr("email", "none") }} + + .. versionadded:: 2.7 + """ + return select_or_reject(args, kwargs, lambda x: not x, True) + + +@evalcontextfilter +def do_tojson(eval_ctx, value, indent=None): + """Dumps a structure to JSON so that it's safe to use in ``<script>`` + tags. It accepts the same arguments and returns a JSON string. Note that + this is available in templates through the ``|tojson`` filter which will + also mark the result as safe. Due to how this function escapes certain + characters this is safe even if used outside of ``<script>`` tags. + + The following characters are escaped in strings: + + - ``<`` + - ``>`` + - ``&`` + - ``'`` + + This makes it safe to embed such strings in any place in HTML with the + notable exception of double quoted attributes. In that case single + quote your attributes or HTML escape it in addition. + + The indent parameter can be used to enable pretty printing. Set it to + the number of spaces that the structures should be indented with. + + Note that this filter is for use in HTML contexts only. + + .. versionadded:: 2.9 + """ + policies = eval_ctx.environment.policies + dumper = policies['json.dumps_function'] + options = policies['json.dumps_kwargs'] + if indent is not None: + options = dict(options) + options['indent'] = indent + return htmlsafe_json_dumps(value, dumper=dumper, **options) + + +def prepare_map(args, kwargs): + context = args[0] + seq = args[1] + + if len(args) == 2 and 'attribute' in kwargs: + attribute = kwargs.pop('attribute') + if kwargs: + raise FilterArgumentError('Unexpected keyword argument %r' % + next(iter(kwargs))) + func = make_attrgetter(context.environment, attribute) + else: + try: + name = args[2] + args = args[3:] + except LookupError: + raise FilterArgumentError('map requires a filter argument') + func = lambda item: context.environment.call_filter( + name, item, args, kwargs, context=context) + + return seq, func + + +def prepare_select_or_reject(args, kwargs, modfunc, lookup_attr): + context = args[0] + seq = args[1] + if lookup_attr: + try: + attr = args[2] + except LookupError: + raise FilterArgumentError('Missing parameter for attribute name') + transfunc = make_attrgetter(context.environment, attr) + off = 1 + else: + off = 0 + transfunc = lambda x: x + + try: + name = args[2 + off] + args = args[3 + off:] + func = lambda item: context.environment.call_test( + name, item, args, kwargs) + except LookupError: + func = bool + + return seq, lambda item: modfunc(func(transfunc(item))) + + +def select_or_reject(args, kwargs, modfunc, lookup_attr): + seq, func = prepare_select_or_reject(args, kwargs, modfunc, lookup_attr) + if seq: + for item in seq: + if func(item): + yield item + + +FILTERS = { + 'abs': abs, + 'attr': do_attr, + 'batch': do_batch, + 'capitalize': do_capitalize, + 'center': do_center, + 'count': len, + 'd': do_default, + 'default': do_default, + 'dictsort': do_dictsort, + 'e': escape, + 'escape': escape, + 'filesizeformat': do_filesizeformat, + 'first': do_first, + 'float': do_float, + 'forceescape': do_forceescape, + 'format': do_format, + 'groupby': do_groupby, + 'indent': do_indent, + 'int': do_int, + 'join': do_join, + 'last': do_last, + 'length': len, + 'list': do_list, + 'lower': do_lower, + 'map': do_map, + 'min': do_min, + 'max': do_max, + 'pprint': do_pprint, + 'random': do_random, + 'reject': do_reject, + 'rejectattr': do_rejectattr, + 'replace': do_replace, + 'reverse': do_reverse, + 'round': do_round, + 'safe': do_mark_safe, + 'select': do_select, + 'selectattr': do_selectattr, + 'slice': do_slice, + 'sort': do_sort, + 'string': soft_unicode, + 'striptags': do_striptags, + 'sum': do_sum, + 'title': do_title, + 'trim': do_trim, + 'truncate': do_truncate, + 'unique': do_unique, + 'upper': do_upper, + 'urlencode': do_urlencode, + 'urlize': do_urlize, + 'wordcount': do_wordcount, + 'wordwrap': do_wordwrap, + 'xmlattr': do_xmlattr, + 'tojson': do_tojson, +} diff --git a/python/jinja2/idtracking.py b/python/jinja2/idtracking.py new file mode 100644 index 0000000..491bfe0 --- /dev/null +++ b/python/jinja2/idtracking.py @@ -0,0 +1,286 @@ +from jinja2.visitor import NodeVisitor +from jinja2._compat import iteritems + + +VAR_LOAD_PARAMETER = 'param' +VAR_LOAD_RESOLVE = 'resolve' +VAR_LOAD_ALIAS = 'alias' +VAR_LOAD_UNDEFINED = 'undefined' + + +def find_symbols(nodes, parent_symbols=None): + sym = Symbols(parent=parent_symbols) + visitor = FrameSymbolVisitor(sym) + for node in nodes: + visitor.visit(node) + return sym + + +def symbols_for_node(node, parent_symbols=None): + sym = Symbols(parent=parent_symbols) + sym.analyze_node(node) + return sym + + +class Symbols(object): + + def __init__(self, parent=None, level=None): + if level is None: + if parent is None: + level = 0 + else: + level = parent.level + 1 + self.level = level + self.parent = parent + self.refs = {} + self.loads = {} + self.stores = set() + + def analyze_node(self, node, **kwargs): + visitor = RootVisitor(self) + visitor.visit(node, **kwargs) + + def _define_ref(self, name, load=None): + ident = 'l_%d_%s' % (self.level, name) + self.refs[name] = ident + if load is not None: + self.loads[ident] = load + return ident + + def find_load(self, target): + if target in self.loads: + return self.loads[target] + if self.parent is not None: + return self.parent.find_load(target) + + def find_ref(self, name): + if name in self.refs: + return self.refs[name] + if self.parent is not None: + return self.parent.find_ref(name) + + def ref(self, name): + rv = self.find_ref(name) + if rv is None: + raise AssertionError('Tried to resolve a name to a reference that ' + 'was unknown to the frame (%r)' % name) + return rv + + def copy(self): + rv = object.__new__(self.__class__) + rv.__dict__.update(self.__dict__) + rv.refs = self.refs.copy() + rv.loads = self.loads.copy() + rv.stores = self.stores.copy() + return rv + + def store(self, name): + self.stores.add(name) + + # If we have not see the name referenced yet, we need to figure + # out what to set it to. + if name not in self.refs: + # If there is a parent scope we check if the name has a + # reference there. If it does it means we might have to alias + # to a variable there. + if self.parent is not None: + outer_ref = self.parent.find_ref(name) + if outer_ref is not None: + self._define_ref(name, load=(VAR_LOAD_ALIAS, outer_ref)) + return + + # Otherwise we can just set it to undefined. + self._define_ref(name, load=(VAR_LOAD_UNDEFINED, None)) + + def declare_parameter(self, name): + self.stores.add(name) + return self._define_ref(name, load=(VAR_LOAD_PARAMETER, None)) + + def load(self, name): + target = self.find_ref(name) + if target is None: + self._define_ref(name, load=(VAR_LOAD_RESOLVE, name)) + + def branch_update(self, branch_symbols): + stores = {} + for branch in branch_symbols: + for target in branch.stores: + if target in self.stores: + continue + stores[target] = stores.get(target, 0) + 1 + + for sym in branch_symbols: + self.refs.update(sym.refs) + self.loads.update(sym.loads) + self.stores.update(sym.stores) + + for name, branch_count in iteritems(stores): + if branch_count == len(branch_symbols): + continue + target = self.find_ref(name) + assert target is not None, 'should not happen' + + if self.parent is not None: + outer_target = self.parent.find_ref(name) + if outer_target is not None: + self.loads[target] = (VAR_LOAD_ALIAS, outer_target) + continue + self.loads[target] = (VAR_LOAD_RESOLVE, name) + + def dump_stores(self): + rv = {} + node = self + while node is not None: + for name in node.stores: + if name not in rv: + rv[name] = self.find_ref(name) + node = node.parent + return rv + + def dump_param_targets(self): + rv = set() + node = self + while node is not None: + for target, (instr, _) in iteritems(self.loads): + if instr == VAR_LOAD_PARAMETER: + rv.add(target) + node = node.parent + return rv + + +class RootVisitor(NodeVisitor): + + def __init__(self, symbols): + self.sym_visitor = FrameSymbolVisitor(symbols) + + def _simple_visit(self, node, **kwargs): + for child in node.iter_child_nodes(): + self.sym_visitor.visit(child) + + visit_Template = visit_Block = visit_Macro = visit_FilterBlock = \ + visit_Scope = visit_If = visit_ScopedEvalContextModifier = \ + _simple_visit + + def visit_AssignBlock(self, node, **kwargs): + for child in node.body: + self.sym_visitor.visit(child) + + def visit_CallBlock(self, node, **kwargs): + for child in node.iter_child_nodes(exclude=('call',)): + self.sym_visitor.visit(child) + + def visit_OverlayScope(self, node, **kwargs): + for child in node.body: + self.sym_visitor.visit(child) + + def visit_For(self, node, for_branch='body', **kwargs): + if for_branch == 'body': + self.sym_visitor.visit(node.target, store_as_param=True) + branch = node.body + elif for_branch == 'else': + branch = node.else_ + elif for_branch == 'test': + self.sym_visitor.visit(node.target, store_as_param=True) + if node.test is not None: + self.sym_visitor.visit(node.test) + return + else: + raise RuntimeError('Unknown for branch') + for item in branch or (): + self.sym_visitor.visit(item) + + def visit_With(self, node, **kwargs): + for target in node.targets: + self.sym_visitor.visit(target) + for child in node.body: + self.sym_visitor.visit(child) + + def generic_visit(self, node, *args, **kwargs): + raise NotImplementedError('Cannot find symbols for %r' % + node.__class__.__name__) + + +class FrameSymbolVisitor(NodeVisitor): + """A visitor for `Frame.inspect`.""" + + def __init__(self, symbols): + self.symbols = symbols + + def visit_Name(self, node, store_as_param=False, **kwargs): + """All assignments to names go through this function.""" + if store_as_param or node.ctx == 'param': + self.symbols.declare_parameter(node.name) + elif node.ctx == 'store': + self.symbols.store(node.name) + elif node.ctx == 'load': + self.symbols.load(node.name) + + def visit_NSRef(self, node, **kwargs): + self.symbols.load(node.name) + + def visit_If(self, node, **kwargs): + self.visit(node.test, **kwargs) + + original_symbols = self.symbols + + def inner_visit(nodes): + self.symbols = rv = original_symbols.copy() + for subnode in nodes: + self.visit(subnode, **kwargs) + self.symbols = original_symbols + return rv + + body_symbols = inner_visit(node.body) + elif_symbols = inner_visit(node.elif_) + else_symbols = inner_visit(node.else_ or ()) + + self.symbols.branch_update([body_symbols, elif_symbols, else_symbols]) + + def visit_Macro(self, node, **kwargs): + self.symbols.store(node.name) + + def visit_Import(self, node, **kwargs): + self.generic_visit(node, **kwargs) + self.symbols.store(node.target) + + def visit_FromImport(self, node, **kwargs): + self.generic_visit(node, **kwargs) + for name in node.names: + if isinstance(name, tuple): + self.symbols.store(name[1]) + else: + self.symbols.store(name) + + def visit_Assign(self, node, **kwargs): + """Visit assignments in the correct order.""" + self.visit(node.node, **kwargs) + self.visit(node.target, **kwargs) + + def visit_For(self, node, **kwargs): + """Visiting stops at for blocks. However the block sequence + is visited as part of the outer scope. + """ + self.visit(node.iter, **kwargs) + + def visit_CallBlock(self, node, **kwargs): + self.visit(node.call, **kwargs) + + def visit_FilterBlock(self, node, **kwargs): + self.visit(node.filter, **kwargs) + + def visit_With(self, node, **kwargs): + for target in node.values: + self.visit(target) + + def visit_AssignBlock(self, node, **kwargs): + """Stop visiting at block assigns.""" + self.visit(node.target, **kwargs) + + def visit_Scope(self, node, **kwargs): + """Stop visiting at scopes.""" + + def visit_Block(self, node, **kwargs): + """Stop visiting at blocks.""" + + def visit_OverlayScope(self, node, **kwargs): + """Do not visit into overlay scopes.""" diff --git a/python/jinja2/lexer.py b/python/jinja2/lexer.py new file mode 100644 index 0000000..6fd135d --- /dev/null +++ b/python/jinja2/lexer.py @@ -0,0 +1,739 @@ +# -*- coding: utf-8 -*- +""" + jinja2.lexer + ~~~~~~~~~~~~ + + This module implements a Jinja / Python combination lexer. The + `Lexer` class provided by this module is used to do some preprocessing + for Jinja. + + On the one hand it filters out invalid operators like the bitshift + operators we don't allow in templates. On the other hand it separates + template code and python code in expressions. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import re +from collections import deque +from operator import itemgetter + +from jinja2._compat import implements_iterator, intern, iteritems, text_type +from jinja2.exceptions import TemplateSyntaxError +from jinja2.utils import LRUCache + +# cache for the lexers. Exists in order to be able to have multiple +# environments with the same lexer +_lexer_cache = LRUCache(50) + +# static regular expressions +whitespace_re = re.compile(r'\s+', re.U) +string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" + r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) +integer_re = re.compile(r'\d+') + +try: + # check if this Python supports Unicode identifiers + compile('föö', '<unknown>', 'eval') +except SyntaxError: + # no Unicode support, use ASCII identifiers + name_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*') + check_ident = False +else: + # Unicode support, build a pattern to match valid characters, and set flag + # to use str.isidentifier to validate during lexing + from jinja2 import _identifier + name_re = re.compile(r'[\w{0}]+'.format(_identifier.pattern)) + check_ident = True + # remove the pattern from memory after building the regex + import sys + del sys.modules['jinja2._identifier'] + import jinja2 + del jinja2._identifier + del _identifier + +float_re = re.compile(r'(?<!\.)\d+\.\d+') +newline_re = re.compile(r'(\r\n|\r|\n)') + +# internal the tokens and keep references to them +TOKEN_ADD = intern('add') +TOKEN_ASSIGN = intern('assign') +TOKEN_COLON = intern('colon') +TOKEN_COMMA = intern('comma') +TOKEN_DIV = intern('div') +TOKEN_DOT = intern('dot') +TOKEN_EQ = intern('eq') +TOKEN_FLOORDIV = intern('floordiv') +TOKEN_GT = intern('gt') +TOKEN_GTEQ = intern('gteq') +TOKEN_LBRACE = intern('lbrace') +TOKEN_LBRACKET = intern('lbracket') +TOKEN_LPAREN = intern('lparen') +TOKEN_LT = intern('lt') +TOKEN_LTEQ = intern('lteq') +TOKEN_MOD = intern('mod') +TOKEN_MUL = intern('mul') +TOKEN_NE = intern('ne') +TOKEN_PIPE = intern('pipe') +TOKEN_POW = intern('pow') +TOKEN_RBRACE = intern('rbrace') +TOKEN_RBRACKET = intern('rbracket') +TOKEN_RPAREN = intern('rparen') +TOKEN_SEMICOLON = intern('semicolon') +TOKEN_SUB = intern('sub') +TOKEN_TILDE = intern('tilde') +TOKEN_WHITESPACE = intern('whitespace') +TOKEN_FLOAT = intern('float') +TOKEN_INTEGER = intern('integer') +TOKEN_NAME = intern('name') +TOKEN_STRING = intern('string') +TOKEN_OPERATOR = intern('operator') +TOKEN_BLOCK_BEGIN = intern('block_begin') +TOKEN_BLOCK_END = intern('block_end') +TOKEN_VARIABLE_BEGIN = intern('variable_begin') +TOKEN_VARIABLE_END = intern('variable_end') +TOKEN_RAW_BEGIN = intern('raw_begin') +TOKEN_RAW_END = intern('raw_end') +TOKEN_COMMENT_BEGIN = intern('comment_begin') +TOKEN_COMMENT_END = intern('comment_end') +TOKEN_COMMENT = intern('comment') +TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin') +TOKEN_LINESTATEMENT_END = intern('linestatement_end') +TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin') +TOKEN_LINECOMMENT_END = intern('linecomment_end') +TOKEN_LINECOMMENT = intern('linecomment') +TOKEN_DATA = intern('data') +TOKEN_INITIAL = intern('initial') +TOKEN_EOF = intern('eof') + +# bind operators to token types +operators = { + '+': TOKEN_ADD, + '-': TOKEN_SUB, + '/': TOKEN_DIV, + '//': TOKEN_FLOORDIV, + '*': TOKEN_MUL, + '%': TOKEN_MOD, + '**': TOKEN_POW, + '~': TOKEN_TILDE, + '[': TOKEN_LBRACKET, + ']': TOKEN_RBRACKET, + '(': TOKEN_LPAREN, + ')': TOKEN_RPAREN, + '{': TOKEN_LBRACE, + '}': TOKEN_RBRACE, + '==': TOKEN_EQ, + '!=': TOKEN_NE, + '>': TOKEN_GT, + '>=': TOKEN_GTEQ, + '<': TOKEN_LT, + '<=': TOKEN_LTEQ, + '=': TOKEN_ASSIGN, + '.': TOKEN_DOT, + ':': TOKEN_COLON, + '|': TOKEN_PIPE, + ',': TOKEN_COMMA, + ';': TOKEN_SEMICOLON +} + +reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) +assert len(operators) == len(reverse_operators), 'operators dropped' +operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in + sorted(operators, key=lambda x: -len(x)))) + +ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, + TOKEN_COMMENT_END, TOKEN_WHITESPACE, + TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END, + TOKEN_LINECOMMENT]) +ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, + TOKEN_COMMENT, TOKEN_LINECOMMENT]) + + +def _describe_token_type(token_type): + if token_type in reverse_operators: + return reverse_operators[token_type] + return { + TOKEN_COMMENT_BEGIN: 'begin of comment', + TOKEN_COMMENT_END: 'end of comment', + TOKEN_COMMENT: 'comment', + TOKEN_LINECOMMENT: 'comment', + TOKEN_BLOCK_BEGIN: 'begin of statement block', + TOKEN_BLOCK_END: 'end of statement block', + TOKEN_VARIABLE_BEGIN: 'begin of print statement', + TOKEN_VARIABLE_END: 'end of print statement', + TOKEN_LINESTATEMENT_BEGIN: 'begin of line statement', + TOKEN_LINESTATEMENT_END: 'end of line statement', + TOKEN_DATA: 'template data / text', + TOKEN_EOF: 'end of template' + }.get(token_type, token_type) + + +def describe_token(token): + """Returns a description of the token.""" + if token.type == 'name': + return token.value + return _describe_token_type(token.type) + + +def describe_token_expr(expr): + """Like `describe_token` but for token expressions.""" + if ':' in expr: + type, value = expr.split(':', 1) + if type == 'name': + return value + else: + type = expr + return _describe_token_type(type) + + +def count_newlines(value): + """Count the number of newline characters in the string. This is + useful for extensions that filter a stream. + """ + return len(newline_re.findall(value)) + + +def compile_rules(environment): + """Compiles all the rules from the environment into a list of rules.""" + e = re.escape + rules = [ + (len(environment.comment_start_string), 'comment', + e(environment.comment_start_string)), + (len(environment.block_start_string), 'block', + e(environment.block_start_string)), + (len(environment.variable_start_string), 'variable', + e(environment.variable_start_string)) + ] + + if environment.line_statement_prefix is not None: + rules.append((len(environment.line_statement_prefix), 'linestatement', + r'^[ \t\v]*' + e(environment.line_statement_prefix))) + if environment.line_comment_prefix is not None: + rules.append((len(environment.line_comment_prefix), 'linecomment', + r'(?:^|(?<=\S))[^\S\r\n]*' + + e(environment.line_comment_prefix))) + + return [x[1:] for x in sorted(rules, reverse=True)] + + +class Failure(object): + """Class that raises a `TemplateSyntaxError` if called. + Used by the `Lexer` to specify known errors. + """ + + def __init__(self, message, cls=TemplateSyntaxError): + self.message = message + self.error_class = cls + + def __call__(self, lineno, filename): + raise self.error_class(self.message, lineno, filename) + + +class Token(tuple): + """Token class.""" + __slots__ = () + lineno, type, value = (property(itemgetter(x)) for x in range(3)) + + def __new__(cls, lineno, type, value): + return tuple.__new__(cls, (lineno, intern(str(type)), value)) + + def __str__(self): + if self.type in reverse_operators: + return reverse_operators[self.type] + elif self.type == 'name': + return self.value + return self.type + + def test(self, expr): + """Test a token against a token expression. This can either be a + token type or ``'token_type:token_value'``. This can only test + against string values and types. + """ + # here we do a regular string equality check as test_any is usually + # passed an iterable of not interned strings. + if self.type == expr: + return True + elif ':' in expr: + return expr.split(':', 1) == [self.type, self.value] + return False + + def test_any(self, *iterable): + """Test against multiple token expressions.""" + for expr in iterable: + if self.test(expr): + return True + return False + + def __repr__(self): + return 'Token(%r, %r, %r)' % ( + self.lineno, + self.type, + self.value + ) + + +@implements_iterator +class TokenStreamIterator(object): + """The iterator for tokenstreams. Iterate over the stream + until the eof token is reached. + """ + + def __init__(self, stream): + self.stream = stream + + def __iter__(self): + return self + + def __next__(self): + token = self.stream.current + if token.type is TOKEN_EOF: + self.stream.close() + raise StopIteration() + next(self.stream) + return token + + +@implements_iterator +class TokenStream(object): + """A token stream is an iterable that yields :class:`Token`\\s. The + parser however does not iterate over it but calls :meth:`next` to go + one token ahead. The current active token is stored as :attr:`current`. + """ + + def __init__(self, generator, name, filename): + self._iter = iter(generator) + self._pushed = deque() + self.name = name + self.filename = filename + self.closed = False + self.current = Token(1, TOKEN_INITIAL, '') + next(self) + + def __iter__(self): + return TokenStreamIterator(self) + + def __bool__(self): + return bool(self._pushed) or self.current.type is not TOKEN_EOF + __nonzero__ = __bool__ # py2 + + eos = property(lambda x: not x, doc="Are we at the end of the stream?") + + def push(self, token): + """Push a token back to the stream.""" + self._pushed.append(token) + + def look(self): + """Look at the next token.""" + old_token = next(self) + result = self.current + self.push(result) + self.current = old_token + return result + + def skip(self, n=1): + """Got n tokens ahead.""" + for x in range(n): + next(self) + + def next_if(self, expr): + """Perform the token test and return the token if it matched. + Otherwise the return value is `None`. + """ + if self.current.test(expr): + return next(self) + + def skip_if(self, expr): + """Like :meth:`next_if` but only returns `True` or `False`.""" + return self.next_if(expr) is not None + + def __next__(self): + """Go one token ahead and return the old one. + + Use the built-in :func:`next` instead of calling this directly. + """ + rv = self.current + if self._pushed: + self.current = self._pushed.popleft() + elif self.current.type is not TOKEN_EOF: + try: + self.current = next(self._iter) + except StopIteration: + self.close() + return rv + + def close(self): + """Close the stream.""" + self.current = Token(self.current.lineno, TOKEN_EOF, '') + self._iter = None + self.closed = True + + def expect(self, expr): + """Expect a given token type and return it. This accepts the same + argument as :meth:`jinja2.lexer.Token.test`. + """ + if not self.current.test(expr): + expr = describe_token_expr(expr) + if self.current.type is TOKEN_EOF: + raise TemplateSyntaxError('unexpected end of template, ' + 'expected %r.' % expr, + self.current.lineno, + self.name, self.filename) + raise TemplateSyntaxError("expected token %r, got %r" % + (expr, describe_token(self.current)), + self.current.lineno, + self.name, self.filename) + try: + return self.current + finally: + next(self) + + +def get_lexer(environment): + """Return a lexer which is probably cached.""" + key = (environment.block_start_string, + environment.block_end_string, + environment.variable_start_string, + environment.variable_end_string, + environment.comment_start_string, + environment.comment_end_string, + environment.line_statement_prefix, + environment.line_comment_prefix, + environment.trim_blocks, + environment.lstrip_blocks, + environment.newline_sequence, + environment.keep_trailing_newline) + lexer = _lexer_cache.get(key) + if lexer is None: + lexer = Lexer(environment) + _lexer_cache[key] = lexer + return lexer + + +class Lexer(object): + """Class that implements a lexer for a given environment. Automatically + created by the environment class, usually you don't have to do that. + + Note that the lexer is not automatically bound to an environment. + Multiple environments can share the same lexer. + """ + + def __init__(self, environment): + # shortcuts + c = lambda x: re.compile(x, re.M | re.S) + e = re.escape + + # lexing rules for tags + tag_rules = [ + (whitespace_re, TOKEN_WHITESPACE, None), + (float_re, TOKEN_FLOAT, None), + (integer_re, TOKEN_INTEGER, None), + (name_re, TOKEN_NAME, None), + (string_re, TOKEN_STRING, None), + (operator_re, TOKEN_OPERATOR, None) + ] + + # assemble the root lexing rule. because "|" is ungreedy + # we have to sort by length so that the lexer continues working + # as expected when we have parsing rules like <% for block and + # <%= for variables. (if someone wants asp like syntax) + # variables are just part of the rules if variable processing + # is required. + root_tag_rules = compile_rules(environment) + + # block suffix if trimming is enabled + block_suffix_re = environment.trim_blocks and '\\n?' or '' + + # strip leading spaces if lstrip_blocks is enabled + prefix_re = {} + if environment.lstrip_blocks: + # use '{%+' to manually disable lstrip_blocks behavior + no_lstrip_re = e('+') + # detect overlap between block and variable or comment strings + block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) + # make sure we don't mistake a block for a variable or a comment + m = block_diff.match(environment.comment_start_string) + no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' + m = block_diff.match(environment.variable_start_string) + no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' + + # detect overlap between comment and variable strings + comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) + m = comment_diff.match(environment.variable_start_string) + no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' + + lstrip_re = r'^[ \t]*' + block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( + lstrip_re, + e(environment.block_start_string), + no_lstrip_re, + e(environment.block_start_string), + ) + comment_prefix_re = r'%s%s%s|%s\+?' % ( + lstrip_re, + e(environment.comment_start_string), + no_variable_re, + e(environment.comment_start_string), + ) + prefix_re['block'] = block_prefix_re + prefix_re['comment'] = comment_prefix_re + else: + block_prefix_re = '%s' % e(environment.block_start_string) + + self.newline_sequence = environment.newline_sequence + self.keep_trailing_newline = environment.keep_trailing_newline + + # global lexing rules + self.rules = { + 'root': [ + # directives + (c('(.*?)(?:%s)' % '|'.join( + [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( + e(environment.block_start_string), + block_prefix_re, + e(environment.block_end_string), + e(environment.block_end_string) + )] + [ + r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) + for n, r in root_tag_rules + ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), + # data + (c('.+'), TOKEN_DATA, None) + ], + # comments + TOKEN_COMMENT_BEGIN: [ + (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( + e(environment.comment_end_string), + e(environment.comment_end_string), + block_suffix_re + )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'), + (c('(.)'), (Failure('Missing end of comment tag'),), None) + ], + # blocks + TOKEN_BLOCK_BEGIN: [ + (c(r'(?:\-%s\s*|%s)%s' % ( + e(environment.block_end_string), + e(environment.block_end_string), + block_suffix_re + )), TOKEN_BLOCK_END, '#pop'), + ] + tag_rules, + # variables + TOKEN_VARIABLE_BEGIN: [ + (c(r'\-%s\s*|%s' % ( + e(environment.variable_end_string), + e(environment.variable_end_string) + )), TOKEN_VARIABLE_END, '#pop') + ] + tag_rules, + # raw block + TOKEN_RAW_BEGIN: [ + (c(r'(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( + e(environment.block_start_string), + block_prefix_re, + e(environment.block_end_string), + e(environment.block_end_string), + block_suffix_re + )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), + (c('(.)'), (Failure('Missing end of raw directive'),), None) + ], + # line statements + TOKEN_LINESTATEMENT_BEGIN: [ + (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') + ] + tag_rules, + # line comments + TOKEN_LINECOMMENT_BEGIN: [ + (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, + TOKEN_LINECOMMENT_END), '#pop') + ] + } + + def _normalize_newlines(self, value): + """Called for strings and template data to normalize it to unicode.""" + return newline_re.sub(self.newline_sequence, value) + + def tokenize(self, source, name=None, filename=None, state=None): + """Calls tokeniter + tokenize and wraps it in a token stream. + """ + stream = self.tokeniter(source, name, filename, state) + return TokenStream(self.wrap(stream, name, filename), name, filename) + + def wrap(self, stream, name=None, filename=None): + """This is called with the stream as returned by `tokenize` and wraps + every token in a :class:`Token` and converts the value. + """ + for lineno, token, value in stream: + if token in ignored_tokens: + continue + elif token == 'linestatement_begin': + token = 'block_begin' + elif token == 'linestatement_end': + token = 'block_end' + # we are not interested in those tokens in the parser + elif token in ('raw_begin', 'raw_end'): + continue + elif token == 'data': + value = self._normalize_newlines(value) + elif token == 'keyword': + token = value + elif token == 'name': + value = str(value) + if check_ident and not value.isidentifier(): + raise TemplateSyntaxError( + 'Invalid character in identifier', + lineno, name, filename) + elif token == 'string': + # try to unescape string + try: + value = self._normalize_newlines(value[1:-1]) \ + .encode('ascii', 'backslashreplace') \ + .decode('unicode-escape') + except Exception as e: + msg = str(e).split(':')[-1].strip() + raise TemplateSyntaxError(msg, lineno, name, filename) + elif token == 'integer': + value = int(value) + elif token == 'float': + value = float(value) + elif token == 'operator': + token = operators[value] + yield Token(lineno, token, value) + + def tokeniter(self, source, name, filename=None, state=None): + """This method tokenizes the text and returns the tokens in a + generator. Use this method if you just want to tokenize a template. + """ + source = text_type(source) + lines = source.splitlines() + if self.keep_trailing_newline and source: + for newline in ('\r\n', '\r', '\n'): + if source.endswith(newline): + lines.append('') + break + source = '\n'.join(lines) + pos = 0 + lineno = 1 + stack = ['root'] + if state is not None and state != 'root': + assert state in ('variable', 'block'), 'invalid state' + stack.append(state + '_begin') + else: + state = 'root' + statetokens = self.rules[stack[-1]] + source_length = len(source) + + balancing_stack = [] + + while 1: + # tokenizer loop + for regex, tokens, new_state in statetokens: + m = regex.match(source, pos) + # if no match we try again with the next rule + if m is None: + continue + + # we only match blocks and variables if braces / parentheses + # are balanced. continue parsing with the lower rule which + # is the operator rule. do this only if the end tags look + # like operators + if balancing_stack and \ + tokens in ('variable_end', 'block_end', + 'linestatement_end'): + continue + + # tuples support more options + if isinstance(tokens, tuple): + for idx, token in enumerate(tokens): + # failure group + if token.__class__ is Failure: + raise token(lineno, filename) + # bygroup is a bit more complex, in that case we + # yield for the current token the first named + # group that matched + elif token == '#bygroup': + for key, value in iteritems(m.groupdict()): + if value is not None: + yield lineno, key, value + lineno += value.count('\n') + break + else: + raise RuntimeError('%r wanted to resolve ' + 'the token dynamically' + ' but no group matched' + % regex) + # normal group + else: + data = m.group(idx + 1) + if data or token not in ignore_if_empty: + yield lineno, token, data + lineno += data.count('\n') + + # strings as token just are yielded as it. + else: + data = m.group() + # update brace/parentheses balance + if tokens == 'operator': + if data == '{': + balancing_stack.append('}') + elif data == '(': + balancing_stack.append(')') + elif data == '[': + balancing_stack.append(']') + elif data in ('}', ')', ']'): + if not balancing_stack: + raise TemplateSyntaxError('unexpected \'%s\'' % + data, lineno, name, + filename) + expected_op = balancing_stack.pop() + if expected_op != data: + raise TemplateSyntaxError('unexpected \'%s\', ' + 'expected \'%s\'' % + (data, expected_op), + lineno, name, + filename) + # yield items + if data or tokens not in ignore_if_empty: + yield lineno, tokens, data + lineno += data.count('\n') + + # fetch new position into new variable so that we can check + # if there is a internal parsing error which would result + # in an infinite loop + pos2 = m.end() + + # handle state changes + if new_state is not None: + # remove the uppermost state + if new_state == '#pop': + stack.pop() + # resolve the new state by group checking + elif new_state == '#bygroup': + for key, value in iteritems(m.groupdict()): + if value is not None: + stack.append(key) + break + else: + raise RuntimeError('%r wanted to resolve the ' + 'new state dynamically but' + ' no group matched' % + regex) + # direct state name given + else: + stack.append(new_state) + statetokens = self.rules[stack[-1]] + # we are still at the same position and no stack change. + # this means a loop without break condition, avoid that and + # raise error + elif pos2 == pos: + raise RuntimeError('%r yielded empty string without ' + 'stack change' % regex) + # publish new function and start again + pos = pos2 + break + # if loop terminated without break we haven't found a single match + # either we are at the end of the file or we have a problem + else: + # end of text + if pos >= source_length: + return + # something went wrong + raise TemplateSyntaxError('unexpected char %r at %d' % + (source[pos], pos), lineno, + name, filename) diff --git a/python/jinja2/loaders.py b/python/jinja2/loaders.py new file mode 100644 index 0000000..4c79793 --- /dev/null +++ b/python/jinja2/loaders.py @@ -0,0 +1,481 @@ +# -*- coding: utf-8 -*- +""" + jinja2.loaders + ~~~~~~~~~~~~~~ + + Jinja loader classes. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import os +import sys +import weakref +from types import ModuleType +from os import path +from hashlib import sha1 +from jinja2.exceptions import TemplateNotFound +from jinja2.utils import open_if_exists, internalcode +from jinja2._compat import string_types, iteritems + + +def split_template_path(template): + """Split a path into segments and perform a sanity check. If it detects + '..' in the path it will raise a `TemplateNotFound` error. + """ + pieces = [] + for piece in template.split('/'): + if path.sep in piece \ + or (path.altsep and path.altsep in piece) or \ + piece == path.pardir: + raise TemplateNotFound(template) + elif piece and piece != '.': + pieces.append(piece) + return pieces + + +class BaseLoader(object): + """Baseclass for all loaders. Subclass this and override `get_source` to + implement a custom loading mechanism. The environment provides a + `get_template` method that calls the loader's `load` method to get the + :class:`Template` object. + + A very basic example for a loader that looks up templates on the file + system could look like this:: + + from jinja2 import BaseLoader, TemplateNotFound + from os.path import join, exists, getmtime + + class MyLoader(BaseLoader): + + def __init__(self, path): + self.path = path + + def get_source(self, environment, template): + path = join(self.path, template) + if not exists(path): + raise TemplateNotFound(template) + mtime = getmtime(path) + with file(path) as f: + source = f.read().decode('utf-8') + return source, path, lambda: mtime == getmtime(path) + """ + + #: if set to `False` it indicates that the loader cannot provide access + #: to the source of templates. + #: + #: .. versionadded:: 2.4 + has_source_access = True + + def get_source(self, environment, template): + """Get the template source, filename and reload helper for a template. + It's passed the environment and template name and has to return a + tuple in the form ``(source, filename, uptodate)`` or raise a + `TemplateNotFound` error if it can't locate the template. + + The source part of the returned tuple must be the source of the + template as unicode string or a ASCII bytestring. The filename should + be the name of the file on the filesystem if it was loaded from there, + otherwise `None`. The filename is used by python for the tracebacks + if no loader extension is used. + + The last item in the tuple is the `uptodate` function. If auto + reloading is enabled it's always called to check if the template + changed. No arguments are passed so the function must store the + old state somewhere (for example in a closure). If it returns `False` + the template will be reloaded. + """ + if not self.has_source_access: + raise RuntimeError('%s cannot provide access to the source' % + self.__class__.__name__) + raise TemplateNotFound(template) + + def list_templates(self): + """Iterates over all templates. If the loader does not support that + it should raise a :exc:`TypeError` which is the default behavior. + """ + raise TypeError('this loader cannot iterate over all templates') + + @internalcode + def load(self, environment, name, globals=None): + """Loads a template. This method looks up the template in the cache + or loads one by calling :meth:`get_source`. Subclasses should not + override this method as loaders working on collections of other + loaders (such as :class:`PrefixLoader` or :class:`ChoiceLoader`) + will not call this method but `get_source` directly. + """ + code = None + if globals is None: + globals = {} + + # first we try to get the source for this template together + # with the filename and the uptodate function. + source, filename, uptodate = self.get_source(environment, name) + + # try to load the code from the bytecode cache if there is a + # bytecode cache configured. + bcc = environment.bytecode_cache + if bcc is not None: + bucket = bcc.get_bucket(environment, name, filename, source) + code = bucket.code + + # if we don't have code so far (not cached, no longer up to + # date) etc. we compile the template + if code is None: + code = environment.compile(source, name, filename) + + # if the bytecode cache is available and the bucket doesn't + # have a code so far, we give the bucket the new code and put + # it back to the bytecode cache. + if bcc is not None and bucket.code is None: + bucket.code = code + bcc.set_bucket(bucket) + + return environment.template_class.from_code(environment, code, + globals, uptodate) + + +class FileSystemLoader(BaseLoader): + """Loads templates from the file system. This loader can find templates + in folders on the file system and is the preferred way to load them. + + The loader takes the path to the templates as string, or if multiple + locations are wanted a list of them which is then looked up in the + given order:: + + >>> loader = FileSystemLoader('/path/to/templates') + >>> loader = FileSystemLoader(['/path/to/templates', '/other/path']) + + Per default the template encoding is ``'utf-8'`` which can be changed + by setting the `encoding` parameter to something else. + + To follow symbolic links, set the *followlinks* parameter to ``True``:: + + >>> loader = FileSystemLoader('/path/to/templates', followlinks=True) + + .. versionchanged:: 2.8+ + The *followlinks* parameter was added. + """ + + def __init__(self, searchpath, encoding='utf-8', followlinks=False): + if isinstance(searchpath, string_types): + searchpath = [searchpath] + self.searchpath = list(searchpath) + self.encoding = encoding + self.followlinks = followlinks + + def get_source(self, environment, template): + pieces = split_template_path(template) + for searchpath in self.searchpath: + filename = path.join(searchpath, *pieces) + f = open_if_exists(filename) + if f is None: + continue + try: + contents = f.read().decode(self.encoding) + finally: + f.close() + + mtime = path.getmtime(filename) + + def uptodate(): + try: + return path.getmtime(filename) == mtime + except OSError: + return False + return contents, filename, uptodate + raise TemplateNotFound(template) + + def list_templates(self): + found = set() + for searchpath in self.searchpath: + walk_dir = os.walk(searchpath, followlinks=self.followlinks) + for dirpath, dirnames, filenames in walk_dir: + for filename in filenames: + template = os.path.join(dirpath, filename) \ + [len(searchpath):].strip(os.path.sep) \ + .replace(os.path.sep, '/') + if template[:2] == './': + template = template[2:] + if template not in found: + found.add(template) + return sorted(found) + + +class PackageLoader(BaseLoader): + """Load templates from python eggs or packages. It is constructed with + the name of the python package and the path to the templates in that + package:: + + loader = PackageLoader('mypackage', 'views') + + If the package path is not given, ``'templates'`` is assumed. + + Per default the template encoding is ``'utf-8'`` which can be changed + by setting the `encoding` parameter to something else. Due to the nature + of eggs it's only possible to reload templates if the package was loaded + from the file system and not a zip file. + """ + + def __init__(self, package_name, package_path='templates', + encoding='utf-8'): + from pkg_resources import DefaultProvider, ResourceManager, \ + get_provider + provider = get_provider(package_name) + self.encoding = encoding + self.manager = ResourceManager() + self.filesystem_bound = isinstance(provider, DefaultProvider) + self.provider = provider + self.package_path = package_path + + def get_source(self, environment, template): + pieces = split_template_path(template) + p = '/'.join((self.package_path,) + tuple(pieces)) + if not self.provider.has_resource(p): + raise TemplateNotFound(template) + + filename = uptodate = None + if self.filesystem_bound: + filename = self.provider.get_resource_filename(self.manager, p) + mtime = path.getmtime(filename) + def uptodate(): + try: + return path.getmtime(filename) == mtime + except OSError: + return False + + source = self.provider.get_resource_string(self.manager, p) + return source.decode(self.encoding), filename, uptodate + + def list_templates(self): + path = self.package_path + if path[:2] == './': + path = path[2:] + elif path == '.': + path = '' + offset = len(path) + results = [] + def _walk(path): + for filename in self.provider.resource_listdir(path): + fullname = path + '/' + filename + if self.provider.resource_isdir(fullname): + _walk(fullname) + else: + results.append(fullname[offset:].lstrip('/')) + _walk(path) + results.sort() + return results + + +class DictLoader(BaseLoader): + """Loads a template from a python dict. It's passed a dict of unicode + strings bound to template names. This loader is useful for unittesting: + + >>> loader = DictLoader({'index.html': 'source here'}) + + Because auto reloading is rarely useful this is disabled per default. + """ + + def __init__(self, mapping): + self.mapping = mapping + + def get_source(self, environment, template): + if template in self.mapping: + source = self.mapping[template] + return source, None, lambda: source == self.mapping.get(template) + raise TemplateNotFound(template) + + def list_templates(self): + return sorted(self.mapping) + + +class FunctionLoader(BaseLoader): + """A loader that is passed a function which does the loading. The + function receives the name of the template and has to return either + an unicode string with the template source, a tuple in the form ``(source, + filename, uptodatefunc)`` or `None` if the template does not exist. + + >>> def load_template(name): + ... if name == 'index.html': + ... return '...' + ... + >>> loader = FunctionLoader(load_template) + + The `uptodatefunc` is a function that is called if autoreload is enabled + and has to return `True` if the template is still up to date. For more + details have a look at :meth:`BaseLoader.get_source` which has the same + return value. + """ + + def __init__(self, load_func): + self.load_func = load_func + + def get_source(self, environment, template): + rv = self.load_func(template) + if rv is None: + raise TemplateNotFound(template) + elif isinstance(rv, string_types): + return rv, None, None + return rv + + +class PrefixLoader(BaseLoader): + """A loader that is passed a dict of loaders where each loader is bound + to a prefix. The prefix is delimited from the template by a slash per + default, which can be changed by setting the `delimiter` argument to + something else:: + + loader = PrefixLoader({ + 'app1': PackageLoader('mypackage.app1'), + 'app2': PackageLoader('mypackage.app2') + }) + + By loading ``'app1/index.html'`` the file from the app1 package is loaded, + by loading ``'app2/index.html'`` the file from the second. + """ + + def __init__(self, mapping, delimiter='/'): + self.mapping = mapping + self.delimiter = delimiter + + def get_loader(self, template): + try: + prefix, name = template.split(self.delimiter, 1) + loader = self.mapping[prefix] + except (ValueError, KeyError): + raise TemplateNotFound(template) + return loader, name + + def get_source(self, environment, template): + loader, name = self.get_loader(template) + try: + return loader.get_source(environment, name) + except TemplateNotFound: + # re-raise the exception with the correct filename here. + # (the one that includes the prefix) + raise TemplateNotFound(template) + + @internalcode + def load(self, environment, name, globals=None): + loader, local_name = self.get_loader(name) + try: + return loader.load(environment, local_name, globals) + except TemplateNotFound: + # re-raise the exception with the correct filename here. + # (the one that includes the prefix) + raise TemplateNotFound(name) + + def list_templates(self): + result = [] + for prefix, loader in iteritems(self.mapping): + for template in loader.list_templates(): + result.append(prefix + self.delimiter + template) + return result + + +class ChoiceLoader(BaseLoader): + """This loader works like the `PrefixLoader` just that no prefix is + specified. If a template could not be found by one loader the next one + is tried. + + >>> loader = ChoiceLoader([ + ... FileSystemLoader('/path/to/user/templates'), + ... FileSystemLoader('/path/to/system/templates') + ... ]) + + This is useful if you want to allow users to override builtin templates + from a different location. + """ + + def __init__(self, loaders): + self.loaders = loaders + + def get_source(self, environment, template): + for loader in self.loaders: + try: + return loader.get_source(environment, template) + except TemplateNotFound: + pass + raise TemplateNotFound(template) + + @internalcode + def load(self, environment, name, globals=None): + for loader in self.loaders: + try: + return loader.load(environment, name, globals) + except TemplateNotFound: + pass + raise TemplateNotFound(name) + + def list_templates(self): + found = set() + for loader in self.loaders: + found.update(loader.list_templates()) + return sorted(found) + + +class _TemplateModule(ModuleType): + """Like a normal module but with support for weak references""" + + +class ModuleLoader(BaseLoader): + """This loader loads templates from precompiled templates. + + Example usage: + + >>> loader = ChoiceLoader([ + ... ModuleLoader('/path/to/compiled/templates'), + ... FileSystemLoader('/path/to/templates') + ... ]) + + Templates can be precompiled with :meth:`Environment.compile_templates`. + """ + + has_source_access = False + + def __init__(self, path): + package_name = '_jinja2_module_templates_%x' % id(self) + + # create a fake module that looks for the templates in the + # path given. + mod = _TemplateModule(package_name) + if isinstance(path, string_types): + path = [path] + else: + path = list(path) + mod.__path__ = path + + sys.modules[package_name] = weakref.proxy(mod, + lambda x: sys.modules.pop(package_name, None)) + + # the only strong reference, the sys.modules entry is weak + # so that the garbage collector can remove it once the + # loader that created it goes out of business. + self.module = mod + self.package_name = package_name + + @staticmethod + def get_template_key(name): + return 'tmpl_' + sha1(name.encode('utf-8')).hexdigest() + + @staticmethod + def get_module_filename(name): + return ModuleLoader.get_template_key(name) + '.py' + + @internalcode + def load(self, environment, name, globals=None): + key = self.get_template_key(name) + module = '%s.%s' % (self.package_name, key) + mod = getattr(self.module, module, None) + if mod is None: + try: + mod = __import__(module, None, None, ['root']) + except ImportError: + raise TemplateNotFound(name) + + # remove the entry from sys.modules, we only want the attribute + # on the module object we have stored on the loader. + sys.modules.pop(module, None) + + return environment.template_class.from_module_dict( + environment, mod.__dict__, globals) diff --git a/python/jinja2/meta.py b/python/jinja2/meta.py new file mode 100644 index 0000000..7421914 --- /dev/null +++ b/python/jinja2/meta.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +""" + jinja2.meta + ~~~~~~~~~~~ + + This module implements various functions that exposes information about + templates that might be interesting for various kinds of applications. + + :copyright: (c) 2017 by the Jinja Team, see AUTHORS for more details. + :license: BSD, see LICENSE for more details. +""" +from jinja2 import nodes +from jinja2.compiler import CodeGenerator +from jinja2._compat import string_types, iteritems + + +class TrackingCodeGenerator(CodeGenerator): + """We abuse the code generator for introspection.""" + + def __init__(self, environment): + CodeGenerator.__init__(self, environment, '<introspection>', + '<introspection>') + self.undeclared_identifiers = set() + + def write(self, x): + """Don't write.""" + + def enter_frame(self, frame): + """Remember all undeclared identifiers.""" + CodeGenerator.enter_frame(self, frame) + for _, (action, param) in iteritems(frame.symbols.loads): + if action == 'resolve': + self.undeclared_identifiers.add(param) + + +def find_undeclared_variables(ast): + """Returns a set of all variables in the AST that will be looked up from + the context at runtime. Because at compile time it's not known which + variables will be used depending on the path the execution takes at + runtime, all variables are returned. + + >>> from jinja2 import Environment, meta + >>> env = Environment() + >>> ast = env.parse('{% set foo = 42 %}{{ bar + foo }}') + >>> meta.find_undeclared_variables(ast) == set(['bar']) + True + + .. admonition:: Implementation + + Internally the code generator is used for finding undeclared variables. + This is good to know because the code generator might raise a + :exc:`TemplateAssertionError` during compilation and as a matter of + fact this function can currently raise that exception as well. + """ + codegen = TrackingCodeGenerator(ast.environment) + codegen.visit(ast) + return codegen.undeclared_identifiers + + +def find_referenced_templates(ast): + """Finds all the referenced templates from the AST. This will return an + iterator over all the hardcoded template extensions, inclusions and + imports. If dynamic inheritance or inclusion is used, `None` will be + yielded. + + >>> from jinja2 import Environment, meta + >>> env = Environment() + >>> ast = env.parse('{% extends "layout.html" %}{% include helper %}') + >>> list(meta.find_referenced_templates(ast)) + ['layout.html', None] + + This function is useful for dependency tracking. For example if you want + to rebuild parts of the website after a layout template has changed. + """ + for node in ast.find_all((nodes.Extends, nodes.FromImport, nodes.Import, + nodes.Include)): + if not isinstance(node.template, nodes.Const): + # a tuple with some non consts in there + if isinstance(node.template, (nodes.Tuple, nodes.List)): + for template_name in node.template.items: + # something const, only yield the strings and ignore + # non-string consts that really just make no sense + if isinstance(template_name, nodes.Const): + if isinstance(template_name.value, string_types): + yield template_name.value + # something dynamic in there + else: + yield None + # something dynamic we don't know about here + else: + yield None + continue + # constant is a basestring, direct template name + if isinstance(node.template.value, string_types): + yield node.template.value + # a tuple or list (latter *should* not happen) made of consts, + # yield the consts that are strings. We could warn here for + # non string values + elif isinstance(node, nodes.Include) and \ + isinstance(node.template.value, (tuple, list)): + for template_name in node.template.value: + if isinstance(template_name, string_types): + yield template_name + # something else we don't care about, we could warn here + else: + yield None diff --git a/python/jinja2/nativetypes.py b/python/jinja2/nativetypes.py new file mode 100644 index 0000000..fe17e41 --- /dev/null +++ b/python/jinja2/nativetypes.py @@ -0,0 +1,220 @@ +import sys +from ast import literal_eval +from itertools import islice, chain +from jinja2 import nodes +from jinja2._compat import text_type +from jinja2.compiler import CodeGenerator, has_safe_repr +from jinja2.environment import Environment, Template +from jinja2.utils import concat, escape + + +def native_concat(nodes): + """Return a native Python type from the list of compiled nodes. If the + result is a single node, its value is returned. Otherwise, the nodes are + concatenated as strings. If the result can be parsed with + :func:`ast.literal_eval`, the parsed value is returned. Otherwise, the + string is returned. + """ + head = list(islice(nodes, 2)) + + if not head: + return None + + if len(head) == 1: + out = head[0] + else: + out = u''.join([text_type(v) for v in chain(head, nodes)]) + + try: + return literal_eval(out) + except (ValueError, SyntaxError, MemoryError): + return out + + +class NativeCodeGenerator(CodeGenerator): + """A code generator which avoids injecting ``to_string()`` calls around the + internal code Jinja uses to render templates. + """ + + def visit_Output(self, node, frame): + """Same as :meth:`CodeGenerator.visit_Output`, but do not call + ``to_string`` on output nodes in generated code. + """ + if self.has_known_extends and frame.require_output_check: + return + + finalize = self.environment.finalize + finalize_context = getattr(finalize, 'contextfunction', False) + finalize_eval = getattr(finalize, 'evalcontextfunction', False) + finalize_env = getattr(finalize, 'environmentfunction', False) + + if finalize is not None: + if finalize_context or finalize_eval: + const_finalize = None + elif finalize_env: + def const_finalize(x): + return finalize(self.environment, x) + else: + const_finalize = finalize + else: + def const_finalize(x): + return x + + # If we are inside a frame that requires output checking, we do so. + outdent_later = False + + if frame.require_output_check: + self.writeline('if parent_template is None:') + self.indent() + outdent_later = True + + # Try to evaluate as many chunks as possible into a static string at + # compile time. + body = [] + + for child in node.nodes: + try: + if const_finalize is None: + raise nodes.Impossible() + + const = child.as_const(frame.eval_ctx) + if not has_safe_repr(const): + raise nodes.Impossible() + except nodes.Impossible: + body.append(child) + continue + + # the frame can't be volatile here, because otherwise the as_const + # function would raise an Impossible exception at that point + try: + if frame.eval_ctx.autoescape: + if hasattr(const, '__html__'): + const = const.__html__() + else: + const = escape(const) + + const = const_finalize(const) + except Exception: + # if something goes wrong here we evaluate the node at runtime + # for easier debugging + body.append(child) + continue + + if body and isinstance(body[-1], list): + body[-1].append(const) + else: + body.append([const]) + + # if we have less than 3 nodes or a buffer we yield or extend/append + if len(body) < 3 or frame.buffer is not None: + if frame.buffer is not None: + # for one item we append, for more we extend + if len(body) == 1: + self.writeline('%s.append(' % frame.buffer) + else: + self.writeline('%s.extend((' % frame.buffer) + + self.indent() + + for item in body: + if isinstance(item, list): + val = repr(native_concat(item)) + + if frame.buffer is None: + self.writeline('yield ' + val) + else: + self.writeline(val + ',') + else: + if frame.buffer is None: + self.writeline('yield ', item) + else: + self.newline(item) + + close = 0 + + if finalize is not None: + self.write('environment.finalize(') + + if finalize_context: + self.write('context, ') + + close += 1 + + self.visit(item, frame) + + if close > 0: + self.write(')' * close) + + if frame.buffer is not None: + self.write(',') + + if frame.buffer is not None: + # close the open parentheses + self.outdent() + self.writeline(len(body) == 1 and ')' or '))') + + # otherwise we create a format string as this is faster in that case + else: + format = [] + arguments = [] + + for item in body: + if isinstance(item, list): + format.append(native_concat(item).replace('%', '%%')) + else: + format.append('%s') + arguments.append(item) + + self.writeline('yield ') + self.write(repr(concat(format)) + ' % (') + self.indent() + + for argument in arguments: + self.newline(argument) + close = 0 + + if finalize is not None: + self.write('environment.finalize(') + + if finalize_context: + self.write('context, ') + elif finalize_eval: + self.write('context.eval_ctx, ') + elif finalize_env: + self.write('environment, ') + + close += 1 + + self.visit(argument, frame) + self.write(')' * close + ', ') + + self.outdent() + self.writeline(')') + + if outdent_later: + self.outdent() + + +class NativeTemplate(Template): + def render(self, *args, **kwargs): + """Render the template to produce a native Python type. If the result + is a single node, its value is returned. Otherwise, the nodes are + concatenated as strings. If the result can be parsed with + :func:`ast.literal_eval`, the parsed value is returned. Otherwise, the + string is returned. + """ + vars = dict(*args, **kwargs) + + try: + return native_concat(self.root_render_func(self.new_context(vars))) + except Exception: + exc_info = sys.exc_info() + + return self.environment.handle_exception(exc_info, True) + + +class NativeEnvironment(Environment): + """An environment that renders templates to native Python types.""" + + code_generator_class = NativeCodeGenerator + template_class = NativeTemplate diff --git a/python/jinja2/nodes.py b/python/jinja2/nodes.py new file mode 100644 index 0000000..4d9a01a --- /dev/null +++ b/python/jinja2/nodes.py @@ -0,0 +1,999 @@ +# -*- coding: utf-8 -*- +""" + jinja2.nodes + ~~~~~~~~~~~~ + + This module implements additional nodes derived from the ast base node. + + It also provides some node tree helper functions like `in_lineno` and + `get_nodes` used by the parser and translator in order to normalize + python and jinja nodes. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import types +import operator + +from collections import deque +from jinja2.utils import Markup +from jinja2._compat import izip, with_metaclass, text_type, PY2 + + +#: the types we support for context functions +_context_function_types = (types.FunctionType, types.MethodType) + + +_binop_to_func = { + '*': operator.mul, + '/': operator.truediv, + '//': operator.floordiv, + '**': operator.pow, + '%': operator.mod, + '+': operator.add, + '-': operator.sub +} + +_uaop_to_func = { + 'not': operator.not_, + '+': operator.pos, + '-': operator.neg +} + +_cmpop_to_func = { + 'eq': operator.eq, + 'ne': operator.ne, + 'gt': operator.gt, + 'gteq': operator.ge, + 'lt': operator.lt, + 'lteq': operator.le, + 'in': lambda a, b: a in b, + 'notin': lambda a, b: a not in b +} + + +class Impossible(Exception): + """Raised if the node could not perform a requested action.""" + + +class NodeType(type): + """A metaclass for nodes that handles the field and attribute + inheritance. fields and attributes from the parent class are + automatically forwarded to the child.""" + + def __new__(cls, name, bases, d): + for attr in 'fields', 'attributes': + storage = [] + storage.extend(getattr(bases[0], attr, ())) + storage.extend(d.get(attr, ())) + assert len(bases) == 1, 'multiple inheritance not allowed' + assert len(storage) == len(set(storage)), 'layout conflict' + d[attr] = tuple(storage) + d.setdefault('abstract', False) + return type.__new__(cls, name, bases, d) + + +class EvalContext(object): + """Holds evaluation time information. Custom attributes can be attached + to it in extensions. + """ + + def __init__(self, environment, template_name=None): + self.environment = environment + if callable(environment.autoescape): + self.autoescape = environment.autoescape(template_name) + else: + self.autoescape = environment.autoescape + self.volatile = False + + def save(self): + return self.__dict__.copy() + + def revert(self, old): + self.__dict__.clear() + self.__dict__.update(old) + + +def get_eval_context(node, ctx): + if ctx is None: + if node.environment is None: + raise RuntimeError('if no eval context is passed, the ' + 'node must have an attached ' + 'environment.') + return EvalContext(node.environment) + return ctx + + +class Node(with_metaclass(NodeType, object)): + """Baseclass for all Jinja2 nodes. There are a number of nodes available + of different types. There are four major types: + + - :class:`Stmt`: statements + - :class:`Expr`: expressions + - :class:`Helper`: helper nodes + - :class:`Template`: the outermost wrapper node + + All nodes have fields and attributes. Fields may be other nodes, lists, + or arbitrary values. Fields are passed to the constructor as regular + positional arguments, attributes as keyword arguments. Each node has + two attributes: `lineno` (the line number of the node) and `environment`. + The `environment` attribute is set at the end of the parsing process for + all nodes automatically. + """ + fields = () + attributes = ('lineno', 'environment') + abstract = True + + def __init__(self, *fields, **attributes): + if self.abstract: + raise TypeError('abstract nodes are not instanciable') + if fields: + if len(fields) != len(self.fields): + if not self.fields: + raise TypeError('%r takes 0 arguments' % + self.__class__.__name__) + raise TypeError('%r takes 0 or %d argument%s' % ( + self.__class__.__name__, + len(self.fields), + len(self.fields) != 1 and 's' or '' + )) + for name, arg in izip(self.fields, fields): + setattr(self, name, arg) + for attr in self.attributes: + setattr(self, attr, attributes.pop(attr, None)) + if attributes: + raise TypeError('unknown attribute %r' % + next(iter(attributes))) + + def iter_fields(self, exclude=None, only=None): + """This method iterates over all fields that are defined and yields + ``(key, value)`` tuples. Per default all fields are returned, but + it's possible to limit that to some fields by providing the `only` + parameter or to exclude some using the `exclude` parameter. Both + should be sets or tuples of field names. + """ + for name in self.fields: + if (exclude is only is None) or \ + (exclude is not None and name not in exclude) or \ + (only is not None and name in only): + try: + yield name, getattr(self, name) + except AttributeError: + pass + + def iter_child_nodes(self, exclude=None, only=None): + """Iterates over all direct child nodes of the node. This iterates + over all fields and yields the values of they are nodes. If the value + of a field is a list all the nodes in that list are returned. + """ + for field, item in self.iter_fields(exclude, only): + if isinstance(item, list): + for n in item: + if isinstance(n, Node): + yield n + elif isinstance(item, Node): + yield item + + def find(self, node_type): + """Find the first node of a given type. If no such node exists the + return value is `None`. + """ + for result in self.find_all(node_type): + return result + + def find_all(self, node_type): + """Find all the nodes of a given type. If the type is a tuple, + the check is performed for any of the tuple items. + """ + for child in self.iter_child_nodes(): + if isinstance(child, node_type): + yield child + for result in child.find_all(node_type): + yield result + + def set_ctx(self, ctx): + """Reset the context of a node and all child nodes. Per default the + parser will all generate nodes that have a 'load' context as it's the + most common one. This method is used in the parser to set assignment + targets and other nodes to a store context. + """ + todo = deque([self]) + while todo: + node = todo.popleft() + if 'ctx' in node.fields: + node.ctx = ctx + todo.extend(node.iter_child_nodes()) + return self + + def set_lineno(self, lineno, override=False): + """Set the line numbers of the node and children.""" + todo = deque([self]) + while todo: + node = todo.popleft() + if 'lineno' in node.attributes: + if node.lineno is None or override: + node.lineno = lineno + todo.extend(node.iter_child_nodes()) + return self + + def set_environment(self, environment): + """Set the environment for all nodes.""" + todo = deque([self]) + while todo: + node = todo.popleft() + node.environment = environment + todo.extend(node.iter_child_nodes()) + return self + + def __eq__(self, other): + return type(self) is type(other) and \ + tuple(self.iter_fields()) == tuple(other.iter_fields()) + + def __ne__(self, other): + return not self.__eq__(other) + + # Restore Python 2 hashing behavior on Python 3 + __hash__ = object.__hash__ + + def __repr__(self): + return '%s(%s)' % ( + self.__class__.__name__, + ', '.join('%s=%r' % (arg, getattr(self, arg, None)) for + arg in self.fields) + ) + + def dump(self): + def _dump(node): + if not isinstance(node, Node): + buf.append(repr(node)) + return + + buf.append('nodes.%s(' % node.__class__.__name__) + if not node.fields: + buf.append(')') + return + for idx, field in enumerate(node.fields): + if idx: + buf.append(', ') + value = getattr(node, field) + if isinstance(value, list): + buf.append('[') + for idx, item in enumerate(value): + if idx: + buf.append(', ') + _dump(item) + buf.append(']') + else: + _dump(value) + buf.append(')') + buf = [] + _dump(self) + return ''.join(buf) + + + +class Stmt(Node): + """Base node for all statements.""" + abstract = True + + +class Helper(Node): + """Nodes that exist in a specific context only.""" + abstract = True + + +class Template(Node): + """Node that represents a template. This must be the outermost node that + is passed to the compiler. + """ + fields = ('body',) + + +class Output(Stmt): + """A node that holds multiple expressions which are then printed out. + This is used both for the `print` statement and the regular template data. + """ + fields = ('nodes',) + + +class Extends(Stmt): + """Represents an extends statement.""" + fields = ('template',) + + +class For(Stmt): + """The for loop. `target` is the target for the iteration (usually a + :class:`Name` or :class:`Tuple`), `iter` the iterable. `body` is a list + of nodes that are used as loop-body, and `else_` a list of nodes for the + `else` block. If no else node exists it has to be an empty list. + + For filtered nodes an expression can be stored as `test`, otherwise `None`. + """ + fields = ('target', 'iter', 'body', 'else_', 'test', 'recursive') + + +class If(Stmt): + """If `test` is true, `body` is rendered, else `else_`.""" + fields = ('test', 'body', 'elif_', 'else_') + + +class Macro(Stmt): + """A macro definition. `name` is the name of the macro, `args` a list of + arguments and `defaults` a list of defaults if there are any. `body` is + a list of nodes for the macro body. + """ + fields = ('name', 'args', 'defaults', 'body') + + +class CallBlock(Stmt): + """Like a macro without a name but a call instead. `call` is called with + the unnamed macro as `caller` argument this node holds. + """ + fields = ('call', 'args', 'defaults', 'body') + + +class FilterBlock(Stmt): + """Node for filter sections.""" + fields = ('body', 'filter') + + +class With(Stmt): + """Specific node for with statements. In older versions of Jinja the + with statement was implemented on the base of the `Scope` node instead. + + .. versionadded:: 2.9.3 + """ + fields = ('targets', 'values', 'body') + + +class Block(Stmt): + """A node that represents a block.""" + fields = ('name', 'body', 'scoped') + + +class Include(Stmt): + """A node that represents the include tag.""" + fields = ('template', 'with_context', 'ignore_missing') + + +class Import(Stmt): + """A node that represents the import tag.""" + fields = ('template', 'target', 'with_context') + + +class FromImport(Stmt): + """A node that represents the from import tag. It's important to not + pass unsafe names to the name attribute. The compiler translates the + attribute lookups directly into getattr calls and does *not* use the + subscript callback of the interface. As exported variables may not + start with double underscores (which the parser asserts) this is not a + problem for regular Jinja code, but if this node is used in an extension + extra care must be taken. + + The list of names may contain tuples if aliases are wanted. + """ + fields = ('template', 'names', 'with_context') + + +class ExprStmt(Stmt): + """A statement that evaluates an expression and discards the result.""" + fields = ('node',) + + +class Assign(Stmt): + """Assigns an expression to a target.""" + fields = ('target', 'node') + + +class AssignBlock(Stmt): + """Assigns a block to a target.""" + fields = ('target', 'filter', 'body') + + +class Expr(Node): + """Baseclass for all expressions.""" + abstract = True + + def as_const(self, eval_ctx=None): + """Return the value of the expression as constant or raise + :exc:`Impossible` if this was not possible. + + An :class:`EvalContext` can be provided, if none is given + a default context is created which requires the nodes to have + an attached environment. + + .. versionchanged:: 2.4 + the `eval_ctx` parameter was added. + """ + raise Impossible() + + def can_assign(self): + """Check if it's possible to assign something to this node.""" + return False + + +class BinExpr(Expr): + """Baseclass for all binary expressions.""" + fields = ('left', 'right') + operator = None + abstract = True + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + # intercepted operators cannot be folded at compile time + if self.environment.sandboxed and \ + self.operator in self.environment.intercepted_binops: + raise Impossible() + f = _binop_to_func[self.operator] + try: + return f(self.left.as_const(eval_ctx), self.right.as_const(eval_ctx)) + except Exception: + raise Impossible() + + +class UnaryExpr(Expr): + """Baseclass for all unary expressions.""" + fields = ('node',) + operator = None + abstract = True + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + # intercepted operators cannot be folded at compile time + if self.environment.sandboxed and \ + self.operator in self.environment.intercepted_unops: + raise Impossible() + f = _uaop_to_func[self.operator] + try: + return f(self.node.as_const(eval_ctx)) + except Exception: + raise Impossible() + + +class Name(Expr): + """Looks up a name or stores a value in a name. + The `ctx` of the node can be one of the following values: + + - `store`: store a value in the name + - `load`: load that name + - `param`: like `store` but if the name was defined as function parameter. + """ + fields = ('name', 'ctx') + + def can_assign(self): + return self.name not in ('true', 'false', 'none', + 'True', 'False', 'None') + + +class NSRef(Expr): + """Reference to a namespace value assignment""" + fields = ('name', 'attr') + + def can_assign(self): + # We don't need any special checks here; NSRef assignments have a + # runtime check to ensure the target is a namespace object which will + # have been checked already as it is created using a normal assignment + # which goes through a `Name` node. + return True + + +class Literal(Expr): + """Baseclass for literals.""" + abstract = True + + +class Const(Literal): + """All constant values. The parser will return this node for simple + constants such as ``42`` or ``"foo"`` but it can be used to store more + complex values such as lists too. Only constants with a safe + representation (objects where ``eval(repr(x)) == x`` is true). + """ + fields = ('value',) + + def as_const(self, eval_ctx=None): + rv = self.value + if PY2 and type(rv) is text_type and \ + self.environment.policies['compiler.ascii_str']: + try: + rv = rv.encode('ascii') + except UnicodeError: + pass + return rv + + @classmethod + def from_untrusted(cls, value, lineno=None, environment=None): + """Return a const object if the value is representable as + constant value in the generated code, otherwise it will raise + an `Impossible` exception. + """ + from .compiler import has_safe_repr + if not has_safe_repr(value): + raise Impossible() + return cls(value, lineno=lineno, environment=environment) + + +class TemplateData(Literal): + """A constant template string.""" + fields = ('data',) + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + if eval_ctx.volatile: + raise Impossible() + if eval_ctx.autoescape: + return Markup(self.data) + return self.data + + +class Tuple(Literal): + """For loop unpacking and some other things like multiple arguments + for subscripts. Like for :class:`Name` `ctx` specifies if the tuple + is used for loading the names or storing. + """ + fields = ('items', 'ctx') + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + return tuple(x.as_const(eval_ctx) for x in self.items) + + def can_assign(self): + for item in self.items: + if not item.can_assign(): + return False + return True + + +class List(Literal): + """Any list literal such as ``[1, 2, 3]``""" + fields = ('items',) + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + return [x.as_const(eval_ctx) for x in self.items] + + +class Dict(Literal): + """Any dict literal such as ``{1: 2, 3: 4}``. The items must be a list of + :class:`Pair` nodes. + """ + fields = ('items',) + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + return dict(x.as_const(eval_ctx) for x in self.items) + + +class Pair(Helper): + """A key, value pair for dicts.""" + fields = ('key', 'value') + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + return self.key.as_const(eval_ctx), self.value.as_const(eval_ctx) + + +class Keyword(Helper): + """A key, value pair for keyword arguments where key is a string.""" + fields = ('key', 'value') + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + return self.key, self.value.as_const(eval_ctx) + + +class CondExpr(Expr): + """A conditional expression (inline if expression). (``{{ + foo if bar else baz }}``) + """ + fields = ('test', 'expr1', 'expr2') + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + if self.test.as_const(eval_ctx): + return self.expr1.as_const(eval_ctx) + + # if we evaluate to an undefined object, we better do that at runtime + if self.expr2 is None: + raise Impossible() + + return self.expr2.as_const(eval_ctx) + + +def args_as_const(node, eval_ctx): + args = [x.as_const(eval_ctx) for x in node.args] + kwargs = dict(x.as_const(eval_ctx) for x in node.kwargs) + + if node.dyn_args is not None: + try: + args.extend(node.dyn_args.as_const(eval_ctx)) + except Exception: + raise Impossible() + + if node.dyn_kwargs is not None: + try: + kwargs.update(node.dyn_kwargs.as_const(eval_ctx)) + except Exception: + raise Impossible() + + return args, kwargs + + +class Filter(Expr): + """This node applies a filter on an expression. `name` is the name of + the filter, the rest of the fields are the same as for :class:`Call`. + + If the `node` of a filter is `None` the contents of the last buffer are + filtered. Buffers are created by macros and filter blocks. + """ + + fields = ('node', 'name', 'args', 'kwargs', 'dyn_args', 'dyn_kwargs') + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + + if eval_ctx.volatile or self.node is None: + raise Impossible() + + # we have to be careful here because we call filter_ below. + # if this variable would be called filter, 2to3 would wrap the + # call in a list beause it is assuming we are talking about the + # builtin filter function here which no longer returns a list in + # python 3. because of that, do not rename filter_ to filter! + filter_ = self.environment.filters.get(self.name) + + if filter_ is None or getattr(filter_, 'contextfilter', False): + raise Impossible() + + # We cannot constant handle async filters, so we need to make sure + # to not go down this path. + if ( + eval_ctx.environment.is_async + and getattr(filter_, 'asyncfiltervariant', False) + ): + raise Impossible() + + args, kwargs = args_as_const(self, eval_ctx) + args.insert(0, self.node.as_const(eval_ctx)) + + if getattr(filter_, 'evalcontextfilter', False): + args.insert(0, eval_ctx) + elif getattr(filter_, 'environmentfilter', False): + args.insert(0, self.environment) + + try: + return filter_(*args, **kwargs) + except Exception: + raise Impossible() + + +class Test(Expr): + """Applies a test on an expression. `name` is the name of the test, the + rest of the fields are the same as for :class:`Call`. + """ + + fields = ('node', 'name', 'args', 'kwargs', 'dyn_args', 'dyn_kwargs') + + def as_const(self, eval_ctx=None): + test = self.environment.tests.get(self.name) + + if test is None: + raise Impossible() + + eval_ctx = get_eval_context(self, eval_ctx) + args, kwargs = args_as_const(self, eval_ctx) + args.insert(0, self.node.as_const(eval_ctx)) + + try: + return test(*args, **kwargs) + except Exception: + raise Impossible() + + +class Call(Expr): + """Calls an expression. `args` is a list of arguments, `kwargs` a list + of keyword arguments (list of :class:`Keyword` nodes), and `dyn_args` + and `dyn_kwargs` has to be either `None` or a node that is used as + node for dynamic positional (``*args``) or keyword (``**kwargs``) + arguments. + """ + fields = ('node', 'args', 'kwargs', 'dyn_args', 'dyn_kwargs') + + +class Getitem(Expr): + """Get an attribute or item from an expression and prefer the item.""" + fields = ('node', 'arg', 'ctx') + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + if self.ctx != 'load': + raise Impossible() + try: + return self.environment.getitem(self.node.as_const(eval_ctx), + self.arg.as_const(eval_ctx)) + except Exception: + raise Impossible() + + def can_assign(self): + return False + + +class Getattr(Expr): + """Get an attribute or item from an expression that is a ascii-only + bytestring and prefer the attribute. + """ + fields = ('node', 'attr', 'ctx') + + def as_const(self, eval_ctx=None): + if self.ctx != 'load': + raise Impossible() + try: + eval_ctx = get_eval_context(self, eval_ctx) + return self.environment.getattr(self.node.as_const(eval_ctx), + self.attr) + except Exception: + raise Impossible() + + def can_assign(self): + return False + + +class Slice(Expr): + """Represents a slice object. This must only be used as argument for + :class:`Subscript`. + """ + fields = ('start', 'stop', 'step') + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + def const(obj): + if obj is None: + return None + return obj.as_const(eval_ctx) + return slice(const(self.start), const(self.stop), const(self.step)) + + +class Concat(Expr): + """Concatenates the list of expressions provided after converting them to + unicode. + """ + fields = ('nodes',) + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + return ''.join(text_type(x.as_const(eval_ctx)) for x in self.nodes) + + +class Compare(Expr): + """Compares an expression with some other expressions. `ops` must be a + list of :class:`Operand`\\s. + """ + fields = ('expr', 'ops') + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + result = value = self.expr.as_const(eval_ctx) + try: + for op in self.ops: + new_value = op.expr.as_const(eval_ctx) + result = _cmpop_to_func[op.op](value, new_value) + value = new_value + except Exception: + raise Impossible() + return result + + +class Operand(Helper): + """Holds an operator and an expression.""" + fields = ('op', 'expr') + +if __debug__: + Operand.__doc__ += '\nThe following operators are available: ' + \ + ', '.join(sorted('``%s``' % x for x in set(_binop_to_func) | + set(_uaop_to_func) | set(_cmpop_to_func))) + + +class Mul(BinExpr): + """Multiplies the left with the right node.""" + operator = '*' + + +class Div(BinExpr): + """Divides the left by the right node.""" + operator = '/' + + +class FloorDiv(BinExpr): + """Divides the left by the right node and truncates conver the + result into an integer by truncating. + """ + operator = '//' + + +class Add(BinExpr): + """Add the left to the right node.""" + operator = '+' + + +class Sub(BinExpr): + """Subtract the right from the left node.""" + operator = '-' + + +class Mod(BinExpr): + """Left modulo right.""" + operator = '%' + + +class Pow(BinExpr): + """Left to the power of right.""" + operator = '**' + + +class And(BinExpr): + """Short circuited AND.""" + operator = 'and' + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + return self.left.as_const(eval_ctx) and self.right.as_const(eval_ctx) + + +class Or(BinExpr): + """Short circuited OR.""" + operator = 'or' + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + return self.left.as_const(eval_ctx) or self.right.as_const(eval_ctx) + + +class Not(UnaryExpr): + """Negate the expression.""" + operator = 'not' + + +class Neg(UnaryExpr): + """Make the expression negative.""" + operator = '-' + + +class Pos(UnaryExpr): + """Make the expression positive (noop for most expressions)""" + operator = '+' + + +# Helpers for extensions + + +class EnvironmentAttribute(Expr): + """Loads an attribute from the environment object. This is useful for + extensions that want to call a callback stored on the environment. + """ + fields = ('name',) + + +class ExtensionAttribute(Expr): + """Returns the attribute of an extension bound to the environment. + The identifier is the identifier of the :class:`Extension`. + + This node is usually constructed by calling the + :meth:`~jinja2.ext.Extension.attr` method on an extension. + """ + fields = ('identifier', 'name') + + +class ImportedName(Expr): + """If created with an import name the import name is returned on node + access. For example ``ImportedName('cgi.escape')`` returns the `escape` + function from the cgi module on evaluation. Imports are optimized by the + compiler so there is no need to assign them to local variables. + """ + fields = ('importname',) + + +class InternalName(Expr): + """An internal name in the compiler. You cannot create these nodes + yourself but the parser provides a + :meth:`~jinja2.parser.Parser.free_identifier` method that creates + a new identifier for you. This identifier is not available from the + template and is not threated specially by the compiler. + """ + fields = ('name',) + + def __init__(self): + raise TypeError('Can\'t create internal names. Use the ' + '`free_identifier` method on a parser.') + + +class MarkSafe(Expr): + """Mark the wrapped expression as safe (wrap it as `Markup`).""" + fields = ('expr',) + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + return Markup(self.expr.as_const(eval_ctx)) + + +class MarkSafeIfAutoescape(Expr): + """Mark the wrapped expression as safe (wrap it as `Markup`) but + only if autoescaping is active. + + .. versionadded:: 2.5 + """ + fields = ('expr',) + + def as_const(self, eval_ctx=None): + eval_ctx = get_eval_context(self, eval_ctx) + if eval_ctx.volatile: + raise Impossible() + expr = self.expr.as_const(eval_ctx) + if eval_ctx.autoescape: + return Markup(expr) + return expr + + +class ContextReference(Expr): + """Returns the current template context. It can be used like a + :class:`Name` node, with a ``'load'`` ctx and will return the + current :class:`~jinja2.runtime.Context` object. + + Here an example that assigns the current template name to a + variable named `foo`:: + + Assign(Name('foo', ctx='store'), + Getattr(ContextReference(), 'name')) + """ + + +class Continue(Stmt): + """Continue a loop.""" + + +class Break(Stmt): + """Break a loop.""" + + +class Scope(Stmt): + """An artificial scope.""" + fields = ('body',) + + +class OverlayScope(Stmt): + """An overlay scope for extensions. This is a largely unoptimized scope + that however can be used to introduce completely arbitrary variables into + a sub scope from a dictionary or dictionary like object. The `context` + field has to evaluate to a dictionary object. + + Example usage:: + + OverlayScope(context=self.call_method('get_context'), + body=[...]) + + .. versionadded:: 2.10 + """ + fields = ('context', 'body') + + +class EvalContextModifier(Stmt): + """Modifies the eval context. For each option that should be modified, + a :class:`Keyword` has to be added to the :attr:`options` list. + + Example to change the `autoescape` setting:: + + EvalContextModifier(options=[Keyword('autoescape', Const(True))]) + """ + fields = ('options',) + + +class ScopedEvalContextModifier(EvalContextModifier): + """Modifies the eval context and reverts it later. Works exactly like + :class:`EvalContextModifier` but will only modify the + :class:`~jinja2.nodes.EvalContext` for nodes in the :attr:`body`. + """ + fields = ('body',) + + +# make sure nobody creates custom nodes +def _failing_new(*args, **kwargs): + raise TypeError('can\'t create custom node types') +NodeType.__new__ = staticmethod(_failing_new); del _failing_new diff --git a/python/jinja2/optimizer.py b/python/jinja2/optimizer.py new file mode 100644 index 0000000..65ab3ce --- /dev/null +++ b/python/jinja2/optimizer.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +""" + jinja2.optimizer + ~~~~~~~~~~~~~~~~ + + The jinja optimizer is currently trying to constant fold a few expressions + and modify the AST in place so that it should be easier to evaluate it. + + Because the AST does not contain all the scoping information and the + compiler has to find that out, we cannot do all the optimizations we + want. For example loop unrolling doesn't work because unrolled loops would + have a different scoping. + + The solution would be a second syntax tree that has the scoping rules stored. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD. +""" +from jinja2 import nodes +from jinja2.visitor import NodeTransformer + + +def optimize(node, environment): + """The context hint can be used to perform an static optimization + based on the context given.""" + optimizer = Optimizer(environment) + return optimizer.visit(node) + + +class Optimizer(NodeTransformer): + + def __init__(self, environment): + self.environment = environment + + def fold(self, node, eval_ctx=None): + """Do constant folding.""" + node = self.generic_visit(node) + try: + return nodes.Const.from_untrusted(node.as_const(eval_ctx), + lineno=node.lineno, + environment=self.environment) + except nodes.Impossible: + return node + + visit_Add = visit_Sub = visit_Mul = visit_Div = visit_FloorDiv = \ + visit_Pow = visit_Mod = visit_And = visit_Or = visit_Pos = visit_Neg = \ + visit_Not = visit_Compare = visit_Getitem = visit_Getattr = visit_Call = \ + visit_Filter = visit_Test = visit_CondExpr = fold + del fold diff --git a/python/jinja2/parser.py b/python/jinja2/parser.py new file mode 100644 index 0000000..ed00d97 --- /dev/null +++ b/python/jinja2/parser.py @@ -0,0 +1,903 @@ +# -*- coding: utf-8 -*- +""" + jinja2.parser + ~~~~~~~~~~~~~ + + Implements the template parser. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +from jinja2 import nodes +from jinja2.exceptions import TemplateSyntaxError, TemplateAssertionError +from jinja2.lexer import describe_token, describe_token_expr +from jinja2._compat import imap + + +_statement_keywords = frozenset(['for', 'if', 'block', 'extends', 'print', + 'macro', 'include', 'from', 'import', + 'set', 'with', 'autoescape']) +_compare_operators = frozenset(['eq', 'ne', 'lt', 'lteq', 'gt', 'gteq']) + +_math_nodes = { + 'add': nodes.Add, + 'sub': nodes.Sub, + 'mul': nodes.Mul, + 'div': nodes.Div, + 'floordiv': nodes.FloorDiv, + 'mod': nodes.Mod, +} + + +class Parser(object): + """This is the central parsing class Jinja2 uses. It's passed to + extensions and can be used to parse expressions or statements. + """ + + def __init__(self, environment, source, name=None, filename=None, + state=None): + self.environment = environment + self.stream = environment._tokenize(source, name, filename, state) + self.name = name + self.filename = filename + self.closed = False + self.extensions = {} + for extension in environment.iter_extensions(): + for tag in extension.tags: + self.extensions[tag] = extension.parse + self._last_identifier = 0 + self._tag_stack = [] + self._end_token_stack = [] + + def fail(self, msg, lineno=None, exc=TemplateSyntaxError): + """Convenience method that raises `exc` with the message, passed + line number or last line number as well as the current name and + filename. + """ + if lineno is None: + lineno = self.stream.current.lineno + raise exc(msg, lineno, self.name, self.filename) + + def _fail_ut_eof(self, name, end_token_stack, lineno): + expected = [] + for exprs in end_token_stack: + expected.extend(imap(describe_token_expr, exprs)) + if end_token_stack: + currently_looking = ' or '.join( + "'%s'" % describe_token_expr(expr) + for expr in end_token_stack[-1]) + else: + currently_looking = None + + if name is None: + message = ['Unexpected end of template.'] + else: + message = ['Encountered unknown tag \'%s\'.' % name] + + if currently_looking: + if name is not None and name in expected: + message.append('You probably made a nesting mistake. Jinja ' + 'is expecting this tag, but currently looking ' + 'for %s.' % currently_looking) + else: + message.append('Jinja was looking for the following tags: ' + '%s.' % currently_looking) + + if self._tag_stack: + message.append('The innermost block that needs to be ' + 'closed is \'%s\'.' % self._tag_stack[-1]) + + self.fail(' '.join(message), lineno) + + def fail_unknown_tag(self, name, lineno=None): + """Called if the parser encounters an unknown tag. Tries to fail + with a human readable error message that could help to identify + the problem. + """ + return self._fail_ut_eof(name, self._end_token_stack, lineno) + + def fail_eof(self, end_tokens=None, lineno=None): + """Like fail_unknown_tag but for end of template situations.""" + stack = list(self._end_token_stack) + if end_tokens is not None: + stack.append(end_tokens) + return self._fail_ut_eof(None, stack, lineno) + + def is_tuple_end(self, extra_end_rules=None): + """Are we at the end of a tuple?""" + if self.stream.current.type in ('variable_end', 'block_end', 'rparen'): + return True + elif extra_end_rules is not None: + return self.stream.current.test_any(extra_end_rules) + return False + + def free_identifier(self, lineno=None): + """Return a new free identifier as :class:`~jinja2.nodes.InternalName`.""" + self._last_identifier += 1 + rv = object.__new__(nodes.InternalName) + nodes.Node.__init__(rv, 'fi%d' % self._last_identifier, lineno=lineno) + return rv + + def parse_statement(self): + """Parse a single statement.""" + token = self.stream.current + if token.type != 'name': + self.fail('tag name expected', token.lineno) + self._tag_stack.append(token.value) + pop_tag = True + try: + if token.value in _statement_keywords: + return getattr(self, 'parse_' + self.stream.current.value)() + if token.value == 'call': + return self.parse_call_block() + if token.value == 'filter': + return self.parse_filter_block() + ext = self.extensions.get(token.value) + if ext is not None: + return ext(self) + + # did not work out, remove the token we pushed by accident + # from the stack so that the unknown tag fail function can + # produce a proper error message. + self._tag_stack.pop() + pop_tag = False + self.fail_unknown_tag(token.value, token.lineno) + finally: + if pop_tag: + self._tag_stack.pop() + + def parse_statements(self, end_tokens, drop_needle=False): + """Parse multiple statements into a list until one of the end tokens + is reached. This is used to parse the body of statements as it also + parses template data if appropriate. The parser checks first if the + current token is a colon and skips it if there is one. Then it checks + for the block end and parses until if one of the `end_tokens` is + reached. Per default the active token in the stream at the end of + the call is the matched end token. If this is not wanted `drop_needle` + can be set to `True` and the end token is removed. + """ + # the first token may be a colon for python compatibility + self.stream.skip_if('colon') + + # in the future it would be possible to add whole code sections + # by adding some sort of end of statement token and parsing those here. + self.stream.expect('block_end') + result = self.subparse(end_tokens) + + # we reached the end of the template too early, the subparser + # does not check for this, so we do that now + if self.stream.current.type == 'eof': + self.fail_eof(end_tokens) + + if drop_needle: + next(self.stream) + return result + + def parse_set(self): + """Parse an assign statement.""" + lineno = next(self.stream).lineno + target = self.parse_assign_target(with_namespace=True) + if self.stream.skip_if('assign'): + expr = self.parse_tuple() + return nodes.Assign(target, expr, lineno=lineno) + filter_node = self.parse_filter(None) + body = self.parse_statements(('name:endset',), + drop_needle=True) + return nodes.AssignBlock(target, filter_node, body, lineno=lineno) + + def parse_for(self): + """Parse a for loop.""" + lineno = self.stream.expect('name:for').lineno + target = self.parse_assign_target(extra_end_rules=('name:in',)) + self.stream.expect('name:in') + iter = self.parse_tuple(with_condexpr=False, + extra_end_rules=('name:recursive',)) + test = None + if self.stream.skip_if('name:if'): + test = self.parse_expression() + recursive = self.stream.skip_if('name:recursive') + body = self.parse_statements(('name:endfor', 'name:else')) + if next(self.stream).value == 'endfor': + else_ = [] + else: + else_ = self.parse_statements(('name:endfor',), drop_needle=True) + return nodes.For(target, iter, body, else_, test, + recursive, lineno=lineno) + + def parse_if(self): + """Parse an if construct.""" + node = result = nodes.If(lineno=self.stream.expect('name:if').lineno) + while 1: + node.test = self.parse_tuple(with_condexpr=False) + node.body = self.parse_statements(('name:elif', 'name:else', + 'name:endif')) + node.elif_ = [] + node.else_ = [] + token = next(self.stream) + if token.test('name:elif'): + node = nodes.If(lineno=self.stream.current.lineno) + result.elif_.append(node) + continue + elif token.test('name:else'): + result.else_ = self.parse_statements(('name:endif',), + drop_needle=True) + break + return result + + def parse_with(self): + node = nodes.With(lineno=next(self.stream).lineno) + targets = [] + values = [] + while self.stream.current.type != 'block_end': + lineno = self.stream.current.lineno + if targets: + self.stream.expect('comma') + target = self.parse_assign_target() + target.set_ctx('param') + targets.append(target) + self.stream.expect('assign') + values.append(self.parse_expression()) + node.targets = targets + node.values = values + node.body = self.parse_statements(('name:endwith',), + drop_needle=True) + return node + + def parse_autoescape(self): + node = nodes.ScopedEvalContextModifier(lineno=next(self.stream).lineno) + node.options = [ + nodes.Keyword('autoescape', self.parse_expression()) + ] + node.body = self.parse_statements(('name:endautoescape',), + drop_needle=True) + return nodes.Scope([node]) + + def parse_block(self): + node = nodes.Block(lineno=next(self.stream).lineno) + node.name = self.stream.expect('name').value + node.scoped = self.stream.skip_if('name:scoped') + + # common problem people encounter when switching from django + # to jinja. we do not support hyphens in block names, so let's + # raise a nicer error message in that case. + if self.stream.current.type == 'sub': + self.fail('Block names in Jinja have to be valid Python ' + 'identifiers and may not contain hyphens, use an ' + 'underscore instead.') + + node.body = self.parse_statements(('name:endblock',), drop_needle=True) + self.stream.skip_if('name:' + node.name) + return node + + def parse_extends(self): + node = nodes.Extends(lineno=next(self.stream).lineno) + node.template = self.parse_expression() + return node + + def parse_import_context(self, node, default): + if self.stream.current.test_any('name:with', 'name:without') and \ + self.stream.look().test('name:context'): + node.with_context = next(self.stream).value == 'with' + self.stream.skip() + else: + node.with_context = default + return node + + def parse_include(self): + node = nodes.Include(lineno=next(self.stream).lineno) + node.template = self.parse_expression() + if self.stream.current.test('name:ignore') and \ + self.stream.look().test('name:missing'): + node.ignore_missing = True + self.stream.skip(2) + else: + node.ignore_missing = False + return self.parse_import_context(node, True) + + def parse_import(self): + node = nodes.Import(lineno=next(self.stream).lineno) + node.template = self.parse_expression() + self.stream.expect('name:as') + node.target = self.parse_assign_target(name_only=True).name + return self.parse_import_context(node, False) + + def parse_from(self): + node = nodes.FromImport(lineno=next(self.stream).lineno) + node.template = self.parse_expression() + self.stream.expect('name:import') + node.names = [] + + def parse_context(): + if self.stream.current.value in ('with', 'without') and \ + self.stream.look().test('name:context'): + node.with_context = next(self.stream).value == 'with' + self.stream.skip() + return True + return False + + while 1: + if node.names: + self.stream.expect('comma') + if self.stream.current.type == 'name': + if parse_context(): + break + target = self.parse_assign_target(name_only=True) + if target.name.startswith('_'): + self.fail('names starting with an underline can not ' + 'be imported', target.lineno, + exc=TemplateAssertionError) + if self.stream.skip_if('name:as'): + alias = self.parse_assign_target(name_only=True) + node.names.append((target.name, alias.name)) + else: + node.names.append(target.name) + if parse_context() or self.stream.current.type != 'comma': + break + else: + self.stream.expect('name') + if not hasattr(node, 'with_context'): + node.with_context = False + return node + + def parse_signature(self, node): + node.args = args = [] + node.defaults = defaults = [] + self.stream.expect('lparen') + while self.stream.current.type != 'rparen': + if args: + self.stream.expect('comma') + arg = self.parse_assign_target(name_only=True) + arg.set_ctx('param') + if self.stream.skip_if('assign'): + defaults.append(self.parse_expression()) + elif defaults: + self.fail('non-default argument follows default argument') + args.append(arg) + self.stream.expect('rparen') + + def parse_call_block(self): + node = nodes.CallBlock(lineno=next(self.stream).lineno) + if self.stream.current.type == 'lparen': + self.parse_signature(node) + else: + node.args = [] + node.defaults = [] + + node.call = self.parse_expression() + if not isinstance(node.call, nodes.Call): + self.fail('expected call', node.lineno) + node.body = self.parse_statements(('name:endcall',), drop_needle=True) + return node + + def parse_filter_block(self): + node = nodes.FilterBlock(lineno=next(self.stream).lineno) + node.filter = self.parse_filter(None, start_inline=True) + node.body = self.parse_statements(('name:endfilter',), + drop_needle=True) + return node + + def parse_macro(self): + node = nodes.Macro(lineno=next(self.stream).lineno) + node.name = self.parse_assign_target(name_only=True).name + self.parse_signature(node) + node.body = self.parse_statements(('name:endmacro',), + drop_needle=True) + return node + + def parse_print(self): + node = nodes.Output(lineno=next(self.stream).lineno) + node.nodes = [] + while self.stream.current.type != 'block_end': + if node.nodes: + self.stream.expect('comma') + node.nodes.append(self.parse_expression()) + return node + + def parse_assign_target(self, with_tuple=True, name_only=False, + extra_end_rules=None, with_namespace=False): + """Parse an assignment target. As Jinja2 allows assignments to + tuples, this function can parse all allowed assignment targets. Per + default assignments to tuples are parsed, that can be disable however + by setting `with_tuple` to `False`. If only assignments to names are + wanted `name_only` can be set to `True`. The `extra_end_rules` + parameter is forwarded to the tuple parsing function. If + `with_namespace` is enabled, a namespace assignment may be parsed. + """ + if with_namespace and self.stream.look().type == 'dot': + token = self.stream.expect('name') + next(self.stream) # dot + attr = self.stream.expect('name') + target = nodes.NSRef(token.value, attr.value, lineno=token.lineno) + elif name_only: + token = self.stream.expect('name') + target = nodes.Name(token.value, 'store', lineno=token.lineno) + else: + if with_tuple: + target = self.parse_tuple(simplified=True, + extra_end_rules=extra_end_rules) + else: + target = self.parse_primary() + target.set_ctx('store') + if not target.can_assign(): + self.fail('can\'t assign to %r' % target.__class__. + __name__.lower(), target.lineno) + return target + + def parse_expression(self, with_condexpr=True): + """Parse an expression. Per default all expressions are parsed, if + the optional `with_condexpr` parameter is set to `False` conditional + expressions are not parsed. + """ + if with_condexpr: + return self.parse_condexpr() + return self.parse_or() + + def parse_condexpr(self): + lineno = self.stream.current.lineno + expr1 = self.parse_or() + while self.stream.skip_if('name:if'): + expr2 = self.parse_or() + if self.stream.skip_if('name:else'): + expr3 = self.parse_condexpr() + else: + expr3 = None + expr1 = nodes.CondExpr(expr2, expr1, expr3, lineno=lineno) + lineno = self.stream.current.lineno + return expr1 + + def parse_or(self): + lineno = self.stream.current.lineno + left = self.parse_and() + while self.stream.skip_if('name:or'): + right = self.parse_and() + left = nodes.Or(left, right, lineno=lineno) + lineno = self.stream.current.lineno + return left + + def parse_and(self): + lineno = self.stream.current.lineno + left = self.parse_not() + while self.stream.skip_if('name:and'): + right = self.parse_not() + left = nodes.And(left, right, lineno=lineno) + lineno = self.stream.current.lineno + return left + + def parse_not(self): + if self.stream.current.test('name:not'): + lineno = next(self.stream).lineno + return nodes.Not(self.parse_not(), lineno=lineno) + return self.parse_compare() + + def parse_compare(self): + lineno = self.stream.current.lineno + expr = self.parse_math1() + ops = [] + while 1: + token_type = self.stream.current.type + if token_type in _compare_operators: + next(self.stream) + ops.append(nodes.Operand(token_type, self.parse_math1())) + elif self.stream.skip_if('name:in'): + ops.append(nodes.Operand('in', self.parse_math1())) + elif (self.stream.current.test('name:not') and + self.stream.look().test('name:in')): + self.stream.skip(2) + ops.append(nodes.Operand('notin', self.parse_math1())) + else: + break + lineno = self.stream.current.lineno + if not ops: + return expr + return nodes.Compare(expr, ops, lineno=lineno) + + def parse_math1(self): + lineno = self.stream.current.lineno + left = self.parse_concat() + while self.stream.current.type in ('add', 'sub'): + cls = _math_nodes[self.stream.current.type] + next(self.stream) + right = self.parse_concat() + left = cls(left, right, lineno=lineno) + lineno = self.stream.current.lineno + return left + + def parse_concat(self): + lineno = self.stream.current.lineno + args = [self.parse_math2()] + while self.stream.current.type == 'tilde': + next(self.stream) + args.append(self.parse_math2()) + if len(args) == 1: + return args[0] + return nodes.Concat(args, lineno=lineno) + + def parse_math2(self): + lineno = self.stream.current.lineno + left = self.parse_pow() + while self.stream.current.type in ('mul', 'div', 'floordiv', 'mod'): + cls = _math_nodes[self.stream.current.type] + next(self.stream) + right = self.parse_pow() + left = cls(left, right, lineno=lineno) + lineno = self.stream.current.lineno + return left + + def parse_pow(self): + lineno = self.stream.current.lineno + left = self.parse_unary() + while self.stream.current.type == 'pow': + next(self.stream) + right = self.parse_unary() + left = nodes.Pow(left, right, lineno=lineno) + lineno = self.stream.current.lineno + return left + + def parse_unary(self, with_filter=True): + token_type = self.stream.current.type + lineno = self.stream.current.lineno + if token_type == 'sub': + next(self.stream) + node = nodes.Neg(self.parse_unary(False), lineno=lineno) + elif token_type == 'add': + next(self.stream) + node = nodes.Pos(self.parse_unary(False), lineno=lineno) + else: + node = self.parse_primary() + node = self.parse_postfix(node) + if with_filter: + node = self.parse_filter_expr(node) + return node + + def parse_primary(self): + token = self.stream.current + if token.type == 'name': + if token.value in ('true', 'false', 'True', 'False'): + node = nodes.Const(token.value in ('true', 'True'), + lineno=token.lineno) + elif token.value in ('none', 'None'): + node = nodes.Const(None, lineno=token.lineno) + else: + node = nodes.Name(token.value, 'load', lineno=token.lineno) + next(self.stream) + elif token.type == 'string': + next(self.stream) + buf = [token.value] + lineno = token.lineno + while self.stream.current.type == 'string': + buf.append(self.stream.current.value) + next(self.stream) + node = nodes.Const(''.join(buf), lineno=lineno) + elif token.type in ('integer', 'float'): + next(self.stream) + node = nodes.Const(token.value, lineno=token.lineno) + elif token.type == 'lparen': + next(self.stream) + node = self.parse_tuple(explicit_parentheses=True) + self.stream.expect('rparen') + elif token.type == 'lbracket': + node = self.parse_list() + elif token.type == 'lbrace': + node = self.parse_dict() + else: + self.fail("unexpected '%s'" % describe_token(token), token.lineno) + return node + + def parse_tuple(self, simplified=False, with_condexpr=True, + extra_end_rules=None, explicit_parentheses=False): + """Works like `parse_expression` but if multiple expressions are + delimited by a comma a :class:`~jinja2.nodes.Tuple` node is created. + This method could also return a regular expression instead of a tuple + if no commas where found. + + The default parsing mode is a full tuple. If `simplified` is `True` + only names and literals are parsed. The `no_condexpr` parameter is + forwarded to :meth:`parse_expression`. + + Because tuples do not require delimiters and may end in a bogus comma + an extra hint is needed that marks the end of a tuple. For example + for loops support tuples between `for` and `in`. In that case the + `extra_end_rules` is set to ``['name:in']``. + + `explicit_parentheses` is true if the parsing was triggered by an + expression in parentheses. This is used to figure out if an empty + tuple is a valid expression or not. + """ + lineno = self.stream.current.lineno + if simplified: + parse = self.parse_primary + elif with_condexpr: + parse = self.parse_expression + else: + parse = lambda: self.parse_expression(with_condexpr=False) + args = [] + is_tuple = False + while 1: + if args: + self.stream.expect('comma') + if self.is_tuple_end(extra_end_rules): + break + args.append(parse()) + if self.stream.current.type == 'comma': + is_tuple = True + else: + break + lineno = self.stream.current.lineno + + if not is_tuple: + if args: + return args[0] + + # if we don't have explicit parentheses, an empty tuple is + # not a valid expression. This would mean nothing (literally + # nothing) in the spot of an expression would be an empty + # tuple. + if not explicit_parentheses: + self.fail('Expected an expression, got \'%s\'' % + describe_token(self.stream.current)) + + return nodes.Tuple(args, 'load', lineno=lineno) + + def parse_list(self): + token = self.stream.expect('lbracket') + items = [] + while self.stream.current.type != 'rbracket': + if items: + self.stream.expect('comma') + if self.stream.current.type == 'rbracket': + break + items.append(self.parse_expression()) + self.stream.expect('rbracket') + return nodes.List(items, lineno=token.lineno) + + def parse_dict(self): + token = self.stream.expect('lbrace') + items = [] + while self.stream.current.type != 'rbrace': + if items: + self.stream.expect('comma') + if self.stream.current.type == 'rbrace': + break + key = self.parse_expression() + self.stream.expect('colon') + value = self.parse_expression() + items.append(nodes.Pair(key, value, lineno=key.lineno)) + self.stream.expect('rbrace') + return nodes.Dict(items, lineno=token.lineno) + + def parse_postfix(self, node): + while 1: + token_type = self.stream.current.type + if token_type == 'dot' or token_type == 'lbracket': + node = self.parse_subscript(node) + # calls are valid both after postfix expressions (getattr + # and getitem) as well as filters and tests + elif token_type == 'lparen': + node = self.parse_call(node) + else: + break + return node + + def parse_filter_expr(self, node): + while 1: + token_type = self.stream.current.type + if token_type == 'pipe': + node = self.parse_filter(node) + elif token_type == 'name' and self.stream.current.value == 'is': + node = self.parse_test(node) + # calls are valid both after postfix expressions (getattr + # and getitem) as well as filters and tests + elif token_type == 'lparen': + node = self.parse_call(node) + else: + break + return node + + def parse_subscript(self, node): + token = next(self.stream) + if token.type == 'dot': + attr_token = self.stream.current + next(self.stream) + if attr_token.type == 'name': + return nodes.Getattr(node, attr_token.value, 'load', + lineno=token.lineno) + elif attr_token.type != 'integer': + self.fail('expected name or number', attr_token.lineno) + arg = nodes.Const(attr_token.value, lineno=attr_token.lineno) + return nodes.Getitem(node, arg, 'load', lineno=token.lineno) + if token.type == 'lbracket': + args = [] + while self.stream.current.type != 'rbracket': + if args: + self.stream.expect('comma') + args.append(self.parse_subscribed()) + self.stream.expect('rbracket') + if len(args) == 1: + arg = args[0] + else: + arg = nodes.Tuple(args, 'load', lineno=token.lineno) + return nodes.Getitem(node, arg, 'load', lineno=token.lineno) + self.fail('expected subscript expression', self.lineno) + + def parse_subscribed(self): + lineno = self.stream.current.lineno + + if self.stream.current.type == 'colon': + next(self.stream) + args = [None] + else: + node = self.parse_expression() + if self.stream.current.type != 'colon': + return node + next(self.stream) + args = [node] + + if self.stream.current.type == 'colon': + args.append(None) + elif self.stream.current.type not in ('rbracket', 'comma'): + args.append(self.parse_expression()) + else: + args.append(None) + + if self.stream.current.type == 'colon': + next(self.stream) + if self.stream.current.type not in ('rbracket', 'comma'): + args.append(self.parse_expression()) + else: + args.append(None) + else: + args.append(None) + + return nodes.Slice(lineno=lineno, *args) + + def parse_call(self, node): + token = self.stream.expect('lparen') + args = [] + kwargs = [] + dyn_args = dyn_kwargs = None + require_comma = False + + def ensure(expr): + if not expr: + self.fail('invalid syntax for function call expression', + token.lineno) + + while self.stream.current.type != 'rparen': + if require_comma: + self.stream.expect('comma') + # support for trailing comma + if self.stream.current.type == 'rparen': + break + if self.stream.current.type == 'mul': + ensure(dyn_args is None and dyn_kwargs is None) + next(self.stream) + dyn_args = self.parse_expression() + elif self.stream.current.type == 'pow': + ensure(dyn_kwargs is None) + next(self.stream) + dyn_kwargs = self.parse_expression() + else: + ensure(dyn_args is None and dyn_kwargs is None) + if self.stream.current.type == 'name' and \ + self.stream.look().type == 'assign': + key = self.stream.current.value + self.stream.skip(2) + value = self.parse_expression() + kwargs.append(nodes.Keyword(key, value, + lineno=value.lineno)) + else: + ensure(not kwargs) + args.append(self.parse_expression()) + + require_comma = True + self.stream.expect('rparen') + + if node is None: + return args, kwargs, dyn_args, dyn_kwargs + return nodes.Call(node, args, kwargs, dyn_args, dyn_kwargs, + lineno=token.lineno) + + def parse_filter(self, node, start_inline=False): + while self.stream.current.type == 'pipe' or start_inline: + if not start_inline: + next(self.stream) + token = self.stream.expect('name') + name = token.value + while self.stream.current.type == 'dot': + next(self.stream) + name += '.' + self.stream.expect('name').value + if self.stream.current.type == 'lparen': + args, kwargs, dyn_args, dyn_kwargs = self.parse_call(None) + else: + args = [] + kwargs = [] + dyn_args = dyn_kwargs = None + node = nodes.Filter(node, name, args, kwargs, dyn_args, + dyn_kwargs, lineno=token.lineno) + start_inline = False + return node + + def parse_test(self, node): + token = next(self.stream) + if self.stream.current.test('name:not'): + next(self.stream) + negated = True + else: + negated = False + name = self.stream.expect('name').value + while self.stream.current.type == 'dot': + next(self.stream) + name += '.' + self.stream.expect('name').value + dyn_args = dyn_kwargs = None + kwargs = [] + if self.stream.current.type == 'lparen': + args, kwargs, dyn_args, dyn_kwargs = self.parse_call(None) + elif (self.stream.current.type in ('name', 'string', 'integer', + 'float', 'lparen', 'lbracket', + 'lbrace') and not + self.stream.current.test_any('name:else', 'name:or', + 'name:and')): + if self.stream.current.test('name:is'): + self.fail('You cannot chain multiple tests with is') + args = [self.parse_primary()] + else: + args = [] + node = nodes.Test(node, name, args, kwargs, dyn_args, + dyn_kwargs, lineno=token.lineno) + if negated: + node = nodes.Not(node, lineno=token.lineno) + return node + + def subparse(self, end_tokens=None): + body = [] + data_buffer = [] + add_data = data_buffer.append + + if end_tokens is not None: + self._end_token_stack.append(end_tokens) + + def flush_data(): + if data_buffer: + lineno = data_buffer[0].lineno + body.append(nodes.Output(data_buffer[:], lineno=lineno)) + del data_buffer[:] + + try: + while self.stream: + token = self.stream.current + if token.type == 'data': + if token.value: + add_data(nodes.TemplateData(token.value, + lineno=token.lineno)) + next(self.stream) + elif token.type == 'variable_begin': + next(self.stream) + add_data(self.parse_tuple(with_condexpr=True)) + self.stream.expect('variable_end') + elif token.type == 'block_begin': + flush_data() + next(self.stream) + if end_tokens is not None and \ + self.stream.current.test_any(*end_tokens): + return body + rv = self.parse_statement() + if isinstance(rv, list): + body.extend(rv) + else: + body.append(rv) + self.stream.expect('block_end') + else: + raise AssertionError('internal parsing error') + + flush_data() + finally: + if end_tokens is not None: + self._end_token_stack.pop() + + return body + + def parse(self): + """Parse the whole template into a `Template` node.""" + result = nodes.Template(self.subparse(), lineno=1) + result.set_environment(self.environment) + return result diff --git a/python/jinja2/runtime.py b/python/jinja2/runtime.py new file mode 100644 index 0000000..f9d7a68 --- /dev/null +++ b/python/jinja2/runtime.py @@ -0,0 +1,813 @@ +# -*- coding: utf-8 -*- +""" + jinja2.runtime + ~~~~~~~~~~~~~~ + + Runtime helpers. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD. +""" +import sys + +from itertools import chain +from types import MethodType + +from jinja2.nodes import EvalContext, _context_function_types +from jinja2.utils import Markup, soft_unicode, escape, missing, concat, \ + internalcode, object_type_repr, evalcontextfunction, Namespace +from jinja2.exceptions import UndefinedError, TemplateRuntimeError, \ + TemplateNotFound +from jinja2._compat import imap, text_type, iteritems, \ + implements_iterator, implements_to_string, string_types, PY2, \ + with_metaclass + + +# these variables are exported to the template runtime +__all__ = ['LoopContext', 'TemplateReference', 'Macro', 'Markup', + 'TemplateRuntimeError', 'missing', 'concat', 'escape', + 'markup_join', 'unicode_join', 'to_string', 'identity', + 'TemplateNotFound', 'Namespace'] + +#: the name of the function that is used to convert something into +#: a string. We can just use the text type here. +to_string = text_type + +#: the identity function. Useful for certain things in the environment +identity = lambda x: x + +_first_iteration = object() +_last_iteration = object() + + +def markup_join(seq): + """Concatenation that escapes if necessary and converts to unicode.""" + buf = [] + iterator = imap(soft_unicode, seq) + for arg in iterator: + buf.append(arg) + if hasattr(arg, '__html__'): + return Markup(u'').join(chain(buf, iterator)) + return concat(buf) + + +def unicode_join(seq): + """Simple args to unicode conversion and concatenation.""" + return concat(imap(text_type, seq)) + + +def new_context(environment, template_name, blocks, vars=None, + shared=None, globals=None, locals=None): + """Internal helper to for context creation.""" + if vars is None: + vars = {} + if shared: + parent = vars + else: + parent = dict(globals or (), **vars) + if locals: + # if the parent is shared a copy should be created because + # we don't want to modify the dict passed + if shared: + parent = dict(parent) + for key, value in iteritems(locals): + if value is not missing: + parent[key] = value + return environment.context_class(environment, parent, template_name, + blocks) + + +class TemplateReference(object): + """The `self` in templates.""" + + def __init__(self, context): + self.__context = context + + def __getitem__(self, name): + blocks = self.__context.blocks[name] + return BlockReference(name, self.__context, blocks, 0) + + def __repr__(self): + return '<%s %r>' % ( + self.__class__.__name__, + self.__context.name + ) + + +def _get_func(x): + return getattr(x, '__func__', x) + + +class ContextMeta(type): + + def __new__(cls, name, bases, d): + rv = type.__new__(cls, name, bases, d) + if bases == (): + return rv + + resolve = _get_func(rv.resolve) + default_resolve = _get_func(Context.resolve) + resolve_or_missing = _get_func(rv.resolve_or_missing) + default_resolve_or_missing = _get_func(Context.resolve_or_missing) + + # If we have a changed resolve but no changed default or missing + # resolve we invert the call logic. + if resolve is not default_resolve and \ + resolve_or_missing is default_resolve_or_missing: + rv._legacy_resolve_mode = True + elif resolve is default_resolve and \ + resolve_or_missing is default_resolve_or_missing: + rv._fast_resolve_mode = True + + return rv + + +def resolve_or_missing(context, key, missing=missing): + if key in context.vars: + return context.vars[key] + if key in context.parent: + return context.parent[key] + return missing + + +class Context(with_metaclass(ContextMeta)): + """The template context holds the variables of a template. It stores the + values passed to the template and also the names the template exports. + Creating instances is neither supported nor useful as it's created + automatically at various stages of the template evaluation and should not + be created by hand. + + The context is immutable. Modifications on :attr:`parent` **must not** + happen and modifications on :attr:`vars` are allowed from generated + template code only. Template filters and global functions marked as + :func:`contextfunction`\\s get the active context passed as first argument + and are allowed to access the context read-only. + + The template context supports read only dict operations (`get`, + `keys`, `values`, `items`, `iterkeys`, `itervalues`, `iteritems`, + `__getitem__`, `__contains__`). Additionally there is a :meth:`resolve` + method that doesn't fail with a `KeyError` but returns an + :class:`Undefined` object for missing variables. + """ + # XXX: we want to eventually make this be a deprecation warning and + # remove it. + _legacy_resolve_mode = False + _fast_resolve_mode = False + + def __init__(self, environment, parent, name, blocks): + self.parent = parent + self.vars = {} + self.environment = environment + self.eval_ctx = EvalContext(self.environment, name) + self.exported_vars = set() + self.name = name + + # create the initial mapping of blocks. Whenever template inheritance + # takes place the runtime will update this mapping with the new blocks + # from the template. + self.blocks = dict((k, [v]) for k, v in iteritems(blocks)) + + # In case we detect the fast resolve mode we can set up an alias + # here that bypasses the legacy code logic. + if self._fast_resolve_mode: + self.resolve_or_missing = MethodType(resolve_or_missing, self) + + def super(self, name, current): + """Render a parent block.""" + try: + blocks = self.blocks[name] + index = blocks.index(current) + 1 + blocks[index] + except LookupError: + return self.environment.undefined('there is no parent block ' + 'called %r.' % name, + name='super') + return BlockReference(name, self, blocks, index) + + def get(self, key, default=None): + """Returns an item from the template context, if it doesn't exist + `default` is returned. + """ + try: + return self[key] + except KeyError: + return default + + def resolve(self, key): + """Looks up a variable like `__getitem__` or `get` but returns an + :class:`Undefined` object with the name of the name looked up. + """ + if self._legacy_resolve_mode: + rv = resolve_or_missing(self, key) + else: + rv = self.resolve_or_missing(key) + if rv is missing: + return self.environment.undefined(name=key) + return rv + + def resolve_or_missing(self, key): + """Resolves a variable like :meth:`resolve` but returns the + special `missing` value if it cannot be found. + """ + if self._legacy_resolve_mode: + rv = self.resolve(key) + if isinstance(rv, Undefined): + rv = missing + return rv + return resolve_or_missing(self, key) + + def get_exported(self): + """Get a new dict with the exported variables.""" + return dict((k, self.vars[k]) for k in self.exported_vars) + + def get_all(self): + """Return the complete context as dict including the exported + variables. For optimizations reasons this might not return an + actual copy so be careful with using it. + """ + if not self.vars: + return self.parent + if not self.parent: + return self.vars + return dict(self.parent, **self.vars) + + @internalcode + def call(__self, __obj, *args, **kwargs): + """Call the callable with the arguments and keyword arguments + provided but inject the active context or environment as first + argument if the callable is a :func:`contextfunction` or + :func:`environmentfunction`. + """ + if __debug__: + __traceback_hide__ = True # noqa + + # Allow callable classes to take a context + if hasattr(__obj, '__call__'): + fn = __obj.__call__ + for fn_type in ('contextfunction', + 'evalcontextfunction', + 'environmentfunction'): + if hasattr(fn, fn_type): + __obj = fn + break + + if isinstance(__obj, _context_function_types): + if getattr(__obj, 'contextfunction', 0): + args = (__self,) + args + elif getattr(__obj, 'evalcontextfunction', 0): + args = (__self.eval_ctx,) + args + elif getattr(__obj, 'environmentfunction', 0): + args = (__self.environment,) + args + try: + return __obj(*args, **kwargs) + except StopIteration: + return __self.environment.undefined('value was undefined because ' + 'a callable raised a ' + 'StopIteration exception') + + def derived(self, locals=None): + """Internal helper function to create a derived context. This is + used in situations where the system needs a new context in the same + template that is independent. + """ + context = new_context(self.environment, self.name, {}, + self.get_all(), True, None, locals) + context.eval_ctx = self.eval_ctx + context.blocks.update((k, list(v)) for k, v in iteritems(self.blocks)) + return context + + def _all(meth): + proxy = lambda self: getattr(self.get_all(), meth)() + proxy.__doc__ = getattr(dict, meth).__doc__ + proxy.__name__ = meth + return proxy + + keys = _all('keys') + values = _all('values') + items = _all('items') + + # not available on python 3 + if PY2: + iterkeys = _all('iterkeys') + itervalues = _all('itervalues') + iteritems = _all('iteritems') + del _all + + def __contains__(self, name): + return name in self.vars or name in self.parent + + def __getitem__(self, key): + """Lookup a variable or raise `KeyError` if the variable is + undefined. + """ + item = self.resolve_or_missing(key) + if item is missing: + raise KeyError(key) + return item + + def __repr__(self): + return '<%s %s of %r>' % ( + self.__class__.__name__, + repr(self.get_all()), + self.name + ) + + +# register the context as mapping if possible +try: + from collections import Mapping + Mapping.register(Context) +except ImportError: + pass + + +class BlockReference(object): + """One block on a template reference.""" + + def __init__(self, name, context, stack, depth): + self.name = name + self._context = context + self._stack = stack + self._depth = depth + + @property + def super(self): + """Super the block.""" + if self._depth + 1 >= len(self._stack): + return self._context.environment. \ + undefined('there is no parent block called %r.' % + self.name, name='super') + return BlockReference(self.name, self._context, self._stack, + self._depth + 1) + + @internalcode + def __call__(self): + rv = concat(self._stack[self._depth](self._context)) + if self._context.eval_ctx.autoescape: + rv = Markup(rv) + return rv + + +class LoopContextBase(object): + """A loop context for dynamic iteration.""" + + _before = _first_iteration + _current = _first_iteration + _after = _last_iteration + _length = None + + def __init__(self, undefined, recurse=None, depth0=0): + self._undefined = undefined + self._recurse = recurse + self.index0 = -1 + self.depth0 = depth0 + self._last_checked_value = missing + + def cycle(self, *args): + """Cycles among the arguments with the current loop index.""" + if not args: + raise TypeError('no items for cycling given') + return args[self.index0 % len(args)] + + def changed(self, *value): + """Checks whether the value has changed since the last call.""" + if self._last_checked_value != value: + self._last_checked_value = value + return True + return False + + first = property(lambda x: x.index0 == 0) + last = property(lambda x: x._after is _last_iteration) + index = property(lambda x: x.index0 + 1) + revindex = property(lambda x: x.length - x.index0) + revindex0 = property(lambda x: x.length - x.index) + depth = property(lambda x: x.depth0 + 1) + + @property + def previtem(self): + if self._before is _first_iteration: + return self._undefined('there is no previous item') + return self._before + + @property + def nextitem(self): + if self._after is _last_iteration: + return self._undefined('there is no next item') + return self._after + + def __len__(self): + return self.length + + @internalcode + def loop(self, iterable): + if self._recurse is None: + raise TypeError('Tried to call non recursive loop. Maybe you ' + "forgot the 'recursive' modifier.") + return self._recurse(iterable, self._recurse, self.depth0 + 1) + + # a nifty trick to enhance the error message if someone tried to call + # the the loop without or with too many arguments. + __call__ = loop + del loop + + def __repr__(self): + return '<%s %r/%r>' % ( + self.__class__.__name__, + self.index, + self.length + ) + + +class LoopContext(LoopContextBase): + + def __init__(self, iterable, undefined, recurse=None, depth0=0): + LoopContextBase.__init__(self, undefined, recurse, depth0) + self._iterator = iter(iterable) + + # try to get the length of the iterable early. This must be done + # here because there are some broken iterators around where there + # __len__ is the number of iterations left (i'm looking at your + # listreverseiterator!). + try: + self._length = len(iterable) + except (TypeError, AttributeError): + self._length = None + self._after = self._safe_next() + + @property + def length(self): + if self._length is None: + # if was not possible to get the length of the iterator when + # the loop context was created (ie: iterating over a generator) + # we have to convert the iterable into a sequence and use the + # length of that + the number of iterations so far. + iterable = tuple(self._iterator) + self._iterator = iter(iterable) + iterations_done = self.index0 + 2 + self._length = len(iterable) + iterations_done + return self._length + + def __iter__(self): + return LoopContextIterator(self) + + def _safe_next(self): + try: + return next(self._iterator) + except StopIteration: + return _last_iteration + + +@implements_iterator +class LoopContextIterator(object): + """The iterator for a loop context.""" + __slots__ = ('context',) + + def __init__(self, context): + self.context = context + + def __iter__(self): + return self + + def __next__(self): + ctx = self.context + ctx.index0 += 1 + if ctx._after is _last_iteration: + raise StopIteration() + ctx._before = ctx._current + ctx._current = ctx._after + ctx._after = ctx._safe_next() + return ctx._current, ctx + + +class Macro(object): + """Wraps a macro function.""" + + def __init__(self, environment, func, name, arguments, + catch_kwargs, catch_varargs, caller, + default_autoescape=None): + self._environment = environment + self._func = func + self._argument_count = len(arguments) + self.name = name + self.arguments = arguments + self.catch_kwargs = catch_kwargs + self.catch_varargs = catch_varargs + self.caller = caller + self.explicit_caller = 'caller' in arguments + if default_autoescape is None: + default_autoescape = environment.autoescape + self._default_autoescape = default_autoescape + + @internalcode + @evalcontextfunction + def __call__(self, *args, **kwargs): + # This requires a bit of explanation, In the past we used to + # decide largely based on compile-time information if a macro is + # safe or unsafe. While there was a volatile mode it was largely + # unused for deciding on escaping. This turns out to be + # problemtic for macros because if a macro is safe or not not so + # much depends on the escape mode when it was defined but when it + # was used. + # + # Because however we export macros from the module system and + # there are historic callers that do not pass an eval context (and + # will continue to not pass one), we need to perform an instance + # check here. + # + # This is considered safe because an eval context is not a valid + # argument to callables otherwise anwyays. Worst case here is + # that if no eval context is passed we fall back to the compile + # time autoescape flag. + if args and isinstance(args[0], EvalContext): + autoescape = args[0].autoescape + args = args[1:] + else: + autoescape = self._default_autoescape + + # try to consume the positional arguments + arguments = list(args[:self._argument_count]) + off = len(arguments) + + # For information why this is necessary refer to the handling + # of caller in the `macro_body` handler in the compiler. + found_caller = False + + # if the number of arguments consumed is not the number of + # arguments expected we start filling in keyword arguments + # and defaults. + if off != self._argument_count: + for idx, name in enumerate(self.arguments[len(arguments):]): + try: + value = kwargs.pop(name) + except KeyError: + value = missing + if name == 'caller': + found_caller = True + arguments.append(value) + else: + found_caller = self.explicit_caller + + # it's important that the order of these arguments does not change + # if not also changed in the compiler's `function_scoping` method. + # the order is caller, keyword arguments, positional arguments! + if self.caller and not found_caller: + caller = kwargs.pop('caller', None) + if caller is None: + caller = self._environment.undefined('No caller defined', + name='caller') + arguments.append(caller) + + if self.catch_kwargs: + arguments.append(kwargs) + elif kwargs: + if 'caller' in kwargs: + raise TypeError('macro %r was invoked with two values for ' + 'the special caller argument. This is ' + 'most likely a bug.' % self.name) + raise TypeError('macro %r takes no keyword argument %r' % + (self.name, next(iter(kwargs)))) + if self.catch_varargs: + arguments.append(args[self._argument_count:]) + elif len(args) > self._argument_count: + raise TypeError('macro %r takes not more than %d argument(s)' % + (self.name, len(self.arguments))) + + return self._invoke(arguments, autoescape) + + def _invoke(self, arguments, autoescape): + """This method is being swapped out by the async implementation.""" + rv = self._func(*arguments) + if autoescape: + rv = Markup(rv) + return rv + + def __repr__(self): + return '<%s %s>' % ( + self.__class__.__name__, + self.name is None and 'anonymous' or repr(self.name) + ) + + +@implements_to_string +class Undefined(object): + """The default undefined type. This undefined type can be printed and + iterated over, but every other access will raise an :exc:`jinja2.exceptions.UndefinedError`: + + >>> foo = Undefined(name='foo') + >>> str(foo) + '' + >>> not foo + True + >>> foo + 42 + Traceback (most recent call last): + ... + jinja2.exceptions.UndefinedError: 'foo' is undefined + """ + __slots__ = ('_undefined_hint', '_undefined_obj', '_undefined_name', + '_undefined_exception') + + def __init__(self, hint=None, obj=missing, name=None, exc=UndefinedError): + self._undefined_hint = hint + self._undefined_obj = obj + self._undefined_name = name + self._undefined_exception = exc + + @internalcode + def _fail_with_undefined_error(self, *args, **kwargs): + """Regular callback function for undefined objects that raises an + `jinja2.exceptions.UndefinedError` on call. + """ + if self._undefined_hint is None: + if self._undefined_obj is missing: + hint = '%r is undefined' % self._undefined_name + elif not isinstance(self._undefined_name, string_types): + hint = '%s has no element %r' % ( + object_type_repr(self._undefined_obj), + self._undefined_name + ) + else: + hint = '%r has no attribute %r' % ( + object_type_repr(self._undefined_obj), + self._undefined_name + ) + else: + hint = self._undefined_hint + raise self._undefined_exception(hint) + + @internalcode + def __getattr__(self, name): + if name[:2] == '__': + raise AttributeError(name) + return self._fail_with_undefined_error() + + __add__ = __radd__ = __mul__ = __rmul__ = __div__ = __rdiv__ = \ + __truediv__ = __rtruediv__ = __floordiv__ = __rfloordiv__ = \ + __mod__ = __rmod__ = __pos__ = __neg__ = __call__ = \ + __getitem__ = __lt__ = __le__ = __gt__ = __ge__ = __int__ = \ + __float__ = __complex__ = __pow__ = __rpow__ = __sub__ = \ + __rsub__ = _fail_with_undefined_error + + def __eq__(self, other): + return type(self) is type(other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return id(type(self)) + + def __str__(self): + return u'' + + def __len__(self): + return 0 + + def __iter__(self): + if 0: + yield None + + def __nonzero__(self): + return False + __bool__ = __nonzero__ + + def __repr__(self): + return 'Undefined' + + +def make_logging_undefined(logger=None, base=None): + """Given a logger object this returns a new undefined class that will + log certain failures. It will log iterations and printing. If no + logger is given a default logger is created. + + Example:: + + logger = logging.getLogger(__name__) + LoggingUndefined = make_logging_undefined( + logger=logger, + base=Undefined + ) + + .. versionadded:: 2.8 + + :param logger: the logger to use. If not provided, a default logger + is created. + :param base: the base class to add logging functionality to. This + defaults to :class:`Undefined`. + """ + if logger is None: + import logging + logger = logging.getLogger(__name__) + logger.addHandler(logging.StreamHandler(sys.stderr)) + if base is None: + base = Undefined + + def _log_message(undef): + if undef._undefined_hint is None: + if undef._undefined_obj is missing: + hint = '%s is undefined' % undef._undefined_name + elif not isinstance(undef._undefined_name, string_types): + hint = '%s has no element %s' % ( + object_type_repr(undef._undefined_obj), + undef._undefined_name) + else: + hint = '%s has no attribute %s' % ( + object_type_repr(undef._undefined_obj), + undef._undefined_name) + else: + hint = undef._undefined_hint + logger.warning('Template variable warning: %s', hint) + + class LoggingUndefined(base): + + def _fail_with_undefined_error(self, *args, **kwargs): + try: + return base._fail_with_undefined_error(self, *args, **kwargs) + except self._undefined_exception as e: + logger.error('Template variable error: %s', str(e)) + raise e + + def __str__(self): + rv = base.__str__(self) + _log_message(self) + return rv + + def __iter__(self): + rv = base.__iter__(self) + _log_message(self) + return rv + + if PY2: + def __nonzero__(self): + rv = base.__nonzero__(self) + _log_message(self) + return rv + + def __unicode__(self): + rv = base.__unicode__(self) + _log_message(self) + return rv + else: + def __bool__(self): + rv = base.__bool__(self) + _log_message(self) + return rv + + return LoggingUndefined + + +@implements_to_string +class DebugUndefined(Undefined): + """An undefined that returns the debug info when printed. + + >>> foo = DebugUndefined(name='foo') + >>> str(foo) + '{{ foo }}' + >>> not foo + True + >>> foo + 42 + Traceback (most recent call last): + ... + jinja2.exceptions.UndefinedError: 'foo' is undefined + """ + __slots__ = () + + def __str__(self): + if self._undefined_hint is None: + if self._undefined_obj is missing: + return u'{{ %s }}' % self._undefined_name + return '{{ no such element: %s[%r] }}' % ( + object_type_repr(self._undefined_obj), + self._undefined_name + ) + return u'{{ undefined value printed: %s }}' % self._undefined_hint + + +@implements_to_string +class StrictUndefined(Undefined): + """An undefined that barks on print and iteration as well as boolean + tests and all kinds of comparisons. In other words: you can do nothing + with it except checking if it's defined using the `defined` test. + + >>> foo = StrictUndefined(name='foo') + >>> str(foo) + Traceback (most recent call last): + ... + jinja2.exceptions.UndefinedError: 'foo' is undefined + >>> not foo + Traceback (most recent call last): + ... + jinja2.exceptions.UndefinedError: 'foo' is undefined + >>> foo + 42 + Traceback (most recent call last): + ... + jinja2.exceptions.UndefinedError: 'foo' is undefined + """ + __slots__ = () + __iter__ = __str__ = __len__ = __nonzero__ = __eq__ = \ + __ne__ = __bool__ = __hash__ = \ + Undefined._fail_with_undefined_error + + +# remove remaining slots attributes, after the metaclass did the magic they +# are unneeded and irritating as they contain wrong data for the subclasses. +del Undefined.__slots__, DebugUndefined.__slots__, StrictUndefined.__slots__ diff --git a/python/jinja2/sandbox.py b/python/jinja2/sandbox.py new file mode 100644 index 0000000..752e812 --- /dev/null +++ b/python/jinja2/sandbox.py @@ -0,0 +1,486 @@ +# -*- coding: utf-8 -*- +""" + jinja2.sandbox + ~~~~~~~~~~~~~~ + + Adds a sandbox layer to Jinja as it was the default behavior in the old + Jinja 1 releases. This sandbox is slightly different from Jinja 1 as the + default behavior is easier to use. + + The behavior can be changed by subclassing the environment. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD. +""" +import types +import operator +from collections import Mapping +from jinja2.environment import Environment +from jinja2.exceptions import SecurityError +from jinja2._compat import string_types, PY2 +from jinja2.utils import Markup + +from markupsafe import EscapeFormatter +from string import Formatter + + +#: maximum number of items a range may produce +MAX_RANGE = 100000 + +#: attributes of function objects that are considered unsafe. +if PY2: + UNSAFE_FUNCTION_ATTRIBUTES = set(['func_closure', 'func_code', 'func_dict', + 'func_defaults', 'func_globals']) +else: + # On versions > python 2 the special attributes on functions are gone, + # but they remain on methods and generators for whatever reason. + UNSAFE_FUNCTION_ATTRIBUTES = set() + + +#: unsafe method attributes. function attributes are unsafe for methods too +UNSAFE_METHOD_ATTRIBUTES = set(['im_class', 'im_func', 'im_self']) + +#: unsafe generator attirbutes. +UNSAFE_GENERATOR_ATTRIBUTES = set(['gi_frame', 'gi_code']) + +#: unsafe attributes on coroutines +UNSAFE_COROUTINE_ATTRIBUTES = set(['cr_frame', 'cr_code']) + +#: unsafe attributes on async generators +UNSAFE_ASYNC_GENERATOR_ATTRIBUTES = set(['ag_code', 'ag_frame']) + +import warnings + +# make sure we don't warn in python 2.6 about stuff we don't care about +warnings.filterwarnings('ignore', 'the sets module', DeprecationWarning, + module='jinja2.sandbox') + +from collections import deque + +_mutable_set_types = (set,) +_mutable_mapping_types = (dict,) +_mutable_sequence_types = (list,) + + +# on python 2.x we can register the user collection types +try: + from UserDict import UserDict, DictMixin + from UserList import UserList + _mutable_mapping_types += (UserDict, DictMixin) + _mutable_set_types += (UserList,) +except ImportError: + pass + +# if sets is still available, register the mutable set from there as well +try: + from sets import Set + _mutable_set_types += (Set,) +except ImportError: + pass + +#: register Python 2.6 abstract base classes +from collections import MutableSet, MutableMapping, MutableSequence +_mutable_set_types += (MutableSet,) +_mutable_mapping_types += (MutableMapping,) +_mutable_sequence_types += (MutableSequence,) + + +_mutable_spec = ( + (_mutable_set_types, frozenset([ + 'add', 'clear', 'difference_update', 'discard', 'pop', 'remove', + 'symmetric_difference_update', 'update' + ])), + (_mutable_mapping_types, frozenset([ + 'clear', 'pop', 'popitem', 'setdefault', 'update' + ])), + (_mutable_sequence_types, frozenset([ + 'append', 'reverse', 'insert', 'sort', 'extend', 'remove' + ])), + (deque, frozenset([ + 'append', 'appendleft', 'clear', 'extend', 'extendleft', 'pop', + 'popleft', 'remove', 'rotate' + ])) +) + + +class _MagicFormatMapping(Mapping): + """This class implements a dummy wrapper to fix a bug in the Python + standard library for string formatting. + + See https://bugs.python.org/issue13598 for information about why + this is necessary. + """ + + def __init__(self, args, kwargs): + self._args = args + self._kwargs = kwargs + self._last_index = 0 + + def __getitem__(self, key): + if key == '': + idx = self._last_index + self._last_index += 1 + try: + return self._args[idx] + except LookupError: + pass + key = str(idx) + return self._kwargs[key] + + def __iter__(self): + return iter(self._kwargs) + + def __len__(self): + return len(self._kwargs) + + +def inspect_format_method(callable): + if not isinstance(callable, (types.MethodType, + types.BuiltinMethodType)) or \ + callable.__name__ not in ('format', 'format_map'): + return None + obj = callable.__self__ + if isinstance(obj, string_types): + return obj + + +def safe_range(*args): + """A range that can't generate ranges with a length of more than + MAX_RANGE items. + """ + rng = range(*args) + if len(rng) > MAX_RANGE: + raise OverflowError('range too big, maximum size for range is %d' % + MAX_RANGE) + return rng + + +def unsafe(f): + """Marks a function or method as unsafe. + + :: + + @unsafe + def delete(self): + pass + """ + f.unsafe_callable = True + return f + + +def is_internal_attribute(obj, attr): + """Test if the attribute given is an internal python attribute. For + example this function returns `True` for the `func_code` attribute of + python objects. This is useful if the environment method + :meth:`~SandboxedEnvironment.is_safe_attribute` is overridden. + + >>> from jinja2.sandbox import is_internal_attribute + >>> is_internal_attribute(str, "mro") + True + >>> is_internal_attribute(str, "upper") + False + """ + if isinstance(obj, types.FunctionType): + if attr in UNSAFE_FUNCTION_ATTRIBUTES: + return True + elif isinstance(obj, types.MethodType): + if attr in UNSAFE_FUNCTION_ATTRIBUTES or \ + attr in UNSAFE_METHOD_ATTRIBUTES: + return True + elif isinstance(obj, type): + if attr == 'mro': + return True + elif isinstance(obj, (types.CodeType, types.TracebackType, types.FrameType)): + return True + elif isinstance(obj, types.GeneratorType): + if attr in UNSAFE_GENERATOR_ATTRIBUTES: + return True + elif hasattr(types, 'CoroutineType') and isinstance(obj, types.CoroutineType): + if attr in UNSAFE_COROUTINE_ATTRIBUTES: + return True + elif hasattr(types, 'AsyncGeneratorType') and isinstance(obj, types.AsyncGeneratorType): + if attr in UNSAFE_ASYNC_GENERATOR_ATTRIBUTES: + return True + return attr.startswith('__') + + +def modifies_known_mutable(obj, attr): + """This function checks if an attribute on a builtin mutable object + (list, dict, set or deque) would modify it if called. It also supports + the "user"-versions of the objects (`sets.Set`, `UserDict.*` etc.) and + with Python 2.6 onwards the abstract base classes `MutableSet`, + `MutableMapping`, and `MutableSequence`. + + >>> modifies_known_mutable({}, "clear") + True + >>> modifies_known_mutable({}, "keys") + False + >>> modifies_known_mutable([], "append") + True + >>> modifies_known_mutable([], "index") + False + + If called with an unsupported object (such as unicode) `False` is + returned. + + >>> modifies_known_mutable("foo", "upper") + False + """ + for typespec, unsafe in _mutable_spec: + if isinstance(obj, typespec): + return attr in unsafe + return False + + +class SandboxedEnvironment(Environment): + """The sandboxed environment. It works like the regular environment but + tells the compiler to generate sandboxed code. Additionally subclasses of + this environment may override the methods that tell the runtime what + attributes or functions are safe to access. + + If the template tries to access insecure code a :exc:`SecurityError` is + raised. However also other exceptions may occur during the rendering so + the caller has to ensure that all exceptions are caught. + """ + sandboxed = True + + #: default callback table for the binary operators. A copy of this is + #: available on each instance of a sandboxed environment as + #: :attr:`binop_table` + default_binop_table = { + '+': operator.add, + '-': operator.sub, + '*': operator.mul, + '/': operator.truediv, + '//': operator.floordiv, + '**': operator.pow, + '%': operator.mod + } + + #: default callback table for the unary operators. A copy of this is + #: available on each instance of a sandboxed environment as + #: :attr:`unop_table` + default_unop_table = { + '+': operator.pos, + '-': operator.neg + } + + #: a set of binary operators that should be intercepted. Each operator + #: that is added to this set (empty by default) is delegated to the + #: :meth:`call_binop` method that will perform the operator. The default + #: operator callback is specified by :attr:`binop_table`. + #: + #: The following binary operators are interceptable: + #: ``//``, ``%``, ``+``, ``*``, ``-``, ``/``, and ``**`` + #: + #: The default operation form the operator table corresponds to the + #: builtin function. Intercepted calls are always slower than the native + #: operator call, so make sure only to intercept the ones you are + #: interested in. + #: + #: .. versionadded:: 2.6 + intercepted_binops = frozenset() + + #: a set of unary operators that should be intercepted. Each operator + #: that is added to this set (empty by default) is delegated to the + #: :meth:`call_unop` method that will perform the operator. The default + #: operator callback is specified by :attr:`unop_table`. + #: + #: The following unary operators are interceptable: ``+``, ``-`` + #: + #: The default operation form the operator table corresponds to the + #: builtin function. Intercepted calls are always slower than the native + #: operator call, so make sure only to intercept the ones you are + #: interested in. + #: + #: .. versionadded:: 2.6 + intercepted_unops = frozenset() + + def intercept_unop(self, operator): + """Called during template compilation with the name of a unary + operator to check if it should be intercepted at runtime. If this + method returns `True`, :meth:`call_unop` is excuted for this unary + operator. The default implementation of :meth:`call_unop` will use + the :attr:`unop_table` dictionary to perform the operator with the + same logic as the builtin one. + + The following unary operators are interceptable: ``+`` and ``-`` + + Intercepted calls are always slower than the native operator call, + so make sure only to intercept the ones you are interested in. + + .. versionadded:: 2.6 + """ + return False + + + def __init__(self, *args, **kwargs): + Environment.__init__(self, *args, **kwargs) + self.globals['range'] = safe_range + self.binop_table = self.default_binop_table.copy() + self.unop_table = self.default_unop_table.copy() + + def is_safe_attribute(self, obj, attr, value): + """The sandboxed environment will call this method to check if the + attribute of an object is safe to access. Per default all attributes + starting with an underscore are considered private as well as the + special attributes of internal python objects as returned by the + :func:`is_internal_attribute` function. + """ + return not (attr.startswith('_') or is_internal_attribute(obj, attr)) + + def is_safe_callable(self, obj): + """Check if an object is safely callable. Per default a function is + considered safe unless the `unsafe_callable` attribute exists and is + True. Override this method to alter the behavior, but this won't + affect the `unsafe` decorator from this module. + """ + return not (getattr(obj, 'unsafe_callable', False) or + getattr(obj, 'alters_data', False)) + + def call_binop(self, context, operator, left, right): + """For intercepted binary operator calls (:meth:`intercepted_binops`) + this function is executed instead of the builtin operator. This can + be used to fine tune the behavior of certain operators. + + .. versionadded:: 2.6 + """ + return self.binop_table[operator](left, right) + + def call_unop(self, context, operator, arg): + """For intercepted unary operator calls (:meth:`intercepted_unops`) + this function is executed instead of the builtin operator. This can + be used to fine tune the behavior of certain operators. + + .. versionadded:: 2.6 + """ + return self.unop_table[operator](arg) + + def getitem(self, obj, argument): + """Subscribe an object from sandboxed code.""" + try: + return obj[argument] + except (TypeError, LookupError): + if isinstance(argument, string_types): + try: + attr = str(argument) + except Exception: + pass + else: + try: + value = getattr(obj, attr) + except AttributeError: + pass + else: + if self.is_safe_attribute(obj, argument, value): + return value + return self.unsafe_undefined(obj, argument) + return self.undefined(obj=obj, name=argument) + + def getattr(self, obj, attribute): + """Subscribe an object from sandboxed code and prefer the + attribute. The attribute passed *must* be a bytestring. + """ + try: + value = getattr(obj, attribute) + except AttributeError: + try: + return obj[attribute] + except (TypeError, LookupError): + pass + else: + if self.is_safe_attribute(obj, attribute, value): + return value + return self.unsafe_undefined(obj, attribute) + return self.undefined(obj=obj, name=attribute) + + def unsafe_undefined(self, obj, attribute): + """Return an undefined object for unsafe attributes.""" + return self.undefined('access to attribute %r of %r ' + 'object is unsafe.' % ( + attribute, + obj.__class__.__name__ + ), name=attribute, obj=obj, exc=SecurityError) + + def format_string(self, s, args, kwargs, format_func=None): + """If a format call is detected, then this is routed through this + method so that our safety sandbox can be used for it. + """ + if isinstance(s, Markup): + formatter = SandboxedEscapeFormatter(self, s.escape) + else: + formatter = SandboxedFormatter(self) + + if format_func is not None and format_func.__name__ == 'format_map': + if len(args) != 1 or kwargs: + raise TypeError( + 'format_map() takes exactly one argument %d given' + % (len(args) + (kwargs is not None)) + ) + + kwargs = args[0] + args = None + + kwargs = _MagicFormatMapping(args, kwargs) + rv = formatter.vformat(s, args, kwargs) + return type(s)(rv) + + def call(__self, __context, __obj, *args, **kwargs): + """Call an object from sandboxed code.""" + fmt = inspect_format_method(__obj) + if fmt is not None: + return __self.format_string(fmt, args, kwargs, __obj) + + # the double prefixes are to avoid double keyword argument + # errors when proxying the call. + if not __self.is_safe_callable(__obj): + raise SecurityError('%r is not safely callable' % (__obj,)) + return __context.call(__obj, *args, **kwargs) + + +class ImmutableSandboxedEnvironment(SandboxedEnvironment): + """Works exactly like the regular `SandboxedEnvironment` but does not + permit modifications on the builtin mutable objects `list`, `set`, and + `dict` by using the :func:`modifies_known_mutable` function. + """ + + def is_safe_attribute(self, obj, attr, value): + if not SandboxedEnvironment.is_safe_attribute(self, obj, attr, value): + return False + return not modifies_known_mutable(obj, attr) + + +# This really is not a public API apparenlty. +try: + from _string import formatter_field_name_split +except ImportError: + def formatter_field_name_split(field_name): + return field_name._formatter_field_name_split() + + +class SandboxedFormatterMixin(object): + + def __init__(self, env): + self._env = env + + def get_field(self, field_name, args, kwargs): + first, rest = formatter_field_name_split(field_name) + obj = self.get_value(first, args, kwargs) + for is_attr, i in rest: + if is_attr: + obj = self._env.getattr(obj, i) + else: + obj = self._env.getitem(obj, i) + return obj, first + +class SandboxedFormatter(SandboxedFormatterMixin, Formatter): + + def __init__(self, env): + SandboxedFormatterMixin.__init__(self, env) + Formatter.__init__(self) + +class SandboxedEscapeFormatter(SandboxedFormatterMixin, EscapeFormatter): + + def __init__(self, env, escape): + SandboxedFormatterMixin.__init__(self, env) + EscapeFormatter.__init__(self, escape) diff --git a/python/jinja2/tests.py b/python/jinja2/tests.py new file mode 100644 index 0000000..0adc3d4 --- /dev/null +++ b/python/jinja2/tests.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- +""" + jinja2.tests + ~~~~~~~~~~~~ + + Jinja test functions. Used with the "is" operator. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import operator +import re +from collections import Mapping +from jinja2.runtime import Undefined +from jinja2._compat import text_type, string_types, integer_types +import decimal + +number_re = re.compile(r'^-?\d+(\.\d+)?$') +regex_type = type(number_re) + + +test_callable = callable + + +def test_odd(value): + """Return true if the variable is odd.""" + return value % 2 == 1 + + +def test_even(value): + """Return true if the variable is even.""" + return value % 2 == 0 + + +def test_divisibleby(value, num): + """Check if a variable is divisible by a number.""" + return value % num == 0 + + +def test_defined(value): + """Return true if the variable is defined: + + .. sourcecode:: jinja + + {% if variable is defined %} + value of variable: {{ variable }} + {% else %} + variable is not defined + {% endif %} + + See the :func:`default` filter for a simple way to set undefined + variables. + """ + return not isinstance(value, Undefined) + + +def test_undefined(value): + """Like :func:`defined` but the other way round.""" + return isinstance(value, Undefined) + + +def test_none(value): + """Return true if the variable is none.""" + return value is None + + +def test_lower(value): + """Return true if the variable is lowercased.""" + return text_type(value).islower() + + +def test_upper(value): + """Return true if the variable is uppercased.""" + return text_type(value).isupper() + + +def test_string(value): + """Return true if the object is a string.""" + return isinstance(value, string_types) + + +def test_mapping(value): + """Return true if the object is a mapping (dict etc.). + + .. versionadded:: 2.6 + """ + return isinstance(value, Mapping) + + +def test_number(value): + """Return true if the variable is a number.""" + return isinstance(value, integer_types + (float, complex, decimal.Decimal)) + + +def test_sequence(value): + """Return true if the variable is a sequence. Sequences are variables + that are iterable. + """ + try: + len(value) + value.__getitem__ + except: + return False + return True + + +def test_sameas(value, other): + """Check if an object points to the same memory address than another + object: + + .. sourcecode:: jinja + + {% if foo.attribute is sameas false %} + the foo attribute really is the `False` singleton + {% endif %} + """ + return value is other + + +def test_iterable(value): + """Check if it's possible to iterate over an object.""" + try: + iter(value) + except TypeError: + return False + return True + + +def test_escaped(value): + """Check if the value is escaped.""" + return hasattr(value, '__html__') + + +def test_in(value, seq): + """Check if value is in seq. + + .. versionadded:: 2.10 + """ + return value in seq + + +TESTS = { + 'odd': test_odd, + 'even': test_even, + 'divisibleby': test_divisibleby, + 'defined': test_defined, + 'undefined': test_undefined, + 'none': test_none, + 'lower': test_lower, + 'upper': test_upper, + 'string': test_string, + 'mapping': test_mapping, + 'number': test_number, + 'sequence': test_sequence, + 'iterable': test_iterable, + 'callable': test_callable, + 'sameas': test_sameas, + 'escaped': test_escaped, + 'in': test_in, + '==': operator.eq, + 'eq': operator.eq, + 'equalto': operator.eq, + '!=': operator.ne, + 'ne': operator.ne, + '>': operator.gt, + 'gt': operator.gt, + 'greaterthan': operator.gt, + 'ge': operator.ge, + '>=': operator.ge, + '<': operator.lt, + 'lt': operator.lt, + 'lessthan': operator.lt, + '<=': operator.le, + 'le': operator.le, +} diff --git a/python/jinja2/utils.py b/python/jinja2/utils.py new file mode 100644 index 0000000..502a311 --- /dev/null +++ b/python/jinja2/utils.py @@ -0,0 +1,647 @@ +# -*- coding: utf-8 -*- +""" + jinja2.utils + ~~~~~~~~~~~~ + + Utility functions. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import re +import json +import errno +from collections import deque +from threading import Lock +from jinja2._compat import text_type, string_types, implements_iterator, \ + url_quote + + +_word_split_re = re.compile(r'(\s+)') +_punctuation_re = re.compile( + '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % ( + '|'.join(map(re.escape, ('(', '<', '<'))), + '|'.join(map(re.escape, ('.', ',', ')', '>', '\n', '>'))) + ) +) +_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') +_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)') +_entity_re = re.compile(r'&([^;]+);') +_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' +_digits = '0123456789' + +# special singleton representing missing values for the runtime +missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})() + +# internal code +internal_code = set() + +concat = u''.join + +_slash_escape = '\\/' not in json.dumps('/') + + +def contextfunction(f): + """This decorator can be used to mark a function or method context callable. + A context callable is passed the active :class:`Context` as first argument when + called from the template. This is useful if a function wants to get access + to the context or functions provided on the context object. For example + a function that returns a sorted list of template variables the current + template exports could look like this:: + + @contextfunction + def get_exported_names(context): + return sorted(context.exported_vars) + """ + f.contextfunction = True + return f + + +def evalcontextfunction(f): + """This decorator can be used to mark a function or method as an eval + context callable. This is similar to the :func:`contextfunction` + but instead of passing the context, an evaluation context object is + passed. For more information about the eval context, see + :ref:`eval-context`. + + .. versionadded:: 2.4 + """ + f.evalcontextfunction = True + return f + + +def environmentfunction(f): + """This decorator can be used to mark a function or method as environment + callable. This decorator works exactly like the :func:`contextfunction` + decorator just that the first argument is the active :class:`Environment` + and not context. + """ + f.environmentfunction = True + return f + + +def internalcode(f): + """Marks the function as internally used""" + internal_code.add(f.__code__) + return f + + +def is_undefined(obj): + """Check if the object passed is undefined. This does nothing more than + performing an instance check against :class:`Undefined` but looks nicer. + This can be used for custom filters or tests that want to react to + undefined variables. For example a custom default filter can look like + this:: + + def default(var, default=''): + if is_undefined(var): + return default + return var + """ + from jinja2.runtime import Undefined + return isinstance(obj, Undefined) + + +def consume(iterable): + """Consumes an iterable without doing anything with it.""" + for event in iterable: + pass + + +def clear_caches(): + """Jinja2 keeps internal caches for environments and lexers. These are + used so that Jinja2 doesn't have to recreate environments and lexers all + the time. Normally you don't have to care about that but if you are + measuring memory consumption you may want to clean the caches. + """ + from jinja2.environment import _spontaneous_environments + from jinja2.lexer import _lexer_cache + _spontaneous_environments.clear() + _lexer_cache.clear() + + +def import_string(import_name, silent=False): + """Imports an object based on a string. This is useful if you want to + use import paths as endpoints or something similar. An import path can + be specified either in dotted notation (``xml.sax.saxutils.escape``) + or with a colon as object delimiter (``xml.sax.saxutils:escape``). + + If the `silent` is True the return value will be `None` if the import + fails. + + :return: imported object + """ + try: + if ':' in import_name: + module, obj = import_name.split(':', 1) + elif '.' in import_name: + items = import_name.split('.') + module = '.'.join(items[:-1]) + obj = items[-1] + else: + return __import__(import_name) + return getattr(__import__(module, None, None, [obj]), obj) + except (ImportError, AttributeError): + if not silent: + raise + + +def open_if_exists(filename, mode='rb'): + """Returns a file descriptor for the filename if that file exists, + otherwise `None`. + """ + try: + return open(filename, mode) + except IOError as e: + if e.errno not in (errno.ENOENT, errno.EISDIR, errno.EINVAL): + raise + + +def object_type_repr(obj): + """Returns the name of the object's type. For some recognized + singletons the name of the object is returned instead. (For + example for `None` and `Ellipsis`). + """ + if obj is None: + return 'None' + elif obj is Ellipsis: + return 'Ellipsis' + # __builtin__ in 2.x, builtins in 3.x + if obj.__class__.__module__ in ('__builtin__', 'builtins'): + name = obj.__class__.__name__ + else: + name = obj.__class__.__module__ + '.' + obj.__class__.__name__ + return '%s object' % name + + +def pformat(obj, verbose=False): + """Prettyprint an object. Either use the `pretty` library or the + builtin `pprint`. + """ + try: + from pretty import pretty + return pretty(obj, verbose=verbose) + except ImportError: + from pprint import pformat + return pformat(obj) + + +def urlize(text, trim_url_limit=None, rel=None, target=None): + """Converts any URLs in text into clickable links. Works on http://, + https:// and www. links. Links can have trailing punctuation (periods, + commas, close-parens) and leading punctuation (opening parens) and + it'll still do the right thing. + + If trim_url_limit is not None, the URLs in link text will be limited + to trim_url_limit characters. + + If nofollow is True, the URLs in link text will get a rel="nofollow" + attribute. + + If target is not None, a target attribute will be added to the link. + """ + trim_url = lambda x, limit=trim_url_limit: limit is not None \ + and (x[:limit] + (len(x) >=limit and '...' + or '')) or x + words = _word_split_re.split(text_type(escape(text))) + rel_attr = rel and ' rel="%s"' % text_type(escape(rel)) or '' + target_attr = target and ' target="%s"' % escape(target) or '' + + for i, word in enumerate(words): + match = _punctuation_re.match(word) + if match: + lead, middle, trail = match.groups() + if middle.startswith('www.') or ( + '@' not in middle and + not middle.startswith('http://') and + not middle.startswith('https://') and + len(middle) > 0 and + middle[0] in _letters + _digits and ( + middle.endswith('.org') or + middle.endswith('.net') or + middle.endswith('.com') + )): + middle = '<a href="http://%s"%s%s>%s</a>' % (middle, + rel_attr, target_attr, trim_url(middle)) + if middle.startswith('http://') or \ + middle.startswith('https://'): + middle = '<a href="%s"%s%s>%s</a>' % (middle, + rel_attr, target_attr, trim_url(middle)) + if '@' in middle and not middle.startswith('www.') and \ + not ':' in middle and _simple_email_re.match(middle): + middle = '<a href="mailto:%s">%s</a>' % (middle, middle) + if lead + middle + trail != word: + words[i] = lead + middle + trail + return u''.join(words) + + +def generate_lorem_ipsum(n=5, html=True, min=20, max=100): + """Generate some lorem ipsum for the template.""" + from jinja2.constants import LOREM_IPSUM_WORDS + from random import choice, randrange + words = LOREM_IPSUM_WORDS.split() + result = [] + + for _ in range(n): + next_capitalized = True + last_comma = last_fullstop = 0 + word = None + last = None + p = [] + + # each paragraph contains out of 20 to 100 words. + for idx, _ in enumerate(range(randrange(min, max))): + while True: + word = choice(words) + if word != last: + last = word + break + if next_capitalized: + word = word.capitalize() + next_capitalized = False + # add commas + if idx - randrange(3, 8) > last_comma: + last_comma = idx + last_fullstop += 2 + word += ',' + # add end of sentences + if idx - randrange(10, 20) > last_fullstop: + last_comma = last_fullstop = idx + word += '.' + next_capitalized = True + p.append(word) + + # ensure that the paragraph ends with a dot. + p = u' '.join(p) + if p.endswith(','): + p = p[:-1] + '.' + elif not p.endswith('.'): + p += '.' + result.append(p) + + if not html: + return u'\n\n'.join(result) + return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result)) + + +def unicode_urlencode(obj, charset='utf-8', for_qs=False): + """URL escapes a single bytestring or unicode string with the + given charset if applicable to URL safe quoting under all rules + that need to be considered under all supported Python versions. + + If non strings are provided they are converted to their unicode + representation first. + """ + if not isinstance(obj, string_types): + obj = text_type(obj) + if isinstance(obj, text_type): + obj = obj.encode(charset) + safe = not for_qs and b'/' or b'' + rv = text_type(url_quote(obj, safe)) + if for_qs: + rv = rv.replace('%20', '+') + return rv + + +class LRUCache(object): + """A simple LRU Cache implementation.""" + + # this is fast for small capacities (something below 1000) but doesn't + # scale. But as long as it's only used as storage for templates this + # won't do any harm. + + def __init__(self, capacity): + self.capacity = capacity + self._mapping = {} + self._queue = deque() + self._postinit() + + def _postinit(self): + # alias all queue methods for faster lookup + self._popleft = self._queue.popleft + self._pop = self._queue.pop + self._remove = self._queue.remove + self._wlock = Lock() + self._append = self._queue.append + + def __getstate__(self): + return { + 'capacity': self.capacity, + '_mapping': self._mapping, + '_queue': self._queue + } + + def __setstate__(self, d): + self.__dict__.update(d) + self._postinit() + + def __getnewargs__(self): + return (self.capacity,) + + def copy(self): + """Return a shallow copy of the instance.""" + rv = self.__class__(self.capacity) + rv._mapping.update(self._mapping) + rv._queue = deque(self._queue) + return rv + + def get(self, key, default=None): + """Return an item from the cache dict or `default`""" + try: + return self[key] + except KeyError: + return default + + def setdefault(self, key, default=None): + """Set `default` if the key is not in the cache otherwise + leave unchanged. Return the value of this key. + """ + self._wlock.acquire() + try: + try: + return self[key] + except KeyError: + self[key] = default + return default + finally: + self._wlock.release() + + def clear(self): + """Clear the cache.""" + self._wlock.acquire() + try: + self._mapping.clear() + self._queue.clear() + finally: + self._wlock.release() + + def __contains__(self, key): + """Check if a key exists in this cache.""" + return key in self._mapping + + def __len__(self): + """Return the current size of the cache.""" + return len(self._mapping) + + def __repr__(self): + return '<%s %r>' % ( + self.__class__.__name__, + self._mapping + ) + + def __getitem__(self, key): + """Get an item from the cache. Moves the item up so that it has the + highest priority then. + + Raise a `KeyError` if it does not exist. + """ + self._wlock.acquire() + try: + rv = self._mapping[key] + if self._queue[-1] != key: + try: + self._remove(key) + except ValueError: + # if something removed the key from the container + # when we read, ignore the ValueError that we would + # get otherwise. + pass + self._append(key) + return rv + finally: + self._wlock.release() + + def __setitem__(self, key, value): + """Sets the value for an item. Moves the item up so that it + has the highest priority then. + """ + self._wlock.acquire() + try: + if key in self._mapping: + self._remove(key) + elif len(self._mapping) == self.capacity: + del self._mapping[self._popleft()] + self._append(key) + self._mapping[key] = value + finally: + self._wlock.release() + + def __delitem__(self, key): + """Remove an item from the cache dict. + Raise a `KeyError` if it does not exist. + """ + self._wlock.acquire() + try: + del self._mapping[key] + try: + self._remove(key) + except ValueError: + # __getitem__ is not locked, it might happen + pass + finally: + self._wlock.release() + + def items(self): + """Return a list of items.""" + result = [(key, self._mapping[key]) for key in list(self._queue)] + result.reverse() + return result + + def iteritems(self): + """Iterate over all items.""" + return iter(self.items()) + + def values(self): + """Return a list of all values.""" + return [x[1] for x in self.items()] + + def itervalue(self): + """Iterate over all values.""" + return iter(self.values()) + + def keys(self): + """Return a list of all keys ordered by most recent usage.""" + return list(self) + + def iterkeys(self): + """Iterate over all keys in the cache dict, ordered by + the most recent usage. + """ + return reversed(tuple(self._queue)) + + __iter__ = iterkeys + + def __reversed__(self): + """Iterate over the values in the cache dict, oldest items + coming first. + """ + return iter(tuple(self._queue)) + + __copy__ = copy + + +# register the LRU cache as mutable mapping if possible +try: + from collections import MutableMapping + MutableMapping.register(LRUCache) +except ImportError: + pass + + +def select_autoescape(enabled_extensions=('html', 'htm', 'xml'), + disabled_extensions=(), + default_for_string=True, + default=False): + """Intelligently sets the initial value of autoescaping based on the + filename of the template. This is the recommended way to configure + autoescaping if you do not want to write a custom function yourself. + + If you want to enable it for all templates created from strings or + for all templates with `.html` and `.xml` extensions:: + + from jinja2 import Environment, select_autoescape + env = Environment(autoescape=select_autoescape( + enabled_extensions=('html', 'xml'), + default_for_string=True, + )) + + Example configuration to turn it on at all times except if the template + ends with `.txt`:: + + from jinja2 import Environment, select_autoescape + env = Environment(autoescape=select_autoescape( + disabled_extensions=('txt',), + default_for_string=True, + default=True, + )) + + The `enabled_extensions` is an iterable of all the extensions that + autoescaping should be enabled for. Likewise `disabled_extensions` is + a list of all templates it should be disabled for. If a template is + loaded from a string then the default from `default_for_string` is used. + If nothing matches then the initial value of autoescaping is set to the + value of `default`. + + For security reasons this function operates case insensitive. + + .. versionadded:: 2.9 + """ + enabled_patterns = tuple('.' + x.lstrip('.').lower() + for x in enabled_extensions) + disabled_patterns = tuple('.' + x.lstrip('.').lower() + for x in disabled_extensions) + def autoescape(template_name): + if template_name is None: + return default_for_string + template_name = template_name.lower() + if template_name.endswith(enabled_patterns): + return True + if template_name.endswith(disabled_patterns): + return False + return default + return autoescape + + +def htmlsafe_json_dumps(obj, dumper=None, **kwargs): + """Works exactly like :func:`dumps` but is safe for use in ``<script>`` + tags. It accepts the same arguments and returns a JSON string. Note that + this is available in templates through the ``|tojson`` filter which will + also mark the result as safe. Due to how this function escapes certain + characters this is safe even if used outside of ``<script>`` tags. + + The following characters are escaped in strings: + + - ``<`` + - ``>`` + - ``&`` + - ``'`` + + This makes it safe to embed such strings in any place in HTML with the + notable exception of double quoted attributes. In that case single + quote your attributes or HTML escape it in addition. + """ + if dumper is None: + dumper = json.dumps + rv = dumper(obj, **kwargs) \ + .replace(u'<', u'\\u003c') \ + .replace(u'>', u'\\u003e') \ + .replace(u'&', u'\\u0026') \ + .replace(u"'", u'\\u0027') + return Markup(rv) + + +@implements_iterator +class Cycler(object): + """A cycle helper for templates.""" + + def __init__(self, *items): + if not items: + raise RuntimeError('at least one item has to be provided') + self.items = items + self.reset() + + def reset(self): + """Resets the cycle.""" + self.pos = 0 + + @property + def current(self): + """Returns the current item.""" + return self.items[self.pos] + + def next(self): + """Goes one item ahead and returns it.""" + rv = self.current + self.pos = (self.pos + 1) % len(self.items) + return rv + + __next__ = next + + +class Joiner(object): + """A joining helper for templates.""" + + def __init__(self, sep=u', '): + self.sep = sep + self.used = False + + def __call__(self): + if not self.used: + self.used = True + return u'' + return self.sep + + +class Namespace(object): + """A namespace object that can hold arbitrary attributes. It may be + initialized from a dictionary or with keyword argments.""" + + def __init__(*args, **kwargs): + self, args = args[0], args[1:] + self.__attrs = dict(*args, **kwargs) + + def __getattribute__(self, name): + if name == '_Namespace__attrs': + return object.__getattribute__(self, name) + try: + return self.__attrs[name] + except KeyError: + raise AttributeError(name) + + def __setitem__(self, name, value): + self.__attrs[name] = value + + def __repr__(self): + return '<Namespace %r>' % self.__attrs + + +# does this python version support async for in and async generators? +try: + exec('async def _():\n async for _ in ():\n yield _') + have_async_gen = True +except SyntaxError: + have_async_gen = False + + +# Imported here because that's where it was in the past +from markupsafe import Markup, escape, soft_unicode diff --git a/python/jinja2/visitor.py b/python/jinja2/visitor.py new file mode 100644 index 0000000..ba526df --- /dev/null +++ b/python/jinja2/visitor.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +""" + jinja2.visitor + ~~~~~~~~~~~~~~ + + This module implements a visitor for the nodes. + + :copyright: (c) 2017 by the Jinja Team. + :license: BSD. +""" +from jinja2.nodes import Node + + +class NodeVisitor(object): + """Walks the abstract syntax tree and call visitor functions for every + node found. The visitor functions may return values which will be + forwarded by the `visit` method. + + Per default the visitor functions for the nodes are ``'visit_'`` + + class name of the node. So a `TryFinally` node visit function would + be `visit_TryFinally`. This behavior can be changed by overriding + the `get_visitor` function. If no visitor function exists for a node + (return value `None`) the `generic_visit` visitor is used instead. + """ + + def get_visitor(self, node): + """Return the visitor function for this node or `None` if no visitor + exists for this node. In that case the generic visit function is + used instead. + """ + method = 'visit_' + node.__class__.__name__ + return getattr(self, method, None) + + def visit(self, node, *args, **kwargs): + """Visit a node.""" + f = self.get_visitor(node) + if f is not None: + return f(node, *args, **kwargs) + return self.generic_visit(node, *args, **kwargs) + + def generic_visit(self, node, *args, **kwargs): + """Called if no explicit visitor function exists for a node.""" + for node in node.iter_child_nodes(): + self.visit(node, *args, **kwargs) + + +class NodeTransformer(NodeVisitor): + """Walks the abstract syntax tree and allows modifications of nodes. + + The `NodeTransformer` will walk the AST and use the return value of the + visitor functions to replace or remove the old node. If the return + value of the visitor function is `None` the node will be removed + from the previous location otherwise it's replaced with the return + value. The return value may be the original node in which case no + replacement takes place. + """ + + def generic_visit(self, node, *args, **kwargs): + for field, old_value in node.iter_fields(): + if isinstance(old_value, list): + new_values = [] + for value in old_value: + if isinstance(value, Node): + value = self.visit(value, *args, **kwargs) + if value is None: + continue + elif not isinstance(value, Node): + new_values.extend(value) + continue + new_values.append(value) + old_value[:] = new_values + elif isinstance(old_value, Node): + new_node = self.visit(old_value, *args, **kwargs) + if new_node is None: + delattr(node, field) + else: + setattr(node, field, new_node) + return node + + def visit_list(self, node, *args, **kwargs): + """As transformers may return lists in some places this method + can be used to enforce a list as return value. + """ + rv = self.visit(node, *args, **kwargs) + if not isinstance(rv, list): + rv = [rv] + return rv diff --git a/python/markupsafe/__init__.py b/python/markupsafe/__init__.py new file mode 100644 index 0000000..68dc85f --- /dev/null +++ b/python/markupsafe/__init__.py @@ -0,0 +1,305 @@ +# -*- coding: utf-8 -*- +""" + markupsafe + ~~~~~~~~~~ + + Implements a Markup string. + + :copyright: (c) 2010 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" +import re +import string +from collections import Mapping +from markupsafe._compat import text_type, string_types, int_types, \ + unichr, iteritems, PY2 + +__version__ = "1.0" + +__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent'] + + +_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)') +_entity_re = re.compile(r'&([^& ;]+);') + + +class Markup(text_type): + r"""Marks a string as being safe for inclusion in HTML/XML output without + needing to be escaped. This implements the `__html__` interface a couple + of frameworks and web applications use. :class:`Markup` is a direct + subclass of `unicode` and provides all the methods of `unicode` just that + it escapes arguments passed and always returns `Markup`. + + The `escape` function returns markup objects so that double escaping can't + happen. + + The constructor of the :class:`Markup` class can be used for three + different things: When passed an unicode object it's assumed to be safe, + when passed an object with an HTML representation (has an `__html__` + method) that representation is used, otherwise the object passed is + converted into a unicode string and then assumed to be safe: + + >>> Markup("Hello <em>World</em>!") + Markup(u'Hello <em>World</em>!') + >>> class Foo(object): + ... def __html__(self): + ... return '<a href="#">foo</a>' + ... + >>> Markup(Foo()) + Markup(u'<a href="#">foo</a>') + + If you want object passed being always treated as unsafe you can use the + :meth:`escape` classmethod to create a :class:`Markup` object: + + >>> Markup.escape("Hello <em>World</em>!") + Markup(u'Hello <em>World</em>!') + + Operations on a markup string are markup aware which means that all + arguments are passed through the :func:`escape` function: + + >>> em = Markup("<em>%s</em>") + >>> em % "foo & bar" + Markup(u'<em>foo & bar</em>') + >>> strong = Markup("<strong>%(text)s</strong>") + >>> strong % {'text': '<blink>hacker here</blink>'} + Markup(u'<strong><blink>hacker here</blink></strong>') + >>> Markup("<em>Hello</em> ") + "<foo>" + Markup(u'<em>Hello</em> <foo>') + """ + __slots__ = () + + def __new__(cls, base=u'', encoding=None, errors='strict'): + if hasattr(base, '__html__'): + base = base.__html__() + if encoding is None: + return text_type.__new__(cls, base) + return text_type.__new__(cls, base, encoding, errors) + + def __html__(self): + return self + + def __add__(self, other): + if isinstance(other, string_types) or hasattr(other, '__html__'): + return self.__class__(super(Markup, self).__add__(self.escape(other))) + return NotImplemented + + def __radd__(self, other): + if hasattr(other, '__html__') or isinstance(other, string_types): + return self.escape(other).__add__(self) + return NotImplemented + + def __mul__(self, num): + if isinstance(num, int_types): + return self.__class__(text_type.__mul__(self, num)) + return NotImplemented + __rmul__ = __mul__ + + def __mod__(self, arg): + if isinstance(arg, tuple): + arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg) + else: + arg = _MarkupEscapeHelper(arg, self.escape) + return self.__class__(text_type.__mod__(self, arg)) + + def __repr__(self): + return '%s(%s)' % ( + self.__class__.__name__, + text_type.__repr__(self) + ) + + def join(self, seq): + return self.__class__(text_type.join(self, map(self.escape, seq))) + join.__doc__ = text_type.join.__doc__ + + def split(self, *args, **kwargs): + return list(map(self.__class__, text_type.split(self, *args, **kwargs))) + split.__doc__ = text_type.split.__doc__ + + def rsplit(self, *args, **kwargs): + return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs))) + rsplit.__doc__ = text_type.rsplit.__doc__ + + def splitlines(self, *args, **kwargs): + return list(map(self.__class__, text_type.splitlines( + self, *args, **kwargs))) + splitlines.__doc__ = text_type.splitlines.__doc__ + + def unescape(self): + r"""Unescape markup again into an text_type string. This also resolves + known HTML4 and XHTML entities: + + >>> Markup("Main » <em>About</em>").unescape() + u'Main \xbb <em>About</em>' + """ + from markupsafe._constants import HTML_ENTITIES + def handle_match(m): + name = m.group(1) + if name in HTML_ENTITIES: + return unichr(HTML_ENTITIES[name]) + try: + if name[:2] in ('#x', '#X'): + return unichr(int(name[2:], 16)) + elif name.startswith('#'): + return unichr(int(name[1:])) + except ValueError: + pass + # Don't modify unexpected input. + return m.group() + return _entity_re.sub(handle_match, text_type(self)) + + def striptags(self): + r"""Unescape markup into an text_type string and strip all tags. This + also resolves known HTML4 and XHTML entities. Whitespace is + normalized to one: + + >>> Markup("Main » <em>About</em>").striptags() + u'Main \xbb About' + """ + stripped = u' '.join(_striptags_re.sub('', self).split()) + return Markup(stripped).unescape() + + @classmethod + def escape(cls, s): + """Escape the string. Works like :func:`escape` with the difference + that for subclasses of :class:`Markup` this function would return the + correct subclass. + """ + rv = escape(s) + if rv.__class__ is not cls: + return cls(rv) + return rv + + def make_simple_escaping_wrapper(name): + orig = getattr(text_type, name) + def func(self, *args, **kwargs): + args = _escape_argspec(list(args), enumerate(args), self.escape) + _escape_argspec(kwargs, iteritems(kwargs), self.escape) + return self.__class__(orig(self, *args, **kwargs)) + func.__name__ = orig.__name__ + func.__doc__ = orig.__doc__ + return func + + for method in '__getitem__', 'capitalize', \ + 'title', 'lower', 'upper', 'replace', 'ljust', \ + 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \ + 'translate', 'expandtabs', 'swapcase', 'zfill': + locals()[method] = make_simple_escaping_wrapper(method) + + # new in python 2.5 + if hasattr(text_type, 'partition'): + def partition(self, sep): + return tuple(map(self.__class__, + text_type.partition(self, self.escape(sep)))) + def rpartition(self, sep): + return tuple(map(self.__class__, + text_type.rpartition(self, self.escape(sep)))) + + # new in python 2.6 + if hasattr(text_type, 'format'): + def format(*args, **kwargs): + self, args = args[0], args[1:] + formatter = EscapeFormatter(self.escape) + kwargs = _MagicFormatMapping(args, kwargs) + return self.__class__(formatter.vformat(self, args, kwargs)) + + def __html_format__(self, format_spec): + if format_spec: + raise ValueError('Unsupported format specification ' + 'for Markup.') + return self + + # not in python 3 + if hasattr(text_type, '__getslice__'): + __getslice__ = make_simple_escaping_wrapper('__getslice__') + + del method, make_simple_escaping_wrapper + + +class _MagicFormatMapping(Mapping): + """This class implements a dummy wrapper to fix a bug in the Python + standard library for string formatting. + + See http://bugs.python.org/issue13598 for information about why + this is necessary. + """ + + def __init__(self, args, kwargs): + self._args = args + self._kwargs = kwargs + self._last_index = 0 + + def __getitem__(self, key): + if key == '': + idx = self._last_index + self._last_index += 1 + try: + return self._args[idx] + except LookupError: + pass + key = str(idx) + return self._kwargs[key] + + def __iter__(self): + return iter(self._kwargs) + + def __len__(self): + return len(self._kwargs) + + +if hasattr(text_type, 'format'): + class EscapeFormatter(string.Formatter): + + def __init__(self, escape): + self.escape = escape + + def format_field(self, value, format_spec): + if hasattr(value, '__html_format__'): + rv = value.__html_format__(format_spec) + elif hasattr(value, '__html__'): + if format_spec: + raise ValueError('No format specification allowed ' + 'when formatting an object with ' + 'its __html__ method.') + rv = value.__html__() + else: + # We need to make sure the format spec is unicode here as + # otherwise the wrong callback methods are invoked. For + # instance a byte string there would invoke __str__ and + # not __unicode__. + rv = string.Formatter.format_field( + self, value, text_type(format_spec)) + return text_type(self.escape(rv)) + + +def _escape_argspec(obj, iterable, escape): + """Helper for various string-wrapped functions.""" + for key, value in iterable: + if hasattr(value, '__html__') or isinstance(value, string_types): + obj[key] = escape(value) + return obj + + +class _MarkupEscapeHelper(object): + """Helper for Markup.__mod__""" + + def __init__(self, obj, escape): + self.obj = obj + self.escape = escape + + __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape) + __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj)) + __repr__ = lambda s: str(s.escape(repr(s.obj))) + __int__ = lambda s: int(s.obj) + __float__ = lambda s: float(s.obj) + + +# we have to import it down here as the speedups and native +# modules imports the markup type which is define above. +try: + from markupsafe._speedups import escape, escape_silent, soft_unicode +except ImportError: + from markupsafe._native import escape, escape_silent, soft_unicode + +if not PY2: + soft_str = soft_unicode + __all__.append('soft_str') diff --git a/python/markupsafe/_compat.py b/python/markupsafe/_compat.py new file mode 100644 index 0000000..62e5632 --- /dev/null +++ b/python/markupsafe/_compat.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +""" + markupsafe._compat + ~~~~~~~~~~~~~~~~~~ + + Compatibility module for different Python versions. + + :copyright: (c) 2013 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" +import sys + +PY2 = sys.version_info[0] == 2 + +if not PY2: + text_type = str + string_types = (str,) + unichr = chr + int_types = (int,) + iteritems = lambda x: iter(x.items()) +else: + text_type = unicode + string_types = (str, unicode) + unichr = unichr + int_types = (int, long) + iteritems = lambda x: x.iteritems() diff --git a/python/markupsafe/_constants.py b/python/markupsafe/_constants.py new file mode 100644 index 0000000..919bf03 --- /dev/null +++ b/python/markupsafe/_constants.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- +""" + markupsafe._constants + ~~~~~~~~~~~~~~~~~~~~~ + + Highlevel implementation of the Markup string. + + :copyright: (c) 2010 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" + + +HTML_ENTITIES = { + 'AElig': 198, + 'Aacute': 193, + 'Acirc': 194, + 'Agrave': 192, + 'Alpha': 913, + 'Aring': 197, + 'Atilde': 195, + 'Auml': 196, + 'Beta': 914, + 'Ccedil': 199, + 'Chi': 935, + 'Dagger': 8225, + 'Delta': 916, + 'ETH': 208, + 'Eacute': 201, + 'Ecirc': 202, + 'Egrave': 200, + 'Epsilon': 917, + 'Eta': 919, + 'Euml': 203, + 'Gamma': 915, + 'Iacute': 205, + 'Icirc': 206, + 'Igrave': 204, + 'Iota': 921, + 'Iuml': 207, + 'Kappa': 922, + 'Lambda': 923, + 'Mu': 924, + 'Ntilde': 209, + 'Nu': 925, + 'OElig': 338, + 'Oacute': 211, + 'Ocirc': 212, + 'Ograve': 210, + 'Omega': 937, + 'Omicron': 927, + 'Oslash': 216, + 'Otilde': 213, + 'Ouml': 214, + 'Phi': 934, + 'Pi': 928, + 'Prime': 8243, + 'Psi': 936, + 'Rho': 929, + 'Scaron': 352, + 'Sigma': 931, + 'THORN': 222, + 'Tau': 932, + 'Theta': 920, + 'Uacute': 218, + 'Ucirc': 219, + 'Ugrave': 217, + 'Upsilon': 933, + 'Uuml': 220, + 'Xi': 926, + 'Yacute': 221, + 'Yuml': 376, + 'Zeta': 918, + 'aacute': 225, + 'acirc': 226, + 'acute': 180, + 'aelig': 230, + 'agrave': 224, + 'alefsym': 8501, + 'alpha': 945, + 'amp': 38, + 'and': 8743, + 'ang': 8736, + 'apos': 39, + 'aring': 229, + 'asymp': 8776, + 'atilde': 227, + 'auml': 228, + 'bdquo': 8222, + 'beta': 946, + 'brvbar': 166, + 'bull': 8226, + 'cap': 8745, + 'ccedil': 231, + 'cedil': 184, + 'cent': 162, + 'chi': 967, + 'circ': 710, + 'clubs': 9827, + 'cong': 8773, + 'copy': 169, + 'crarr': 8629, + 'cup': 8746, + 'curren': 164, + 'dArr': 8659, + 'dagger': 8224, + 'darr': 8595, + 'deg': 176, + 'delta': 948, + 'diams': 9830, + 'divide': 247, + 'eacute': 233, + 'ecirc': 234, + 'egrave': 232, + 'empty': 8709, + 'emsp': 8195, + 'ensp': 8194, + 'epsilon': 949, + 'equiv': 8801, + 'eta': 951, + 'eth': 240, + 'euml': 235, + 'euro': 8364, + 'exist': 8707, + 'fnof': 402, + 'forall': 8704, + 'frac12': 189, + 'frac14': 188, + 'frac34': 190, + 'frasl': 8260, + 'gamma': 947, + 'ge': 8805, + 'gt': 62, + 'hArr': 8660, + 'harr': 8596, + 'hearts': 9829, + 'hellip': 8230, + 'iacute': 237, + 'icirc': 238, + 'iexcl': 161, + 'igrave': 236, + 'image': 8465, + 'infin': 8734, + 'int': 8747, + 'iota': 953, + 'iquest': 191, + 'isin': 8712, + 'iuml': 239, + 'kappa': 954, + 'lArr': 8656, + 'lambda': 955, + 'lang': 9001, + 'laquo': 171, + 'larr': 8592, + 'lceil': 8968, + 'ldquo': 8220, + 'le': 8804, + 'lfloor': 8970, + 'lowast': 8727, + 'loz': 9674, + 'lrm': 8206, + 'lsaquo': 8249, + 'lsquo': 8216, + 'lt': 60, + 'macr': 175, + 'mdash': 8212, + 'micro': 181, + 'middot': 183, + 'minus': 8722, + 'mu': 956, + 'nabla': 8711, + 'nbsp': 160, + 'ndash': 8211, + 'ne': 8800, + 'ni': 8715, + 'not': 172, + 'notin': 8713, + 'nsub': 8836, + 'ntilde': 241, + 'nu': 957, + 'oacute': 243, + 'ocirc': 244, + 'oelig': 339, + 'ograve': 242, + 'oline': 8254, + 'omega': 969, + 'omicron': 959, + 'oplus': 8853, + 'or': 8744, + 'ordf': 170, + 'ordm': 186, + 'oslash': 248, + 'otilde': 245, + 'otimes': 8855, + 'ouml': 246, + 'para': 182, + 'part': 8706, + 'permil': 8240, + 'perp': 8869, + 'phi': 966, + 'pi': 960, + 'piv': 982, + 'plusmn': 177, + 'pound': 163, + 'prime': 8242, + 'prod': 8719, + 'prop': 8733, + 'psi': 968, + 'quot': 34, + 'rArr': 8658, + 'radic': 8730, + 'rang': 9002, + 'raquo': 187, + 'rarr': 8594, + 'rceil': 8969, + 'rdquo': 8221, + 'real': 8476, + 'reg': 174, + 'rfloor': 8971, + 'rho': 961, + 'rlm': 8207, + 'rsaquo': 8250, + 'rsquo': 8217, + 'sbquo': 8218, + 'scaron': 353, + 'sdot': 8901, + 'sect': 167, + 'shy': 173, + 'sigma': 963, + 'sigmaf': 962, + 'sim': 8764, + 'spades': 9824, + 'sub': 8834, + 'sube': 8838, + 'sum': 8721, + 'sup': 8835, + 'sup1': 185, + 'sup2': 178, + 'sup3': 179, + 'supe': 8839, + 'szlig': 223, + 'tau': 964, + 'there4': 8756, + 'theta': 952, + 'thetasym': 977, + 'thinsp': 8201, + 'thorn': 254, + 'tilde': 732, + 'times': 215, + 'trade': 8482, + 'uArr': 8657, + 'uacute': 250, + 'uarr': 8593, + 'ucirc': 251, + 'ugrave': 249, + 'uml': 168, + 'upsih': 978, + 'upsilon': 965, + 'uuml': 252, + 'weierp': 8472, + 'xi': 958, + 'yacute': 253, + 'yen': 165, + 'yuml': 255, + 'zeta': 950, + 'zwj': 8205, + 'zwnj': 8204 +} diff --git a/python/markupsafe/_native.py b/python/markupsafe/_native.py new file mode 100644 index 0000000..5e83f10 --- /dev/null +++ b/python/markupsafe/_native.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +""" + markupsafe._native + ~~~~~~~~~~~~~~~~~~ + + Native Python implementation the C module is not compiled. + + :copyright: (c) 2010 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" +from markupsafe import Markup +from markupsafe._compat import text_type + + +def escape(s): + """Convert the characters &, <, >, ' and " in string s to HTML-safe + sequences. Use this if you need to display text that might contain + such characters in HTML. Marks return value as markup string. + """ + if hasattr(s, '__html__'): + return s.__html__() + return Markup(text_type(s) + .replace('&', '&') + .replace('>', '>') + .replace('<', '<') + .replace("'", ''') + .replace('"', '"') + ) + + +def escape_silent(s): + """Like :func:`escape` but converts `None` into an empty + markup string. + """ + if s is None: + return Markup() + return escape(s) + + +def soft_unicode(s): + """Make a string unicode if it isn't already. That way a markup + string is not converted back to unicode. + """ + if not isinstance(s, text_type): + s = text_type(s) + return s diff --git a/python/markupsafe/_speedups.c b/python/markupsafe/_speedups.c new file mode 100644 index 0000000..d779a68 --- /dev/null +++ b/python/markupsafe/_speedups.c @@ -0,0 +1,239 @@ +/** + * markupsafe._speedups + * ~~~~~~~~~~~~~~~~~~~~ + * + * This module implements functions for automatic escaping in C for better + * performance. + * + * :copyright: (c) 2010 by Armin Ronacher. + * :license: BSD. + */ + +#include <Python.h> + +#define ESCAPED_CHARS_TABLE_SIZE 63 +#define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL))); + +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#endif + + +static PyObject* markup; +static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE]; +static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE]; + +static int +init_constants(void) +{ + PyObject *module; + /* mapping of characters to replace */ + escaped_chars_repl['"'] = UNICHR("""); + escaped_chars_repl['\''] = UNICHR("'"); + escaped_chars_repl['&'] = UNICHR("&"); + escaped_chars_repl['<'] = UNICHR("<"); + escaped_chars_repl['>'] = UNICHR(">"); + + /* lengths of those characters when replaced - 1 */ + memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len)); + escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \ + escaped_chars_delta_len['&'] = 4; + escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3; + + /* import markup type so that we can mark the return value */ + module = PyImport_ImportModule("markupsafe"); + if (!module) + return 0; + markup = PyObject_GetAttrString(module, "Markup"); + Py_DECREF(module); + + return 1; +} + +static PyObject* +escape_unicode(PyUnicodeObject *in) +{ + PyUnicodeObject *out; + Py_UNICODE *inp = PyUnicode_AS_UNICODE(in); + const Py_UNICODE *inp_end = PyUnicode_AS_UNICODE(in) + PyUnicode_GET_SIZE(in); + Py_UNICODE *next_escp; + Py_UNICODE *outp; + Py_ssize_t delta=0, erepl=0, delta_len=0; + + /* First we need to figure out how long the escaped string will be */ + while (*(inp) || inp < inp_end) { + if (*inp < ESCAPED_CHARS_TABLE_SIZE) { + delta += escaped_chars_delta_len[*inp]; + erepl += !!escaped_chars_delta_len[*inp]; + } + ++inp; + } + + /* Do we need to escape anything at all? */ + if (!erepl) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(in) + delta); + if (!out) + return NULL; + + outp = PyUnicode_AS_UNICODE(out); + inp = PyUnicode_AS_UNICODE(in); + while (erepl-- > 0) { + /* look for the next substitution */ + next_escp = inp; + while (next_escp < inp_end) { + if (*next_escp < ESCAPED_CHARS_TABLE_SIZE && + (delta_len = escaped_chars_delta_len[*next_escp])) { + ++delta_len; + break; + } + ++next_escp; + } + + if (next_escp > inp) { + /* copy unescaped chars between inp and next_escp */ + Py_UNICODE_COPY(outp, inp, next_escp-inp); + outp += next_escp - inp; + } + + /* escape 'next_escp' */ + Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len); + outp += delta_len; + + inp = next_escp + 1; + } + if (inp < inp_end) + Py_UNICODE_COPY(outp, inp, PyUnicode_GET_SIZE(in) - (inp - PyUnicode_AS_UNICODE(in))); + + return (PyObject*)out; +} + + +static PyObject* +escape(PyObject *self, PyObject *text) +{ + PyObject *s = NULL, *rv = NULL, *html; + + /* we don't have to escape integers, bools or floats */ + if (PyLong_CheckExact(text) || +#if PY_MAJOR_VERSION < 3 + PyInt_CheckExact(text) || +#endif + PyFloat_CheckExact(text) || PyBool_Check(text) || + text == Py_None) + return PyObject_CallFunctionObjArgs(markup, text, NULL); + + /* if the object has an __html__ method that performs the escaping */ + html = PyObject_GetAttrString(text, "__html__"); + if (html) { + rv = PyObject_CallObject(html, NULL); + Py_DECREF(html); + return rv; + } + + /* otherwise make the object unicode if it isn't, then escape */ + PyErr_Clear(); + if (!PyUnicode_Check(text)) { +#if PY_MAJOR_VERSION < 3 + PyObject *unicode = PyObject_Unicode(text); +#else + PyObject *unicode = PyObject_Str(text); +#endif + if (!unicode) + return NULL; + s = escape_unicode((PyUnicodeObject*)unicode); + Py_DECREF(unicode); + } + else + s = escape_unicode((PyUnicodeObject*)text); + + /* convert the unicode string into a markup object. */ + rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL); + Py_DECREF(s); + return rv; +} + + +static PyObject* +escape_silent(PyObject *self, PyObject *text) +{ + if (text != Py_None) + return escape(self, text); + return PyObject_CallFunctionObjArgs(markup, NULL); +} + + +static PyObject* +soft_unicode(PyObject *self, PyObject *s) +{ + if (!PyUnicode_Check(s)) +#if PY_MAJOR_VERSION < 3 + return PyObject_Unicode(s); +#else + return PyObject_Str(s); +#endif + Py_INCREF(s); + return s; +} + + +static PyMethodDef module_methods[] = { + {"escape", (PyCFunction)escape, METH_O, + "escape(s) -> markup\n\n" + "Convert the characters &, <, >, ', and \" in string s to HTML-safe\n" + "sequences. Use this if you need to display text that might contain\n" + "such characters in HTML. Marks return value as markup string."}, + {"escape_silent", (PyCFunction)escape_silent, METH_O, + "escape_silent(s) -> markup\n\n" + "Like escape but converts None to an empty string."}, + {"soft_unicode", (PyCFunction)soft_unicode, METH_O, + "soft_unicode(object) -> string\n\n" + "Make a string unicode if it isn't already. That way a markup\n" + "string is not converted back to unicode."}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + +#if PY_MAJOR_VERSION < 3 + +#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */ +#define PyMODINIT_FUNC void +#endif +PyMODINIT_FUNC +init_speedups(void) +{ + if (!init_constants()) + return; + + Py_InitModule3("markupsafe._speedups", module_methods, ""); +} + +#else /* Python 3.x module initialization */ + +static struct PyModuleDef module_definition = { + PyModuleDef_HEAD_INIT, + "markupsafe._speedups", + NULL, + -1, + module_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__speedups(void) +{ + if (!init_constants()) + return NULL; + + return PyModule_Create(&module_definition); +} + +#endif diff --git a/python/werkzeug/__init__.py b/python/werkzeug/__init__.py new file mode 100644 index 0000000..e460e75 --- /dev/null +++ b/python/werkzeug/__init__.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +""" + werkzeug + ~~~~~~~~ + + Werkzeug is the Swiss Army knife of Python web development. + + It provides useful classes and functions for any WSGI application to make + the life of a python web developer much easier. All of the provided + classes are independent from each other so you can mix it with any other + library. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import sys +from types import ModuleType + +__version__ = "0.15.4" + +# This import magic raises concerns quite often which is why the implementation +# and motivation is explained here in detail now. +# +# The majority of the functions and classes provided by Werkzeug work on the +# HTTP and WSGI layer. There is no useful grouping for those which is why +# they are all importable from "werkzeug" instead of the modules where they are +# implemented. The downside of that is, that now everything would be loaded at +# once, even if unused. +# +# The implementation of a lazy-loading module in this file replaces the +# werkzeug package when imported from within. Attribute access to the werkzeug +# module will then lazily import from the modules that implement the objects. + +# import mapping to objects in other modules +all_by_module = { + "werkzeug.debug": ["DebuggedApplication"], + "werkzeug.local": [ + "Local", + "LocalManager", + "LocalProxy", + "LocalStack", + "release_local", + ], + "werkzeug.serving": ["run_simple"], + "werkzeug.test": ["Client", "EnvironBuilder", "create_environ", "run_wsgi_app"], + "werkzeug.testapp": ["test_app"], + "werkzeug.exceptions": ["abort", "Aborter"], + "werkzeug.urls": [ + "url_decode", + "url_encode", + "url_quote", + "url_quote_plus", + "url_unquote", + "url_unquote_plus", + "url_fix", + "Href", + "iri_to_uri", + "uri_to_iri", + ], + "werkzeug.formparser": ["parse_form_data"], + "werkzeug.utils": [ + "escape", + "environ_property", + "append_slash_redirect", + "redirect", + "cached_property", + "import_string", + "dump_cookie", + "parse_cookie", + "unescape", + "format_string", + "find_modules", + "header_property", + "html", + "xhtml", + "HTMLBuilder", + "validate_arguments", + "ArgumentValidationError", + "bind_arguments", + "secure_filename", + ], + "werkzeug.wsgi": [ + "get_current_url", + "get_host", + "pop_path_info", + "peek_path_info", + "ClosingIterator", + "FileWrapper", + "make_line_iter", + "LimitedStream", + "responder", + "wrap_file", + "extract_path_info", + ], + "werkzeug.datastructures": [ + "MultiDict", + "CombinedMultiDict", + "Headers", + "EnvironHeaders", + "ImmutableList", + "ImmutableDict", + "ImmutableMultiDict", + "TypeConversionDict", + "ImmutableTypeConversionDict", + "Accept", + "MIMEAccept", + "CharsetAccept", + "LanguageAccept", + "RequestCacheControl", + "ResponseCacheControl", + "ETags", + "HeaderSet", + "WWWAuthenticate", + "Authorization", + "FileMultiDict", + "CallbackDict", + "FileStorage", + "OrderedMultiDict", + "ImmutableOrderedMultiDict", + ], + "werkzeug.useragents": ["UserAgent"], + "werkzeug.http": [ + "parse_etags", + "parse_date", + "http_date", + "cookie_date", + "parse_cache_control_header", + "is_resource_modified", + "parse_accept_header", + "parse_set_header", + "quote_etag", + "unquote_etag", + "generate_etag", + "dump_header", + "parse_list_header", + "parse_dict_header", + "parse_authorization_header", + "parse_www_authenticate_header", + "remove_entity_headers", + "is_entity_header", + "remove_hop_by_hop_headers", + "parse_options_header", + "dump_options_header", + "is_hop_by_hop_header", + "unquote_header_value", + "quote_header_value", + "HTTP_STATUS_CODES", + ], + "werkzeug.wrappers": [ + "BaseResponse", + "BaseRequest", + "Request", + "Response", + "AcceptMixin", + "ETagRequestMixin", + "ETagResponseMixin", + "ResponseStreamMixin", + "CommonResponseDescriptorsMixin", + "UserAgentMixin", + "AuthorizationMixin", + "WWWAuthenticateMixin", + "CommonRequestDescriptorsMixin", + ], + "werkzeug.middleware.dispatcher": ["DispatcherMiddleware"], + "werkzeug.middleware.shared_data": ["SharedDataMiddleware"], + "werkzeug.security": ["generate_password_hash", "check_password_hash"], + # the undocumented easteregg ;-) + "werkzeug._internal": ["_easteregg"], +} + +# modules that should be imported when accessed as attributes of werkzeug +attribute_modules = frozenset(["exceptions", "routing"]) + +object_origins = {} +for module, items in all_by_module.items(): + for item in items: + object_origins[item] = module + + +class module(ModuleType): + """Automatically import objects from the modules.""" + + def __getattr__(self, name): + if name in object_origins: + module = __import__(object_origins[name], None, None, [name]) + for extra_name in all_by_module[module.__name__]: + setattr(self, extra_name, getattr(module, extra_name)) + return getattr(module, name) + elif name in attribute_modules: + __import__("werkzeug." + name) + return ModuleType.__getattribute__(self, name) + + def __dir__(self): + """Just show what we want to show.""" + result = list(new_module.__all__) + result.extend( + ( + "__file__", + "__doc__", + "__all__", + "__docformat__", + "__name__", + "__path__", + "__package__", + "__version__", + ) + ) + return result + + +# keep a reference to this module so that it's not garbage collected +old_module = sys.modules["werkzeug"] + + +# setup the new module and patch it into the dict of loaded modules +new_module = sys.modules["werkzeug"] = module("werkzeug") +new_module.__dict__.update( + { + "__file__": __file__, + "__package__": "werkzeug", + "__path__": __path__, + "__doc__": __doc__, + "__version__": __version__, + "__all__": tuple(object_origins) + tuple(attribute_modules), + "__docformat__": "restructuredtext en", + } +) + + +# Due to bootstrapping issues we need to import exceptions here. +# Don't ask :-( +__import__("werkzeug.exceptions") diff --git a/python/werkzeug/_compat.py b/python/werkzeug/_compat.py new file mode 100644 index 0000000..1097983 --- /dev/null +++ b/python/werkzeug/_compat.py @@ -0,0 +1,219 @@ +# flake8: noqa +# This whole file is full of lint errors +import functools +import operator +import sys + +try: + import builtins +except ImportError: + import __builtin__ as builtins + + +PY2 = sys.version_info[0] == 2 +WIN = sys.platform.startswith("win") + +_identity = lambda x: x + +if PY2: + unichr = unichr + text_type = unicode + string_types = (str, unicode) + integer_types = (int, long) + + iterkeys = lambda d, *args, **kwargs: d.iterkeys(*args, **kwargs) + itervalues = lambda d, *args, **kwargs: d.itervalues(*args, **kwargs) + iteritems = lambda d, *args, **kwargs: d.iteritems(*args, **kwargs) + + iterlists = lambda d, *args, **kwargs: d.iterlists(*args, **kwargs) + iterlistvalues = lambda d, *args, **kwargs: d.iterlistvalues(*args, **kwargs) + + int_to_byte = chr + iter_bytes = iter + + import collections as collections_abc + + exec("def reraise(tp, value, tb=None):\n raise tp, value, tb") + + def fix_tuple_repr(obj): + def __repr__(self): + cls = self.__class__ + return "%s(%s)" % ( + cls.__name__, + ", ".join( + "%s=%r" % (field, self[index]) + for index, field in enumerate(cls._fields) + ), + ) + + obj.__repr__ = __repr__ + return obj + + def implements_iterator(cls): + cls.next = cls.__next__ + del cls.__next__ + return cls + + def implements_to_string(cls): + cls.__unicode__ = cls.__str__ + cls.__str__ = lambda x: x.__unicode__().encode("utf-8") + return cls + + def native_string_result(func): + def wrapper(*args, **kwargs): + return func(*args, **kwargs).encode("utf-8") + + return functools.update_wrapper(wrapper, func) + + def implements_bool(cls): + cls.__nonzero__ = cls.__bool__ + del cls.__bool__ + return cls + + from itertools import imap, izip, ifilter + + range_type = xrange + + from StringIO import StringIO + from cStringIO import StringIO as BytesIO + + NativeStringIO = BytesIO + + def make_literal_wrapper(reference): + return _identity + + def normalize_string_tuple(tup): + """Normalizes a string tuple to a common type. Following Python 2 + rules, upgrades to unicode are implicit. + """ + if any(isinstance(x, text_type) for x in tup): + return tuple(to_unicode(x) for x in tup) + return tup + + def try_coerce_native(s): + """Try to coerce a unicode string to native if possible. Otherwise, + leave it as unicode. + """ + try: + return to_native(s) + except UnicodeError: + return s + + wsgi_get_bytes = _identity + + def wsgi_decoding_dance(s, charset="utf-8", errors="replace"): + return s.decode(charset, errors) + + def wsgi_encoding_dance(s, charset="utf-8", errors="replace"): + if isinstance(s, bytes): + return s + return s.encode(charset, errors) + + def to_bytes(x, charset=sys.getdefaultencoding(), errors="strict"): + if x is None: + return None + if isinstance(x, (bytes, bytearray, buffer)): + return bytes(x) + if isinstance(x, unicode): + return x.encode(charset, errors) + raise TypeError("Expected bytes") + + def to_native(x, charset=sys.getdefaultencoding(), errors="strict"): + if x is None or isinstance(x, str): + return x + return x.encode(charset, errors) + + +else: + unichr = chr + text_type = str + string_types = (str,) + integer_types = (int,) + + iterkeys = lambda d, *args, **kwargs: iter(d.keys(*args, **kwargs)) + itervalues = lambda d, *args, **kwargs: iter(d.values(*args, **kwargs)) + iteritems = lambda d, *args, **kwargs: iter(d.items(*args, **kwargs)) + + iterlists = lambda d, *args, **kwargs: iter(d.lists(*args, **kwargs)) + iterlistvalues = lambda d, *args, **kwargs: iter(d.listvalues(*args, **kwargs)) + + int_to_byte = operator.methodcaller("to_bytes", 1, "big") + iter_bytes = functools.partial(map, int_to_byte) + + import collections.abc as collections_abc + + def reraise(tp, value, tb=None): + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + + fix_tuple_repr = _identity + implements_iterator = _identity + implements_to_string = _identity + implements_bool = _identity + native_string_result = _identity + imap = map + izip = zip + ifilter = filter + range_type = range + + from io import StringIO, BytesIO + + NativeStringIO = StringIO + + _latin1_encode = operator.methodcaller("encode", "latin1") + + def make_literal_wrapper(reference): + if isinstance(reference, text_type): + return _identity + return _latin1_encode + + def normalize_string_tuple(tup): + """Ensures that all types in the tuple are either strings + or bytes. + """ + tupiter = iter(tup) + is_text = isinstance(next(tupiter, None), text_type) + for arg in tupiter: + if isinstance(arg, text_type) != is_text: + raise TypeError( + "Cannot mix str and bytes arguments (got %s)" % repr(tup) + ) + return tup + + try_coerce_native = _identity + wsgi_get_bytes = _latin1_encode + + def wsgi_decoding_dance(s, charset="utf-8", errors="replace"): + return s.encode("latin1").decode(charset, errors) + + def wsgi_encoding_dance(s, charset="utf-8", errors="replace"): + if isinstance(s, text_type): + s = s.encode(charset) + return s.decode("latin1", errors) + + def to_bytes(x, charset=sys.getdefaultencoding(), errors="strict"): + if x is None: + return None + if isinstance(x, (bytes, bytearray, memoryview)): # noqa + return bytes(x) + if isinstance(x, str): + return x.encode(charset, errors) + raise TypeError("Expected bytes") + + def to_native(x, charset=sys.getdefaultencoding(), errors="strict"): + if x is None or isinstance(x, str): + return x + return x.decode(charset, errors) + + +def to_unicode( + x, charset=sys.getdefaultencoding(), errors="strict", allow_none_charset=False +): + if x is None: + return None + if not isinstance(x, bytes): + return text_type(x) + if charset is None and allow_none_charset: + return x + return x.decode(charset, errors) diff --git a/python/werkzeug/_internal.py b/python/werkzeug/_internal.py new file mode 100644 index 0000000..90e3dd9 --- /dev/null +++ b/python/werkzeug/_internal.py @@ -0,0 +1,484 @@ +# -*- coding: utf-8 -*- +""" + werkzeug._internal + ~~~~~~~~~~~~~~~~~~ + + This module provides internally used helpers and constants. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import inspect +import logging +import re +import string +from datetime import date +from datetime import datetime +from itertools import chain +from weakref import WeakKeyDictionary + +from ._compat import int_to_byte +from ._compat import integer_types +from ._compat import iter_bytes +from ._compat import range_type +from ._compat import text_type + + +_logger = None +_signature_cache = WeakKeyDictionary() +_epoch_ord = date(1970, 1, 1).toordinal() +_cookie_params = { + b"expires", + b"path", + b"comment", + b"max-age", + b"secure", + b"httponly", + b"version", +} +_legal_cookie_chars = ( + string.ascii_letters + string.digits + u"/=!#$%&'*+-.^_`|~:" +).encode("ascii") + +_cookie_quoting_map = {b",": b"\\054", b";": b"\\073", b'"': b'\\"', b"\\": b"\\\\"} +for _i in chain(range_type(32), range_type(127, 256)): + _cookie_quoting_map[int_to_byte(_i)] = ("\\%03o" % _i).encode("latin1") + +_octal_re = re.compile(br"\\[0-3][0-7][0-7]") +_quote_re = re.compile(br"[\\].") +_legal_cookie_chars_re = br"[\w\d!#%&\'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" +_cookie_re = re.compile( + br""" + (?P<key>[^=;]+) + (?:\s*=\s* + (?P<val> + "(?:[^\\"]|\\.)*" | + (?:.*?) + ) + )? + \s*; +""", + flags=re.VERBOSE, +) + + +class _Missing(object): + def __repr__(self): + return "no value" + + def __reduce__(self): + return "_missing" + + +_missing = _Missing() + + +def _get_environ(obj): + env = getattr(obj, "environ", obj) + assert isinstance(env, dict), ( + "%r is not a WSGI environment (has to be a dict)" % type(obj).__name__ + ) + return env + + +def _has_level_handler(logger): + """Check if there is a handler in the logging chain that will handle + the given logger's effective level. + """ + level = logger.getEffectiveLevel() + current = logger + + while current: + if any(handler.level <= level for handler in current.handlers): + return True + + if not current.propagate: + break + + current = current.parent + + return False + + +def _log(type, message, *args, **kwargs): + """Log a message to the 'werkzeug' logger. + + The logger is created the first time it is needed. If there is no + level set, it is set to :data:`logging.INFO`. If there is no handler + for the logger's effective level, a :class:`logging.StreamHandler` + is added. + """ + global _logger + + if _logger is None: + _logger = logging.getLogger("werkzeug") + + if _logger.level == logging.NOTSET: + _logger.setLevel(logging.INFO) + + if not _has_level_handler(_logger): + _logger.addHandler(logging.StreamHandler()) + + getattr(_logger, type)(message.rstrip(), *args, **kwargs) + + +def _parse_signature(func): + """Return a signature object for the function.""" + if hasattr(func, "im_func"): + func = func.im_func + + # if we have a cached validator for this function, return it + parse = _signature_cache.get(func) + if parse is not None: + return parse + + # inspect the function signature and collect all the information + if hasattr(inspect, "getfullargspec"): + tup = inspect.getfullargspec(func) + else: + tup = inspect.getargspec(func) + positional, vararg_var, kwarg_var, defaults = tup[:4] + defaults = defaults or () + arg_count = len(positional) + arguments = [] + for idx, name in enumerate(positional): + if isinstance(name, list): + raise TypeError( + "cannot parse functions that unpack tuples in the function signature" + ) + try: + default = defaults[idx - arg_count] + except IndexError: + param = (name, False, None) + else: + param = (name, True, default) + arguments.append(param) + arguments = tuple(arguments) + + def parse(args, kwargs): + new_args = [] + missing = [] + extra = {} + + # consume as many arguments as positional as possible + for idx, (name, has_default, default) in enumerate(arguments): + try: + new_args.append(args[idx]) + except IndexError: + try: + new_args.append(kwargs.pop(name)) + except KeyError: + if has_default: + new_args.append(default) + else: + missing.append(name) + else: + if name in kwargs: + extra[name] = kwargs.pop(name) + + # handle extra arguments + extra_positional = args[arg_count:] + if vararg_var is not None: + new_args.extend(extra_positional) + extra_positional = () + if kwargs and kwarg_var is None: + extra.update(kwargs) + kwargs = {} + + return ( + new_args, + kwargs, + missing, + extra, + extra_positional, + arguments, + vararg_var, + kwarg_var, + ) + + _signature_cache[func] = parse + return parse + + +def _date_to_unix(arg): + """Converts a timetuple, integer or datetime object into the seconds from + epoch in utc. + """ + if isinstance(arg, datetime): + arg = arg.utctimetuple() + elif isinstance(arg, integer_types + (float,)): + return int(arg) + year, month, day, hour, minute, second = arg[:6] + days = date(year, month, 1).toordinal() - _epoch_ord + day - 1 + hours = days * 24 + hour + minutes = hours * 60 + minute + seconds = minutes * 60 + second + return seconds + + +class _DictAccessorProperty(object): + """Baseclass for `environ_property` and `header_property`.""" + + read_only = False + + def __init__( + self, + name, + default=None, + load_func=None, + dump_func=None, + read_only=None, + doc=None, + ): + self.name = name + self.default = default + self.load_func = load_func + self.dump_func = dump_func + if read_only is not None: + self.read_only = read_only + self.__doc__ = doc + + def __get__(self, obj, type=None): + if obj is None: + return self + storage = self.lookup(obj) + if self.name not in storage: + return self.default + rv = storage[self.name] + if self.load_func is not None: + try: + rv = self.load_func(rv) + except (ValueError, TypeError): + rv = self.default + return rv + + def __set__(self, obj, value): + if self.read_only: + raise AttributeError("read only property") + if self.dump_func is not None: + value = self.dump_func(value) + self.lookup(obj)[self.name] = value + + def __delete__(self, obj): + if self.read_only: + raise AttributeError("read only property") + self.lookup(obj).pop(self.name, None) + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, self.name) + + +def _cookie_quote(b): + buf = bytearray() + all_legal = True + _lookup = _cookie_quoting_map.get + _push = buf.extend + + for char in iter_bytes(b): + if char not in _legal_cookie_chars: + all_legal = False + char = _lookup(char, char) + _push(char) + + if all_legal: + return bytes(buf) + return bytes(b'"' + buf + b'"') + + +def _cookie_unquote(b): + if len(b) < 2: + return b + if b[:1] != b'"' or b[-1:] != b'"': + return b + + b = b[1:-1] + + i = 0 + n = len(b) + rv = bytearray() + _push = rv.extend + + while 0 <= i < n: + o_match = _octal_re.search(b, i) + q_match = _quote_re.search(b, i) + if not o_match and not q_match: + rv.extend(b[i:]) + break + j = k = -1 + if o_match: + j = o_match.start(0) + if q_match: + k = q_match.start(0) + if q_match and (not o_match or k < j): + _push(b[i:k]) + _push(b[k + 1 : k + 2]) + i = k + 2 + else: + _push(b[i:j]) + rv.append(int(b[j + 1 : j + 4], 8)) + i = j + 4 + + return bytes(rv) + + +def _cookie_parse_impl(b): + """Lowlevel cookie parsing facility that operates on bytes.""" + i = 0 + n = len(b) + + while i < n: + match = _cookie_re.search(b + b";", i) + if not match: + break + + key = match.group("key").strip() + value = match.group("val") or b"" + i = match.end(0) + + # Ignore parameters. We have no interest in them. + if key.lower() not in _cookie_params: + yield _cookie_unquote(key), _cookie_unquote(value) + + +def _encode_idna(domain): + # If we're given bytes, make sure they fit into ASCII + if not isinstance(domain, text_type): + domain.decode("ascii") + return domain + + # Otherwise check if it's already ascii, then return + try: + return domain.encode("ascii") + except UnicodeError: + pass + + # Otherwise encode each part separately + parts = domain.split(".") + for idx, part in enumerate(parts): + parts[idx] = part.encode("idna") + return b".".join(parts) + + +def _decode_idna(domain): + # If the input is a string try to encode it to ascii to + # do the idna decoding. if that fails because of an + # unicode error, then we already have a decoded idna domain + if isinstance(domain, text_type): + try: + domain = domain.encode("ascii") + except UnicodeError: + return domain + + # Decode each part separately. If a part fails, try to + # decode it with ascii and silently ignore errors. This makes + # most sense because the idna codec does not have error handling + parts = domain.split(b".") + for idx, part in enumerate(parts): + try: + parts[idx] = part.decode("idna") + except UnicodeError: + parts[idx] = part.decode("ascii", "ignore") + + return ".".join(parts) + + +def _make_cookie_domain(domain): + if domain is None: + return None + domain = _encode_idna(domain) + if b":" in domain: + domain = domain.split(b":", 1)[0] + if b"." in domain: + return domain + raise ValueError( + "Setting 'domain' for a cookie on a server running locally (ex: " + "localhost) is not supported by complying browsers. You should " + "have something like: '127.0.0.1 localhost dev.localhost' on " + "your hosts file and then point your server to run on " + "'dev.localhost' and also set 'domain' for 'dev.localhost'" + ) + + +def _easteregg(app=None): + """Like the name says. But who knows how it works?""" + + def bzzzzzzz(gyver): + import base64 + import zlib + + return zlib.decompress(base64.b64decode(gyver)).decode("ascii") + + gyver = u"\n".join( + [ + x + (77 - len(x)) * u" " + for x in bzzzzzzz( + b""" +eJyFlzuOJDkMRP06xRjymKgDJCDQStBYT8BCgK4gTwfQ2fcFs2a2FzvZk+hvlcRvRJD148efHt9m +9Xz94dRY5hGt1nrYcXx7us9qlcP9HHNh28rz8dZj+q4rynVFFPdlY4zH873NKCexrDM6zxxRymzz +4QIxzK4bth1PV7+uHn6WXZ5C4ka/+prFzx3zWLMHAVZb8RRUxtFXI5DTQ2n3Hi2sNI+HK43AOWSY +jmEzE4naFp58PdzhPMdslLVWHTGUVpSxImw+pS/D+JhzLfdS1j7PzUMxij+mc2U0I9zcbZ/HcZxc +q1QjvvcThMYFnp93agEx392ZdLJWXbi/Ca4Oivl4h/Y1ErEqP+lrg7Xa4qnUKu5UE9UUA4xeqLJ5 +jWlPKJvR2yhRI7xFPdzPuc6adXu6ovwXwRPXXnZHxlPtkSkqWHilsOrGrvcVWXgGP3daXomCj317 +8P2UOw/NnA0OOikZyFf3zZ76eN9QXNwYdD8f8/LdBRFg0BO3bB+Pe/+G8er8tDJv83XTkj7WeMBJ +v/rnAfdO51d6sFglfi8U7zbnr0u9tyJHhFZNXYfH8Iafv2Oa+DT6l8u9UYlajV/hcEgk1x8E8L/r +XJXl2SK+GJCxtnyhVKv6GFCEB1OO3f9YWAIEbwcRWv/6RPpsEzOkXURMN37J0PoCSYeBnJQd9Giu +LxYQJNlYPSo/iTQwgaihbART7Fcyem2tTSCcwNCs85MOOpJtXhXDe0E7zgZJkcxWTar/zEjdIVCk +iXy87FW6j5aGZhttDBoAZ3vnmlkx4q4mMmCdLtnHkBXFMCReqthSGkQ+MDXLLCpXwBs0t+sIhsDI +tjBB8MwqYQpLygZ56rRHHpw+OAVyGgaGRHWy2QfXez+ZQQTTBkmRXdV/A9LwH6XGZpEAZU8rs4pE +1R4FQ3Uwt8RKEtRc0/CrANUoes3EzM6WYcFyskGZ6UTHJWenBDS7h163Eo2bpzqxNE9aVgEM2CqI +GAJe9Yra4P5qKmta27VjzYdR04Vc7KHeY4vs61C0nbywFmcSXYjzBHdiEjraS7PGG2jHHTpJUMxN +Jlxr3pUuFvlBWLJGE3GcA1/1xxLcHmlO+LAXbhrXah1tD6Ze+uqFGdZa5FM+3eHcKNaEarutAQ0A +QMAZHV+ve6LxAwWnXbbSXEG2DmCX5ijeLCKj5lhVFBrMm+ryOttCAeFpUdZyQLAQkA06RLs56rzG +8MID55vqr/g64Qr/wqwlE0TVxgoiZhHrbY2h1iuuyUVg1nlkpDrQ7Vm1xIkI5XRKLedN9EjzVchu +jQhXcVkjVdgP2O99QShpdvXWoSwkp5uMwyjt3jiWCqWGSiaaPAzohjPanXVLbM3x0dNskJsaCEyz +DTKIs+7WKJD4ZcJGfMhLFBf6hlbnNkLEePF8Cx2o2kwmYF4+MzAxa6i+6xIQkswOqGO+3x9NaZX8 +MrZRaFZpLeVTYI9F/djY6DDVVs340nZGmwrDqTCiiqD5luj3OzwpmQCiQhdRYowUYEA3i1WWGwL4 +GCtSoO4XbIPFeKGU13XPkDf5IdimLpAvi2kVDVQbzOOa4KAXMFlpi/hV8F6IDe0Y2reg3PuNKT3i +RYhZqtkQZqSB2Qm0SGtjAw7RDwaM1roESC8HWiPxkoOy0lLTRFG39kvbLZbU9gFKFRvixDZBJmpi +Xyq3RE5lW00EJjaqwp/v3EByMSpVZYsEIJ4APaHmVtpGSieV5CALOtNUAzTBiw81GLgC0quyzf6c +NlWknzJeCsJ5fup2R4d8CYGN77mu5vnO1UqbfElZ9E6cR6zbHjgsr9ly18fXjZoPeDjPuzlWbFwS +pdvPkhntFvkc13qb9094LL5NrA3NIq3r9eNnop9DizWOqCEbyRBFJTHn6Tt3CG1o8a4HevYh0XiJ +sR0AVVHuGuMOIfbuQ/OKBkGRC6NJ4u7sbPX8bG/n5sNIOQ6/Y/BX3IwRlTSabtZpYLB85lYtkkgm +p1qXK3Du2mnr5INXmT/78KI12n11EFBkJHHp0wJyLe9MvPNUGYsf+170maayRoy2lURGHAIapSpQ +krEDuNoJCHNlZYhKpvw4mspVWxqo415n8cD62N9+EfHrAvqQnINStetek7RY2Urv8nxsnGaZfRr/ +nhXbJ6m/yl1LzYqscDZA9QHLNbdaSTTr+kFg3bC0iYbX/eQy0Bv3h4B50/SGYzKAXkCeOLI3bcAt +mj2Z/FM1vQWgDynsRwNvrWnJHlespkrp8+vO1jNaibm+PhqXPPv30YwDZ6jApe3wUjFQobghvW9p +7f2zLkGNv8b191cD/3vs9Q833z8t""" + ).splitlines() + ] + ) + + def easteregged(environ, start_response): + def injecting_start_response(status, headers, exc_info=None): + headers.append(("X-Powered-By", "Werkzeug")) + return start_response(status, headers, exc_info) + + if app is not None and environ.get("QUERY_STRING") != "macgybarchakku": + return app(environ, injecting_start_response) + injecting_start_response("200 OK", [("Content-Type", "text/html")]) + return [ + ( + u""" +<!DOCTYPE html> +<html> +<head> +<title>About Werkzeug</title> +<style type="text/css"> + body { font: 15px Georgia, serif; text-align: center; } + a { color: #333; text-decoration: none; } + h1 { font-size: 30px; margin: 20px 0 10px 0; } + p { margin: 0 0 30px 0; } + pre { font: 11px 'Consolas', 'Monaco', monospace; line-height: 0.95; } +</style> +</head> +<body> +<h1><a href="http://werkzeug.pocoo.org/">Werkzeug</a></h1> +<p>the Swiss Army knife of Python web development.</p> +<pre>%s\n\n\n</pre> +</body> +</html>""" + % gyver + ).encode("latin1") + ] + + return easteregged diff --git a/python/werkzeug/_reloader.py b/python/werkzeug/_reloader.py new file mode 100644 index 0000000..f06a63d --- /dev/null +++ b/python/werkzeug/_reloader.py @@ -0,0 +1,334 @@ +import os +import subprocess +import sys +import threading +import time +from itertools import chain + +from ._compat import iteritems +from ._compat import PY2 +from ._compat import text_type +from ._internal import _log + + +def _iter_module_files(): + """This iterates over all relevant Python files. It goes through all + loaded files from modules, all files in folders of already loaded modules + as well as all files reachable through a package. + """ + # The list call is necessary on Python 3 in case the module + # dictionary modifies during iteration. + for module in list(sys.modules.values()): + if module is None: + continue + filename = getattr(module, "__file__", None) + if filename: + if os.path.isdir(filename) and os.path.exists( + os.path.join(filename, "__init__.py") + ): + filename = os.path.join(filename, "__init__.py") + + old = None + while not os.path.isfile(filename): + old = filename + filename = os.path.dirname(filename) + if filename == old: + break + else: + if filename[-4:] in (".pyc", ".pyo"): + filename = filename[:-1] + yield filename + + +def _find_observable_paths(extra_files=None): + """Finds all paths that should be observed.""" + rv = set( + os.path.dirname(os.path.abspath(x)) if os.path.isfile(x) else os.path.abspath(x) + for x in sys.path + ) + + for filename in extra_files or (): + rv.add(os.path.dirname(os.path.abspath(filename))) + + for module in list(sys.modules.values()): + fn = getattr(module, "__file__", None) + if fn is None: + continue + fn = os.path.abspath(fn) + rv.add(os.path.dirname(fn)) + + return _find_common_roots(rv) + + +def _get_args_for_reloading(): + """Returns the executable. This contains a workaround for windows + if the executable is incorrectly reported to not have the .exe + extension which can cause bugs on reloading. This also contains + a workaround for linux where the file is executable (possibly with + a program other than python) + """ + rv = [sys.executable] + py_script = os.path.abspath(sys.argv[0]) + args = sys.argv[1:] + # Need to look at main module to determine how it was executed. + __main__ = sys.modules["__main__"] + + if __main__.__package__ is None: + # Executed a file, like "python app.py". + if os.name == "nt": + # Windows entry points have ".exe" extension and should be + # called directly. + if not os.path.exists(py_script) and os.path.exists(py_script + ".exe"): + py_script += ".exe" + + if ( + os.path.splitext(rv[0])[1] == ".exe" + and os.path.splitext(py_script)[1] == ".exe" + ): + rv.pop(0) + + elif os.path.isfile(py_script) and os.access(py_script, os.X_OK): + # The file is marked as executable. Nix adds a wrapper that + # shouldn't be called with the Python executable. + rv.pop(0) + + rv.append(py_script) + else: + # Executed a module, like "python -m werkzeug.serving". + if sys.argv[0] == "-m": + # Flask works around previous behavior by putting + # "-m flask" in sys.argv. + # TODO remove this once Flask no longer misbehaves + args = sys.argv + else: + py_module = __main__.__package__ + name = os.path.splitext(os.path.basename(py_script))[0] + + if name != "__main__": + py_module += "." + name + + rv.extend(("-m", py_module.lstrip("."))) + + rv.extend(args) + return rv + + +def _find_common_roots(paths): + """Out of some paths it finds the common roots that need monitoring.""" + paths = [x.split(os.path.sep) for x in paths] + root = {} + for chunks in sorted(paths, key=len, reverse=True): + node = root + for chunk in chunks: + node = node.setdefault(chunk, {}) + node.clear() + + rv = set() + + def _walk(node, path): + for prefix, child in iteritems(node): + _walk(child, path + (prefix,)) + if not node: + rv.add("/".join(path)) + + _walk(root, ()) + return rv + + +class ReloaderLoop(object): + name = None + + # monkeypatched by testsuite. wrapping with `staticmethod` is required in + # case time.sleep has been replaced by a non-c function (e.g. by + # `eventlet.monkey_patch`) before we get here + _sleep = staticmethod(time.sleep) + + def __init__(self, extra_files=None, interval=1): + self.extra_files = set(os.path.abspath(x) for x in extra_files or ()) + self.interval = interval + + def run(self): + pass + + def restart_with_reloader(self): + """Spawn a new Python interpreter with the same arguments as this one, + but running the reloader thread. + """ + while 1: + _log("info", " * Restarting with %s" % self.name) + args = _get_args_for_reloading() + + # a weird bug on windows. sometimes unicode strings end up in the + # environment and subprocess.call does not like this, encode them + # to latin1 and continue. + if os.name == "nt" and PY2: + new_environ = {} + for key, value in iteritems(os.environ): + if isinstance(key, text_type): + key = key.encode("iso-8859-1") + if isinstance(value, text_type): + value = value.encode("iso-8859-1") + new_environ[key] = value + else: + new_environ = os.environ.copy() + + new_environ["WERKZEUG_RUN_MAIN"] = "true" + exit_code = subprocess.call(args, env=new_environ, close_fds=False) + if exit_code != 3: + return exit_code + + def trigger_reload(self, filename): + self.log_reload(filename) + sys.exit(3) + + def log_reload(self, filename): + filename = os.path.abspath(filename) + _log("info", " * Detected change in %r, reloading" % filename) + + +class StatReloaderLoop(ReloaderLoop): + name = "stat" + + def run(self): + mtimes = {} + while 1: + for filename in chain(_iter_module_files(), self.extra_files): + try: + mtime = os.stat(filename).st_mtime + except OSError: + continue + + old_time = mtimes.get(filename) + if old_time is None: + mtimes[filename] = mtime + continue + elif mtime > old_time: + self.trigger_reload(filename) + self._sleep(self.interval) + + +class WatchdogReloaderLoop(ReloaderLoop): + def __init__(self, *args, **kwargs): + ReloaderLoop.__init__(self, *args, **kwargs) + from watchdog.observers import Observer + from watchdog.events import FileSystemEventHandler + + self.observable_paths = set() + + def _check_modification(filename): + if filename in self.extra_files: + self.trigger_reload(filename) + dirname = os.path.dirname(filename) + if dirname.startswith(tuple(self.observable_paths)): + if filename.endswith((".pyc", ".pyo", ".py")): + self.trigger_reload(filename) + + class _CustomHandler(FileSystemEventHandler): + def on_created(self, event): + _check_modification(event.src_path) + + def on_modified(self, event): + _check_modification(event.src_path) + + def on_moved(self, event): + _check_modification(event.src_path) + _check_modification(event.dest_path) + + def on_deleted(self, event): + _check_modification(event.src_path) + + reloader_name = Observer.__name__.lower() + if reloader_name.endswith("observer"): + reloader_name = reloader_name[:-8] + reloader_name += " reloader" + + self.name = reloader_name + + self.observer_class = Observer + self.event_handler = _CustomHandler() + self.should_reload = False + + def trigger_reload(self, filename): + # This is called inside an event handler, which means throwing + # SystemExit has no effect. + # https://github.com/gorakhargosh/watchdog/issues/294 + self.should_reload = True + self.log_reload(filename) + + def run(self): + watches = {} + observer = self.observer_class() + observer.start() + + try: + while not self.should_reload: + to_delete = set(watches) + paths = _find_observable_paths(self.extra_files) + for path in paths: + if path not in watches: + try: + watches[path] = observer.schedule( + self.event_handler, path, recursive=True + ) + except OSError: + # Clear this path from list of watches We don't want + # the same error message showing again in the next + # iteration. + watches[path] = None + to_delete.discard(path) + for path in to_delete: + watch = watches.pop(path, None) + if watch is not None: + observer.unschedule(watch) + self.observable_paths = paths + self._sleep(self.interval) + finally: + observer.stop() + observer.join() + + sys.exit(3) + + +reloader_loops = {"stat": StatReloaderLoop, "watchdog": WatchdogReloaderLoop} + +try: + __import__("watchdog.observers") +except ImportError: + reloader_loops["auto"] = reloader_loops["stat"] +else: + reloader_loops["auto"] = reloader_loops["watchdog"] + + +def ensure_echo_on(): + """Ensure that echo mode is enabled. Some tools such as PDB disable + it which causes usability issues after reload.""" + # tcgetattr will fail if stdin isn't a tty + if not sys.stdin.isatty(): + return + try: + import termios + except ImportError: + return + attributes = termios.tcgetattr(sys.stdin) + if not attributes[3] & termios.ECHO: + attributes[3] |= termios.ECHO + termios.tcsetattr(sys.stdin, termios.TCSANOW, attributes) + + +def run_with_reloader(main_func, extra_files=None, interval=1, reloader_type="auto"): + """Run the given function in an independent python interpreter.""" + import signal + + reloader = reloader_loops[reloader_type](extra_files, interval) + signal.signal(signal.SIGTERM, lambda *args: sys.exit(0)) + try: + if os.environ.get("WERKZEUG_RUN_MAIN") == "true": + ensure_echo_on() + t = threading.Thread(target=main_func, args=()) + t.setDaemon(True) + t.start() + reloader.run() + else: + sys.exit(reloader.restart_with_reloader()) + except KeyboardInterrupt: + pass diff --git a/python/werkzeug/contrib/__init__.py b/python/werkzeug/contrib/__init__.py new file mode 100644 index 0000000..0e741f0 --- /dev/null +++ b/python/werkzeug/contrib/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.contrib + ~~~~~~~~~~~~~~~~ + + Contains user-submitted code that other users may find useful, but which + is not part of the Werkzeug core. Anyone can write code for inclusion in + the `contrib` package. All modules in this package are distributed as an + add-on library and thus are not part of Werkzeug itself. + + This file itself is mostly for informational purposes and to tell the + Python interpreter that `contrib` is a package. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" diff --git a/python/werkzeug/contrib/atom.py b/python/werkzeug/contrib/atom.py new file mode 100644 index 0000000..d079d2b --- /dev/null +++ b/python/werkzeug/contrib/atom.py @@ -0,0 +1,362 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.contrib.atom + ~~~~~~~~~~~~~~~~~~~~~ + + This module provides a class called :class:`AtomFeed` which can be + used to generate feeds in the Atom syndication format (see :rfc:`4287`). + + Example:: + + def atom_feed(request): + feed = AtomFeed("My Blog", feed_url=request.url, + url=request.host_url, + subtitle="My example blog for a feed test.") + for post in Post.query.limit(10).all(): + feed.add(post.title, post.body, content_type='html', + author=post.author, url=post.url, id=post.uid, + updated=post.last_update, published=post.pub_date) + return feed.get_response() + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import warnings +from datetime import datetime + +from .._compat import implements_to_string +from .._compat import string_types +from ..utils import escape +from ..wrappers import BaseResponse + +warnings.warn( + "'werkzeug.contrib.atom' is deprecated as of version 0.15 and will" + " be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, +) + +XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" + + +def _make_text_block(name, content, content_type=None): + """Helper function for the builder that creates an XML text block.""" + if content_type == "xhtml": + return u'<%s type="xhtml"><div xmlns="%s">%s</div></%s>\n' % ( + name, + XHTML_NAMESPACE, + content, + name, + ) + if not content_type: + return u"<%s>%s</%s>\n" % (name, escape(content), name) + return u'<%s type="%s">%s</%s>\n' % (name, content_type, escape(content), name) + + +def format_iso8601(obj): + """Format a datetime object for iso8601""" + iso8601 = obj.isoformat() + if obj.tzinfo: + return iso8601 + return iso8601 + "Z" + + +@implements_to_string +class AtomFeed(object): + + """A helper class that creates Atom feeds. + + :param title: the title of the feed. Required. + :param title_type: the type attribute for the title element. One of + ``'html'``, ``'text'`` or ``'xhtml'``. + :param url: the url for the feed (not the url *of* the feed) + :param id: a globally unique id for the feed. Must be an URI. If + not present the `feed_url` is used, but one of both is + required. + :param updated: the time the feed was modified the last time. Must + be a :class:`datetime.datetime` object. If not + present the latest entry's `updated` is used. + Treated as UTC if naive datetime. + :param feed_url: the URL to the feed. Should be the URL that was + requested. + :param author: the author of the feed. Must be either a string (the + name) or a dict with name (required) and uri or + email (both optional). Can be a list of (may be + mixed, too) strings and dicts, too, if there are + multiple authors. Required if not every entry has an + author element. + :param icon: an icon for the feed. + :param logo: a logo for the feed. + :param rights: copyright information for the feed. + :param rights_type: the type attribute for the rights element. One of + ``'html'``, ``'text'`` or ``'xhtml'``. Default is + ``'text'``. + :param subtitle: a short description of the feed. + :param subtitle_type: the type attribute for the subtitle element. + One of ``'text'``, ``'html'``, ``'text'`` + or ``'xhtml'``. Default is ``'text'``. + :param links: additional links. Must be a list of dictionaries with + href (required) and rel, type, hreflang, title, length + (all optional) + :param generator: the software that generated this feed. This must be + a tuple in the form ``(name, url, version)``. If + you don't want to specify one of them, set the item + to `None`. + :param entries: a list with the entries for the feed. Entries can also + be added later with :meth:`add`. + + For more information on the elements see + http://www.atomenabled.org/developers/syndication/ + + Everywhere where a list is demanded, any iterable can be used. + """ + + default_generator = ("Werkzeug", None, None) + + def __init__(self, title=None, entries=None, **kwargs): + self.title = title + self.title_type = kwargs.get("title_type", "text") + self.url = kwargs.get("url") + self.feed_url = kwargs.get("feed_url", self.url) + self.id = kwargs.get("id", self.feed_url) + self.updated = kwargs.get("updated") + self.author = kwargs.get("author", ()) + self.icon = kwargs.get("icon") + self.logo = kwargs.get("logo") + self.rights = kwargs.get("rights") + self.rights_type = kwargs.get("rights_type") + self.subtitle = kwargs.get("subtitle") + self.subtitle_type = kwargs.get("subtitle_type", "text") + self.generator = kwargs.get("generator") + if self.generator is None: + self.generator = self.default_generator + self.links = kwargs.get("links", []) + self.entries = list(entries) if entries else [] + + if not hasattr(self.author, "__iter__") or isinstance( + self.author, string_types + (dict,) + ): + self.author = [self.author] + for i, author in enumerate(self.author): + if not isinstance(author, dict): + self.author[i] = {"name": author} + + if not self.title: + raise ValueError("title is required") + if not self.id: + raise ValueError("id is required") + for author in self.author: + if "name" not in author: + raise TypeError("author must contain at least a name") + + def add(self, *args, **kwargs): + """Add a new entry to the feed. This function can either be called + with a :class:`FeedEntry` or some keyword and positional arguments + that are forwarded to the :class:`FeedEntry` constructor. + """ + if len(args) == 1 and not kwargs and isinstance(args[0], FeedEntry): + self.entries.append(args[0]) + else: + kwargs["feed_url"] = self.feed_url + self.entries.append(FeedEntry(*args, **kwargs)) + + def __repr__(self): + return "<%s %r (%d entries)>" % ( + self.__class__.__name__, + self.title, + len(self.entries), + ) + + def generate(self): + """Return a generator that yields pieces of XML.""" + # atom demands either an author element in every entry or a global one + if not self.author: + if any(not e.author for e in self.entries): + self.author = ({"name": "Unknown author"},) + + if not self.updated: + dates = sorted([entry.updated for entry in self.entries]) + self.updated = dates[-1] if dates else datetime.utcnow() + + yield u'<?xml version="1.0" encoding="utf-8"?>\n' + yield u'<feed xmlns="http://www.w3.org/2005/Atom">\n' + yield " " + _make_text_block("title", self.title, self.title_type) + yield u" <id>%s</id>\n" % escape(self.id) + yield u" <updated>%s</updated>\n" % format_iso8601(self.updated) + if self.url: + yield u' <link href="%s" />\n' % escape(self.url) + if self.feed_url: + yield u' <link href="%s" rel="self" />\n' % escape(self.feed_url) + for link in self.links: + yield u" <link %s/>\n" % "".join( + '%s="%s" ' % (k, escape(link[k])) for k in link + ) + for author in self.author: + yield u" <author>\n" + yield u" <name>%s</name>\n" % escape(author["name"]) + if "uri" in author: + yield u" <uri>%s</uri>\n" % escape(author["uri"]) + if "email" in author: + yield " <email>%s</email>\n" % escape(author["email"]) + yield " </author>\n" + if self.subtitle: + yield " " + _make_text_block("subtitle", self.subtitle, self.subtitle_type) + if self.icon: + yield u" <icon>%s</icon>\n" % escape(self.icon) + if self.logo: + yield u" <logo>%s</logo>\n" % escape(self.logo) + if self.rights: + yield " " + _make_text_block("rights", self.rights, self.rights_type) + generator_name, generator_url, generator_version = self.generator + if generator_name or generator_url or generator_version: + tmp = [u" <generator"] + if generator_url: + tmp.append(u' uri="%s"' % escape(generator_url)) + if generator_version: + tmp.append(u' version="%s"' % escape(generator_version)) + tmp.append(u">%s</generator>\n" % escape(generator_name)) + yield u"".join(tmp) + for entry in self.entries: + for line in entry.generate(): + yield u" " + line + yield u"</feed>\n" + + def to_string(self): + """Convert the feed into a string.""" + return u"".join(self.generate()) + + def get_response(self): + """Return a response object for the feed.""" + return BaseResponse(self.to_string(), mimetype="application/atom+xml") + + def __call__(self, environ, start_response): + """Use the class as WSGI response object.""" + return self.get_response()(environ, start_response) + + def __str__(self): + return self.to_string() + + +@implements_to_string +class FeedEntry(object): + + """Represents a single entry in a feed. + + :param title: the title of the entry. Required. + :param title_type: the type attribute for the title element. One of + ``'html'``, ``'text'`` or ``'xhtml'``. + :param content: the content of the entry. + :param content_type: the type attribute for the content element. One + of ``'html'``, ``'text'`` or ``'xhtml'``. + :param summary: a summary of the entry's content. + :param summary_type: the type attribute for the summary element. One + of ``'html'``, ``'text'`` or ``'xhtml'``. + :param url: the url for the entry. + :param id: a globally unique id for the entry. Must be an URI. If + not present the URL is used, but one of both is required. + :param updated: the time the entry was modified the last time. Must + be a :class:`datetime.datetime` object. Treated as + UTC if naive datetime. Required. + :param author: the author of the entry. Must be either a string (the + name) or a dict with name (required) and uri or + email (both optional). Can be a list of (may be + mixed, too) strings and dicts, too, if there are + multiple authors. Required if the feed does not have an + author element. + :param published: the time the entry was initially published. Must + be a :class:`datetime.datetime` object. Treated as + UTC if naive datetime. + :param rights: copyright information for the entry. + :param rights_type: the type attribute for the rights element. One of + ``'html'``, ``'text'`` or ``'xhtml'``. Default is + ``'text'``. + :param links: additional links. Must be a list of dictionaries with + href (required) and rel, type, hreflang, title, length + (all optional) + :param categories: categories for the entry. Must be a list of dictionaries + with term (required), scheme and label (all optional) + :param xml_base: The xml base (url) for this feed item. If not provided + it will default to the item url. + + For more information on the elements see + http://www.atomenabled.org/developers/syndication/ + + Everywhere where a list is demanded, any iterable can be used. + """ + + def __init__(self, title=None, content=None, feed_url=None, **kwargs): + self.title = title + self.title_type = kwargs.get("title_type", "text") + self.content = content + self.content_type = kwargs.get("content_type", "html") + self.url = kwargs.get("url") + self.id = kwargs.get("id", self.url) + self.updated = kwargs.get("updated") + self.summary = kwargs.get("summary") + self.summary_type = kwargs.get("summary_type", "html") + self.author = kwargs.get("author", ()) + self.published = kwargs.get("published") + self.rights = kwargs.get("rights") + self.links = kwargs.get("links", []) + self.categories = kwargs.get("categories", []) + self.xml_base = kwargs.get("xml_base", feed_url) + + if not hasattr(self.author, "__iter__") or isinstance( + self.author, string_types + (dict,) + ): + self.author = [self.author] + for i, author in enumerate(self.author): + if not isinstance(author, dict): + self.author[i] = {"name": author} + + if not self.title: + raise ValueError("title is required") + if not self.id: + raise ValueError("id is required") + if not self.updated: + raise ValueError("updated is required") + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.title) + + def generate(self): + """Yields pieces of ATOM XML.""" + base = "" + if self.xml_base: + base = ' xml:base="%s"' % escape(self.xml_base) + yield u"<entry%s>\n" % base + yield u" " + _make_text_block("title", self.title, self.title_type) + yield u" <id>%s</id>\n" % escape(self.id) + yield u" <updated>%s</updated>\n" % format_iso8601(self.updated) + if self.published: + yield u" <published>%s</published>\n" % format_iso8601(self.published) + if self.url: + yield u' <link href="%s" />\n' % escape(self.url) + for author in self.author: + yield u" <author>\n" + yield u" <name>%s</name>\n" % escape(author["name"]) + if "uri" in author: + yield u" <uri>%s</uri>\n" % escape(author["uri"]) + if "email" in author: + yield u" <email>%s</email>\n" % escape(author["email"]) + yield u" </author>\n" + for link in self.links: + yield u" <link %s/>\n" % "".join( + '%s="%s" ' % (k, escape(link[k])) for k in link + ) + for category in self.categories: + yield u" <category %s/>\n" % "".join( + '%s="%s" ' % (k, escape(category[k])) for k in category + ) + if self.summary: + yield u" " + _make_text_block("summary", self.summary, self.summary_type) + if self.content: + yield u" " + _make_text_block("content", self.content, self.content_type) + yield u"</entry>\n" + + def to_string(self): + """Convert the feed item into a unicode object.""" + return u"".join(self.generate()) + + def __str__(self): + return self.to_string() diff --git a/python/werkzeug/contrib/cache.py b/python/werkzeug/contrib/cache.py new file mode 100644 index 0000000..79c749b --- /dev/null +++ b/python/werkzeug/contrib/cache.py @@ -0,0 +1,933 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.contrib.cache + ~~~~~~~~~~~~~~~~~~~~~~ + + The main problem with dynamic Web sites is, well, they're dynamic. Each + time a user requests a page, the webserver executes a lot of code, queries + the database, renders templates until the visitor gets the page he sees. + + This is a lot more expensive than just loading a file from the file system + and sending it to the visitor. + + For most Web applications, this overhead isn't a big deal but once it + becomes, you will be glad to have a cache system in place. + + How Caching Works + ================= + + Caching is pretty simple. Basically you have a cache object lurking around + somewhere that is connected to a remote cache or the file system or + something else. When the request comes in you check if the current page + is already in the cache and if so, you're returning it from the cache. + Otherwise you generate the page and put it into the cache. (Or a fragment + of the page, you don't have to cache the full thing) + + Here is a simple example of how to cache a sidebar for 5 minutes:: + + def get_sidebar(user): + identifier = 'sidebar_for/user%d' % user.id + value = cache.get(identifier) + if value is not None: + return value + value = generate_sidebar_for(user=user) + cache.set(identifier, value, timeout=60 * 5) + return value + + Creating a Cache Object + ======================= + + To create a cache object you just import the cache system of your choice + from the cache module and instantiate it. Then you can start working + with that object: + + >>> from werkzeug.contrib.cache import SimpleCache + >>> c = SimpleCache() + >>> c.set("foo", "value") + >>> c.get("foo") + 'value' + >>> c.get("missing") is None + True + + Please keep in mind that you have to create the cache and put it somewhere + you have access to it (either as a module global you can import or you just + put it into your WSGI application). + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import errno +import os +import platform +import re +import tempfile +import warnings +from hashlib import md5 +from time import time + +from .._compat import integer_types +from .._compat import iteritems +from .._compat import string_types +from .._compat import text_type +from .._compat import to_native +from ..posixemulation import rename + +try: + import cPickle as pickle +except ImportError: # pragma: no cover + import pickle + +warnings.warn( + "'werkzeug.contrib.cache' is deprecated as of version 0.15 and will" + " be removed in version 1.0. It has moved to https://github.com" + "/pallets/cachelib.", + DeprecationWarning, + stacklevel=2, +) + + +def _items(mappingorseq): + """Wrapper for efficient iteration over mappings represented by dicts + or sequences:: + + >>> for k, v in _items((i, i*i) for i in xrange(5)): + ... assert k*k == v + + >>> for k, v in _items(dict((i, i*i) for i in xrange(5))): + ... assert k*k == v + + """ + if hasattr(mappingorseq, "items"): + return iteritems(mappingorseq) + return mappingorseq + + +class BaseCache(object): + """Baseclass for the cache systems. All the cache systems implement this + API or a superset of it. + + :param default_timeout: the default timeout (in seconds) that is used if + no timeout is specified on :meth:`set`. A timeout + of 0 indicates that the cache never expires. + """ + + def __init__(self, default_timeout=300): + self.default_timeout = default_timeout + + def _normalize_timeout(self, timeout): + if timeout is None: + timeout = self.default_timeout + return timeout + + def get(self, key): + """Look up key in the cache and return the value for it. + + :param key: the key to be looked up. + :returns: The value if it exists and is readable, else ``None``. + """ + return None + + def delete(self, key): + """Delete `key` from the cache. + + :param key: the key to delete. + :returns: Whether the key existed and has been deleted. + :rtype: boolean + """ + return True + + def get_many(self, *keys): + """Returns a list of values for the given keys. + For each key an item in the list is created:: + + foo, bar = cache.get_many("foo", "bar") + + Has the same error handling as :meth:`get`. + + :param keys: The function accepts multiple keys as positional + arguments. + """ + return [self.get(k) for k in keys] + + def get_dict(self, *keys): + """Like :meth:`get_many` but return a dict:: + + d = cache.get_dict("foo", "bar") + foo = d["foo"] + bar = d["bar"] + + :param keys: The function accepts multiple keys as positional + arguments. + """ + return dict(zip(keys, self.get_many(*keys))) + + def set(self, key, value, timeout=None): + """Add a new key/value to the cache (overwrites value, if key already + exists in the cache). + + :param key: the key to set + :param value: the value for the key + :param timeout: the cache timeout for the key in seconds (if not + specified, it uses the default timeout). A timeout of + 0 idicates that the cache never expires. + :returns: ``True`` if key has been updated, ``False`` for backend + errors. Pickling errors, however, will raise a subclass of + ``pickle.PickleError``. + :rtype: boolean + """ + return True + + def add(self, key, value, timeout=None): + """Works like :meth:`set` but does not overwrite the values of already + existing keys. + + :param key: the key to set + :param value: the value for the key + :param timeout: the cache timeout for the key in seconds (if not + specified, it uses the default timeout). A timeout of + 0 idicates that the cache never expires. + :returns: Same as :meth:`set`, but also ``False`` for already + existing keys. + :rtype: boolean + """ + return True + + def set_many(self, mapping, timeout=None): + """Sets multiple keys and values from a mapping. + + :param mapping: a mapping with the keys/values to set. + :param timeout: the cache timeout for the key in seconds (if not + specified, it uses the default timeout). A timeout of + 0 idicates that the cache never expires. + :returns: Whether all given keys have been set. + :rtype: boolean + """ + rv = True + for key, value in _items(mapping): + if not self.set(key, value, timeout): + rv = False + return rv + + def delete_many(self, *keys): + """Deletes multiple keys at once. + + :param keys: The function accepts multiple keys as positional + arguments. + :returns: Whether all given keys have been deleted. + :rtype: boolean + """ + return all(self.delete(key) for key in keys) + + def has(self, key): + """Checks if a key exists in the cache without returning it. This is a + cheap operation that bypasses loading the actual data on the backend. + + This method is optional and may not be implemented on all caches. + + :param key: the key to check + """ + raise NotImplementedError( + "%s doesn't have an efficient implementation of `has`. That " + "means it is impossible to check whether a key exists without " + "fully loading the key's data. Consider using `self.get` " + "explicitly if you don't care about performance." + ) + + def clear(self): + """Clears the cache. Keep in mind that not all caches support + completely clearing the cache. + + :returns: Whether the cache has been cleared. + :rtype: boolean + """ + return True + + def inc(self, key, delta=1): + """Increments the value of a key by `delta`. If the key does + not yet exist it is initialized with `delta`. + + For supporting caches this is an atomic operation. + + :param key: the key to increment. + :param delta: the delta to add. + :returns: The new value or ``None`` for backend errors. + """ + value = (self.get(key) or 0) + delta + return value if self.set(key, value) else None + + def dec(self, key, delta=1): + """Decrements the value of a key by `delta`. If the key does + not yet exist it is initialized with `-delta`. + + For supporting caches this is an atomic operation. + + :param key: the key to increment. + :param delta: the delta to subtract. + :returns: The new value or `None` for backend errors. + """ + value = (self.get(key) or 0) - delta + return value if self.set(key, value) else None + + +class NullCache(BaseCache): + """A cache that doesn't cache. This can be useful for unit testing. + + :param default_timeout: a dummy parameter that is ignored but exists + for API compatibility with other caches. + """ + + def has(self, key): + return False + + +class SimpleCache(BaseCache): + """Simple memory cache for single process environments. This class exists + mainly for the development server and is not 100% thread safe. It tries + to use as many atomic operations as possible and no locks for simplicity + but it could happen under heavy load that keys are added multiple times. + + :param threshold: the maximum number of items the cache stores before + it starts deleting some. + :param default_timeout: the default timeout that is used if no timeout is + specified on :meth:`~BaseCache.set`. A timeout of + 0 indicates that the cache never expires. + """ + + def __init__(self, threshold=500, default_timeout=300): + BaseCache.__init__(self, default_timeout) + self._cache = {} + self.clear = self._cache.clear + self._threshold = threshold + + def _prune(self): + if len(self._cache) > self._threshold: + now = time() + toremove = [] + for idx, (key, (expires, _)) in enumerate(self._cache.items()): + if (expires != 0 and expires <= now) or idx % 3 == 0: + toremove.append(key) + for key in toremove: + self._cache.pop(key, None) + + def _normalize_timeout(self, timeout): + timeout = BaseCache._normalize_timeout(self, timeout) + if timeout > 0: + timeout = time() + timeout + return timeout + + def get(self, key): + try: + expires, value = self._cache[key] + if expires == 0 or expires > time(): + return pickle.loads(value) + except (KeyError, pickle.PickleError): + return None + + def set(self, key, value, timeout=None): + expires = self._normalize_timeout(timeout) + self._prune() + self._cache[key] = (expires, pickle.dumps(value, pickle.HIGHEST_PROTOCOL)) + return True + + def add(self, key, value, timeout=None): + expires = self._normalize_timeout(timeout) + self._prune() + item = (expires, pickle.dumps(value, pickle.HIGHEST_PROTOCOL)) + if key in self._cache: + return False + self._cache.setdefault(key, item) + return True + + def delete(self, key): + return self._cache.pop(key, None) is not None + + def has(self, key): + try: + expires, value = self._cache[key] + return expires == 0 or expires > time() + except KeyError: + return False + + +_test_memcached_key = re.compile(r"[^\x00-\x21\xff]{1,250}$").match + + +class MemcachedCache(BaseCache): + """A cache that uses memcached as backend. + + The first argument can either be an object that resembles the API of a + :class:`memcache.Client` or a tuple/list of server addresses. In the + event that a tuple/list is passed, Werkzeug tries to import the best + available memcache library. + + This cache looks into the following packages/modules to find bindings for + memcached: + + - ``pylibmc`` + - ``google.appengine.api.memcached`` + - ``memcached`` + - ``libmc`` + + Implementation notes: This cache backend works around some limitations in + memcached to simplify the interface. For example unicode keys are encoded + to utf-8 on the fly. Methods such as :meth:`~BaseCache.get_dict` return + the keys in the same format as passed. Furthermore all get methods + silently ignore key errors to not cause problems when untrusted user data + is passed to the get methods which is often the case in web applications. + + :param servers: a list or tuple of server addresses or alternatively + a :class:`memcache.Client` or a compatible client. + :param default_timeout: the default timeout that is used if no timeout is + specified on :meth:`~BaseCache.set`. A timeout of + 0 indicates that the cache never expires. + :param key_prefix: a prefix that is added before all keys. This makes it + possible to use the same memcached server for different + applications. Keep in mind that + :meth:`~BaseCache.clear` will also clear keys with a + different prefix. + """ + + def __init__(self, servers=None, default_timeout=300, key_prefix=None): + BaseCache.__init__(self, default_timeout) + if servers is None or isinstance(servers, (list, tuple)): + if servers is None: + servers = ["127.0.0.1:11211"] + self._client = self.import_preferred_memcache_lib(servers) + if self._client is None: + raise RuntimeError("no memcache module found") + else: + # NOTE: servers is actually an already initialized memcache + # client. + self._client = servers + + self.key_prefix = to_native(key_prefix) + + def _normalize_key(self, key): + key = to_native(key, "utf-8") + if self.key_prefix: + key = self.key_prefix + key + return key + + def _normalize_timeout(self, timeout): + timeout = BaseCache._normalize_timeout(self, timeout) + if timeout > 0: + timeout = int(time()) + timeout + return timeout + + def get(self, key): + key = self._normalize_key(key) + # memcached doesn't support keys longer than that. Because often + # checks for so long keys can occur because it's tested from user + # submitted data etc we fail silently for getting. + if _test_memcached_key(key): + return self._client.get(key) + + def get_dict(self, *keys): + key_mapping = {} + have_encoded_keys = False + for key in keys: + encoded_key = self._normalize_key(key) + if not isinstance(key, str): + have_encoded_keys = True + if _test_memcached_key(key): + key_mapping[encoded_key] = key + _keys = list(key_mapping) + d = rv = self._client.get_multi(_keys) + if have_encoded_keys or self.key_prefix: + rv = {} + for key, value in iteritems(d): + rv[key_mapping[key]] = value + if len(rv) < len(keys): + for key in keys: + if key not in rv: + rv[key] = None + return rv + + def add(self, key, value, timeout=None): + key = self._normalize_key(key) + timeout = self._normalize_timeout(timeout) + return self._client.add(key, value, timeout) + + def set(self, key, value, timeout=None): + key = self._normalize_key(key) + timeout = self._normalize_timeout(timeout) + return self._client.set(key, value, timeout) + + def get_many(self, *keys): + d = self.get_dict(*keys) + return [d[key] for key in keys] + + def set_many(self, mapping, timeout=None): + new_mapping = {} + for key, value in _items(mapping): + key = self._normalize_key(key) + new_mapping[key] = value + + timeout = self._normalize_timeout(timeout) + failed_keys = self._client.set_multi(new_mapping, timeout) + return not failed_keys + + def delete(self, key): + key = self._normalize_key(key) + if _test_memcached_key(key): + return self._client.delete(key) + + def delete_many(self, *keys): + new_keys = [] + for key in keys: + key = self._normalize_key(key) + if _test_memcached_key(key): + new_keys.append(key) + return self._client.delete_multi(new_keys) + + def has(self, key): + key = self._normalize_key(key) + if _test_memcached_key(key): + return self._client.append(key, "") + return False + + def clear(self): + return self._client.flush_all() + + def inc(self, key, delta=1): + key = self._normalize_key(key) + return self._client.incr(key, delta) + + def dec(self, key, delta=1): + key = self._normalize_key(key) + return self._client.decr(key, delta) + + def import_preferred_memcache_lib(self, servers): + """Returns an initialized memcache client. Used by the constructor.""" + try: + import pylibmc + except ImportError: + pass + else: + return pylibmc.Client(servers) + + try: + from google.appengine.api import memcache + except ImportError: + pass + else: + return memcache.Client() + + try: + import memcache + except ImportError: + pass + else: + return memcache.Client(servers) + + try: + import libmc + except ImportError: + pass + else: + return libmc.Client(servers) + + +# backwards compatibility +GAEMemcachedCache = MemcachedCache + + +class RedisCache(BaseCache): + """Uses the Redis key-value store as a cache backend. + + The first argument can be either a string denoting address of the Redis + server or an object resembling an instance of a redis.Redis class. + + Note: Python Redis API already takes care of encoding unicode strings on + the fly. + + .. versionadded:: 0.7 + + .. versionadded:: 0.8 + `key_prefix` was added. + + .. versionchanged:: 0.8 + This cache backend now properly serializes objects. + + .. versionchanged:: 0.8.3 + This cache backend now supports password authentication. + + .. versionchanged:: 0.10 + ``**kwargs`` is now passed to the redis object. + + :param host: address of the Redis server or an object which API is + compatible with the official Python Redis client (redis-py). + :param port: port number on which Redis server listens for connections. + :param password: password authentication for the Redis server. + :param db: db (zero-based numeric index) on Redis Server to connect. + :param default_timeout: the default timeout that is used if no timeout is + specified on :meth:`~BaseCache.set`. A timeout of + 0 indicates that the cache never expires. + :param key_prefix: A prefix that should be added to all keys. + + Any additional keyword arguments will be passed to ``redis.Redis``. + """ + + def __init__( + self, + host="localhost", + port=6379, + password=None, + db=0, + default_timeout=300, + key_prefix=None, + **kwargs + ): + BaseCache.__init__(self, default_timeout) + if host is None: + raise ValueError("RedisCache host parameter may not be None") + if isinstance(host, string_types): + try: + import redis + except ImportError: + raise RuntimeError("no redis module found") + if kwargs.get("decode_responses", None): + raise ValueError("decode_responses is not supported by RedisCache.") + self._client = redis.Redis( + host=host, port=port, password=password, db=db, **kwargs + ) + else: + self._client = host + self.key_prefix = key_prefix or "" + + def _normalize_timeout(self, timeout): + timeout = BaseCache._normalize_timeout(self, timeout) + if timeout == 0: + timeout = -1 + return timeout + + def dump_object(self, value): + """Dumps an object into a string for redis. By default it serializes + integers as regular string and pickle dumps everything else. + """ + t = type(value) + if t in integer_types: + return str(value).encode("ascii") + return b"!" + pickle.dumps(value) + + def load_object(self, value): + """The reversal of :meth:`dump_object`. This might be called with + None. + """ + if value is None: + return None + if value.startswith(b"!"): + try: + return pickle.loads(value[1:]) + except pickle.PickleError: + return None + try: + return int(value) + except ValueError: + # before 0.8 we did not have serialization. Still support that. + return value + + def get(self, key): + return self.load_object(self._client.get(self.key_prefix + key)) + + def get_many(self, *keys): + if self.key_prefix: + keys = [self.key_prefix + key for key in keys] + return [self.load_object(x) for x in self._client.mget(keys)] + + def set(self, key, value, timeout=None): + timeout = self._normalize_timeout(timeout) + dump = self.dump_object(value) + if timeout == -1: + result = self._client.set(name=self.key_prefix + key, value=dump) + else: + result = self._client.setex( + name=self.key_prefix + key, value=dump, time=timeout + ) + return result + + def add(self, key, value, timeout=None): + timeout = self._normalize_timeout(timeout) + dump = self.dump_object(value) + return self._client.setnx( + name=self.key_prefix + key, value=dump + ) and self._client.expire(name=self.key_prefix + key, time=timeout) + + def set_many(self, mapping, timeout=None): + timeout = self._normalize_timeout(timeout) + # Use transaction=False to batch without calling redis MULTI + # which is not supported by twemproxy + pipe = self._client.pipeline(transaction=False) + + for key, value in _items(mapping): + dump = self.dump_object(value) + if timeout == -1: + pipe.set(name=self.key_prefix + key, value=dump) + else: + pipe.setex(name=self.key_prefix + key, value=dump, time=timeout) + return pipe.execute() + + def delete(self, key): + return self._client.delete(self.key_prefix + key) + + def delete_many(self, *keys): + if not keys: + return + if self.key_prefix: + keys = [self.key_prefix + key for key in keys] + return self._client.delete(*keys) + + def has(self, key): + return self._client.exists(self.key_prefix + key) + + def clear(self): + status = False + if self.key_prefix: + keys = self._client.keys(self.key_prefix + "*") + if keys: + status = self._client.delete(*keys) + else: + status = self._client.flushdb() + return status + + def inc(self, key, delta=1): + return self._client.incr(name=self.key_prefix + key, amount=delta) + + def dec(self, key, delta=1): + return self._client.decr(name=self.key_prefix + key, amount=delta) + + +class FileSystemCache(BaseCache): + """A cache that stores the items on the file system. This cache depends + on being the only user of the `cache_dir`. Make absolutely sure that + nobody but this cache stores files there or otherwise the cache will + randomly delete files therein. + + :param cache_dir: the directory where cache files are stored. + :param threshold: the maximum number of items the cache stores before + it starts deleting some. A threshold value of 0 + indicates no threshold. + :param default_timeout: the default timeout that is used if no timeout is + specified on :meth:`~BaseCache.set`. A timeout of + 0 indicates that the cache never expires. + :param mode: the file mode wanted for the cache files, default 0600 + """ + + #: used for temporary files by the FileSystemCache + _fs_transaction_suffix = ".__wz_cache" + #: keep amount of files in a cache element + _fs_count_file = "__wz_cache_count" + + def __init__(self, cache_dir, threshold=500, default_timeout=300, mode=0o600): + BaseCache.__init__(self, default_timeout) + self._path = cache_dir + self._threshold = threshold + self._mode = mode + + try: + os.makedirs(self._path) + except OSError as ex: + if ex.errno != errno.EEXIST: + raise + + self._update_count(value=len(self._list_dir())) + + @property + def _file_count(self): + return self.get(self._fs_count_file) or 0 + + def _update_count(self, delta=None, value=None): + # If we have no threshold, don't count files + if self._threshold == 0: + return + + if delta: + new_count = self._file_count + delta + else: + new_count = value or 0 + self.set(self._fs_count_file, new_count, mgmt_element=True) + + def _normalize_timeout(self, timeout): + timeout = BaseCache._normalize_timeout(self, timeout) + if timeout != 0: + timeout = time() + timeout + return int(timeout) + + def _list_dir(self): + """return a list of (fully qualified) cache filenames + """ + mgmt_files = [ + self._get_filename(name).split("/")[-1] for name in (self._fs_count_file,) + ] + return [ + os.path.join(self._path, fn) + for fn in os.listdir(self._path) + if not fn.endswith(self._fs_transaction_suffix) and fn not in mgmt_files + ] + + def _prune(self): + if self._threshold == 0 or not self._file_count > self._threshold: + return + + entries = self._list_dir() + now = time() + for idx, fname in enumerate(entries): + try: + remove = False + with open(fname, "rb") as f: + expires = pickle.load(f) + remove = (expires != 0 and expires <= now) or idx % 3 == 0 + + if remove: + os.remove(fname) + except (IOError, OSError): + pass + self._update_count(value=len(self._list_dir())) + + def clear(self): + for fname in self._list_dir(): + try: + os.remove(fname) + except (IOError, OSError): + self._update_count(value=len(self._list_dir())) + return False + self._update_count(value=0) + return True + + def _get_filename(self, key): + if isinstance(key, text_type): + key = key.encode("utf-8") # XXX unicode review + hash = md5(key).hexdigest() + return os.path.join(self._path, hash) + + def get(self, key): + filename = self._get_filename(key) + try: + with open(filename, "rb") as f: + pickle_time = pickle.load(f) + if pickle_time == 0 or pickle_time >= time(): + return pickle.load(f) + else: + os.remove(filename) + return None + except (IOError, OSError, pickle.PickleError): + return None + + def add(self, key, value, timeout=None): + filename = self._get_filename(key) + if not os.path.exists(filename): + return self.set(key, value, timeout) + return False + + def set(self, key, value, timeout=None, mgmt_element=False): + # Management elements have no timeout + if mgmt_element: + timeout = 0 + + # Don't prune on management element update, to avoid loop + else: + self._prune() + + timeout = self._normalize_timeout(timeout) + filename = self._get_filename(key) + try: + fd, tmp = tempfile.mkstemp( + suffix=self._fs_transaction_suffix, dir=self._path + ) + with os.fdopen(fd, "wb") as f: + pickle.dump(timeout, f, 1) + pickle.dump(value, f, pickle.HIGHEST_PROTOCOL) + rename(tmp, filename) + os.chmod(filename, self._mode) + except (IOError, OSError): + return False + else: + # Management elements should not count towards threshold + if not mgmt_element: + self._update_count(delta=1) + return True + + def delete(self, key, mgmt_element=False): + try: + os.remove(self._get_filename(key)) + except (IOError, OSError): + return False + else: + # Management elements should not count towards threshold + if not mgmt_element: + self._update_count(delta=-1) + return True + + def has(self, key): + filename = self._get_filename(key) + try: + with open(filename, "rb") as f: + pickle_time = pickle.load(f) + if pickle_time == 0 or pickle_time >= time(): + return True + else: + os.remove(filename) + return False + except (IOError, OSError, pickle.PickleError): + return False + + +class UWSGICache(BaseCache): + """Implements the cache using uWSGI's caching framework. + + .. note:: + This class cannot be used when running under PyPy, because the uWSGI + API implementation for PyPy is lacking the needed functionality. + + :param default_timeout: The default timeout in seconds. + :param cache: The name of the caching instance to connect to, for + example: mycache@localhost:3031, defaults to an empty string, which + means uWSGI will cache in the local instance. If the cache is in the + same instance as the werkzeug app, you only have to provide the name of + the cache. + """ + + def __init__(self, default_timeout=300, cache=""): + BaseCache.__init__(self, default_timeout) + + if platform.python_implementation() == "PyPy": + raise RuntimeError( + "uWSGI caching does not work under PyPy, see " + "the docs for more details." + ) + + try: + import uwsgi + + self._uwsgi = uwsgi + except ImportError: + raise RuntimeError( + "uWSGI could not be imported, are you running under uWSGI?" + ) + + self.cache = cache + + def get(self, key): + rv = self._uwsgi.cache_get(key, self.cache) + if rv is None: + return + return pickle.loads(rv) + + def delete(self, key): + return self._uwsgi.cache_del(key, self.cache) + + def set(self, key, value, timeout=None): + return self._uwsgi.cache_update( + key, pickle.dumps(value), self._normalize_timeout(timeout), self.cache + ) + + def add(self, key, value, timeout=None): + return self._uwsgi.cache_set( + key, pickle.dumps(value), self._normalize_timeout(timeout), self.cache + ) + + def clear(self): + return self._uwsgi.cache_clear(self.cache) + + def has(self, key): + return self._uwsgi.cache_exists(key, self.cache) is not None diff --git a/python/werkzeug/contrib/fixers.py b/python/werkzeug/contrib/fixers.py new file mode 100644 index 0000000..8df0afd --- /dev/null +++ b/python/werkzeug/contrib/fixers.py @@ -0,0 +1,262 @@ +""" +Fixers +====== + +.. warning:: + .. deprecated:: 0.15 + ``ProxyFix`` has moved to :mod:`werkzeug.middleware.proxy_fix`. + All other code in this module is deprecated and will be removed + in version 1.0. + +.. versionadded:: 0.5 + +This module includes various helpers that fix web server behavior. + +.. autoclass:: ProxyFix + :members: + +.. autoclass:: CGIRootFix + +.. autoclass:: PathInfoFromRequestUriFix + +.. autoclass:: HeaderRewriterFix + +.. autoclass:: InternetExplorerFix + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import warnings + +from ..datastructures import Headers +from ..datastructures import ResponseCacheControl +from ..http import parse_cache_control_header +from ..http import parse_options_header +from ..http import parse_set_header +from ..middleware.proxy_fix import ProxyFix as _ProxyFix +from ..useragents import UserAgent + +try: + from urllib.parse import unquote +except ImportError: + from urllib import unquote + + +class CGIRootFix(object): + """Wrap the application in this middleware if you are using FastCGI + or CGI and you have problems with your app root being set to the CGI + script's path instead of the path users are going to visit. + + :param app: the WSGI application + :param app_root: Defaulting to ``'/'``, you can set this to + something else if your app is mounted somewhere else. + + .. deprecated:: 0.15 + This middleware will be removed in version 1.0. + + .. versionchanged:: 0.9 + Added `app_root` parameter and renamed from + ``LighttpdCGIRootFix``. + """ + + def __init__(self, app, app_root="/"): + warnings.warn( + "'CGIRootFix' is deprecated as of version 0.15 and will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + self.app = app + self.app_root = app_root.strip("/") + + def __call__(self, environ, start_response): + environ["SCRIPT_NAME"] = self.app_root + return self.app(environ, start_response) + + +class LighttpdCGIRootFix(CGIRootFix): + def __init__(self, *args, **kwargs): + warnings.warn( + "'LighttpdCGIRootFix' is renamed 'CGIRootFix'. Both will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(LighttpdCGIRootFix, self).__init__(*args, **kwargs) + + +class PathInfoFromRequestUriFix(object): + """On windows environment variables are limited to the system charset + which makes it impossible to store the `PATH_INFO` variable in the + environment without loss of information on some systems. + + This is for example a problem for CGI scripts on a Windows Apache. + + This fixer works by recreating the `PATH_INFO` from `REQUEST_URI`, + `REQUEST_URL`, or `UNENCODED_URL` (whatever is available). Thus the + fix can only be applied if the webserver supports either of these + variables. + + :param app: the WSGI application + + .. deprecated:: 0.15 + This middleware will be removed in version 1.0. + """ + + def __init__(self, app): + warnings.warn( + "'PathInfoFromRequestUriFix' is deprecated as of version" + " 0.15 and will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + self.app = app + + def __call__(self, environ, start_response): + for key in "REQUEST_URL", "REQUEST_URI", "UNENCODED_URL": + if key not in environ: + continue + request_uri = unquote(environ[key]) + script_name = unquote(environ.get("SCRIPT_NAME", "")) + if request_uri.startswith(script_name): + environ["PATH_INFO"] = request_uri[len(script_name) :].split("?", 1)[0] + break + return self.app(environ, start_response) + + +class ProxyFix(_ProxyFix): + """ + .. deprecated:: 0.15 + ``werkzeug.contrib.fixers.ProxyFix`` has moved to + :mod:`werkzeug.middleware.proxy_fix`. This import will be + removed in 1.0. + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.contrib.fixers.ProxyFix' has moved to 'werkzeug" + ".middleware.proxy_fix.ProxyFix'. This import is deprecated" + " as of version 0.15 and will be removed in 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(ProxyFix, self).__init__(*args, **kwargs) + + +class HeaderRewriterFix(object): + """This middleware can remove response headers and add others. This + is for example useful to remove the `Date` header from responses if you + are using a server that adds that header, no matter if it's present or + not or to add `X-Powered-By` headers:: + + app = HeaderRewriterFix(app, remove_headers=['Date'], + add_headers=[('X-Powered-By', 'WSGI')]) + + :param app: the WSGI application + :param remove_headers: a sequence of header keys that should be + removed. + :param add_headers: a sequence of ``(key, value)`` tuples that should + be added. + + .. deprecated:: 0.15 + This middleware will be removed in 1.0. + """ + + def __init__(self, app, remove_headers=None, add_headers=None): + warnings.warn( + "'HeaderRewriterFix' is deprecated as of version 0.15 and" + " will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + self.app = app + self.remove_headers = set(x.lower() for x in (remove_headers or ())) + self.add_headers = list(add_headers or ()) + + def __call__(self, environ, start_response): + def rewriting_start_response(status, headers, exc_info=None): + new_headers = [] + for key, value in headers: + if key.lower() not in self.remove_headers: + new_headers.append((key, value)) + new_headers += self.add_headers + return start_response(status, new_headers, exc_info) + + return self.app(environ, rewriting_start_response) + + +class InternetExplorerFix(object): + """This middleware fixes a couple of bugs with Microsoft Internet + Explorer. Currently the following fixes are applied: + + - removing of `Vary` headers for unsupported mimetypes which + causes troubles with caching. Can be disabled by passing + ``fix_vary=False`` to the constructor. + see: https://support.microsoft.com/en-us/help/824847 + + - removes offending headers to work around caching bugs in + Internet Explorer if `Content-Disposition` is set. Can be + disabled by passing ``fix_attach=False`` to the constructor. + + If it does not detect affected Internet Explorer versions it won't touch + the request / response. + + .. deprecated:: 0.15 + This middleware will be removed in 1.0. + """ + + # This code was inspired by Django fixers for the same bugs. The + # fix_vary and fix_attach fixers were originally implemented in Django + # by Michael Axiak and is available as part of the Django project: + # https://code.djangoproject.com/ticket/4148 + + def __init__(self, app, fix_vary=True, fix_attach=True): + warnings.warn( + "'InternetExplorerFix' is deprecated as of version 0.15 and" + " will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + self.app = app + self.fix_vary = fix_vary + self.fix_attach = fix_attach + + def fix_headers(self, environ, headers, status=None): + if self.fix_vary: + header = headers.get("content-type", "") + mimetype, options = parse_options_header(header) + if mimetype not in ("text/html", "text/plain", "text/sgml"): + headers.pop("vary", None) + + if self.fix_attach and "content-disposition" in headers: + pragma = parse_set_header(headers.get("pragma", "")) + pragma.discard("no-cache") + header = pragma.to_header() + if not header: + headers.pop("pragma", "") + else: + headers["Pragma"] = header + header = headers.get("cache-control", "") + if header: + cc = parse_cache_control_header(header, cls=ResponseCacheControl) + cc.no_cache = None + cc.no_store = False + header = cc.to_header() + if not header: + headers.pop("cache-control", "") + else: + headers["Cache-Control"] = header + + def run_fixed(self, environ, start_response): + def fixing_start_response(status, headers, exc_info=None): + headers = Headers(headers) + self.fix_headers(environ, headers, status) + return start_response(status, headers.to_wsgi_list(), exc_info) + + return self.app(environ, fixing_start_response) + + def __call__(self, environ, start_response): + ua = UserAgent(environ) + if ua.browser != "msie": + return self.app(environ, start_response) + return self.run_fixed(environ, start_response) diff --git a/python/werkzeug/contrib/iterio.py b/python/werkzeug/contrib/iterio.py new file mode 100644 index 0000000..b672454 --- /dev/null +++ b/python/werkzeug/contrib/iterio.py @@ -0,0 +1,358 @@ +# -*- coding: utf-8 -*- +r""" + werkzeug.contrib.iterio + ~~~~~~~~~~~~~~~~~~~~~~~ + + This module implements a :class:`IterIO` that converts an iterator into + a stream object and the other way round. Converting streams into + iterators requires the `greenlet`_ module. + + To convert an iterator into a stream all you have to do is to pass it + directly to the :class:`IterIO` constructor. In this example we pass it + a newly created generator:: + + def foo(): + yield "something\n" + yield "otherthings" + stream = IterIO(foo()) + print stream.read() # read the whole iterator + + The other way round works a bit different because we have to ensure that + the code execution doesn't take place yet. An :class:`IterIO` call with a + callable as first argument does two things. The function itself is passed + an :class:`IterIO` stream it can feed. The object returned by the + :class:`IterIO` constructor on the other hand is not an stream object but + an iterator:: + + def foo(stream): + stream.write("some") + stream.write("thing") + stream.flush() + stream.write("otherthing") + iterator = IterIO(foo) + print iterator.next() # prints something + print iterator.next() # prints otherthing + iterator.next() # raises StopIteration + + .. _greenlet: https://github.com/python-greenlet/greenlet + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import warnings + +from .._compat import implements_iterator + +try: + import greenlet +except ImportError: + greenlet = None + +warnings.warn( + "'werkzeug.contrib.iterio' is deprecated as of version 0.15 and" + " will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, +) + + +def _mixed_join(iterable, sentinel): + """concatenate any string type in an intelligent way.""" + iterator = iter(iterable) + first_item = next(iterator, sentinel) + if isinstance(first_item, bytes): + return first_item + b"".join(iterator) + return first_item + u"".join(iterator) + + +def _newline(reference_string): + if isinstance(reference_string, bytes): + return b"\n" + return u"\n" + + +@implements_iterator +class IterIO(object): + """Instances of this object implement an interface compatible with the + standard Python :class:`file` object. Streams are either read-only or + write-only depending on how the object is created. + + If the first argument is an iterable a file like object is returned that + returns the contents of the iterable. In case the iterable is empty + read operations will return the sentinel value. + + If the first argument is a callable then the stream object will be + created and passed to that function. The caller itself however will + not receive a stream but an iterable. The function will be executed + step by step as something iterates over the returned iterable. Each + call to :meth:`flush` will create an item for the iterable. If + :meth:`flush` is called without any writes in-between the sentinel + value will be yielded. + + Note for Python 3: due to the incompatible interface of bytes and + streams you should set the sentinel value explicitly to an empty + bytestring (``b''``) if you are expecting to deal with bytes as + otherwise the end of the stream is marked with the wrong sentinel + value. + + .. versionadded:: 0.9 + `sentinel` parameter was added. + """ + + def __new__(cls, obj, sentinel=""): + try: + iterator = iter(obj) + except TypeError: + return IterI(obj, sentinel) + return IterO(iterator, sentinel) + + def __iter__(self): + return self + + def tell(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return self.pos + + def isatty(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return False + + def seek(self, pos, mode=0): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def truncate(self, size=None): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def write(self, s): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def writelines(self, list): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def read(self, n=-1): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def readlines(self, sizehint=0): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def readline(self, length=None): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def flush(self): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def __next__(self): + if self.closed: + raise StopIteration() + line = self.readline() + if not line: + raise StopIteration() + return line + + +class IterI(IterIO): + """Convert an stream into an iterator.""" + + def __new__(cls, func, sentinel=""): + if greenlet is None: + raise RuntimeError("IterI requires greenlet support") + stream = object.__new__(cls) + stream._parent = greenlet.getcurrent() + stream._buffer = [] + stream.closed = False + stream.sentinel = sentinel + stream.pos = 0 + + def run(): + func(stream) + stream.close() + + g = greenlet.greenlet(run, stream._parent) + while 1: + rv = g.switch() + if not rv: + return + yield rv[0] + + def close(self): + if not self.closed: + self.closed = True + self._flush_impl() + + def write(self, s): + if self.closed: + raise ValueError("I/O operation on closed file") + if s: + self.pos += len(s) + self._buffer.append(s) + + def writelines(self, list): + for item in list: + self.write(item) + + def flush(self): + if self.closed: + raise ValueError("I/O operation on closed file") + self._flush_impl() + + def _flush_impl(self): + data = _mixed_join(self._buffer, self.sentinel) + self._buffer = [] + if not data and self.closed: + self._parent.switch() + else: + self._parent.switch((data,)) + + +class IterO(IterIO): + """Iter output. Wrap an iterator and give it a stream like interface.""" + + def __new__(cls, gen, sentinel=""): + self = object.__new__(cls) + self._gen = gen + self._buf = None + self.sentinel = sentinel + self.closed = False + self.pos = 0 + return self + + def __iter__(self): + return self + + def _buf_append(self, string): + """Replace string directly without appending to an empty string, + avoiding type issues.""" + if not self._buf: + self._buf = string + else: + self._buf += string + + def close(self): + if not self.closed: + self.closed = True + if hasattr(self._gen, "close"): + self._gen.close() + + def seek(self, pos, mode=0): + if self.closed: + raise ValueError("I/O operation on closed file") + if mode == 1: + pos += self.pos + elif mode == 2: + self.read() + self.pos = min(self.pos, self.pos + pos) + return + elif mode != 0: + raise IOError("Invalid argument") + buf = [] + try: + tmp_end_pos = len(self._buf or "") + while pos > tmp_end_pos: + item = next(self._gen) + tmp_end_pos += len(item) + buf.append(item) + except StopIteration: + pass + if buf: + self._buf_append(_mixed_join(buf, self.sentinel)) + self.pos = max(0, pos) + + def read(self, n=-1): + if self.closed: + raise ValueError("I/O operation on closed file") + if n < 0: + self._buf_append(_mixed_join(self._gen, self.sentinel)) + result = self._buf[self.pos :] + self.pos += len(result) + return result + new_pos = self.pos + n + buf = [] + try: + tmp_end_pos = 0 if self._buf is None else len(self._buf) + while new_pos > tmp_end_pos or (self._buf is None and not buf): + item = next(self._gen) + tmp_end_pos += len(item) + buf.append(item) + except StopIteration: + pass + if buf: + self._buf_append(_mixed_join(buf, self.sentinel)) + + if self._buf is None: + return self.sentinel + + new_pos = max(0, new_pos) + try: + return self._buf[self.pos : new_pos] + finally: + self.pos = min(new_pos, len(self._buf)) + + def readline(self, length=None): + if self.closed: + raise ValueError("I/O operation on closed file") + + nl_pos = -1 + if self._buf: + nl_pos = self._buf.find(_newline(self._buf), self.pos) + buf = [] + try: + if self._buf is None: + pos = self.pos + else: + pos = len(self._buf) + while nl_pos < 0: + item = next(self._gen) + local_pos = item.find(_newline(item)) + buf.append(item) + if local_pos >= 0: + nl_pos = pos + local_pos + break + pos += len(item) + except StopIteration: + pass + if buf: + self._buf_append(_mixed_join(buf, self.sentinel)) + + if self._buf is None: + return self.sentinel + + if nl_pos < 0: + new_pos = len(self._buf) + else: + new_pos = nl_pos + 1 + if length is not None and self.pos + length < new_pos: + new_pos = self.pos + length + try: + return self._buf[self.pos : new_pos] + finally: + self.pos = min(new_pos, len(self._buf)) + + def readlines(self, sizehint=0): + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines diff --git a/python/werkzeug/contrib/lint.py b/python/werkzeug/contrib/lint.py new file mode 100644 index 0000000..8bd8b8a --- /dev/null +++ b/python/werkzeug/contrib/lint.py @@ -0,0 +1,11 @@ +import warnings + +from ..middleware.lint import * # noqa: F401, F403 + +warnings.warn( + "'werkzeug.contrib.lint' has moved to 'werkzeug.middleware.lint'." + " This import is deprecated as of version 0.15 and will be removed" + " in version 1.0.", + DeprecationWarning, + stacklevel=2, +) diff --git a/python/werkzeug/contrib/profiler.py b/python/werkzeug/contrib/profiler.py new file mode 100644 index 0000000..b79fe56 --- /dev/null +++ b/python/werkzeug/contrib/profiler.py @@ -0,0 +1,42 @@ +import warnings + +from ..middleware.profiler import * # noqa: F401, F403 + +warnings.warn( + "'werkzeug.contrib.profiler' has moved to" + "'werkzeug.middleware.profiler'. This import is deprecated as of" + "version 0.15 and will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, +) + + +class MergeStream(object): + """An object that redirects ``write`` calls to multiple streams. + Use this to log to both ``sys.stdout`` and a file:: + + f = open('profiler.log', 'w') + stream = MergeStream(sys.stdout, f) + profiler = ProfilerMiddleware(app, stream) + + .. deprecated:: 0.15 + Use the ``tee`` command in your terminal instead. This class + will be removed in 1.0. + """ + + def __init__(self, *streams): + warnings.warn( + "'MergeStream' is deprecated as of version 0.15 and will be removed in" + " version 1.0. Use your terminal's 'tee' command instead.", + DeprecationWarning, + stacklevel=2, + ) + + if not streams: + raise TypeError("At least one stream must be given.") + + self.streams = streams + + def write(self, data): + for stream in self.streams: + stream.write(data) diff --git a/python/werkzeug/contrib/securecookie.py b/python/werkzeug/contrib/securecookie.py new file mode 100644 index 0000000..c4c9eee --- /dev/null +++ b/python/werkzeug/contrib/securecookie.py @@ -0,0 +1,362 @@ +# -*- coding: utf-8 -*- +r""" + werkzeug.contrib.securecookie + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + This module implements a cookie that is not alterable from the client + because it adds a checksum the server checks for. You can use it as + session replacement if all you have is a user id or something to mark + a logged in user. + + Keep in mind that the data is still readable from the client as a + normal cookie is. However you don't have to store and flush the + sessions you have at the server. + + Example usage: + + >>> from werkzeug.contrib.securecookie import SecureCookie + >>> x = SecureCookie({"foo": 42, "baz": (1, 2, 3)}, "deadbeef") + + Dumping into a string so that one can store it in a cookie: + + >>> value = x.serialize() + + Loading from that string again: + + >>> x = SecureCookie.unserialize(value, "deadbeef") + >>> x["baz"] + (1, 2, 3) + + If someone modifies the cookie and the checksum is wrong the unserialize + method will fail silently and return a new empty `SecureCookie` object. + + Keep in mind that the values will be visible in the cookie so do not + store data in a cookie you don't want the user to see. + + Application Integration + ======================= + + If you are using the werkzeug request objects you could integrate the + secure cookie into your application like this:: + + from werkzeug.utils import cached_property + from werkzeug.wrappers import BaseRequest + from werkzeug.contrib.securecookie import SecureCookie + + # don't use this key but a different one; you could just use + # os.urandom(20) to get something random + SECRET_KEY = '\xfa\xdd\xb8z\xae\xe0}4\x8b\xea' + + class Request(BaseRequest): + + @cached_property + def client_session(self): + data = self.cookies.get('session_data') + if not data: + return SecureCookie(secret_key=SECRET_KEY) + return SecureCookie.unserialize(data, SECRET_KEY) + + def application(environ, start_response): + request = Request(environ) + + # get a response object here + response = ... + + if request.client_session.should_save: + session_data = request.client_session.serialize() + response.set_cookie('session_data', session_data, + httponly=True) + return response(environ, start_response) + + A less verbose integration can be achieved by using shorthand methods:: + + class Request(BaseRequest): + + @cached_property + def client_session(self): + return SecureCookie.load_cookie(self, secret_key=COOKIE_SECRET) + + def application(environ, start_response): + request = Request(environ) + + # get a response object here + response = ... + + request.client_session.save_cookie(response) + return response(environ, start_response) + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import base64 +import pickle +import warnings +from hashlib import sha1 as _default_hash +from hmac import new as hmac +from time import time + +from .._compat import iteritems +from .._compat import text_type +from .._compat import to_bytes +from .._compat import to_native +from .._internal import _date_to_unix +from ..contrib.sessions import ModificationTrackingDict +from ..security import safe_str_cmp +from ..urls import url_quote_plus +from ..urls import url_unquote_plus + +warnings.warn( + "'werkzeug.contrib.securecookie' is deprecated as of version 0.15" + " and will be removed in version 1.0. It has moved to" + " https://github.com/pallets/secure-cookie.", + DeprecationWarning, + stacklevel=2, +) + + +class UnquoteError(Exception): + """Internal exception used to signal failures on quoting.""" + + +class SecureCookie(ModificationTrackingDict): + """Represents a secure cookie. You can subclass this class and provide + an alternative mac method. The import thing is that the mac method + is a function with a similar interface to the hashlib. Required + methods are update() and digest(). + + Example usage: + + >>> x = SecureCookie({"foo": 42, "baz": (1, 2, 3)}, "deadbeef") + >>> x["foo"] + 42 + >>> x["baz"] + (1, 2, 3) + >>> x["blafasel"] = 23 + >>> x.should_save + True + + :param data: the initial data. Either a dict, list of tuples or `None`. + :param secret_key: the secret key. If not set `None` or not specified + it has to be set before :meth:`serialize` is called. + :param new: The initial value of the `new` flag. + """ + + #: The hash method to use. This has to be a module with a new function + #: or a function that creates a hashlib object. Such as `hashlib.md5` + #: Subclasses can override this attribute. The default hash is sha1. + #: Make sure to wrap this in staticmethod() if you store an arbitrary + #: function there such as hashlib.sha1 which might be implemented + #: as a function. + hash_method = staticmethod(_default_hash) + + #: The module used for serialization. Should have a ``dumps`` and a + #: ``loads`` method that takes bytes. The default is :mod:`pickle`. + #: + #: .. versionchanged:: 0.15 + #: The default of ``pickle`` will change to :mod:`json` in 1.0. + serialization_method = pickle + + #: if the contents should be base64 quoted. This can be disabled if the + #: serialization process returns cookie safe strings only. + quote_base64 = True + + def __init__(self, data=None, secret_key=None, new=True): + ModificationTrackingDict.__init__(self, data or ()) + # explicitly convert it into a bytestring because python 2.6 + # no longer performs an implicit string conversion on hmac + if secret_key is not None: + secret_key = to_bytes(secret_key, "utf-8") + self.secret_key = secret_key + self.new = new + + if self.serialization_method is pickle: + warnings.warn( + "The default 'SecureCookie.serialization_method' will" + " change from pickle to json in version 1.0. To upgrade" + " existing tokens, override 'unquote' to try pickle if" + " json fails.", + stacklevel=2, + ) + + def __repr__(self): + return "<%s %s%s>" % ( + self.__class__.__name__, + dict.__repr__(self), + "*" if self.should_save else "", + ) + + @property + def should_save(self): + """True if the session should be saved. By default this is only true + for :attr:`modified` cookies, not :attr:`new`. + """ + return self.modified + + @classmethod + def quote(cls, value): + """Quote the value for the cookie. This can be any object supported + by :attr:`serialization_method`. + + :param value: the value to quote. + """ + if cls.serialization_method is not None: + value = cls.serialization_method.dumps(value) + if cls.quote_base64: + value = b"".join( + base64.b64encode(to_bytes(value, "utf8")).splitlines() + ).strip() + return value + + @classmethod + def unquote(cls, value): + """Unquote the value for the cookie. If unquoting does not work a + :exc:`UnquoteError` is raised. + + :param value: the value to unquote. + """ + try: + if cls.quote_base64: + value = base64.b64decode(value) + if cls.serialization_method is not None: + value = cls.serialization_method.loads(value) + return value + except Exception: + # unfortunately pickle and other serialization modules can + # cause pretty every error here. if we get one we catch it + # and convert it into an UnquoteError + raise UnquoteError() + + def serialize(self, expires=None): + """Serialize the secure cookie into a string. + + If expires is provided, the session will be automatically invalidated + after expiration when you unseralize it. This provides better + protection against session cookie theft. + + :param expires: an optional expiration date for the cookie (a + :class:`datetime.datetime` object) + """ + if self.secret_key is None: + raise RuntimeError("no secret key defined") + if expires: + self["_expires"] = _date_to_unix(expires) + result = [] + mac = hmac(self.secret_key, None, self.hash_method) + for key, value in sorted(self.items()): + result.append( + ( + "%s=%s" % (url_quote_plus(key), self.quote(value).decode("ascii")) + ).encode("ascii") + ) + mac.update(b"|" + result[-1]) + return b"?".join([base64.b64encode(mac.digest()).strip(), b"&".join(result)]) + + @classmethod + def unserialize(cls, string, secret_key): + """Load the secure cookie from a serialized string. + + :param string: the cookie value to unserialize. + :param secret_key: the secret key used to serialize the cookie. + :return: a new :class:`SecureCookie`. + """ + if isinstance(string, text_type): + string = string.encode("utf-8", "replace") + if isinstance(secret_key, text_type): + secret_key = secret_key.encode("utf-8", "replace") + try: + base64_hash, data = string.split(b"?", 1) + except (ValueError, IndexError): + items = () + else: + items = {} + mac = hmac(secret_key, None, cls.hash_method) + for item in data.split(b"&"): + mac.update(b"|" + item) + if b"=" not in item: + items = None + break + key, value = item.split(b"=", 1) + # try to make the key a string + key = url_unquote_plus(key.decode("ascii")) + try: + key = to_native(key) + except UnicodeError: + pass + items[key] = value + + # no parsing error and the mac looks okay, we can now + # sercurely unpickle our cookie. + try: + client_hash = base64.b64decode(base64_hash) + except TypeError: + items = client_hash = None + if items is not None and safe_str_cmp(client_hash, mac.digest()): + try: + for key, value in iteritems(items): + items[key] = cls.unquote(value) + except UnquoteError: + items = () + else: + if "_expires" in items: + if time() > items["_expires"]: + items = () + else: + del items["_expires"] + else: + items = () + return cls(items, secret_key, False) + + @classmethod + def load_cookie(cls, request, key="session", secret_key=None): + """Loads a :class:`SecureCookie` from a cookie in request. If the + cookie is not set, a new :class:`SecureCookie` instanced is + returned. + + :param request: a request object that has a `cookies` attribute + which is a dict of all cookie values. + :param key: the name of the cookie. + :param secret_key: the secret key used to unquote the cookie. + Always provide the value even though it has + no default! + """ + data = request.cookies.get(key) + if not data: + return cls(secret_key=secret_key) + return cls.unserialize(data, secret_key) + + def save_cookie( + self, + response, + key="session", + expires=None, + session_expires=None, + max_age=None, + path="/", + domain=None, + secure=None, + httponly=False, + force=False, + ): + """Saves the SecureCookie in a cookie on response object. All + parameters that are not described here are forwarded directly + to :meth:`~BaseResponse.set_cookie`. + + :param response: a response object that has a + :meth:`~BaseResponse.set_cookie` method. + :param key: the name of the cookie. + :param session_expires: the expiration date of the secure cookie + stored information. If this is not provided + the cookie `expires` date is used instead. + """ + if force or self.should_save: + data = self.serialize(session_expires or expires) + response.set_cookie( + key, + data, + expires=expires, + max_age=max_age, + path=path, + domain=domain, + secure=secure, + httponly=httponly, + ) diff --git a/python/werkzeug/contrib/sessions.py b/python/werkzeug/contrib/sessions.py new file mode 100644 index 0000000..866e827 --- /dev/null +++ b/python/werkzeug/contrib/sessions.py @@ -0,0 +1,389 @@ +# -*- coding: utf-8 -*- +r""" + werkzeug.contrib.sessions + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + This module contains some helper classes that help one to add session + support to a python WSGI application. For full client-side session + storage see :mod:`~werkzeug.contrib.securecookie` which implements a + secure, client-side session storage. + + + Application Integration + ======================= + + :: + + from werkzeug.contrib.sessions import SessionMiddleware, \ + FilesystemSessionStore + + app = SessionMiddleware(app, FilesystemSessionStore()) + + The current session will then appear in the WSGI environment as + `werkzeug.session`. However it's recommended to not use the middleware + but the stores directly in the application. However for very simple + scripts a middleware for sessions could be sufficient. + + This module does not implement methods or ways to check if a session is + expired. That should be done by a cronjob and storage specific. For + example to prune unused filesystem sessions one could check the modified + time of the files. If sessions are stored in the database the new() + method should add an expiration timestamp for the session. + + For better flexibility it's recommended to not use the middleware but the + store and session object directly in the application dispatching:: + + session_store = FilesystemSessionStore() + + def application(environ, start_response): + request = Request(environ) + sid = request.cookies.get('cookie_name') + if sid is None: + request.session = session_store.new() + else: + request.session = session_store.get(sid) + response = get_the_response_object(request) + if request.session.should_save: + session_store.save(request.session) + response.set_cookie('cookie_name', request.session.sid) + return response(environ, start_response) + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import os +import re +import tempfile +import warnings +from hashlib import sha1 +from os import path +from pickle import dump +from pickle import HIGHEST_PROTOCOL +from pickle import load +from random import random +from time import time + +from .._compat import PY2 +from .._compat import text_type +from ..datastructures import CallbackDict +from ..filesystem import get_filesystem_encoding +from ..posixemulation import rename +from ..utils import dump_cookie +from ..utils import parse_cookie +from ..wsgi import ClosingIterator + +warnings.warn( + "'werkzeug.contrib.sessions' is deprecated as of version 0.15 and" + " will be removed in version 1.0. It has moved to" + " https://github.com/pallets/secure-cookie.", + DeprecationWarning, + stacklevel=2, +) + +_sha1_re = re.compile(r"^[a-f0-9]{40}$") + + +def _urandom(): + if hasattr(os, "urandom"): + return os.urandom(30) + return text_type(random()).encode("ascii") + + +def generate_key(salt=None): + if salt is None: + salt = repr(salt).encode("ascii") + return sha1(b"".join([salt, str(time()).encode("ascii"), _urandom()])).hexdigest() + + +class ModificationTrackingDict(CallbackDict): + __slots__ = ("modified",) + + def __init__(self, *args, **kwargs): + def on_update(self): + self.modified = True + + self.modified = False + CallbackDict.__init__(self, on_update=on_update) + dict.update(self, *args, **kwargs) + + def copy(self): + """Create a flat copy of the dict.""" + missing = object() + result = object.__new__(self.__class__) + for name in self.__slots__: + val = getattr(self, name, missing) + if val is not missing: + setattr(result, name, val) + return result + + def __copy__(self): + return self.copy() + + +class Session(ModificationTrackingDict): + """Subclass of a dict that keeps track of direct object changes. Changes + in mutable structures are not tracked, for those you have to set + `modified` to `True` by hand. + """ + + __slots__ = ModificationTrackingDict.__slots__ + ("sid", "new") + + def __init__(self, data, sid, new=False): + ModificationTrackingDict.__init__(self, data) + self.sid = sid + self.new = new + + def __repr__(self): + return "<%s %s%s>" % ( + self.__class__.__name__, + dict.__repr__(self), + "*" if self.should_save else "", + ) + + @property + def should_save(self): + """True if the session should be saved. + + .. versionchanged:: 0.6 + By default the session is now only saved if the session is + modified, not if it is new like it was before. + """ + return self.modified + + +class SessionStore(object): + """Baseclass for all session stores. The Werkzeug contrib module does not + implement any useful stores besides the filesystem store, application + developers are encouraged to create their own stores. + + :param session_class: The session class to use. Defaults to + :class:`Session`. + """ + + def __init__(self, session_class=None): + if session_class is None: + session_class = Session + self.session_class = session_class + + def is_valid_key(self, key): + """Check if a key has the correct format.""" + return _sha1_re.match(key) is not None + + def generate_key(self, salt=None): + """Simple function that generates a new session key.""" + return generate_key(salt) + + def new(self): + """Generate a new session.""" + return self.session_class({}, self.generate_key(), True) + + def save(self, session): + """Save a session.""" + + def save_if_modified(self, session): + """Save if a session class wants an update.""" + if session.should_save: + self.save(session) + + def delete(self, session): + """Delete a session.""" + + def get(self, sid): + """Get a session for this sid or a new session object. This method + has to check if the session key is valid and create a new session if + that wasn't the case. + """ + return self.session_class({}, sid, True) + + +#: used for temporary files by the filesystem session store +_fs_transaction_suffix = ".__wz_sess" + + +class FilesystemSessionStore(SessionStore): + """Simple example session store that saves sessions on the filesystem. + This store works best on POSIX systems and Windows Vista / Windows + Server 2008 and newer. + + .. versionchanged:: 0.6 + `renew_missing` was added. Previously this was considered `True`, + now the default changed to `False` and it can be explicitly + deactivated. + + :param path: the path to the folder used for storing the sessions. + If not provided the default temporary directory is used. + :param filename_template: a string template used to give the session + a filename. ``%s`` is replaced with the + session id. + :param session_class: The session class to use. Defaults to + :class:`Session`. + :param renew_missing: set to `True` if you want the store to + give the user a new sid if the session was + not yet saved. + """ + + def __init__( + self, + path=None, + filename_template="werkzeug_%s.sess", + session_class=None, + renew_missing=False, + mode=0o644, + ): + SessionStore.__init__(self, session_class) + if path is None: + path = tempfile.gettempdir() + self.path = path + if isinstance(filename_template, text_type) and PY2: + filename_template = filename_template.encode(get_filesystem_encoding()) + assert not filename_template.endswith(_fs_transaction_suffix), ( + "filename templates may not end with %s" % _fs_transaction_suffix + ) + self.filename_template = filename_template + self.renew_missing = renew_missing + self.mode = mode + + def get_session_filename(self, sid): + # out of the box, this should be a strict ASCII subset but + # you might reconfigure the session object to have a more + # arbitrary string. + if isinstance(sid, text_type) and PY2: + sid = sid.encode(get_filesystem_encoding()) + return path.join(self.path, self.filename_template % sid) + + def save(self, session): + fn = self.get_session_filename(session.sid) + fd, tmp = tempfile.mkstemp(suffix=_fs_transaction_suffix, dir=self.path) + f = os.fdopen(fd, "wb") + try: + dump(dict(session), f, HIGHEST_PROTOCOL) + finally: + f.close() + try: + rename(tmp, fn) + os.chmod(fn, self.mode) + except (IOError, OSError): + pass + + def delete(self, session): + fn = self.get_session_filename(session.sid) + try: + os.unlink(fn) + except OSError: + pass + + def get(self, sid): + if not self.is_valid_key(sid): + return self.new() + try: + f = open(self.get_session_filename(sid), "rb") + except IOError: + if self.renew_missing: + return self.new() + data = {} + else: + try: + try: + data = load(f) + except Exception: + data = {} + finally: + f.close() + return self.session_class(data, sid, False) + + def list(self): + """Lists all sessions in the store. + + .. versionadded:: 0.6 + """ + before, after = self.filename_template.split("%s", 1) + filename_re = re.compile( + r"%s(.{5,})%s$" % (re.escape(before), re.escape(after)) + ) + result = [] + for filename in os.listdir(self.path): + #: this is a session that is still being saved. + if filename.endswith(_fs_transaction_suffix): + continue + match = filename_re.match(filename) + if match is not None: + result.append(match.group(1)) + return result + + +class SessionMiddleware(object): + """A simple middleware that puts the session object of a store provided + into the WSGI environ. It automatically sets cookies and restores + sessions. + + However a middleware is not the preferred solution because it won't be as + fast as sessions managed by the application itself and will put a key into + the WSGI environment only relevant for the application which is against + the concept of WSGI. + + The cookie parameters are the same as for the :func:`~dump_cookie` + function just prefixed with ``cookie_``. Additionally `max_age` is + called `cookie_age` and not `cookie_max_age` because of backwards + compatibility. + """ + + def __init__( + self, + app, + store, + cookie_name="session_id", + cookie_age=None, + cookie_expires=None, + cookie_path="/", + cookie_domain=None, + cookie_secure=None, + cookie_httponly=False, + cookie_samesite="Lax", + environ_key="werkzeug.session", + ): + self.app = app + self.store = store + self.cookie_name = cookie_name + self.cookie_age = cookie_age + self.cookie_expires = cookie_expires + self.cookie_path = cookie_path + self.cookie_domain = cookie_domain + self.cookie_secure = cookie_secure + self.cookie_httponly = cookie_httponly + self.cookie_samesite = cookie_samesite + self.environ_key = environ_key + + def __call__(self, environ, start_response): + cookie = parse_cookie(environ.get("HTTP_COOKIE", "")) + sid = cookie.get(self.cookie_name, None) + if sid is None: + session = self.store.new() + else: + session = self.store.get(sid) + environ[self.environ_key] = session + + def injecting_start_response(status, headers, exc_info=None): + if session.should_save: + self.store.save(session) + headers.append( + ( + "Set-Cookie", + dump_cookie( + self.cookie_name, + session.sid, + self.cookie_age, + self.cookie_expires, + self.cookie_path, + self.cookie_domain, + self.cookie_secure, + self.cookie_httponly, + samesite=self.cookie_samesite, + ), + ) + ) + return start_response(status, headers, exc_info) + + return ClosingIterator( + self.app(environ, injecting_start_response), + lambda: self.store.save_if_modified(session), + ) diff --git a/python/werkzeug/contrib/wrappers.py b/python/werkzeug/contrib/wrappers.py new file mode 100644 index 0000000..49b82a7 --- /dev/null +++ b/python/werkzeug/contrib/wrappers.py @@ -0,0 +1,385 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.contrib.wrappers + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Extra wrappers or mixins contributed by the community. These wrappers can + be mixed in into request objects to add extra functionality. + + Example:: + + from werkzeug.wrappers import Request as RequestBase + from werkzeug.contrib.wrappers import JSONRequestMixin + + class Request(RequestBase, JSONRequestMixin): + pass + + Afterwards this request object provides the extra functionality of the + :class:`JSONRequestMixin`. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import warnings + +from .._compat import wsgi_decoding_dance +from ..exceptions import BadRequest +from ..http import dump_options_header +from ..http import parse_options_header +from ..utils import cached_property +from ..wrappers.json import JSONMixin as _JSONMixin + + +def is_known_charset(charset): + """Checks if the given charset is known to Python.""" + try: + codecs.lookup(charset) + except LookupError: + return False + return True + + +class JSONRequestMixin(_JSONMixin): + """ + .. deprecated:: 0.15 + Moved to :class:`werkzeug.wrappers.json.JSONMixin`. This old + import will be removed in version 1.0. + """ + + @property + def json(self): + warnings.warn( + "'werkzeug.contrib.wrappers.JSONRequestMixin' has moved to" + " 'werkzeug.wrappers.json.JSONMixin'. This old import will" + " be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return super(JSONRequestMixin, self).json + + +class ProtobufRequestMixin(object): + + """Add protobuf parsing method to a request object. This will parse the + input data through `protobuf`_ if possible. + + :exc:`~werkzeug.exceptions.BadRequest` will be raised if the content-type + is not protobuf or if the data itself cannot be parsed property. + + .. _protobuf: https://github.com/protocolbuffers/protobuf + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + """ + + #: by default the :class:`ProtobufRequestMixin` will raise a + #: :exc:`~werkzeug.exceptions.BadRequest` if the object is not + #: initialized. You can bypass that check by setting this + #: attribute to `False`. + protobuf_check_initialization = True + + def parse_protobuf(self, proto_type): + """Parse the data into an instance of proto_type.""" + warnings.warn( + "'werkzeug.contrib.wrappers.ProtobufRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + if "protobuf" not in self.environ.get("CONTENT_TYPE", ""): + raise BadRequest("Not a Protobuf request") + + obj = proto_type() + try: + obj.ParseFromString(self.data) + except Exception: + raise BadRequest("Unable to parse Protobuf request") + + # Fail if not all required fields are set + if self.protobuf_check_initialization and not obj.IsInitialized(): + raise BadRequest("Partial Protobuf request") + + return obj + + +class RoutingArgsRequestMixin(object): + + """This request mixin adds support for the wsgiorg routing args + `specification`_. + + .. _specification: https://wsgi.readthedocs.io/en/latest/ + specifications/routing_args.html + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + """ + + def _get_routing_args(self): + warnings.warn( + "'werkzeug.contrib.wrappers.RoutingArgsRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return self.environ.get("wsgiorg.routing_args", (()))[0] + + def _set_routing_args(self, value): + warnings.warn( + "'werkzeug.contrib.wrappers.RoutingArgsRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + if self.shallow: + raise RuntimeError( + "A shallow request tried to modify the WSGI " + "environment. If you really want to do that, " + "set `shallow` to False." + ) + self.environ["wsgiorg.routing_args"] = (value, self.routing_vars) + + routing_args = property( + _get_routing_args, + _set_routing_args, + doc=""" + The positional URL arguments as `tuple`.""", + ) + del _get_routing_args, _set_routing_args + + def _get_routing_vars(self): + warnings.warn( + "'werkzeug.contrib.wrappers.RoutingArgsRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + rv = self.environ.get("wsgiorg.routing_args") + if rv is not None: + return rv[1] + rv = {} + if not self.shallow: + self.routing_vars = rv + return rv + + def _set_routing_vars(self, value): + warnings.warn( + "'werkzeug.contrib.wrappers.RoutingArgsRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + if self.shallow: + raise RuntimeError( + "A shallow request tried to modify the WSGI " + "environment. If you really want to do that, " + "set `shallow` to False." + ) + self.environ["wsgiorg.routing_args"] = (self.routing_args, value) + + routing_vars = property( + _get_routing_vars, + _set_routing_vars, + doc=""" + The keyword URL arguments as `dict`.""", + ) + del _get_routing_vars, _set_routing_vars + + +class ReverseSlashBehaviorRequestMixin(object): + + """This mixin reverses the trailing slash behavior of :attr:`script_root` + and :attr:`path`. This makes it possible to use :func:`~urlparse.urljoin` + directly on the paths. + + Because it changes the behavior or :class:`Request` this class has to be + mixed in *before* the actual request class:: + + class MyRequest(ReverseSlashBehaviorRequestMixin, Request): + pass + + This example shows the differences (for an application mounted on + `/application` and the request going to `/application/foo/bar`): + + +---------------+-------------------+---------------------+ + | | normal behavior | reverse behavior | + +===============+===================+=====================+ + | `script_root` | ``/application`` | ``/application/`` | + +---------------+-------------------+---------------------+ + | `path` | ``/foo/bar`` | ``foo/bar`` | + +---------------+-------------------+---------------------+ + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + """ + + @cached_property + def path(self): + """Requested path as unicode. This works a bit like the regular path + info in the WSGI environment but will not include a leading slash. + """ + warnings.warn( + "'werkzeug.contrib.wrappers.ReverseSlashBehaviorRequestMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + path = wsgi_decoding_dance( + self.environ.get("PATH_INFO") or "", self.charset, self.encoding_errors + ) + return path.lstrip("/") + + @cached_property + def script_root(self): + """The root path of the script includling a trailing slash.""" + warnings.warn( + "'werkzeug.contrib.wrappers.ReverseSlashBehaviorRequestMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + path = wsgi_decoding_dance( + self.environ.get("SCRIPT_NAME") or "", self.charset, self.encoding_errors + ) + return path.rstrip("/") + "/" + + +class DynamicCharsetRequestMixin(object): + + """"If this mixin is mixed into a request class it will provide + a dynamic `charset` attribute. This means that if the charset is + transmitted in the content type headers it's used from there. + + Because it changes the behavior or :class:`Request` this class has + to be mixed in *before* the actual request class:: + + class MyRequest(DynamicCharsetRequestMixin, Request): + pass + + By default the request object assumes that the URL charset is the + same as the data charset. If the charset varies on each request + based on the transmitted data it's not a good idea to let the URLs + change based on that. Most browsers assume either utf-8 or latin1 + for the URLs if they have troubles figuring out. It's strongly + recommended to set the URL charset to utf-8:: + + class MyRequest(DynamicCharsetRequestMixin, Request): + url_charset = 'utf-8' + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + + .. versionadded:: 0.6 + """ + + #: the default charset that is assumed if the content type header + #: is missing or does not contain a charset parameter. The default + #: is latin1 which is what HTTP specifies as default charset. + #: You may however want to set this to utf-8 to better support + #: browsers that do not transmit a charset for incoming data. + default_charset = "latin1" + + def unknown_charset(self, charset): + """Called if a charset was provided but is not supported by + the Python codecs module. By default latin1 is assumed then + to not lose any information, you may override this method to + change the behavior. + + :param charset: the charset that was not found. + :return: the replacement charset. + """ + return "latin1" + + @cached_property + def charset(self): + """The charset from the content type.""" + warnings.warn( + "'werkzeug.contrib.wrappers.DynamicCharsetRequestMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + header = self.environ.get("CONTENT_TYPE") + if header: + ct, options = parse_options_header(header) + charset = options.get("charset") + if charset: + if is_known_charset(charset): + return charset + return self.unknown_charset(charset) + return self.default_charset + + +class DynamicCharsetResponseMixin(object): + + """If this mixin is mixed into a response class it will provide + a dynamic `charset` attribute. This means that if the charset is + looked up and stored in the `Content-Type` header and updates + itself automatically. This also means a small performance hit but + can be useful if you're working with different charsets on + responses. + + Because the charset attribute is no a property at class-level, the + default value is stored in `default_charset`. + + Because it changes the behavior or :class:`Response` this class has + to be mixed in *before* the actual response class:: + + class MyResponse(DynamicCharsetResponseMixin, Response): + pass + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + + .. versionadded:: 0.6 + """ + + #: the default charset. + default_charset = "utf-8" + + def _get_charset(self): + warnings.warn( + "'werkzeug.contrib.wrappers.DynamicCharsetResponseMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + header = self.headers.get("content-type") + if header: + charset = parse_options_header(header)[1].get("charset") + if charset: + return charset + return self.default_charset + + def _set_charset(self, charset): + warnings.warn( + "'werkzeug.contrib.wrappers.DynamicCharsetResponseMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + header = self.headers.get("content-type") + ct, options = parse_options_header(header) + if not ct: + raise TypeError("Cannot set charset if Content-Type header is missing.") + options["charset"] = charset + self.headers["Content-Type"] = dump_options_header(ct, options) + + charset = property( + _get_charset, + _set_charset, + doc=""" + The charset for the response. It's stored inside the + Content-Type header as a parameter.""", + ) + del _get_charset, _set_charset diff --git a/python/werkzeug/datastructures.py b/python/werkzeug/datastructures.py new file mode 100644 index 0000000..9643db9 --- /dev/null +++ b/python/werkzeug/datastructures.py @@ -0,0 +1,2852 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.datastructures + ~~~~~~~~~~~~~~~~~~~~~~~ + + This module provides mixins and classes with an immutable interface. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import mimetypes +import re +from copy import deepcopy +from itertools import repeat + +from ._compat import BytesIO +from ._compat import collections_abc +from ._compat import integer_types +from ._compat import iteritems +from ._compat import iterkeys +from ._compat import iterlists +from ._compat import itervalues +from ._compat import make_literal_wrapper +from ._compat import PY2 +from ._compat import string_types +from ._compat import text_type +from ._compat import to_native +from ._internal import _missing +from .filesystem import get_filesystem_encoding + +_locale_delim_re = re.compile(r"[_-]") + + +def is_immutable(self): + raise TypeError("%r objects are immutable" % self.__class__.__name__) + + +def iter_multi_items(mapping): + """Iterates over the items of a mapping yielding keys and values + without dropping any from more complex structures. + """ + if isinstance(mapping, MultiDict): + for item in iteritems(mapping, multi=True): + yield item + elif isinstance(mapping, dict): + for key, value in iteritems(mapping): + if isinstance(value, (tuple, list)): + for value in value: + yield key, value + else: + yield key, value + else: + for item in mapping: + yield item + + +def native_itermethods(names): + if not PY2: + return lambda x: x + + def setviewmethod(cls, name): + viewmethod_name = "view%s" % name + repr_name = "view_%s" % name + + def viewmethod(self, *a, **kw): + return ViewItems(self, name, repr_name, *a, **kw) + + viewmethod.__name__ = viewmethod_name + viewmethod.__doc__ = "`%s()` object providing a view on %s" % ( + viewmethod_name, + name, + ) + setattr(cls, viewmethod_name, viewmethod) + + def setitermethod(cls, name): + itermethod = getattr(cls, name) + setattr(cls, "iter%s" % name, itermethod) + + def listmethod(self, *a, **kw): + return list(itermethod(self, *a, **kw)) + + listmethod.__name__ = name + listmethod.__doc__ = "Like :py:meth:`iter%s`, but returns a list." % name + setattr(cls, name, listmethod) + + def wrap(cls): + for name in names: + setitermethod(cls, name) + setviewmethod(cls, name) + return cls + + return wrap + + +class ImmutableListMixin(object): + """Makes a :class:`list` immutable. + + .. versionadded:: 0.5 + + :private: + """ + + _hash_cache = None + + def __hash__(self): + if self._hash_cache is not None: + return self._hash_cache + rv = self._hash_cache = hash(tuple(self)) + return rv + + def __reduce_ex__(self, protocol): + return type(self), (list(self),) + + def __delitem__(self, key): + is_immutable(self) + + def __iadd__(self, other): + is_immutable(self) + + __imul__ = __iadd__ + + def __setitem__(self, key, value): + is_immutable(self) + + def append(self, item): + is_immutable(self) + + remove = append + + def extend(self, iterable): + is_immutable(self) + + def insert(self, pos, value): + is_immutable(self) + + def pop(self, index=-1): + is_immutable(self) + + def reverse(self): + is_immutable(self) + + def sort(self, cmp=None, key=None, reverse=None): + is_immutable(self) + + +class ImmutableList(ImmutableListMixin, list): + """An immutable :class:`list`. + + .. versionadded:: 0.5 + + :private: + """ + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, list.__repr__(self)) + + +class ImmutableDictMixin(object): + """Makes a :class:`dict` immutable. + + .. versionadded:: 0.5 + + :private: + """ + + _hash_cache = None + + @classmethod + def fromkeys(cls, keys, value=None): + instance = super(cls, cls).__new__(cls) + instance.__init__(zip(keys, repeat(value))) + return instance + + def __reduce_ex__(self, protocol): + return type(self), (dict(self),) + + def _iter_hashitems(self): + return iteritems(self) + + def __hash__(self): + if self._hash_cache is not None: + return self._hash_cache + rv = self._hash_cache = hash(frozenset(self._iter_hashitems())) + return rv + + def setdefault(self, key, default=None): + is_immutable(self) + + def update(self, *args, **kwargs): + is_immutable(self) + + def pop(self, key, default=None): + is_immutable(self) + + def popitem(self): + is_immutable(self) + + def __setitem__(self, key, value): + is_immutable(self) + + def __delitem__(self, key): + is_immutable(self) + + def clear(self): + is_immutable(self) + + +class ImmutableMultiDictMixin(ImmutableDictMixin): + """Makes a :class:`MultiDict` immutable. + + .. versionadded:: 0.5 + + :private: + """ + + def __reduce_ex__(self, protocol): + return type(self), (list(iteritems(self, multi=True)),) + + def _iter_hashitems(self): + return iteritems(self, multi=True) + + def add(self, key, value): + is_immutable(self) + + def popitemlist(self): + is_immutable(self) + + def poplist(self, key): + is_immutable(self) + + def setlist(self, key, new_list): + is_immutable(self) + + def setlistdefault(self, key, default_list=None): + is_immutable(self) + + +class UpdateDictMixin(object): + """Makes dicts call `self.on_update` on modifications. + + .. versionadded:: 0.5 + + :private: + """ + + on_update = None + + def calls_update(name): # noqa: B902 + def oncall(self, *args, **kw): + rv = getattr(super(UpdateDictMixin, self), name)(*args, **kw) + if self.on_update is not None: + self.on_update(self) + return rv + + oncall.__name__ = name + return oncall + + def setdefault(self, key, default=None): + modified = key not in self + rv = super(UpdateDictMixin, self).setdefault(key, default) + if modified and self.on_update is not None: + self.on_update(self) + return rv + + def pop(self, key, default=_missing): + modified = key in self + if default is _missing: + rv = super(UpdateDictMixin, self).pop(key) + else: + rv = super(UpdateDictMixin, self).pop(key, default) + if modified and self.on_update is not None: + self.on_update(self) + return rv + + __setitem__ = calls_update("__setitem__") + __delitem__ = calls_update("__delitem__") + clear = calls_update("clear") + popitem = calls_update("popitem") + update = calls_update("update") + del calls_update + + +class TypeConversionDict(dict): + """Works like a regular dict but the :meth:`get` method can perform + type conversions. :class:`MultiDict` and :class:`CombinedMultiDict` + are subclasses of this class and provide the same feature. + + .. versionadded:: 0.5 + """ + + def get(self, key, default=None, type=None): + """Return the default value if the requested data doesn't exist. + If `type` is provided and is a callable it should convert the value, + return it or raise a :exc:`ValueError` if that is not possible. In + this case the function will return the default as if the value was not + found: + + >>> d = TypeConversionDict(foo='42', bar='blub') + >>> d.get('foo', type=int) + 42 + >>> d.get('bar', -1, type=int) + -1 + + :param key: The key to be looked up. + :param default: The default value to be returned if the key can't + be looked up. If not further specified `None` is + returned. + :param type: A callable that is used to cast the value in the + :class:`MultiDict`. If a :exc:`ValueError` is raised + by this callable the default value is returned. + """ + try: + rv = self[key] + except KeyError: + return default + if type is not None: + try: + rv = type(rv) + except ValueError: + rv = default + return rv + + +class ImmutableTypeConversionDict(ImmutableDictMixin, TypeConversionDict): + """Works like a :class:`TypeConversionDict` but does not support + modifications. + + .. versionadded:: 0.5 + """ + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return TypeConversionDict(self) + + def __copy__(self): + return self + + +class ViewItems(object): + def __init__(self, multi_dict, method, repr_name, *a, **kw): + self.__multi_dict = multi_dict + self.__method = method + self.__repr_name = repr_name + self.__a = a + self.__kw = kw + + def __get_items(self): + return getattr(self.__multi_dict, self.__method)(*self.__a, **self.__kw) + + def __repr__(self): + return "%s(%r)" % (self.__repr_name, list(self.__get_items())) + + def __iter__(self): + return iter(self.__get_items()) + + +@native_itermethods(["keys", "values", "items", "lists", "listvalues"]) +class MultiDict(TypeConversionDict): + """A :class:`MultiDict` is a dictionary subclass customized to deal with + multiple values for the same key which is for example used by the parsing + functions in the wrappers. This is necessary because some HTML form + elements pass multiple values for the same key. + + :class:`MultiDict` implements all standard dictionary methods. + Internally, it saves all values for a key as a list, but the standard dict + access methods will only return the first value for a key. If you want to + gain access to the other values, too, you have to use the `list` methods as + explained below. + + Basic Usage: + + >>> d = MultiDict([('a', 'b'), ('a', 'c')]) + >>> d + MultiDict([('a', 'b'), ('a', 'c')]) + >>> d['a'] + 'b' + >>> d.getlist('a') + ['b', 'c'] + >>> 'a' in d + True + + It behaves like a normal dict thus all dict functions will only return the + first value when multiple values for one key are found. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for HTTP + exceptions. + + A :class:`MultiDict` can be constructed from an iterable of + ``(key, value)`` tuples, a dict, a :class:`MultiDict` or from Werkzeug 0.2 + onwards some keyword parameters. + + :param mapping: the initial value for the :class:`MultiDict`. Either a + regular dict, an iterable of ``(key, value)`` tuples + or `None`. + """ + + def __init__(self, mapping=None): + if isinstance(mapping, MultiDict): + dict.__init__(self, ((k, l[:]) for k, l in iterlists(mapping))) + elif isinstance(mapping, dict): + tmp = {} + for key, value in iteritems(mapping): + if isinstance(value, (tuple, list)): + if len(value) == 0: + continue + value = list(value) + else: + value = [value] + tmp[key] = value + dict.__init__(self, tmp) + else: + tmp = {} + for key, value in mapping or (): + tmp.setdefault(key, []).append(value) + dict.__init__(self, tmp) + + def __getstate__(self): + return dict(self.lists()) + + def __setstate__(self, value): + dict.clear(self) + dict.update(self, value) + + def __getitem__(self, key): + """Return the first data value for this key; + raises KeyError if not found. + + :param key: The key to be looked up. + :raise KeyError: if the key does not exist. + """ + + if key in self: + lst = dict.__getitem__(self, key) + if len(lst) > 0: + return lst[0] + raise exceptions.BadRequestKeyError(key) + + def __setitem__(self, key, value): + """Like :meth:`add` but removes an existing key first. + + :param key: the key for the value. + :param value: the value to set. + """ + dict.__setitem__(self, key, [value]) + + def add(self, key, value): + """Adds a new value for the key. + + .. versionadded:: 0.6 + + :param key: the key for the value. + :param value: the value to add. + """ + dict.setdefault(self, key, []).append(value) + + def getlist(self, key, type=None): + """Return the list of items for a given key. If that key is not in the + `MultiDict`, the return value will be an empty list. Just as `get` + `getlist` accepts a `type` parameter. All items will be converted + with the callable defined there. + + :param key: The key to be looked up. + :param type: A callable that is used to cast the value in the + :class:`MultiDict`. If a :exc:`ValueError` is raised + by this callable the value will be removed from the list. + :return: a :class:`list` of all the values for the key. + """ + try: + rv = dict.__getitem__(self, key) + except KeyError: + return [] + if type is None: + return list(rv) + result = [] + for item in rv: + try: + result.append(type(item)) + except ValueError: + pass + return result + + def setlist(self, key, new_list): + """Remove the old values for a key and add new ones. Note that the list + you pass the values in will be shallow-copied before it is inserted in + the dictionary. + + >>> d = MultiDict() + >>> d.setlist('foo', ['1', '2']) + >>> d['foo'] + '1' + >>> d.getlist('foo') + ['1', '2'] + + :param key: The key for which the values are set. + :param new_list: An iterable with the new values for the key. Old values + are removed first. + """ + dict.__setitem__(self, key, list(new_list)) + + def setdefault(self, key, default=None): + """Returns the value for the key if it is in the dict, otherwise it + returns `default` and sets that value for `key`. + + :param key: The key to be looked up. + :param default: The default value to be returned if the key is not + in the dict. If not further specified it's `None`. + """ + if key not in self: + self[key] = default + else: + default = self[key] + return default + + def setlistdefault(self, key, default_list=None): + """Like `setdefault` but sets multiple values. The list returned + is not a copy, but the list that is actually used internally. This + means that you can put new values into the dict by appending items + to the list: + + >>> d = MultiDict({"foo": 1}) + >>> d.setlistdefault("foo").extend([2, 3]) + >>> d.getlist("foo") + [1, 2, 3] + + :param key: The key to be looked up. + :param default_list: An iterable of default values. It is either copied + (in case it was a list) or converted into a list + before returned. + :return: a :class:`list` + """ + if key not in self: + default_list = list(default_list or ()) + dict.__setitem__(self, key, default_list) + else: + default_list = dict.__getitem__(self, key) + return default_list + + def items(self, multi=False): + """Return an iterator of ``(key, value)`` pairs. + + :param multi: If set to `True` the iterator returned will have a pair + for each value of each key. Otherwise it will only + contain pairs for the first value of each key. + """ + + for key, values in iteritems(dict, self): + if multi: + for value in values: + yield key, value + else: + yield key, values[0] + + def lists(self): + """Return a iterator of ``(key, values)`` pairs, where values is the list + of all values associated with the key.""" + + for key, values in iteritems(dict, self): + yield key, list(values) + + def keys(self): + return iterkeys(dict, self) + + __iter__ = keys + + def values(self): + """Returns an iterator of the first value on every key's value list.""" + for values in itervalues(dict, self): + yield values[0] + + def listvalues(self): + """Return an iterator of all values associated with a key. Zipping + :meth:`keys` and this is the same as calling :meth:`lists`: + + >>> d = MultiDict({"foo": [1, 2, 3]}) + >>> zip(d.keys(), d.listvalues()) == d.lists() + True + """ + + return itervalues(dict, self) + + def copy(self): + """Return a shallow copy of this object.""" + return self.__class__(self) + + def deepcopy(self, memo=None): + """Return a deep copy of this object.""" + return self.__class__(deepcopy(self.to_dict(flat=False), memo)) + + def to_dict(self, flat=True): + """Return the contents as regular dict. If `flat` is `True` the + returned dict will only have the first item present, if `flat` is + `False` all values will be returned as lists. + + :param flat: If set to `False` the dict returned will have lists + with all the values in it. Otherwise it will only + contain the first value for each key. + :return: a :class:`dict` + """ + if flat: + return dict(iteritems(self)) + return dict(self.lists()) + + def update(self, other_dict): + """update() extends rather than replaces existing key lists: + + >>> a = MultiDict({'x': 1}) + >>> b = MultiDict({'x': 2, 'y': 3}) + >>> a.update(b) + >>> a + MultiDict([('y', 3), ('x', 1), ('x', 2)]) + + If the value list for a key in ``other_dict`` is empty, no new values + will be added to the dict and the key will not be created: + + >>> x = {'empty_list': []} + >>> y = MultiDict() + >>> y.update(x) + >>> y + MultiDict([]) + """ + for key, value in iter_multi_items(other_dict): + MultiDict.add(self, key, value) + + def pop(self, key, default=_missing): + """Pop the first item for a list on the dict. Afterwards the + key is removed from the dict, so additional values are discarded: + + >>> d = MultiDict({"foo": [1, 2, 3]}) + >>> d.pop("foo") + 1 + >>> "foo" in d + False + + :param key: the key to pop. + :param default: if provided the value to return if the key was + not in the dictionary. + """ + try: + lst = dict.pop(self, key) + + if len(lst) == 0: + raise exceptions.BadRequestKeyError(key) + + return lst[0] + except KeyError: + if default is not _missing: + return default + raise exceptions.BadRequestKeyError(key) + + def popitem(self): + """Pop an item from the dict.""" + try: + item = dict.popitem(self) + + if len(item[1]) == 0: + raise exceptions.BadRequestKeyError(item) + + return (item[0], item[1][0]) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) + + def poplist(self, key): + """Pop the list for a key from the dict. If the key is not in the dict + an empty list is returned. + + .. versionchanged:: 0.5 + If the key does no longer exist a list is returned instead of + raising an error. + """ + return dict.pop(self, key, []) + + def popitemlist(self): + """Pop a ``(key, list)`` tuple from the dict.""" + try: + return dict.popitem(self) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) + + def __copy__(self): + return self.copy() + + def __deepcopy__(self, memo): + return self.deepcopy(memo=memo) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, list(iteritems(self, multi=True))) + + +class _omd_bucket(object): + """Wraps values in the :class:`OrderedMultiDict`. This makes it + possible to keep an order over multiple different keys. It requires + a lot of extra memory and slows down access a lot, but makes it + possible to access elements in O(1) and iterate in O(n). + """ + + __slots__ = ("prev", "key", "value", "next") + + def __init__(self, omd, key, value): + self.prev = omd._last_bucket + self.key = key + self.value = value + self.next = None + + if omd._first_bucket is None: + omd._first_bucket = self + if omd._last_bucket is not None: + omd._last_bucket.next = self + omd._last_bucket = self + + def unlink(self, omd): + if self.prev: + self.prev.next = self.next + if self.next: + self.next.prev = self.prev + if omd._first_bucket is self: + omd._first_bucket = self.next + if omd._last_bucket is self: + omd._last_bucket = self.prev + + +@native_itermethods(["keys", "values", "items", "lists", "listvalues"]) +class OrderedMultiDict(MultiDict): + """Works like a regular :class:`MultiDict` but preserves the + order of the fields. To convert the ordered multi dict into a + list you can use the :meth:`items` method and pass it ``multi=True``. + + In general an :class:`OrderedMultiDict` is an order of magnitude + slower than a :class:`MultiDict`. + + .. admonition:: note + + Due to a limitation in Python you cannot convert an ordered + multi dict into a regular dict by using ``dict(multidict)``. + Instead you have to use the :meth:`to_dict` method, otherwise + the internal bucket objects are exposed. + """ + + def __init__(self, mapping=None): + dict.__init__(self) + self._first_bucket = self._last_bucket = None + if mapping is not None: + OrderedMultiDict.update(self, mapping) + + def __eq__(self, other): + if not isinstance(other, MultiDict): + return NotImplemented + if isinstance(other, OrderedMultiDict): + iter1 = iteritems(self, multi=True) + iter2 = iteritems(other, multi=True) + try: + for k1, v1 in iter1: + k2, v2 = next(iter2) + if k1 != k2 or v1 != v2: + return False + except StopIteration: + return False + try: + next(iter2) + except StopIteration: + return True + return False + if len(self) != len(other): + return False + for key, values in iterlists(self): + if other.getlist(key) != values: + return False + return True + + __hash__ = None + + def __ne__(self, other): + return not self.__eq__(other) + + def __reduce_ex__(self, protocol): + return type(self), (list(iteritems(self, multi=True)),) + + def __getstate__(self): + return list(iteritems(self, multi=True)) + + def __setstate__(self, values): + dict.clear(self) + for key, value in values: + self.add(key, value) + + def __getitem__(self, key): + if key in self: + return dict.__getitem__(self, key)[0].value + raise exceptions.BadRequestKeyError(key) + + def __setitem__(self, key, value): + self.poplist(key) + self.add(key, value) + + def __delitem__(self, key): + self.pop(key) + + def keys(self): + return (key for key, value in iteritems(self)) + + __iter__ = keys + + def values(self): + return (value for key, value in iteritems(self)) + + def items(self, multi=False): + ptr = self._first_bucket + if multi: + while ptr is not None: + yield ptr.key, ptr.value + ptr = ptr.next + else: + returned_keys = set() + while ptr is not None: + if ptr.key not in returned_keys: + returned_keys.add(ptr.key) + yield ptr.key, ptr.value + ptr = ptr.next + + def lists(self): + returned_keys = set() + ptr = self._first_bucket + while ptr is not None: + if ptr.key not in returned_keys: + yield ptr.key, self.getlist(ptr.key) + returned_keys.add(ptr.key) + ptr = ptr.next + + def listvalues(self): + for _key, values in iterlists(self): + yield values + + def add(self, key, value): + dict.setdefault(self, key, []).append(_omd_bucket(self, key, value)) + + def getlist(self, key, type=None): + try: + rv = dict.__getitem__(self, key) + except KeyError: + return [] + if type is None: + return [x.value for x in rv] + result = [] + for item in rv: + try: + result.append(type(item.value)) + except ValueError: + pass + return result + + def setlist(self, key, new_list): + self.poplist(key) + for value in new_list: + self.add(key, value) + + def setlistdefault(self, key, default_list=None): + raise TypeError("setlistdefault is unsupported for ordered multi dicts") + + def update(self, mapping): + for key, value in iter_multi_items(mapping): + OrderedMultiDict.add(self, key, value) + + def poplist(self, key): + buckets = dict.pop(self, key, ()) + for bucket in buckets: + bucket.unlink(self) + return [x.value for x in buckets] + + def pop(self, key, default=_missing): + try: + buckets = dict.pop(self, key) + except KeyError: + if default is not _missing: + return default + raise exceptions.BadRequestKeyError(key) + for bucket in buckets: + bucket.unlink(self) + return buckets[0].value + + def popitem(self): + try: + key, buckets = dict.popitem(self) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) + for bucket in buckets: + bucket.unlink(self) + return key, buckets[0].value + + def popitemlist(self): + try: + key, buckets = dict.popitem(self) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) + for bucket in buckets: + bucket.unlink(self) + return key, [x.value for x in buckets] + + +def _options_header_vkw(value, kw): + return dump_options_header( + value, dict((k.replace("_", "-"), v) for k, v in kw.items()) + ) + + +def _unicodify_header_value(value): + if isinstance(value, bytes): + value = value.decode("latin-1") + if not isinstance(value, text_type): + value = text_type(value) + return value + + +@native_itermethods(["keys", "values", "items"]) +class Headers(object): + """An object that stores some headers. It has a dict-like interface + but is ordered and can store the same keys multiple times. + + This data structure is useful if you want a nicer way to handle WSGI + headers which are stored as tuples in a list. + + From Werkzeug 0.3 onwards, the :exc:`KeyError` raised by this class is + also a subclass of the :class:`~exceptions.BadRequest` HTTP exception + and will render a page for a ``400 BAD REQUEST`` if caught in a + catch-all for HTTP exceptions. + + Headers is mostly compatible with the Python :class:`wsgiref.headers.Headers` + class, with the exception of `__getitem__`. :mod:`wsgiref` will return + `None` for ``headers['missing']``, whereas :class:`Headers` will raise + a :class:`KeyError`. + + To create a new :class:`Headers` object pass it a list or dict of headers + which are used as default values. This does not reuse the list passed + to the constructor for internal usage. + + :param defaults: The list of default values for the :class:`Headers`. + + .. versionchanged:: 0.9 + This data structure now stores unicode values similar to how the + multi dicts do it. The main difference is that bytes can be set as + well which will automatically be latin1 decoded. + + .. versionchanged:: 0.9 + The :meth:`linked` function was removed without replacement as it + was an API that does not support the changes to the encoding model. + """ + + def __init__(self, defaults=None): + self._list = [] + if defaults is not None: + if isinstance(defaults, (list, Headers)): + self._list.extend(defaults) + else: + self.extend(defaults) + + def __getitem__(self, key, _get_mode=False): + if not _get_mode: + if isinstance(key, integer_types): + return self._list[key] + elif isinstance(key, slice): + return self.__class__(self._list[key]) + if not isinstance(key, string_types): + raise exceptions.BadRequestKeyError(key) + ikey = key.lower() + for k, v in self._list: + if k.lower() == ikey: + return v + # micro optimization: if we are in get mode we will catch that + # exception one stack level down so we can raise a standard + # key error instead of our special one. + if _get_mode: + raise KeyError() + raise exceptions.BadRequestKeyError(key) + + def __eq__(self, other): + return other.__class__ is self.__class__ and set(other._list) == set(self._list) + + __hash__ = None + + def __ne__(self, other): + return not self.__eq__(other) + + def get(self, key, default=None, type=None, as_bytes=False): + """Return the default value if the requested data doesn't exist. + If `type` is provided and is a callable it should convert the value, + return it or raise a :exc:`ValueError` if that is not possible. In + this case the function will return the default as if the value was not + found: + + >>> d = Headers([('Content-Length', '42')]) + >>> d.get('Content-Length', type=int) + 42 + + If a headers object is bound you must not add unicode strings + because no encoding takes place. + + .. versionadded:: 0.9 + Added support for `as_bytes`. + + :param key: The key to be looked up. + :param default: The default value to be returned if the key can't + be looked up. If not further specified `None` is + returned. + :param type: A callable that is used to cast the value in the + :class:`Headers`. If a :exc:`ValueError` is raised + by this callable the default value is returned. + :param as_bytes: return bytes instead of unicode strings. + """ + try: + rv = self.__getitem__(key, _get_mode=True) + except KeyError: + return default + if as_bytes: + rv = rv.encode("latin1") + if type is None: + return rv + try: + return type(rv) + except ValueError: + return default + + def getlist(self, key, type=None, as_bytes=False): + """Return the list of items for a given key. If that key is not in the + :class:`Headers`, the return value will be an empty list. Just as + :meth:`get` :meth:`getlist` accepts a `type` parameter. All items will + be converted with the callable defined there. + + .. versionadded:: 0.9 + Added support for `as_bytes`. + + :param key: The key to be looked up. + :param type: A callable that is used to cast the value in the + :class:`Headers`. If a :exc:`ValueError` is raised + by this callable the value will be removed from the list. + :return: a :class:`list` of all the values for the key. + :param as_bytes: return bytes instead of unicode strings. + """ + ikey = key.lower() + result = [] + for k, v in self: + if k.lower() == ikey: + if as_bytes: + v = v.encode("latin1") + if type is not None: + try: + v = type(v) + except ValueError: + continue + result.append(v) + return result + + def get_all(self, name): + """Return a list of all the values for the named field. + + This method is compatible with the :mod:`wsgiref` + :meth:`~wsgiref.headers.Headers.get_all` method. + """ + return self.getlist(name) + + def items(self, lower=False): + for key, value in self: + if lower: + key = key.lower() + yield key, value + + def keys(self, lower=False): + for key, _ in iteritems(self, lower): + yield key + + def values(self): + for _, value in iteritems(self): + yield value + + def extend(self, iterable): + """Extend the headers with a dict or an iterable yielding keys and + values. + """ + if isinstance(iterable, dict): + for key, value in iteritems(iterable): + if isinstance(value, (tuple, list)): + for v in value: + self.add(key, v) + else: + self.add(key, value) + else: + for key, value in iterable: + self.add(key, value) + + def __delitem__(self, key, _index_operation=True): + if _index_operation and isinstance(key, (integer_types, slice)): + del self._list[key] + return + key = key.lower() + new = [] + for k, v in self._list: + if k.lower() != key: + new.append((k, v)) + self._list[:] = new + + def remove(self, key): + """Remove a key. + + :param key: The key to be removed. + """ + return self.__delitem__(key, _index_operation=False) + + def pop(self, key=None, default=_missing): + """Removes and returns a key or index. + + :param key: The key to be popped. If this is an integer the item at + that position is removed, if it's a string the value for + that key is. If the key is omitted or `None` the last + item is removed. + :return: an item. + """ + if key is None: + return self._list.pop() + if isinstance(key, integer_types): + return self._list.pop(key) + try: + rv = self[key] + self.remove(key) + except KeyError: + if default is not _missing: + return default + raise + return rv + + def popitem(self): + """Removes a key or index and returns a (key, value) item.""" + return self.pop() + + def __contains__(self, key): + """Check if a key is present.""" + try: + self.__getitem__(key, _get_mode=True) + except KeyError: + return False + return True + + has_key = __contains__ + + def __iter__(self): + """Yield ``(key, value)`` tuples.""" + return iter(self._list) + + def __len__(self): + return len(self._list) + + def add(self, _key, _value, **kw): + """Add a new header tuple to the list. + + Keyword arguments can specify additional parameters for the header + value, with underscores converted to dashes:: + + >>> d = Headers() + >>> d.add('Content-Type', 'text/plain') + >>> d.add('Content-Disposition', 'attachment', filename='foo.png') + + The keyword argument dumping uses :func:`dump_options_header` + behind the scenes. + + .. versionadded:: 0.4.1 + keyword arguments were added for :mod:`wsgiref` compatibility. + """ + if kw: + _value = _options_header_vkw(_value, kw) + _key = _unicodify_header_value(_key) + _value = _unicodify_header_value(_value) + self._validate_value(_value) + self._list.append((_key, _value)) + + def _validate_value(self, value): + if not isinstance(value, text_type): + raise TypeError("Value should be unicode.") + if u"\n" in value or u"\r" in value: + raise ValueError( + "Detected newline in header value. This is " + "a potential security problem" + ) + + def add_header(self, _key, _value, **_kw): + """Add a new header tuple to the list. + + An alias for :meth:`add` for compatibility with the :mod:`wsgiref` + :meth:`~wsgiref.headers.Headers.add_header` method. + """ + self.add(_key, _value, **_kw) + + def clear(self): + """Clears all headers.""" + del self._list[:] + + def set(self, _key, _value, **kw): + """Remove all header tuples for `key` and add a new one. The newly + added key either appears at the end of the list if there was no + entry or replaces the first one. + + Keyword arguments can specify additional parameters for the header + value, with underscores converted to dashes. See :meth:`add` for + more information. + + .. versionchanged:: 0.6.1 + :meth:`set` now accepts the same arguments as :meth:`add`. + + :param key: The key to be inserted. + :param value: The value to be inserted. + """ + if kw: + _value = _options_header_vkw(_value, kw) + _key = _unicodify_header_value(_key) + _value = _unicodify_header_value(_value) + self._validate_value(_value) + if not self._list: + self._list.append((_key, _value)) + return + listiter = iter(self._list) + ikey = _key.lower() + for idx, (old_key, _old_value) in enumerate(listiter): + if old_key.lower() == ikey: + # replace first ocurrence + self._list[idx] = (_key, _value) + break + else: + self._list.append((_key, _value)) + return + self._list[idx + 1 :] = [t for t in listiter if t[0].lower() != ikey] + + def setdefault(self, key, default): + """Returns the value for the key if it is in the dict, otherwise it + returns `default` and sets that value for `key`. + + :param key: The key to be looked up. + :param default: The default value to be returned if the key is not + in the dict. If not further specified it's `None`. + """ + if key in self: + return self[key] + self.set(key, default) + return default + + def __setitem__(self, key, value): + """Like :meth:`set` but also supports index/slice based setting.""" + if isinstance(key, (slice, integer_types)): + if isinstance(key, integer_types): + value = [value] + value = [ + (_unicodify_header_value(k), _unicodify_header_value(v)) + for (k, v) in value + ] + [self._validate_value(v) for (k, v) in value] + if isinstance(key, integer_types): + self._list[key] = value[0] + else: + self._list[key] = value + else: + self.set(key, value) + + def to_list(self, charset="iso-8859-1"): + """Convert the headers into a list suitable for WSGI. + + .. deprecated:: 0.9 + """ + from warnings import warn + + warn( + "'to_list' deprecated as of version 0.9 and will be removed" + " in version 1.0. Use 'to_wsgi_list' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.to_wsgi_list() + + def to_wsgi_list(self): + """Convert the headers into a list suitable for WSGI. + + The values are byte strings in Python 2 converted to latin1 and unicode + strings in Python 3 for the WSGI server to encode. + + :return: list + """ + if PY2: + return [(to_native(k), v.encode("latin1")) for k, v in self] + return list(self) + + def copy(self): + return self.__class__(self._list) + + def __copy__(self): + return self.copy() + + def __str__(self): + """Returns formatted headers suitable for HTTP transmission.""" + strs = [] + for key, value in self.to_wsgi_list(): + strs.append("%s: %s" % (key, value)) + strs.append("\r\n") + return "\r\n".join(strs) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, list(self)) + + +class ImmutableHeadersMixin(object): + """Makes a :class:`Headers` immutable. We do not mark them as + hashable though since the only usecase for this datastructure + in Werkzeug is a view on a mutable structure. + + .. versionadded:: 0.5 + + :private: + """ + + def __delitem__(self, key, **kwargs): + is_immutable(self) + + def __setitem__(self, key, value): + is_immutable(self) + + set = __setitem__ + + def add(self, item): + is_immutable(self) + + remove = add_header = add + + def extend(self, iterable): + is_immutable(self) + + def insert(self, pos, value): + is_immutable(self) + + def pop(self, index=-1): + is_immutable(self) + + def popitem(self): + is_immutable(self) + + def setdefault(self, key, default): + is_immutable(self) + + +class EnvironHeaders(ImmutableHeadersMixin, Headers): + """Read only version of the headers from a WSGI environment. This + provides the same interface as `Headers` and is constructed from + a WSGI environment. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for + HTTP exceptions. + """ + + def __init__(self, environ): + self.environ = environ + + def __eq__(self, other): + return self.environ is other.environ + + __hash__ = None + + def __getitem__(self, key, _get_mode=False): + # _get_mode is a no-op for this class as there is no index but + # used because get() calls it. + if not isinstance(key, string_types): + raise KeyError(key) + key = key.upper().replace("-", "_") + if key in ("CONTENT_TYPE", "CONTENT_LENGTH"): + return _unicodify_header_value(self.environ[key]) + return _unicodify_header_value(self.environ["HTTP_" + key]) + + def __len__(self): + # the iter is necessary because otherwise list calls our + # len which would call list again and so forth. + return len(list(iter(self))) + + def __iter__(self): + for key, value in iteritems(self.environ): + if key.startswith("HTTP_") and key not in ( + "HTTP_CONTENT_TYPE", + "HTTP_CONTENT_LENGTH", + ): + yield ( + key[5:].replace("_", "-").title(), + _unicodify_header_value(value), + ) + elif key in ("CONTENT_TYPE", "CONTENT_LENGTH") and value: + yield (key.replace("_", "-").title(), _unicodify_header_value(value)) + + def copy(self): + raise TypeError("cannot create %r copies" % self.__class__.__name__) + + +@native_itermethods(["keys", "values", "items", "lists", "listvalues"]) +class CombinedMultiDict(ImmutableMultiDictMixin, MultiDict): + """A read only :class:`MultiDict` that you can pass multiple :class:`MultiDict` + instances as sequence and it will combine the return values of all wrapped + dicts: + + >>> from werkzeug.datastructures import CombinedMultiDict, MultiDict + >>> post = MultiDict([('foo', 'bar')]) + >>> get = MultiDict([('blub', 'blah')]) + >>> combined = CombinedMultiDict([get, post]) + >>> combined['foo'] + 'bar' + >>> combined['blub'] + 'blah' + + This works for all read operations and will raise a `TypeError` for + methods that usually change data which isn't possible. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for HTTP + exceptions. + """ + + def __reduce_ex__(self, protocol): + return type(self), (self.dicts,) + + def __init__(self, dicts=None): + self.dicts = dicts or [] + + @classmethod + def fromkeys(cls): + raise TypeError("cannot create %r instances by fromkeys" % cls.__name__) + + def __getitem__(self, key): + for d in self.dicts: + if key in d: + return d[key] + raise exceptions.BadRequestKeyError(key) + + def get(self, key, default=None, type=None): + for d in self.dicts: + if key in d: + if type is not None: + try: + return type(d[key]) + except ValueError: + continue + return d[key] + return default + + def getlist(self, key, type=None): + rv = [] + for d in self.dicts: + rv.extend(d.getlist(key, type)) + return rv + + def _keys_impl(self): + """This function exists so __len__ can be implemented more efficiently, + saving one list creation from an iterator. + + Using this for Python 2's ``dict.keys`` behavior would be useless since + `dict.keys` in Python 2 returns a list, while we have a set here. + """ + rv = set() + for d in self.dicts: + rv.update(iterkeys(d)) + return rv + + def keys(self): + return iter(self._keys_impl()) + + __iter__ = keys + + def items(self, multi=False): + found = set() + for d in self.dicts: + for key, value in iteritems(d, multi): + if multi: + yield key, value + elif key not in found: + found.add(key) + yield key, value + + def values(self): + for _key, value in iteritems(self): + yield value + + def lists(self): + rv = {} + for d in self.dicts: + for key, values in iterlists(d): + rv.setdefault(key, []).extend(values) + return iteritems(rv) + + def listvalues(self): + return (x[1] for x in self.lists()) + + def copy(self): + """Return a shallow mutable copy of this object. + + This returns a :class:`MultiDict` representing the data at the + time of copying. The copy will no longer reflect changes to the + wrapped dicts. + + .. versionchanged:: 0.15 + Return a mutable :class:`MultiDict`. + """ + return MultiDict(self) + + def to_dict(self, flat=True): + """Return the contents as regular dict. If `flat` is `True` the + returned dict will only have the first item present, if `flat` is + `False` all values will be returned as lists. + + :param flat: If set to `False` the dict returned will have lists + with all the values in it. Otherwise it will only + contain the first item for each key. + :return: a :class:`dict` + """ + rv = {} + for d in reversed(self.dicts): + rv.update(d.to_dict(flat)) + return rv + + def __len__(self): + return len(self._keys_impl()) + + def __contains__(self, key): + for d in self.dicts: + if key in d: + return True + return False + + has_key = __contains__ + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.dicts) + + +class FileMultiDict(MultiDict): + """A special :class:`MultiDict` that has convenience methods to add + files to it. This is used for :class:`EnvironBuilder` and generally + useful for unittesting. + + .. versionadded:: 0.5 + """ + + def add_file(self, name, file, filename=None, content_type=None): + """Adds a new file to the dict. `file` can be a file name or + a :class:`file`-like or a :class:`FileStorage` object. + + :param name: the name of the field. + :param file: a filename or :class:`file`-like object + :param filename: an optional filename + :param content_type: an optional content type + """ + if isinstance(file, FileStorage): + value = file + else: + if isinstance(file, string_types): + if filename is None: + filename = file + file = open(file, "rb") + if filename and content_type is None: + content_type = ( + mimetypes.guess_type(filename)[0] or "application/octet-stream" + ) + value = FileStorage(file, filename, name, content_type) + + self.add(name, value) + + +class ImmutableDict(ImmutableDictMixin, dict): + """An immutable :class:`dict`. + + .. versionadded:: 0.5 + """ + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, dict.__repr__(self)) + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return dict(self) + + def __copy__(self): + return self + + +class ImmutableMultiDict(ImmutableMultiDictMixin, MultiDict): + """An immutable :class:`MultiDict`. + + .. versionadded:: 0.5 + """ + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return MultiDict(self) + + def __copy__(self): + return self + + +class ImmutableOrderedMultiDict(ImmutableMultiDictMixin, OrderedMultiDict): + """An immutable :class:`OrderedMultiDict`. + + .. versionadded:: 0.6 + """ + + def _iter_hashitems(self): + return enumerate(iteritems(self, multi=True)) + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return OrderedMultiDict(self) + + def __copy__(self): + return self + + +@native_itermethods(["values"]) +class Accept(ImmutableList): + """An :class:`Accept` object is just a list subclass for lists of + ``(value, quality)`` tuples. It is automatically sorted by specificity + and quality. + + All :class:`Accept` objects work similar to a list but provide extra + functionality for working with the data. Containment checks are + normalized to the rules of that header: + + >>> a = CharsetAccept([('ISO-8859-1', 1), ('utf-8', 0.7)]) + >>> a.best + 'ISO-8859-1' + >>> 'iso-8859-1' in a + True + >>> 'UTF8' in a + True + >>> 'utf7' in a + False + + To get the quality for an item you can use normal item lookup: + + >>> print a['utf-8'] + 0.7 + >>> a['utf7'] + 0 + + .. versionchanged:: 0.5 + :class:`Accept` objects are forced immutable now. + """ + + def __init__(self, values=()): + if values is None: + list.__init__(self) + self.provided = False + elif isinstance(values, Accept): + self.provided = values.provided + list.__init__(self, values) + else: + self.provided = True + values = sorted( + values, + key=lambda x: (self._specificity(x[0]), x[1], x[0]), + reverse=True, + ) + list.__init__(self, values) + + def _specificity(self, value): + """Returns a tuple describing the value's specificity.""" + return (value != "*",) + + def _value_matches(self, value, item): + """Check if a value matches a given accept item.""" + return item == "*" or item.lower() == value.lower() + + def __getitem__(self, key): + """Besides index lookup (getting item n) you can also pass it a string + to get the quality for the item. If the item is not in the list, the + returned quality is ``0``. + """ + if isinstance(key, string_types): + return self.quality(key) + return list.__getitem__(self, key) + + def quality(self, key): + """Returns the quality of the key. + + .. versionadded:: 0.6 + In previous versions you had to use the item-lookup syntax + (eg: ``obj[key]`` instead of ``obj.quality(key)``) + """ + for item, quality in self: + if self._value_matches(key, item): + return quality + return 0 + + def __contains__(self, value): + for item, _quality in self: + if self._value_matches(value, item): + return True + return False + + def __repr__(self): + return "%s([%s])" % ( + self.__class__.__name__, + ", ".join("(%r, %s)" % (x, y) for x, y in self), + ) + + def index(self, key): + """Get the position of an entry or raise :exc:`ValueError`. + + :param key: The key to be looked up. + + .. versionchanged:: 0.5 + This used to raise :exc:`IndexError`, which was inconsistent + with the list API. + """ + if isinstance(key, string_types): + for idx, (item, _quality) in enumerate(self): + if self._value_matches(key, item): + return idx + raise ValueError(key) + return list.index(self, key) + + def find(self, key): + """Get the position of an entry or return -1. + + :param key: The key to be looked up. + """ + try: + return self.index(key) + except ValueError: + return -1 + + def values(self): + """Iterate over all values.""" + for item in self: + yield item[0] + + def to_header(self): + """Convert the header set into an HTTP header string.""" + result = [] + for value, quality in self: + if quality != 1: + value = "%s;q=%s" % (value, quality) + result.append(value) + return ",".join(result) + + def __str__(self): + return self.to_header() + + def _best_single_match(self, match): + for client_item, quality in self: + if self._value_matches(match, client_item): + # self is sorted by specificity descending, we can exit + return client_item, quality + + def best_match(self, matches, default=None): + """Returns the best match from a list of possible matches based + on the specificity and quality of the client. If two items have the + same quality and specificity, the one is returned that comes first. + + :param matches: a list of matches to check for + :param default: the value that is returned if none match + """ + result = default + best_quality = -1 + best_specificity = (-1,) + for server_item in matches: + match = self._best_single_match(server_item) + if not match: + continue + client_item, quality = match + specificity = self._specificity(client_item) + if quality <= 0 or quality < best_quality: + continue + # better quality or same quality but more specific => better match + if quality > best_quality or specificity > best_specificity: + result = server_item + best_quality = quality + best_specificity = specificity + return result + + @property + def best(self): + """The best match as value.""" + if self: + return self[0][0] + + +class MIMEAccept(Accept): + """Like :class:`Accept` but with special methods and behavior for + mimetypes. + """ + + def _specificity(self, value): + return tuple(x != "*" for x in value.split("/", 1)) + + def _value_matches(self, value, item): + def _normalize(x): + x = x.lower() + return ("*", "*") if x == "*" else x.split("/", 1) + + # this is from the application which is trusted. to avoid developer + # frustration we actually check these for valid values + if "/" not in value: + raise ValueError("invalid mimetype %r" % value) + value_type, value_subtype = _normalize(value) + if value_type == "*" and value_subtype != "*": + raise ValueError("invalid mimetype %r" % value) + + if "/" not in item: + return False + item_type, item_subtype = _normalize(item) + if item_type == "*" and item_subtype != "*": + return False + return ( + item_type == item_subtype == "*" or value_type == value_subtype == "*" + ) or ( + item_type == value_type + and ( + item_subtype == "*" + or value_subtype == "*" + or item_subtype == value_subtype + ) + ) + + @property + def accept_html(self): + """True if this object accepts HTML.""" + return ( + "text/html" in self or "application/xhtml+xml" in self or self.accept_xhtml + ) + + @property + def accept_xhtml(self): + """True if this object accepts XHTML.""" + return "application/xhtml+xml" in self or "application/xml" in self + + @property + def accept_json(self): + """True if this object accepts JSON.""" + return "application/json" in self + + +class LanguageAccept(Accept): + """Like :class:`Accept` but with normalization for languages.""" + + def _value_matches(self, value, item): + def _normalize(language): + return _locale_delim_re.split(language.lower()) + + return item == "*" or _normalize(value) == _normalize(item) + + +class CharsetAccept(Accept): + """Like :class:`Accept` but with normalization for charsets.""" + + def _value_matches(self, value, item): + def _normalize(name): + try: + return codecs.lookup(name).name + except LookupError: + return name.lower() + + return item == "*" or _normalize(value) == _normalize(item) + + +def cache_property(key, empty, type): + """Return a new property object for a cache header. Useful if you + want to add support for a cache extension in a subclass.""" + return property( + lambda x: x._get_cache_value(key, empty, type), + lambda x, v: x._set_cache_value(key, v, type), + lambda x: x._del_cache_value(key), + "accessor for %r" % key, + ) + + +class _CacheControl(UpdateDictMixin, dict): + """Subclass of a dict that stores values for a Cache-Control header. It + has accessors for all the cache-control directives specified in RFC 2616. + The class does not differentiate between request and response directives. + + Because the cache-control directives in the HTTP header use dashes the + python descriptors use underscores for that. + + To get a header of the :class:`CacheControl` object again you can convert + the object into a string or call the :meth:`to_header` method. If you plan + to subclass it and add your own items have a look at the sourcecode for + that class. + + .. versionchanged:: 0.4 + + Setting `no_cache` or `private` to boolean `True` will set the implicit + none-value which is ``*``: + + >>> cc = ResponseCacheControl() + >>> cc.no_cache = True + >>> cc + <ResponseCacheControl 'no-cache'> + >>> cc.no_cache + '*' + >>> cc.no_cache = None + >>> cc + <ResponseCacheControl ''> + + In versions before 0.5 the behavior documented here affected the now + no longer existing `CacheControl` class. + """ + + no_cache = cache_property("no-cache", "*", None) + no_store = cache_property("no-store", None, bool) + max_age = cache_property("max-age", -1, int) + no_transform = cache_property("no-transform", None, None) + + def __init__(self, values=(), on_update=None): + dict.__init__(self, values or ()) + self.on_update = on_update + self.provided = values is not None + + def _get_cache_value(self, key, empty, type): + """Used internally by the accessor properties.""" + if type is bool: + return key in self + if key in self: + value = self[key] + if value is None: + return empty + elif type is not None: + try: + value = type(value) + except ValueError: + pass + return value + + def _set_cache_value(self, key, value, type): + """Used internally by the accessor properties.""" + if type is bool: + if value: + self[key] = None + else: + self.pop(key, None) + else: + if value is None: + self.pop(key) + elif value is True: + self[key] = None + else: + self[key] = value + + def _del_cache_value(self, key): + """Used internally by the accessor properties.""" + if key in self: + del self[key] + + def to_header(self): + """Convert the stored values into a cache control header.""" + return dump_header(self) + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %s>" % ( + self.__class__.__name__, + " ".join("%s=%r" % (k, v) for k, v in sorted(self.items())), + ) + + +class RequestCacheControl(ImmutableDictMixin, _CacheControl): + """A cache control for requests. This is immutable and gives access + to all the request-relevant cache control headers. + + To get a header of the :class:`RequestCacheControl` object again you can + convert the object into a string or call the :meth:`to_header` method. If + you plan to subclass it and add your own items have a look at the sourcecode + for that class. + + .. versionadded:: 0.5 + In previous versions a `CacheControl` class existed that was used + both for request and response. + """ + + max_stale = cache_property("max-stale", "*", int) + min_fresh = cache_property("min-fresh", "*", int) + no_transform = cache_property("no-transform", None, None) + only_if_cached = cache_property("only-if-cached", None, bool) + + +class ResponseCacheControl(_CacheControl): + """A cache control for responses. Unlike :class:`RequestCacheControl` + this is mutable and gives access to response-relevant cache control + headers. + + To get a header of the :class:`ResponseCacheControl` object again you can + convert the object into a string or call the :meth:`to_header` method. If + you plan to subclass it and add your own items have a look at the sourcecode + for that class. + + .. versionadded:: 0.5 + In previous versions a `CacheControl` class existed that was used + both for request and response. + """ + + public = cache_property("public", None, bool) + private = cache_property("private", "*", None) + must_revalidate = cache_property("must-revalidate", None, bool) + proxy_revalidate = cache_property("proxy-revalidate", None, bool) + s_maxage = cache_property("s-maxage", None, None) + + +# attach cache_property to the _CacheControl as staticmethod +# so that others can reuse it. +_CacheControl.cache_property = staticmethod(cache_property) + + +class CallbackDict(UpdateDictMixin, dict): + """A dict that calls a function passed every time something is changed. + The function is passed the dict instance. + """ + + def __init__(self, initial=None, on_update=None): + dict.__init__(self, initial or ()) + self.on_update = on_update + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, dict.__repr__(self)) + + +class HeaderSet(collections_abc.MutableSet): + """Similar to the :class:`ETags` class this implements a set-like structure. + Unlike :class:`ETags` this is case insensitive and used for vary, allow, and + content-language headers. + + If not constructed using the :func:`parse_set_header` function the + instantiation works like this: + + >>> hs = HeaderSet(['foo', 'bar', 'baz']) + >>> hs + HeaderSet(['foo', 'bar', 'baz']) + """ + + def __init__(self, headers=None, on_update=None): + self._headers = list(headers or ()) + self._set = set([x.lower() for x in self._headers]) + self.on_update = on_update + + def add(self, header): + """Add a new header to the set.""" + self.update((header,)) + + def remove(self, header): + """Remove a header from the set. This raises an :exc:`KeyError` if the + header is not in the set. + + .. versionchanged:: 0.5 + In older versions a :exc:`IndexError` was raised instead of a + :exc:`KeyError` if the object was missing. + + :param header: the header to be removed. + """ + key = header.lower() + if key not in self._set: + raise KeyError(header) + self._set.remove(key) + for idx, key in enumerate(self._headers): + if key.lower() == header: + del self._headers[idx] + break + if self.on_update is not None: + self.on_update(self) + + def update(self, iterable): + """Add all the headers from the iterable to the set. + + :param iterable: updates the set with the items from the iterable. + """ + inserted_any = False + for header in iterable: + key = header.lower() + if key not in self._set: + self._headers.append(header) + self._set.add(key) + inserted_any = True + if inserted_any and self.on_update is not None: + self.on_update(self) + + def discard(self, header): + """Like :meth:`remove` but ignores errors. + + :param header: the header to be discarded. + """ + try: + return self.remove(header) + except KeyError: + pass + + def find(self, header): + """Return the index of the header in the set or return -1 if not found. + + :param header: the header to be looked up. + """ + header = header.lower() + for idx, item in enumerate(self._headers): + if item.lower() == header: + return idx + return -1 + + def index(self, header): + """Return the index of the header in the set or raise an + :exc:`IndexError`. + + :param header: the header to be looked up. + """ + rv = self.find(header) + if rv < 0: + raise IndexError(header) + return rv + + def clear(self): + """Clear the set.""" + self._set.clear() + del self._headers[:] + if self.on_update is not None: + self.on_update(self) + + def as_set(self, preserve_casing=False): + """Return the set as real python set type. When calling this, all + the items are converted to lowercase and the ordering is lost. + + :param preserve_casing: if set to `True` the items in the set returned + will have the original case like in the + :class:`HeaderSet`, otherwise they will + be lowercase. + """ + if preserve_casing: + return set(self._headers) + return set(self._set) + + def to_header(self): + """Convert the header set into an HTTP header string.""" + return ", ".join(map(quote_header_value, self._headers)) + + def __getitem__(self, idx): + return self._headers[idx] + + def __delitem__(self, idx): + rv = self._headers.pop(idx) + self._set.remove(rv.lower()) + if self.on_update is not None: + self.on_update(self) + + def __setitem__(self, idx, value): + old = self._headers[idx] + self._set.remove(old.lower()) + self._headers[idx] = value + self._set.add(value.lower()) + if self.on_update is not None: + self.on_update(self) + + def __contains__(self, header): + return header.lower() in self._set + + def __len__(self): + return len(self._set) + + def __iter__(self): + return iter(self._headers) + + def __nonzero__(self): + return bool(self._set) + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self._headers) + + +class ETags(collections_abc.Container, collections_abc.Iterable): + """A set that can be used to check if one etag is present in a collection + of etags. + """ + + def __init__(self, strong_etags=None, weak_etags=None, star_tag=False): + self._strong = frozenset(not star_tag and strong_etags or ()) + self._weak = frozenset(weak_etags or ()) + self.star_tag = star_tag + + def as_set(self, include_weak=False): + """Convert the `ETags` object into a python set. Per default all the + weak etags are not part of this set.""" + rv = set(self._strong) + if include_weak: + rv.update(self._weak) + return rv + + def is_weak(self, etag): + """Check if an etag is weak.""" + return etag in self._weak + + def is_strong(self, etag): + """Check if an etag is strong.""" + return etag in self._strong + + def contains_weak(self, etag): + """Check if an etag is part of the set including weak and strong tags.""" + return self.is_weak(etag) or self.contains(etag) + + def contains(self, etag): + """Check if an etag is part of the set ignoring weak tags. + It is also possible to use the ``in`` operator. + """ + if self.star_tag: + return True + return self.is_strong(etag) + + def contains_raw(self, etag): + """When passed a quoted tag it will check if this tag is part of the + set. If the tag is weak it is checked against weak and strong tags, + otherwise strong only.""" + etag, weak = unquote_etag(etag) + if weak: + return self.contains_weak(etag) + return self.contains(etag) + + def to_header(self): + """Convert the etags set into a HTTP header string.""" + if self.star_tag: + return "*" + return ", ".join( + ['"%s"' % x for x in self._strong] + ['W/"%s"' % x for x in self._weak] + ) + + def __call__(self, etag=None, data=None, include_weak=False): + if [etag, data].count(None) != 1: + raise TypeError("either tag or data required, but at least one") + if etag is None: + etag = generate_etag(data) + if include_weak: + if etag in self._weak: + return True + return etag in self._strong + + def __bool__(self): + return bool(self.star_tag or self._strong or self._weak) + + __nonzero__ = __bool__ + + def __str__(self): + return self.to_header() + + def __iter__(self): + return iter(self._strong) + + def __contains__(self, etag): + return self.contains(etag) + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, str(self)) + + +class IfRange(object): + """Very simple object that represents the `If-Range` header in parsed + form. It will either have neither a etag or date or one of either but + never both. + + .. versionadded:: 0.7 + """ + + def __init__(self, etag=None, date=None): + #: The etag parsed and unquoted. Ranges always operate on strong + #: etags so the weakness information is not necessary. + self.etag = etag + #: The date in parsed format or `None`. + self.date = date + + def to_header(self): + """Converts the object back into an HTTP header.""" + if self.date is not None: + return http_date(self.date) + if self.etag is not None: + return quote_etag(self.etag) + return "" + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, str(self)) + + +class Range(object): + """Represents a ``Range`` header. All methods only support only + bytes as the unit. Stores a list of ranges if given, but the methods + only work if only one range is provided. + + :raise ValueError: If the ranges provided are invalid. + + .. versionchanged:: 0.15 + The ranges passed in are validated. + + .. versionadded:: 0.7 + """ + + def __init__(self, units, ranges): + #: The units of this range. Usually "bytes". + self.units = units + #: A list of ``(begin, end)`` tuples for the range header provided. + #: The ranges are non-inclusive. + self.ranges = ranges + + for start, end in ranges: + if start is None or (end is not None and (start < 0 or start >= end)): + raise ValueError("{} is not a valid range.".format((start, end))) + + def range_for_length(self, length): + """If the range is for bytes, the length is not None and there is + exactly one range and it is satisfiable it returns a ``(start, stop)`` + tuple, otherwise `None`. + """ + if self.units != "bytes" or length is None or len(self.ranges) != 1: + return None + start, end = self.ranges[0] + if end is None: + end = length + if start < 0: + start += length + if is_byte_range_valid(start, end, length): + return start, min(end, length) + + def make_content_range(self, length): + """Creates a :class:`~werkzeug.datastructures.ContentRange` object + from the current range and given content length. + """ + rng = self.range_for_length(length) + if rng is not None: + return ContentRange(self.units, rng[0], rng[1], length) + + def to_header(self): + """Converts the object back into an HTTP header.""" + ranges = [] + for begin, end in self.ranges: + if end is None: + ranges.append("%s-" % begin if begin >= 0 else str(begin)) + else: + ranges.append("%s-%s" % (begin, end - 1)) + return "%s=%s" % (self.units, ",".join(ranges)) + + def to_content_range_header(self, length): + """Converts the object into `Content-Range` HTTP header, + based on given length + """ + range_for_length = self.range_for_length(length) + if range_for_length is not None: + return "%s %d-%d/%d" % ( + self.units, + range_for_length[0], + range_for_length[1] - 1, + length, + ) + return None + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, str(self)) + + +class ContentRange(object): + """Represents the content range header. + + .. versionadded:: 0.7 + """ + + def __init__(self, units, start, stop, length=None, on_update=None): + assert is_byte_range_valid(start, stop, length), "Bad range provided" + self.on_update = on_update + self.set(start, stop, length, units) + + def _callback_property(name): # noqa: B902 + def fget(self): + return getattr(self, name) + + def fset(self, value): + setattr(self, name, value) + if self.on_update is not None: + self.on_update(self) + + return property(fget, fset) + + #: The units to use, usually "bytes" + units = _callback_property("_units") + #: The start point of the range or `None`. + start = _callback_property("_start") + #: The stop point of the range (non-inclusive) or `None`. Can only be + #: `None` if also start is `None`. + stop = _callback_property("_stop") + #: The length of the range or `None`. + length = _callback_property("_length") + del _callback_property + + def set(self, start, stop, length=None, units="bytes"): + """Simple method to update the ranges.""" + assert is_byte_range_valid(start, stop, length), "Bad range provided" + self._units = units + self._start = start + self._stop = stop + self._length = length + if self.on_update is not None: + self.on_update(self) + + def unset(self): + """Sets the units to `None` which indicates that the header should + no longer be used. + """ + self.set(None, None, units=None) + + def to_header(self): + if self.units is None: + return "" + if self.length is None: + length = "*" + else: + length = self.length + if self.start is None: + return "%s */%s" % (self.units, length) + return "%s %s-%s/%s" % (self.units, self.start, self.stop - 1, length) + + def __nonzero__(self): + return self.units is not None + + __bool__ = __nonzero__ + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, str(self)) + + +class Authorization(ImmutableDictMixin, dict): + """Represents an `Authorization` header sent by the client. You should + not create this kind of object yourself but use it when it's returned by + the `parse_authorization_header` function. + + This object is a dict subclass and can be altered by setting dict items + but it should be considered immutable as it's returned by the client and + not meant for modifications. + + .. versionchanged:: 0.5 + This object became immutable. + """ + + def __init__(self, auth_type, data=None): + dict.__init__(self, data or {}) + self.type = auth_type + + username = property( + lambda self: self.get("username"), + doc=""" + The username transmitted. This is set for both basic and digest + auth all the time.""", + ) + password = property( + lambda self: self.get("password"), + doc=""" + When the authentication type is basic this is the password + transmitted by the client, else `None`.""", + ) + realm = property( + lambda self: self.get("realm"), + doc=""" + This is the server realm sent back for HTTP digest auth.""", + ) + nonce = property( + lambda self: self.get("nonce"), + doc=""" + The nonce the server sent for digest auth, sent back by the client. + A nonce should be unique for every 401 response for HTTP digest + auth.""", + ) + uri = property( + lambda self: self.get("uri"), + doc=""" + The URI from Request-URI of the Request-Line; duplicated because + proxies are allowed to change the Request-Line in transit. HTTP + digest auth only.""", + ) + nc = property( + lambda self: self.get("nc"), + doc=""" + The nonce count value transmitted by clients if a qop-header is + also transmitted. HTTP digest auth only.""", + ) + cnonce = property( + lambda self: self.get("cnonce"), + doc=""" + If the server sent a qop-header in the ``WWW-Authenticate`` + header, the client has to provide this value for HTTP digest auth. + See the RFC for more details.""", + ) + response = property( + lambda self: self.get("response"), + doc=""" + A string of 32 hex digits computed as defined in RFC 2617, which + proves that the user knows a password. Digest auth only.""", + ) + opaque = property( + lambda self: self.get("opaque"), + doc=""" + The opaque header from the server returned unchanged by the client. + It is recommended that this string be base64 or hexadecimal data. + Digest auth only.""", + ) + qop = property( + lambda self: self.get("qop"), + doc=""" + Indicates what "quality of protection" the client has applied to + the message for HTTP digest auth. Note that this is a single token, + not a quoted list of alternatives as in WWW-Authenticate.""", + ) + + +class WWWAuthenticate(UpdateDictMixin, dict): + """Provides simple access to `WWW-Authenticate` headers.""" + + #: list of keys that require quoting in the generated header + _require_quoting = frozenset(["domain", "nonce", "opaque", "realm", "qop"]) + + def __init__(self, auth_type=None, values=None, on_update=None): + dict.__init__(self, values or ()) + if auth_type: + self["__auth_type__"] = auth_type + self.on_update = on_update + + def set_basic(self, realm="authentication required"): + """Clear the auth info and enable basic auth.""" + dict.clear(self) + dict.update(self, {"__auth_type__": "basic", "realm": realm}) + if self.on_update: + self.on_update(self) + + def set_digest( + self, realm, nonce, qop=("auth",), opaque=None, algorithm=None, stale=False + ): + """Clear the auth info and enable digest auth.""" + d = { + "__auth_type__": "digest", + "realm": realm, + "nonce": nonce, + "qop": dump_header(qop), + } + if stale: + d["stale"] = "TRUE" + if opaque is not None: + d["opaque"] = opaque + if algorithm is not None: + d["algorithm"] = algorithm + dict.clear(self) + dict.update(self, d) + if self.on_update: + self.on_update(self) + + def to_header(self): + """Convert the stored values into a WWW-Authenticate header.""" + d = dict(self) + auth_type = d.pop("__auth_type__", None) or "basic" + return "%s %s" % ( + auth_type.title(), + ", ".join( + [ + "%s=%s" + % ( + key, + quote_header_value( + value, allow_token=key not in self._require_quoting + ), + ) + for key, value in iteritems(d) + ] + ), + ) + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.to_header()) + + def auth_property(name, doc=None): # noqa: B902 + """A static helper function for subclasses to add extra authentication + system properties onto a class:: + + class FooAuthenticate(WWWAuthenticate): + special_realm = auth_property('special_realm') + + For more information have a look at the sourcecode to see how the + regular properties (:attr:`realm` etc.) are implemented. + """ + + def _set_value(self, value): + if value is None: + self.pop(name, None) + else: + self[name] = str(value) + + return property(lambda x: x.get(name), _set_value, doc=doc) + + def _set_property(name, doc=None): # noqa: B902 + def fget(self): + def on_update(header_set): + if not header_set and name in self: + del self[name] + elif header_set: + self[name] = header_set.to_header() + + return parse_set_header(self.get(name), on_update) + + return property(fget, doc=doc) + + type = auth_property( + "__auth_type__", + doc="""The type of the auth mechanism. HTTP currently specifies + ``Basic`` and ``Digest``.""", + ) + realm = auth_property( + "realm", + doc="""A string to be displayed to users so they know which + username and password to use. This string should contain at + least the name of the host performing the authentication and + might additionally indicate the collection of users who might + have access.""", + ) + domain = _set_property( + "domain", + doc="""A list of URIs that define the protection space. If a URI + is an absolute path, it is relative to the canonical root URL of + the server being accessed.""", + ) + nonce = auth_property( + "nonce", + doc=""" + A server-specified data string which should be uniquely generated + each time a 401 response is made. It is recommended that this + string be base64 or hexadecimal data.""", + ) + opaque = auth_property( + "opaque", + doc="""A string of data, specified by the server, which should + be returned by the client unchanged in the Authorization header + of subsequent requests with URIs in the same protection space. + It is recommended that this string be base64 or hexadecimal + data.""", + ) + algorithm = auth_property( + "algorithm", + doc="""A string indicating a pair of algorithms used to produce + the digest and a checksum. If this is not present it is assumed + to be "MD5". If the algorithm is not understood, the challenge + should be ignored (and a different one used, if there is more + than one).""", + ) + qop = _set_property( + "qop", + doc="""A set of quality-of-privacy directives such as auth and + auth-int.""", + ) + + @property + def stale(self): + """A flag, indicating that the previous request from the client + was rejected because the nonce value was stale. + """ + val = self.get("stale") + if val is not None: + return val.lower() == "true" + + @stale.setter + def stale(self, value): + if value is None: + self.pop("stale", None) + else: + self["stale"] = "TRUE" if value else "FALSE" + + auth_property = staticmethod(auth_property) + del _set_property + + +class FileStorage(object): + """The :class:`FileStorage` class is a thin wrapper over incoming files. + It is used by the request object to represent uploaded files. All the + attributes of the wrapper stream are proxied by the file storage so + it's possible to do ``storage.read()`` instead of the long form + ``storage.stream.read()``. + """ + + def __init__( + self, + stream=None, + filename=None, + name=None, + content_type=None, + content_length=None, + headers=None, + ): + self.name = name + self.stream = stream or BytesIO() + + # if no filename is provided we can attempt to get the filename + # from the stream object passed. There we have to be careful to + # skip things like <fdopen>, <stderr> etc. Python marks these + # special filenames with angular brackets. + if filename is None: + filename = getattr(stream, "name", None) + s = make_literal_wrapper(filename) + if filename and filename[0] == s("<") and filename[-1] == s(">"): + filename = None + + # On Python 3 we want to make sure the filename is always unicode. + # This might not be if the name attribute is bytes due to the + # file being opened from the bytes API. + if not PY2 and isinstance(filename, bytes): + filename = filename.decode(get_filesystem_encoding(), "replace") + + self.filename = filename + if headers is None: + headers = Headers() + self.headers = headers + if content_type is not None: + headers["Content-Type"] = content_type + if content_length is not None: + headers["Content-Length"] = str(content_length) + + def _parse_content_type(self): + if not hasattr(self, "_parsed_content_type"): + self._parsed_content_type = parse_options_header(self.content_type) + + @property + def content_type(self): + """The content-type sent in the header. Usually not available""" + return self.headers.get("content-type") + + @property + def content_length(self): + """The content-length sent in the header. Usually not available""" + return int(self.headers.get("content-length") or 0) + + @property + def mimetype(self): + """Like :attr:`content_type`, but without parameters (eg, without + charset, type etc.) and always lowercase. For example if the content + type is ``text/HTML; charset=utf-8`` the mimetype would be + ``'text/html'``. + + .. versionadded:: 0.7 + """ + self._parse_content_type() + return self._parsed_content_type[0].lower() + + @property + def mimetype_params(self): + """The mimetype parameters as dict. For example if the content + type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + + .. versionadded:: 0.7 + """ + self._parse_content_type() + return self._parsed_content_type[1] + + def save(self, dst, buffer_size=16384): + """Save the file to a destination path or file object. If the + destination is a file object you have to close it yourself after the + call. The buffer size is the number of bytes held in memory during + the copy process. It defaults to 16KB. + + For secure file saving also have a look at :func:`secure_filename`. + + :param dst: a filename or open file object the uploaded file + is saved to. + :param buffer_size: the size of the buffer. This works the same as + the `length` parameter of + :func:`shutil.copyfileobj`. + """ + from shutil import copyfileobj + + close_dst = False + if isinstance(dst, string_types): + dst = open(dst, "wb") + close_dst = True + try: + copyfileobj(self.stream, dst, buffer_size) + finally: + if close_dst: + dst.close() + + def close(self): + """Close the underlying file if possible.""" + try: + self.stream.close() + except Exception: + pass + + def __nonzero__(self): + return bool(self.filename) + + __bool__ = __nonzero__ + + def __getattr__(self, name): + try: + return getattr(self.stream, name) + except AttributeError: + # SpooledTemporaryFile doesn't implement IOBase, get the + # attribute from its backing file instead. + # https://github.com/python/cpython/pull/3249 + if hasattr(self.stream, "_file"): + return getattr(self.stream._file, name) + raise + + def __iter__(self): + return iter(self.stream) + + def __repr__(self): + return "<%s: %r (%r)>" % ( + self.__class__.__name__, + self.filename, + self.content_type, + ) + + +# circular dependencies +from . import exceptions +from .http import dump_header +from .http import dump_options_header +from .http import generate_etag +from .http import http_date +from .http import is_byte_range_valid +from .http import parse_options_header +from .http import parse_set_header +from .http import quote_etag +from .http import quote_header_value +from .http import unquote_etag diff --git a/python/werkzeug/exceptions.py b/python/werkzeug/exceptions.py new file mode 100644 index 0000000..fb6528d --- /dev/null +++ b/python/werkzeug/exceptions.py @@ -0,0 +1,774 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.exceptions + ~~~~~~~~~~~~~~~~~~~ + + This module implements a number of Python exceptions you can raise from + within your views to trigger a standard non-200 response. + + + Usage Example + ------------- + + :: + + from werkzeug.wrappers import BaseRequest + from werkzeug.wsgi import responder + from werkzeug.exceptions import HTTPException, NotFound + + def view(request): + raise NotFound() + + @responder + def application(environ, start_response): + request = BaseRequest(environ) + try: + return view(request) + except HTTPException as e: + return e + + + As you can see from this example those exceptions are callable WSGI + applications. Because of Python 2.4 compatibility those do not extend + from the response objects but only from the python exception class. + + As a matter of fact they are not Werkzeug response objects. However you + can get a response object by calling ``get_response()`` on a HTTP + exception. + + Keep in mind that you have to pass an environment to ``get_response()`` + because some errors fetch additional information from the WSGI + environment. + + If you want to hook in a different exception page to say, a 404 status + code, you can add a second except for a specific subclass of an error:: + + @responder + def application(environ, start_response): + request = BaseRequest(environ) + try: + return view(request) + except NotFound, e: + return not_found(request) + except HTTPException, e: + return e + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import sys + +import werkzeug + +# Because of bootstrapping reasons we need to manually patch ourselves +# onto our parent module. +werkzeug.exceptions = sys.modules[__name__] + +from ._compat import implements_to_string +from ._compat import integer_types +from ._compat import iteritems +from ._compat import text_type +from ._internal import _get_environ +from .wrappers import Response + + +@implements_to_string +class HTTPException(Exception): + """Baseclass for all HTTP exceptions. This exception can be called as WSGI + application to render a default error page or you can catch the subclasses + of it independently and render nicer error messages. + """ + + code = None + description = None + + def __init__(self, description=None, response=None): + super(Exception, self).__init__() + if description is not None: + self.description = description + self.response = response + + @classmethod + def wrap(cls, exception, name=None): + """Create an exception that is a subclass of the calling HTTP + exception and the ``exception`` argument. + + The first argument to the class will be passed to the + wrapped ``exception``, the rest to the HTTP exception. If + ``self.args`` is not empty, the wrapped exception message is + added to the HTTP exception description. + + .. versionchanged:: 0.15 + The description includes the wrapped exception message. + """ + + class newcls(cls, exception): + def __init__(self, arg=None, *args, **kwargs): + super(cls, self).__init__(*args, **kwargs) + + if arg is None: + exception.__init__(self) + else: + exception.__init__(self, arg) + + def get_description(self, environ=None): + out = super(cls, self).get_description(environ=environ) + + if self.args: + out += "<p><pre><code>{}: {}</code></pre></p>".format( + exception.__name__, escape(exception.__str__(self)) + ) + + return out + + newcls.__module__ = sys._getframe(1).f_globals.get("__name__") + newcls.__name__ = name or cls.__name__ + exception.__name__ + return newcls + + @property + def name(self): + """The status name.""" + return HTTP_STATUS_CODES.get(self.code, "Unknown Error") + + def get_description(self, environ=None): + """Get the description.""" + return u"<p>%s</p>" % escape(self.description) + + def get_body(self, environ=None): + """Get the HTML body.""" + return text_type( + ( + u'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\n' + u"<title>%(code)s %(name)s</title>\n" + u"<h1>%(name)s</h1>\n" + u"%(description)s\n" + ) + % { + "code": self.code, + "name": escape(self.name), + "description": self.get_description(environ), + } + ) + + def get_headers(self, environ=None): + """Get a list of headers.""" + return [("Content-Type", "text/html")] + + def get_response(self, environ=None): + """Get a response object. If one was passed to the exception + it's returned directly. + + :param environ: the optional environ for the request. This + can be used to modify the response depending + on how the request looked like. + :return: a :class:`Response` object or a subclass thereof. + """ + if self.response is not None: + return self.response + if environ is not None: + environ = _get_environ(environ) + headers = self.get_headers(environ) + return Response(self.get_body(environ), self.code, headers) + + def __call__(self, environ, start_response): + """Call the exception as WSGI application. + + :param environ: the WSGI environment. + :param start_response: the response callable provided by the WSGI + server. + """ + response = self.get_response(environ) + return response(environ, start_response) + + def __str__(self): + code = self.code if self.code is not None else "???" + return "%s %s: %s" % (code, self.name, self.description) + + def __repr__(self): + code = self.code if self.code is not None else "???" + return "<%s '%s: %s'>" % (self.__class__.__name__, code, self.name) + + +class BadRequest(HTTPException): + """*400* `Bad Request` + + Raise if the browser sends something to the application the application + or server cannot handle. + """ + + code = 400 + description = ( + "The browser (or proxy) sent a request that this server could " + "not understand." + ) + + +class ClientDisconnected(BadRequest): + """Internal exception that is raised if Werkzeug detects a disconnected + client. Since the client is already gone at that point attempting to + send the error message to the client might not work and might ultimately + result in another exception in the server. Mainly this is here so that + it is silenced by default as far as Werkzeug is concerned. + + Since disconnections cannot be reliably detected and are unspecified + by WSGI to a large extent this might or might not be raised if a client + is gone. + + .. versionadded:: 0.8 + """ + + +class SecurityError(BadRequest): + """Raised if something triggers a security error. This is otherwise + exactly like a bad request error. + + .. versionadded:: 0.9 + """ + + +class BadHost(BadRequest): + """Raised if the submitted host is badly formatted. + + .. versionadded:: 0.11.2 + """ + + +class Unauthorized(HTTPException): + """*401* ``Unauthorized`` + + Raise if the user is not authorized to access a resource. + + The ``www_authenticate`` argument should be used to set the + ``WWW-Authenticate`` header. This is used for HTTP basic auth and + other schemes. Use :class:`~werkzeug.datastructures.WWWAuthenticate` + to create correctly formatted values. Strictly speaking a 401 + response is invalid if it doesn't provide at least one value for + this header, although real clients typically don't care. + + :param description: Override the default message used for the body + of the response. + :param www-authenticate: A single value, or list of values, for the + WWW-Authenticate header. + + .. versionchanged:: 0.15.3 + If the ``www_authenticate`` argument is not set, the + ``WWW-Authenticate`` header is not set. + + .. versionchanged:: 0.15.3 + The ``response`` argument was restored. + + .. versionchanged:: 0.15.1 + ``description`` was moved back as the first argument, restoring + its previous position. + + .. versionchanged:: 0.15.0 + ``www_authenticate`` was added as the first argument, ahead of + ``description``. + """ + + code = 401 + description = ( + "The server could not verify that you are authorized to access" + " the URL requested. You either supplied the wrong credentials" + " (e.g. a bad password), or your browser doesn't understand" + " how to supply the credentials required." + ) + + def __init__(self, description=None, response=None, www_authenticate=None): + HTTPException.__init__(self, description, response) + + if www_authenticate is not None: + if not isinstance(www_authenticate, (tuple, list)): + www_authenticate = (www_authenticate,) + + self.www_authenticate = www_authenticate + + def get_headers(self, environ=None): + headers = HTTPException.get_headers(self, environ) + if self.www_authenticate: + headers.append( + ("WWW-Authenticate", ", ".join([str(x) for x in self.www_authenticate])) + ) + return headers + + +class Forbidden(HTTPException): + """*403* `Forbidden` + + Raise if the user doesn't have the permission for the requested resource + but was authenticated. + """ + + code = 403 + description = ( + "You don't have the permission to access the requested" + " resource. It is either read-protected or not readable by the" + " server." + ) + + +class NotFound(HTTPException): + """*404* `Not Found` + + Raise if a resource does not exist and never existed. + """ + + code = 404 + description = ( + "The requested URL was not found on the server. If you entered" + " the URL manually please check your spelling and try again." + ) + + +class MethodNotAllowed(HTTPException): + """*405* `Method Not Allowed` + + Raise if the server used a method the resource does not handle. For + example `POST` if the resource is view only. Especially useful for REST. + + The first argument for this exception should be a list of allowed methods. + Strictly speaking the response would be invalid if you don't provide valid + methods in the header which you can do with that list. + """ + + code = 405 + description = "The method is not allowed for the requested URL." + + def __init__(self, valid_methods=None, description=None): + """Takes an optional list of valid http methods + starting with werkzeug 0.3 the list will be mandatory.""" + HTTPException.__init__(self, description) + self.valid_methods = valid_methods + + def get_headers(self, environ=None): + headers = HTTPException.get_headers(self, environ) + if self.valid_methods: + headers.append(("Allow", ", ".join(self.valid_methods))) + return headers + + +class NotAcceptable(HTTPException): + """*406* `Not Acceptable` + + Raise if the server can't return any content conforming to the + `Accept` headers of the client. + """ + + code = 406 + + description = ( + "The resource identified by the request is only capable of" + " generating response entities which have content" + " characteristics not acceptable according to the accept" + " headers sent in the request." + ) + + +class RequestTimeout(HTTPException): + """*408* `Request Timeout` + + Raise to signalize a timeout. + """ + + code = 408 + description = ( + "The server closed the network connection because the browser" + " didn't finish the request within the specified time." + ) + + +class Conflict(HTTPException): + """*409* `Conflict` + + Raise to signal that a request cannot be completed because it conflicts + with the current state on the server. + + .. versionadded:: 0.7 + """ + + code = 409 + description = ( + "A conflict happened while processing the request. The" + " resource might have been modified while the request was being" + " processed." + ) + + +class Gone(HTTPException): + """*410* `Gone` + + Raise if a resource existed previously and went away without new location. + """ + + code = 410 + description = ( + "The requested URL is no longer available on this server and" + " there is no forwarding address. If you followed a link from a" + " foreign page, please contact the author of this page." + ) + + +class LengthRequired(HTTPException): + """*411* `Length Required` + + Raise if the browser submitted data but no ``Content-Length`` header which + is required for the kind of processing the server does. + """ + + code = 411 + description = ( + "A request with this method requires a valid <code>Content-" + "Length</code> header." + ) + + +class PreconditionFailed(HTTPException): + """*412* `Precondition Failed` + + Status code used in combination with ``If-Match``, ``If-None-Match``, or + ``If-Unmodified-Since``. + """ + + code = 412 + description = ( + "The precondition on the request for the URL failed positive evaluation." + ) + + +class RequestEntityTooLarge(HTTPException): + """*413* `Request Entity Too Large` + + The status code one should return if the data submitted exceeded a given + limit. + """ + + code = 413 + description = "The data value transmitted exceeds the capacity limit." + + +class RequestURITooLarge(HTTPException): + """*414* `Request URI Too Large` + + Like *413* but for too long URLs. + """ + + code = 414 + description = ( + "The length of the requested URL exceeds the capacity limit for" + " this server. The request cannot be processed." + ) + + +class UnsupportedMediaType(HTTPException): + """*415* `Unsupported Media Type` + + The status code returned if the server is unable to handle the media type + the client transmitted. + """ + + code = 415 + description = ( + "The server does not support the media type transmitted in the request." + ) + + +class RequestedRangeNotSatisfiable(HTTPException): + """*416* `Requested Range Not Satisfiable` + + The client asked for an invalid part of the file. + + .. versionadded:: 0.7 + """ + + code = 416 + description = "The server cannot provide the requested range." + + def __init__(self, length=None, units="bytes", description=None): + """Takes an optional `Content-Range` header value based on ``length`` + parameter. + """ + HTTPException.__init__(self, description) + self.length = length + self.units = units + + def get_headers(self, environ=None): + headers = HTTPException.get_headers(self, environ) + if self.length is not None: + headers.append(("Content-Range", "%s */%d" % (self.units, self.length))) + return headers + + +class ExpectationFailed(HTTPException): + """*417* `Expectation Failed` + + The server cannot meet the requirements of the Expect request-header. + + .. versionadded:: 0.7 + """ + + code = 417 + description = "The server could not meet the requirements of the Expect header" + + +class ImATeapot(HTTPException): + """*418* `I'm a teapot` + + The server should return this if it is a teapot and someone attempted + to brew coffee with it. + + .. versionadded:: 0.7 + """ + + code = 418 + description = "This server is a teapot, not a coffee machine" + + +class UnprocessableEntity(HTTPException): + """*422* `Unprocessable Entity` + + Used if the request is well formed, but the instructions are otherwise + incorrect. + """ + + code = 422 + description = ( + "The request was well-formed but was unable to be followed due" + " to semantic errors." + ) + + +class Locked(HTTPException): + """*423* `Locked` + + Used if the resource that is being accessed is locked. + """ + + code = 423 + description = "The resource that is being accessed is locked." + + +class FailedDependency(HTTPException): + """*424* `Failed Dependency` + + Used if the method could not be performed on the resource + because the requested action depended on another action and that action failed. + """ + + code = 424 + description = ( + "The method could not be performed on the resource because the" + " requested action depended on another action and that action" + " failed." + ) + + +class PreconditionRequired(HTTPException): + """*428* `Precondition Required` + + The server requires this request to be conditional, typically to prevent + the lost update problem, which is a race condition between two or more + clients attempting to update a resource through PUT or DELETE. By requiring + each client to include a conditional header ("If-Match" or "If-Unmodified- + Since") with the proper value retained from a recent GET request, the + server ensures that each client has at least seen the previous revision of + the resource. + """ + + code = 428 + description = ( + "This request is required to be conditional; try using" + ' "If-Match" or "If-Unmodified-Since".' + ) + + +class TooManyRequests(HTTPException): + """*429* `Too Many Requests` + + The server is limiting the rate at which this user receives responses, and + this request exceeds that rate. (The server may use any convenient method + to identify users and their request rates). The server may include a + "Retry-After" header to indicate how long the user should wait before + retrying. + """ + + code = 429 + description = "This user has exceeded an allotted request count. Try again later." + + +class RequestHeaderFieldsTooLarge(HTTPException): + """*431* `Request Header Fields Too Large` + + The server refuses to process the request because the header fields are too + large. One or more individual fields may be too large, or the set of all + headers is too large. + """ + + code = 431 + description = "One or more header fields exceeds the maximum size." + + +class UnavailableForLegalReasons(HTTPException): + """*451* `Unavailable For Legal Reasons` + + This status code indicates that the server is denying access to the + resource as a consequence of a legal demand. + """ + + code = 451 + description = "Unavailable for legal reasons." + + +class InternalServerError(HTTPException): + """*500* `Internal Server Error` + + Raise if an internal server error occurred. This is a good fallback if an + unknown error occurred in the dispatcher. + """ + + code = 500 + description = ( + "The server encountered an internal error and was unable to" + " complete your request. Either the server is overloaded or" + " there is an error in the application." + ) + + +class NotImplemented(HTTPException): + """*501* `Not Implemented` + + Raise if the application does not support the action requested by the + browser. + """ + + code = 501 + description = "The server does not support the action requested by the browser." + + +class BadGateway(HTTPException): + """*502* `Bad Gateway` + + If you do proxying in your application you should return this status code + if you received an invalid response from the upstream server it accessed + in attempting to fulfill the request. + """ + + code = 502 + description = ( + "The proxy server received an invalid response from an upstream server." + ) + + +class ServiceUnavailable(HTTPException): + """*503* `Service Unavailable` + + Status code you should return if a service is temporarily unavailable. + """ + + code = 503 + description = ( + "The server is temporarily unable to service your request due" + " to maintenance downtime or capacity problems. Please try" + " again later." + ) + + +class GatewayTimeout(HTTPException): + """*504* `Gateway Timeout` + + Status code you should return if a connection to an upstream server + times out. + """ + + code = 504 + description = "The connection to an upstream server timed out." + + +class HTTPVersionNotSupported(HTTPException): + """*505* `HTTP Version Not Supported` + + The server does not support the HTTP protocol version used in the request. + """ + + code = 505 + description = ( + "The server does not support the HTTP protocol version used in the request." + ) + + +default_exceptions = {} +__all__ = ["HTTPException"] + + +def _find_exceptions(): + for _name, obj in iteritems(globals()): + try: + is_http_exception = issubclass(obj, HTTPException) + except TypeError: + is_http_exception = False + if not is_http_exception or obj.code is None: + continue + __all__.append(obj.__name__) + old_obj = default_exceptions.get(obj.code, None) + if old_obj is not None and issubclass(obj, old_obj): + continue + default_exceptions[obj.code] = obj + + +_find_exceptions() +del _find_exceptions + + +class Aborter(object): + """When passed a dict of code -> exception items it can be used as + callable that raises exceptions. If the first argument to the + callable is an integer it will be looked up in the mapping, if it's + a WSGI application it will be raised in a proxy exception. + + The rest of the arguments are forwarded to the exception constructor. + """ + + def __init__(self, mapping=None, extra=None): + if mapping is None: + mapping = default_exceptions + self.mapping = dict(mapping) + if extra is not None: + self.mapping.update(extra) + + def __call__(self, code, *args, **kwargs): + if not args and not kwargs and not isinstance(code, integer_types): + raise HTTPException(response=code) + if code not in self.mapping: + raise LookupError("no exception for %r" % code) + raise self.mapping[code](*args, **kwargs) + + +def abort(status, *args, **kwargs): + """Raises an :py:exc:`HTTPException` for the given status code or WSGI + application:: + + abort(404) # 404 Not Found + abort(Response('Hello World')) + + Can be passed a WSGI application or a status code. If a status code is + given it's looked up in the list of exceptions and will raise that + exception, if passed a WSGI application it will wrap it in a proxy WSGI + exception and raise that:: + + abort(404) + abort(Response('Hello World')) + + """ + return _aborter(status, *args, **kwargs) + + +_aborter = Aborter() + + +#: an exception that is used internally to signal both a key error and a +#: bad request. Used by a lot of the datastructures. +BadRequestKeyError = BadRequest.wrap(KeyError) + +# imported here because of circular dependencies of werkzeug.utils +from .http import HTTP_STATUS_CODES +from .utils import escape diff --git a/python/werkzeug/filesystem.py b/python/werkzeug/filesystem.py new file mode 100644 index 0000000..d016cae --- /dev/null +++ b/python/werkzeug/filesystem.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.filesystem + ~~~~~~~~~~~~~~~~~~~ + + Various utilities for the local filesystem. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import sys +import warnings + +# We do not trust traditional unixes. +has_likely_buggy_unicode_filesystem = ( + sys.platform.startswith("linux") or "bsd" in sys.platform +) + + +def _is_ascii_encoding(encoding): + """Given an encoding this figures out if the encoding is actually ASCII (which + is something we don't actually want in most cases). This is necessary + because ASCII comes under many names such as ANSI_X3.4-1968. + """ + if encoding is None: + return False + try: + return codecs.lookup(encoding).name == "ascii" + except LookupError: + return False + + +class BrokenFilesystemWarning(RuntimeWarning, UnicodeWarning): + """The warning used by Werkzeug to signal a broken filesystem. Will only be + used once per runtime.""" + + +_warned_about_filesystem_encoding = False + + +def get_filesystem_encoding(): + """Returns the filesystem encoding that should be used. Note that this is + different from the Python understanding of the filesystem encoding which + might be deeply flawed. Do not use this value against Python's unicode APIs + because it might be different. See :ref:`filesystem-encoding` for the exact + behavior. + + The concept of a filesystem encoding in generally is not something you + should rely on. As such if you ever need to use this function except for + writing wrapper code reconsider. + """ + global _warned_about_filesystem_encoding + rv = sys.getfilesystemencoding() + if has_likely_buggy_unicode_filesystem and not rv or _is_ascii_encoding(rv): + if not _warned_about_filesystem_encoding: + warnings.warn( + "Detected a misconfigured UNIX filesystem: Will use" + " UTF-8 as filesystem encoding instead of {0!r}".format(rv), + BrokenFilesystemWarning, + ) + _warned_about_filesystem_encoding = True + return "utf-8" + return rv diff --git a/python/werkzeug/formparser.py b/python/werkzeug/formparser.py new file mode 100644 index 0000000..0ddc5c8 --- /dev/null +++ b/python/werkzeug/formparser.py @@ -0,0 +1,586 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.formparser + ~~~~~~~~~~~~~~~~~~~ + + This module implements the form parsing. It supports url-encoded forms + as well as non-nested multipart uploads. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import re +from functools import update_wrapper +from itertools import chain +from itertools import repeat +from itertools import tee + +from ._compat import BytesIO +from ._compat import text_type +from ._compat import to_native +from .datastructures import FileStorage +from .datastructures import Headers +from .datastructures import MultiDict +from .http import parse_options_header +from .urls import url_decode_stream +from .wsgi import get_content_length +from .wsgi import get_input_stream +from .wsgi import make_line_iter + +# there are some platforms where SpooledTemporaryFile is not available. +# In that case we need to provide a fallback. +try: + from tempfile import SpooledTemporaryFile +except ImportError: + from tempfile import TemporaryFile + + SpooledTemporaryFile = None + + +#: an iterator that yields empty strings +_empty_string_iter = repeat("") + +#: a regular expression for multipart boundaries +_multipart_boundary_re = re.compile("^[ -~]{0,200}[!-~]$") + +#: supported http encodings that are also available in python we support +#: for multipart messages. +_supported_multipart_encodings = frozenset(["base64", "quoted-printable"]) + + +def default_stream_factory( + total_content_length, filename, content_type, content_length=None +): + """The stream factory that is used per default.""" + max_size = 1024 * 500 + if SpooledTemporaryFile is not None: + return SpooledTemporaryFile(max_size=max_size, mode="wb+") + if total_content_length is None or total_content_length > max_size: + return TemporaryFile("wb+") + return BytesIO() + + +def parse_form_data( + environ, + stream_factory=None, + charset="utf-8", + errors="replace", + max_form_memory_size=None, + max_content_length=None, + cls=None, + silent=True, +): + """Parse the form data in the environ and return it as tuple in the form + ``(stream, form, files)``. You should only call this method if the + transport method is `POST`, `PUT`, or `PATCH`. + + If the mimetype of the data transmitted is `multipart/form-data` the + files multidict will be filled with `FileStorage` objects. If the + mimetype is unknown the input stream is wrapped and returned as first + argument, else the stream is empty. + + This is a shortcut for the common usage of :class:`FormDataParser`. + + Have a look at :ref:`dealing-with-request-data` for more details. + + .. versionadded:: 0.5 + The `max_form_memory_size`, `max_content_length` and + `cls` parameters were added. + + .. versionadded:: 0.5.1 + The optional `silent` flag was added. + + :param environ: the WSGI environment to be used for parsing. + :param stream_factory: An optional callable that returns a new read and + writeable file descriptor. This callable works + the same as :meth:`~BaseResponse._get_file_stream`. + :param charset: The character set for URL and url encoded form data. + :param errors: The encoding error behavior. + :param max_form_memory_size: the maximum number of bytes to be accepted for + in-memory stored form data. If the data + exceeds the value specified an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param max_content_length: If this is provided and the transmitted data + is longer than this value an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + :param silent: If set to False parsing errors will not be caught. + :return: A tuple in the form ``(stream, form, files)``. + """ + return FormDataParser( + stream_factory, + charset, + errors, + max_form_memory_size, + max_content_length, + cls, + silent, + ).parse_from_environ(environ) + + +def exhaust_stream(f): + """Helper decorator for methods that exhausts the stream on return.""" + + def wrapper(self, stream, *args, **kwargs): + try: + return f(self, stream, *args, **kwargs) + finally: + exhaust = getattr(stream, "exhaust", None) + if exhaust is not None: + exhaust() + else: + while 1: + chunk = stream.read(1024 * 64) + if not chunk: + break + + return update_wrapper(wrapper, f) + + +class FormDataParser(object): + """This class implements parsing of form data for Werkzeug. By itself + it can parse multipart and url encoded form data. It can be subclassed + and extended but for most mimetypes it is a better idea to use the + untouched stream and expose it as separate attributes on a request + object. + + .. versionadded:: 0.8 + + :param stream_factory: An optional callable that returns a new read and + writeable file descriptor. This callable works + the same as :meth:`~BaseResponse._get_file_stream`. + :param charset: The character set for URL and url encoded form data. + :param errors: The encoding error behavior. + :param max_form_memory_size: the maximum number of bytes to be accepted for + in-memory stored form data. If the data + exceeds the value specified an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param max_content_length: If this is provided and the transmitted data + is longer than this value an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + :param silent: If set to False parsing errors will not be caught. + """ + + def __init__( + self, + stream_factory=None, + charset="utf-8", + errors="replace", + max_form_memory_size=None, + max_content_length=None, + cls=None, + silent=True, + ): + if stream_factory is None: + stream_factory = default_stream_factory + self.stream_factory = stream_factory + self.charset = charset + self.errors = errors + self.max_form_memory_size = max_form_memory_size + self.max_content_length = max_content_length + if cls is None: + cls = MultiDict + self.cls = cls + self.silent = silent + + def get_parse_func(self, mimetype, options): + return self.parse_functions.get(mimetype) + + def parse_from_environ(self, environ): + """Parses the information from the environment as form data. + + :param environ: the WSGI environment to be used for parsing. + :return: A tuple in the form ``(stream, form, files)``. + """ + content_type = environ.get("CONTENT_TYPE", "") + content_length = get_content_length(environ) + mimetype, options = parse_options_header(content_type) + return self.parse(get_input_stream(environ), mimetype, content_length, options) + + def parse(self, stream, mimetype, content_length, options=None): + """Parses the information from the given stream, mimetype, + content length and mimetype parameters. + + :param stream: an input stream + :param mimetype: the mimetype of the data + :param content_length: the content length of the incoming data + :param options: optional mimetype parameters (used for + the multipart boundary for instance) + :return: A tuple in the form ``(stream, form, files)``. + """ + if ( + self.max_content_length is not None + and content_length is not None + and content_length > self.max_content_length + ): + raise exceptions.RequestEntityTooLarge() + if options is None: + options = {} + + parse_func = self.get_parse_func(mimetype, options) + if parse_func is not None: + try: + return parse_func(self, stream, mimetype, content_length, options) + except ValueError: + if not self.silent: + raise + + return stream, self.cls(), self.cls() + + @exhaust_stream + def _parse_multipart(self, stream, mimetype, content_length, options): + parser = MultiPartParser( + self.stream_factory, + self.charset, + self.errors, + max_form_memory_size=self.max_form_memory_size, + cls=self.cls, + ) + boundary = options.get("boundary") + if boundary is None: + raise ValueError("Missing boundary") + if isinstance(boundary, text_type): + boundary = boundary.encode("ascii") + form, files = parser.parse(stream, boundary, content_length) + return stream, form, files + + @exhaust_stream + def _parse_urlencoded(self, stream, mimetype, content_length, options): + if ( + self.max_form_memory_size is not None + and content_length is not None + and content_length > self.max_form_memory_size + ): + raise exceptions.RequestEntityTooLarge() + form = url_decode_stream(stream, self.charset, errors=self.errors, cls=self.cls) + return stream, form, self.cls() + + #: mapping of mimetypes to parsing functions + parse_functions = { + "multipart/form-data": _parse_multipart, + "application/x-www-form-urlencoded": _parse_urlencoded, + "application/x-url-encoded": _parse_urlencoded, + } + + +def is_valid_multipart_boundary(boundary): + """Checks if the string given is a valid multipart boundary.""" + return _multipart_boundary_re.match(boundary) is not None + + +def _line_parse(line): + """Removes line ending characters and returns a tuple (`stripped_line`, + `is_terminated`). + """ + if line[-2:] in ["\r\n", b"\r\n"]: + return line[:-2], True + elif line[-1:] in ["\r", "\n", b"\r", b"\n"]: + return line[:-1], True + return line, False + + +def parse_multipart_headers(iterable): + """Parses multipart headers from an iterable that yields lines (including + the trailing newline symbol). The iterable has to be newline terminated. + + The iterable will stop at the line where the headers ended so it can be + further consumed. + + :param iterable: iterable of strings that are newline terminated + """ + result = [] + for line in iterable: + line = to_native(line) + line, line_terminated = _line_parse(line) + if not line_terminated: + raise ValueError("unexpected end of line in multipart header") + if not line: + break + elif line[0] in " \t" and result: + key, value = result[-1] + result[-1] = (key, value + "\n " + line[1:]) + else: + parts = line.split(":", 1) + if len(parts) == 2: + result.append((parts[0].strip(), parts[1].strip())) + + # we link the list to the headers, no need to create a copy, the + # list was not shared anyways. + return Headers(result) + + +_begin_form = "begin_form" +_begin_file = "begin_file" +_cont = "cont" +_end = "end" + + +class MultiPartParser(object): + def __init__( + self, + stream_factory=None, + charset="utf-8", + errors="replace", + max_form_memory_size=None, + cls=None, + buffer_size=64 * 1024, + ): + self.charset = charset + self.errors = errors + self.max_form_memory_size = max_form_memory_size + self.stream_factory = ( + default_stream_factory if stream_factory is None else stream_factory + ) + self.cls = MultiDict if cls is None else cls + + # make sure the buffer size is divisible by four so that we can base64 + # decode chunk by chunk + assert buffer_size % 4 == 0, "buffer size has to be divisible by 4" + # also the buffer size has to be at least 1024 bytes long or long headers + # will freak out the system + assert buffer_size >= 1024, "buffer size has to be at least 1KB" + + self.buffer_size = buffer_size + + def _fix_ie_filename(self, filename): + """Internet Explorer 6 transmits the full file name if a file is + uploaded. This function strips the full path if it thinks the + filename is Windows-like absolute. + """ + if filename[1:3] == ":\\" or filename[:2] == "\\\\": + return filename.split("\\")[-1] + return filename + + def _find_terminator(self, iterator): + """The terminator might have some additional newlines before it. + There is at least one application that sends additional newlines + before headers (the python setuptools package). + """ + for line in iterator: + if not line: + break + line = line.strip() + if line: + return line + return b"" + + def fail(self, message): + raise ValueError(message) + + def get_part_encoding(self, headers): + transfer_encoding = headers.get("content-transfer-encoding") + if ( + transfer_encoding is not None + and transfer_encoding in _supported_multipart_encodings + ): + return transfer_encoding + + def get_part_charset(self, headers): + # Figure out input charset for current part + content_type = headers.get("content-type") + if content_type: + mimetype, ct_params = parse_options_header(content_type) + return ct_params.get("charset", self.charset) + return self.charset + + def start_file_streaming(self, filename, headers, total_content_length): + if isinstance(filename, bytes): + filename = filename.decode(self.charset, self.errors) + filename = self._fix_ie_filename(filename) + content_type = headers.get("content-type") + try: + content_length = int(headers["content-length"]) + except (KeyError, ValueError): + content_length = 0 + container = self.stream_factory( + total_content_length=total_content_length, + filename=filename, + content_type=content_type, + content_length=content_length, + ) + return filename, container + + def in_memory_threshold_reached(self, bytes): + raise exceptions.RequestEntityTooLarge() + + def validate_boundary(self, boundary): + if not boundary: + self.fail("Missing boundary") + if not is_valid_multipart_boundary(boundary): + self.fail("Invalid boundary: %s" % boundary) + if len(boundary) > self.buffer_size: # pragma: no cover + # this should never happen because we check for a minimum size + # of 1024 and boundaries may not be longer than 200. The only + # situation when this happens is for non debug builds where + # the assert is skipped. + self.fail("Boundary longer than buffer size") + + def parse_lines(self, file, boundary, content_length, cap_at_buffer=True): + """Generate parts of + ``('begin_form', (headers, name))`` + ``('begin_file', (headers, name, filename))`` + ``('cont', bytestring)`` + ``('end', None)`` + + Always obeys the grammar + parts = ( begin_form cont* end | + begin_file cont* end )* + """ + next_part = b"--" + boundary + last_part = next_part + b"--" + + iterator = chain( + make_line_iter( + file, + limit=content_length, + buffer_size=self.buffer_size, + cap_at_buffer=cap_at_buffer, + ), + _empty_string_iter, + ) + + terminator = self._find_terminator(iterator) + + if terminator == last_part: + return + elif terminator != next_part: + self.fail("Expected boundary at start of multipart data") + + while terminator != last_part: + headers = parse_multipart_headers(iterator) + + disposition = headers.get("content-disposition") + if disposition is None: + self.fail("Missing Content-Disposition header") + disposition, extra = parse_options_header(disposition) + transfer_encoding = self.get_part_encoding(headers) + name = extra.get("name") + filename = extra.get("filename") + + # if no content type is given we stream into memory. A list is + # used as a temporary container. + if filename is None: + yield _begin_form, (headers, name) + + # otherwise we parse the rest of the headers and ask the stream + # factory for something we can write in. + else: + yield _begin_file, (headers, name, filename) + + buf = b"" + for line in iterator: + if not line: + self.fail("unexpected end of stream") + + if line[:2] == b"--": + terminator = line.rstrip() + if terminator in (next_part, last_part): + break + + if transfer_encoding is not None: + if transfer_encoding == "base64": + transfer_encoding = "base64_codec" + try: + line = codecs.decode(line, transfer_encoding) + except Exception: + self.fail("could not decode transfer encoded chunk") + + # we have something in the buffer from the last iteration. + # this is usually a newline delimiter. + if buf: + yield _cont, buf + buf = b"" + + # If the line ends with windows CRLF we write everything except + # the last two bytes. In all other cases however we write + # everything except the last byte. If it was a newline, that's + # fine, otherwise it does not matter because we will write it + # the next iteration. this ensures we do not write the + # final newline into the stream. That way we do not have to + # truncate the stream. However we do have to make sure that + # if something else than a newline is in there we write it + # out. + if line[-2:] == b"\r\n": + buf = b"\r\n" + cutoff = -2 + else: + buf = line[-1:] + cutoff = -1 + yield _cont, line[:cutoff] + + else: # pragma: no cover + raise ValueError("unexpected end of part") + + # if we have a leftover in the buffer that is not a newline + # character we have to flush it, otherwise we will chop of + # certain values. + if buf not in (b"", b"\r", b"\n", b"\r\n"): + yield _cont, buf + + yield _end, None + + def parse_parts(self, file, boundary, content_length): + """Generate ``('file', (name, val))`` and + ``('form', (name, val))`` parts. + """ + in_memory = 0 + + for ellt, ell in self.parse_lines(file, boundary, content_length): + if ellt == _begin_file: + headers, name, filename = ell + is_file = True + guard_memory = False + filename, container = self.start_file_streaming( + filename, headers, content_length + ) + _write = container.write + + elif ellt == _begin_form: + headers, name = ell + is_file = False + container = [] + _write = container.append + guard_memory = self.max_form_memory_size is not None + + elif ellt == _cont: + _write(ell) + # if we write into memory and there is a memory size limit we + # count the number of bytes in memory and raise an exception if + # there is too much data in memory. + if guard_memory: + in_memory += len(ell) + if in_memory > self.max_form_memory_size: + self.in_memory_threshold_reached(in_memory) + + elif ellt == _end: + if is_file: + container.seek(0) + yield ( + "file", + (name, FileStorage(container, filename, name, headers=headers)), + ) + else: + part_charset = self.get_part_charset(headers) + yield ( + "form", + (name, b"".join(container).decode(part_charset, self.errors)), + ) + + def parse(self, file, boundary, content_length): + formstream, filestream = tee( + self.parse_parts(file, boundary, content_length), 2 + ) + form = (p[1] for p in formstream if p[0] == "form") + files = (p[1] for p in filestream if p[0] == "file") + return self.cls(form), self.cls(files) + + +from . import exceptions diff --git a/python/werkzeug/http.py b/python/werkzeug/http.py new file mode 100644 index 0000000..af32007 --- /dev/null +++ b/python/werkzeug/http.py @@ -0,0 +1,1303 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.http + ~~~~~~~~~~~~~ + + Werkzeug comes with a bunch of utilities that help Werkzeug to deal with + HTTP data. Most of the classes and functions provided by this module are + used by the wrappers, but they are useful on their own, too, especially if + the response and request objects are not used. + + This covers some of the more HTTP centric features of WSGI, some other + utilities such as cookie handling are documented in the `werkzeug.utils` + module. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import base64 +import re +import warnings +from datetime import datetime +from datetime import timedelta +from hashlib import md5 +from time import gmtime +from time import time + +from ._compat import integer_types +from ._compat import iteritems +from ._compat import PY2 +from ._compat import string_types +from ._compat import text_type +from ._compat import to_bytes +from ._compat import to_unicode +from ._compat import try_coerce_native +from ._internal import _cookie_parse_impl +from ._internal import _cookie_quote +from ._internal import _make_cookie_domain + +try: + from email.utils import parsedate_tz +except ImportError: + from email.Utils import parsedate_tz + +try: + from urllib.request import parse_http_list as _parse_list_header + from urllib.parse import unquote_to_bytes as _unquote +except ImportError: + from urllib2 import parse_http_list as _parse_list_header + from urllib2 import unquote as _unquote + +_cookie_charset = "latin1" +_basic_auth_charset = "utf-8" +# for explanation of "media-range", etc. see Sections 5.3.{1,2} of RFC 7231 +_accept_re = re.compile( + r""" + ( # media-range capturing-parenthesis + [^\s;,]+ # type/subtype + (?:[ \t]*;[ \t]* # ";" + (?: # parameter non-capturing-parenthesis + [^\s;,q][^\s;,]* # token that doesn't start with "q" + | # or + q[^\s;,=][^\s;,]* # token that is more than just "q" + ) + )* # zero or more parameters + ) # end of media-range + (?:[ \t]*;[ \t]*q= # weight is a "q" parameter + (\d*(?:\.\d+)?) # qvalue capturing-parentheses + [^,]* # "extension" accept params: who cares? + )? # accept params are optional + """, + re.VERBOSE, +) +_token_chars = frozenset( + "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~" +) +_etag_re = re.compile(r'([Ww]/)?(?:"(.*?)"|(.*?))(?:\s*,\s*|$)') +_unsafe_header_chars = set('()<>@,;:"/[]?={} \t') +_option_header_piece_re = re.compile( + r""" + ;\s*,?\s* # newlines were replaced with commas + (?P<key> + "[^"\\]*(?:\\.[^"\\]*)*" # quoted string + | + [^\s;,=*]+ # token + ) + (?:\*(?P<count>\d+))? # *1, optional continuation index + \s* + (?: # optionally followed by =value + (?: # equals sign, possibly with encoding + \*\s*=\s* # * indicates extended notation + (?: # optional encoding + (?P<encoding>[^\s]+?) + '(?P<language>[^\s]*?)' + )? + | + =\s* # basic notation + ) + (?P<value> + "[^"\\]*(?:\\.[^"\\]*)*" # quoted string + | + [^;,]+ # token + )? + )? + \s* + """, + flags=re.VERBOSE, +) +_option_header_start_mime_type = re.compile(r",\s*([^;,\s]+)([;,]\s*.+)?") + +_entity_headers = frozenset( + [ + "allow", + "content-encoding", + "content-language", + "content-length", + "content-location", + "content-md5", + "content-range", + "content-type", + "expires", + "last-modified", + ] +) +_hop_by_hop_headers = frozenset( + [ + "connection", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailer", + "transfer-encoding", + "upgrade", + ] +) + + +HTTP_STATUS_CODES = { + 100: "Continue", + 101: "Switching Protocols", + 102: "Processing", + 200: "OK", + 201: "Created", + 202: "Accepted", + 203: "Non Authoritative Information", + 204: "No Content", + 205: "Reset Content", + 206: "Partial Content", + 207: "Multi Status", + 226: "IM Used", # see RFC 3229 + 300: "Multiple Choices", + 301: "Moved Permanently", + 302: "Found", + 303: "See Other", + 304: "Not Modified", + 305: "Use Proxy", + 307: "Temporary Redirect", + 308: "Permanent Redirect", + 400: "Bad Request", + 401: "Unauthorized", + 402: "Payment Required", # unused + 403: "Forbidden", + 404: "Not Found", + 405: "Method Not Allowed", + 406: "Not Acceptable", + 407: "Proxy Authentication Required", + 408: "Request Timeout", + 409: "Conflict", + 410: "Gone", + 411: "Length Required", + 412: "Precondition Failed", + 413: "Request Entity Too Large", + 414: "Request URI Too Long", + 415: "Unsupported Media Type", + 416: "Requested Range Not Satisfiable", + 417: "Expectation Failed", + 418: "I'm a teapot", # see RFC 2324 + 421: "Misdirected Request", # see RFC 7540 + 422: "Unprocessable Entity", + 423: "Locked", + 424: "Failed Dependency", + 426: "Upgrade Required", + 428: "Precondition Required", # see RFC 6585 + 429: "Too Many Requests", + 431: "Request Header Fields Too Large", + 449: "Retry With", # proprietary MS extension + 451: "Unavailable For Legal Reasons", + 500: "Internal Server Error", + 501: "Not Implemented", + 502: "Bad Gateway", + 503: "Service Unavailable", + 504: "Gateway Timeout", + 505: "HTTP Version Not Supported", + 507: "Insufficient Storage", + 510: "Not Extended", +} + + +def wsgi_to_bytes(data): + """coerce wsgi unicode represented bytes to real ones""" + if isinstance(data, bytes): + return data + return data.encode("latin1") # XXX: utf8 fallback? + + +def bytes_to_wsgi(data): + assert isinstance(data, bytes), "data must be bytes" + if isinstance(data, str): + return data + else: + return data.decode("latin1") + + +def quote_header_value(value, extra_chars="", allow_token=True): + """Quote a header value if necessary. + + .. versionadded:: 0.5 + + :param value: the value to quote. + :param extra_chars: a list of extra characters to skip quoting. + :param allow_token: if this is enabled token values are returned + unchanged. + """ + if isinstance(value, bytes): + value = bytes_to_wsgi(value) + value = str(value) + if allow_token: + token_chars = _token_chars | set(extra_chars) + if set(value).issubset(token_chars): + return value + return '"%s"' % value.replace("\\", "\\\\").replace('"', '\\"') + + +def unquote_header_value(value, is_filename=False): + r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). + This does not use the real unquoting but what browsers are actually + using for quoting. + + .. versionadded:: 0.5 + + :param value: the header value to unquote. + """ + if value and value[0] == value[-1] == '"': + # this is not the real unquoting, but fixing this so that the + # RFC is met will result in bugs with internet explorer and + # probably some other browsers as well. IE for example is + # uploading files with "C:\foo\bar.txt" as filename + value = value[1:-1] + + # if this is a filename and the starting characters look like + # a UNC path, then just return the value without quotes. Using the + # replace sequence below on a UNC path has the effect of turning + # the leading double slash into a single slash and then + # _fix_ie_filename() doesn't work correctly. See #458. + if not is_filename or value[:2] != "\\\\": + return value.replace("\\\\", "\\").replace('\\"', '"') + return value + + +def dump_options_header(header, options): + """The reverse function to :func:`parse_options_header`. + + :param header: the header to dump + :param options: a dict of options to append. + """ + segments = [] + if header is not None: + segments.append(header) + for key, value in iteritems(options): + if value is None: + segments.append(key) + else: + segments.append("%s=%s" % (key, quote_header_value(value))) + return "; ".join(segments) + + +def dump_header(iterable, allow_token=True): + """Dump an HTTP header again. This is the reversal of + :func:`parse_list_header`, :func:`parse_set_header` and + :func:`parse_dict_header`. This also quotes strings that include an + equals sign unless you pass it as dict of key, value pairs. + + >>> dump_header({'foo': 'bar baz'}) + 'foo="bar baz"' + >>> dump_header(('foo', 'bar baz')) + 'foo, "bar baz"' + + :param iterable: the iterable or dict of values to quote. + :param allow_token: if set to `False` tokens as values are disallowed. + See :func:`quote_header_value` for more details. + """ + if isinstance(iterable, dict): + items = [] + for key, value in iteritems(iterable): + if value is None: + items.append(key) + else: + items.append( + "%s=%s" % (key, quote_header_value(value, allow_token=allow_token)) + ) + else: + items = [quote_header_value(x, allow_token=allow_token) for x in iterable] + return ", ".join(items) + + +def parse_list_header(value): + """Parse lists as described by RFC 2068 Section 2. + + In particular, parse comma-separated lists where the elements of + the list may include quoted-strings. A quoted-string could + contain a comma. A non-quoted string could have quotes in the + middle. Quotes are removed automatically after parsing. + + It basically works like :func:`parse_set_header` just that items + may appear multiple times and case sensitivity is preserved. + + The return value is a standard :class:`list`: + + >>> parse_list_header('token, "quoted value"') + ['token', 'quoted value'] + + To create a header from the :class:`list` again, use the + :func:`dump_header` function. + + :param value: a string with a list header. + :return: :class:`list` + """ + result = [] + for item in _parse_list_header(value): + if item[:1] == item[-1:] == '"': + item = unquote_header_value(item[1:-1]) + result.append(item) + return result + + +def parse_dict_header(value, cls=dict): + """Parse lists of key, value pairs as described by RFC 2068 Section 2 and + convert them into a python dict (or any other mapping object created from + the type with a dict like interface provided by the `cls` argument): + + >>> d = parse_dict_header('foo="is a fish", bar="as well"') + >>> type(d) is dict + True + >>> sorted(d.items()) + [('bar', 'as well'), ('foo', 'is a fish')] + + If there is no value for a key it will be `None`: + + >>> parse_dict_header('key_without_value') + {'key_without_value': None} + + To create a header from the :class:`dict` again, use the + :func:`dump_header` function. + + .. versionchanged:: 0.9 + Added support for `cls` argument. + + :param value: a string with a dict header. + :param cls: callable to use for storage of parsed results. + :return: an instance of `cls` + """ + result = cls() + if not isinstance(value, text_type): + # XXX: validate + value = bytes_to_wsgi(value) + for item in _parse_list_header(value): + if "=" not in item: + result[item] = None + continue + name, value = item.split("=", 1) + if value[:1] == value[-1:] == '"': + value = unquote_header_value(value[1:-1]) + result[name] = value + return result + + +def parse_options_header(value, multiple=False): + """Parse a ``Content-Type`` like header into a tuple with the content + type and the options: + + >>> parse_options_header('text/html; charset=utf8') + ('text/html', {'charset': 'utf8'}) + + This should not be used to parse ``Cache-Control`` like headers that use + a slightly different format. For these headers use the + :func:`parse_dict_header` function. + + .. versionchanged:: 0.15 + :rfc:`2231` parameter continuations are handled. + + .. versionadded:: 0.5 + + :param value: the header to parse. + :param multiple: Whether try to parse and return multiple MIME types + :return: (mimetype, options) or (mimetype, options, mimetype, options, …) + if multiple=True + """ + if not value: + return "", {} + + result = [] + + value = "," + value.replace("\n", ",") + while value: + match = _option_header_start_mime_type.match(value) + if not match: + break + result.append(match.group(1)) # mimetype + options = {} + # Parse options + rest = match.group(2) + continued_encoding = None + while rest: + optmatch = _option_header_piece_re.match(rest) + if not optmatch: + break + option, count, encoding, language, option_value = optmatch.groups() + # Continuations don't have to supply the encoding after the + # first line. If we're in a continuation, track the current + # encoding to use for subsequent lines. Reset it when the + # continuation ends. + if not count: + continued_encoding = None + else: + if not encoding: + encoding = continued_encoding + continued_encoding = encoding + option = unquote_header_value(option) + if option_value is not None: + option_value = unquote_header_value(option_value, option == "filename") + if encoding is not None: + option_value = _unquote(option_value).decode(encoding) + if count: + # Continuations append to the existing value. For + # simplicity, this ignores the possibility of + # out-of-order indices, which shouldn't happen anyway. + options[option] = options.get(option, "") + option_value + else: + options[option] = option_value + rest = rest[optmatch.end() :] + result.append(options) + if multiple is False: + return tuple(result) + value = rest + + return tuple(result) if result else ("", {}) + + +def parse_accept_header(value, cls=None): + """Parses an HTTP Accept-* header. This does not implement a complete + valid algorithm but one that supports at least value and quality + extraction. + + Returns a new :class:`Accept` object (basically a list of ``(value, quality)`` + tuples sorted by the quality with some additional accessor methods). + + The second parameter can be a subclass of :class:`Accept` that is created + with the parsed values and returned. + + :param value: the accept header string to be parsed. + :param cls: the wrapper class for the return value (can be + :class:`Accept` or a subclass thereof) + :return: an instance of `cls`. + """ + if cls is None: + cls = Accept + + if not value: + return cls(None) + + result = [] + for match in _accept_re.finditer(value): + quality = match.group(2) + if not quality: + quality = 1 + else: + quality = max(min(float(quality), 1), 0) + result.append((match.group(1), quality)) + return cls(result) + + +def parse_cache_control_header(value, on_update=None, cls=None): + """Parse a cache control header. The RFC differs between response and + request cache control, this method does not. It's your responsibility + to not use the wrong control statements. + + .. versionadded:: 0.5 + The `cls` was added. If not specified an immutable + :class:`~werkzeug.datastructures.RequestCacheControl` is returned. + + :param value: a cache control header to be parsed. + :param on_update: an optional callable that is called every time a value + on the :class:`~werkzeug.datastructures.CacheControl` + object is changed. + :param cls: the class for the returned object. By default + :class:`~werkzeug.datastructures.RequestCacheControl` is used. + :return: a `cls` object. + """ + if cls is None: + cls = RequestCacheControl + if not value: + return cls(None, on_update) + return cls(parse_dict_header(value), on_update) + + +def parse_set_header(value, on_update=None): + """Parse a set-like header and return a + :class:`~werkzeug.datastructures.HeaderSet` object: + + >>> hs = parse_set_header('token, "quoted value"') + + The return value is an object that treats the items case-insensitively + and keeps the order of the items: + + >>> 'TOKEN' in hs + True + >>> hs.index('quoted value') + 1 + >>> hs + HeaderSet(['token', 'quoted value']) + + To create a header from the :class:`HeaderSet` again, use the + :func:`dump_header` function. + + :param value: a set header to be parsed. + :param on_update: an optional callable that is called every time a + value on the :class:`~werkzeug.datastructures.HeaderSet` + object is changed. + :return: a :class:`~werkzeug.datastructures.HeaderSet` + """ + if not value: + return HeaderSet(None, on_update) + return HeaderSet(parse_list_header(value), on_update) + + +def parse_authorization_header(value): + """Parse an HTTP basic/digest authorization header transmitted by the web + browser. The return value is either `None` if the header was invalid or + not given, otherwise an :class:`~werkzeug.datastructures.Authorization` + object. + + :param value: the authorization header to parse. + :return: a :class:`~werkzeug.datastructures.Authorization` object or `None`. + """ + if not value: + return + value = wsgi_to_bytes(value) + try: + auth_type, auth_info = value.split(None, 1) + auth_type = auth_type.lower() + except ValueError: + return + if auth_type == b"basic": + try: + username, password = base64.b64decode(auth_info).split(b":", 1) + except Exception: + return + return Authorization( + "basic", + { + "username": to_unicode(username, _basic_auth_charset), + "password": to_unicode(password, _basic_auth_charset), + }, + ) + elif auth_type == b"digest": + auth_map = parse_dict_header(auth_info) + for key in "username", "realm", "nonce", "uri", "response": + if key not in auth_map: + return + if "qop" in auth_map: + if not auth_map.get("nc") or not auth_map.get("cnonce"): + return + return Authorization("digest", auth_map) + + +def parse_www_authenticate_header(value, on_update=None): + """Parse an HTTP WWW-Authenticate header into a + :class:`~werkzeug.datastructures.WWWAuthenticate` object. + + :param value: a WWW-Authenticate header to parse. + :param on_update: an optional callable that is called every time a value + on the :class:`~werkzeug.datastructures.WWWAuthenticate` + object is changed. + :return: a :class:`~werkzeug.datastructures.WWWAuthenticate` object. + """ + if not value: + return WWWAuthenticate(on_update=on_update) + try: + auth_type, auth_info = value.split(None, 1) + auth_type = auth_type.lower() + except (ValueError, AttributeError): + return WWWAuthenticate(value.strip().lower(), on_update=on_update) + return WWWAuthenticate(auth_type, parse_dict_header(auth_info), on_update) + + +def parse_if_range_header(value): + """Parses an if-range header which can be an etag or a date. Returns + a :class:`~werkzeug.datastructures.IfRange` object. + + .. versionadded:: 0.7 + """ + if not value: + return IfRange() + date = parse_date(value) + if date is not None: + return IfRange(date=date) + # drop weakness information + return IfRange(unquote_etag(value)[0]) + + +def parse_range_header(value, make_inclusive=True): + """Parses a range header into a :class:`~werkzeug.datastructures.Range` + object. If the header is missing or malformed `None` is returned. + `ranges` is a list of ``(start, stop)`` tuples where the ranges are + non-inclusive. + + .. versionadded:: 0.7 + """ + if not value or "=" not in value: + return None + + ranges = [] + last_end = 0 + units, rng = value.split("=", 1) + units = units.strip().lower() + + for item in rng.split(","): + item = item.strip() + if "-" not in item: + return None + if item.startswith("-"): + if last_end < 0: + return None + try: + begin = int(item) + except ValueError: + return None + end = None + last_end = -1 + elif "-" in item: + begin, end = item.split("-", 1) + begin = begin.strip() + end = end.strip() + if not begin.isdigit(): + return None + begin = int(begin) + if begin < last_end or last_end < 0: + return None + if end: + if not end.isdigit(): + return None + end = int(end) + 1 + if begin >= end: + return None + else: + end = None + last_end = end + ranges.append((begin, end)) + + return Range(units, ranges) + + +def parse_content_range_header(value, on_update=None): + """Parses a range header into a + :class:`~werkzeug.datastructures.ContentRange` object or `None` if + parsing is not possible. + + .. versionadded:: 0.7 + + :param value: a content range header to be parsed. + :param on_update: an optional callable that is called every time a value + on the :class:`~werkzeug.datastructures.ContentRange` + object is changed. + """ + if value is None: + return None + try: + units, rangedef = (value or "").strip().split(None, 1) + except ValueError: + return None + + if "/" not in rangedef: + return None + rng, length = rangedef.split("/", 1) + if length == "*": + length = None + elif length.isdigit(): + length = int(length) + else: + return None + + if rng == "*": + return ContentRange(units, None, None, length, on_update=on_update) + elif "-" not in rng: + return None + + start, stop = rng.split("-", 1) + try: + start = int(start) + stop = int(stop) + 1 + except ValueError: + return None + + if is_byte_range_valid(start, stop, length): + return ContentRange(units, start, stop, length, on_update=on_update) + + +def quote_etag(etag, weak=False): + """Quote an etag. + + :param etag: the etag to quote. + :param weak: set to `True` to tag it "weak". + """ + if '"' in etag: + raise ValueError("invalid etag") + etag = '"%s"' % etag + if weak: + etag = "W/" + etag + return etag + + +def unquote_etag(etag): + """Unquote a single etag: + + >>> unquote_etag('W/"bar"') + ('bar', True) + >>> unquote_etag('"bar"') + ('bar', False) + + :param etag: the etag identifier to unquote. + :return: a ``(etag, weak)`` tuple. + """ + if not etag: + return None, None + etag = etag.strip() + weak = False + if etag.startswith(("W/", "w/")): + weak = True + etag = etag[2:] + if etag[:1] == etag[-1:] == '"': + etag = etag[1:-1] + return etag, weak + + +def parse_etags(value): + """Parse an etag header. + + :param value: the tag header to parse + :return: an :class:`~werkzeug.datastructures.ETags` object. + """ + if not value: + return ETags() + strong = [] + weak = [] + end = len(value) + pos = 0 + while pos < end: + match = _etag_re.match(value, pos) + if match is None: + break + is_weak, quoted, raw = match.groups() + if raw == "*": + return ETags(star_tag=True) + elif quoted: + raw = quoted + if is_weak: + weak.append(raw) + else: + strong.append(raw) + pos = match.end() + return ETags(strong, weak) + + +def generate_etag(data): + """Generate an etag for some data.""" + return md5(data).hexdigest() + + +def parse_date(value): + """Parse one of the following date formats into a datetime object: + + .. sourcecode:: text + + Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 + Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format + + If parsing fails the return value is `None`. + + :param value: a string with a supported date format. + :return: a :class:`datetime.datetime` object. + """ + if value: + t = parsedate_tz(value.strip()) + if t is not None: + try: + year = t[0] + # unfortunately that function does not tell us if two digit + # years were part of the string, or if they were prefixed + # with two zeroes. So what we do is to assume that 69-99 + # refer to 1900, and everything below to 2000 + if year >= 0 and year <= 68: + year += 2000 + elif year >= 69 and year <= 99: + year += 1900 + return datetime(*((year,) + t[1:7])) - timedelta(seconds=t[-1] or 0) + except (ValueError, OverflowError): + return None + + +def _dump_date(d, delim): + """Used for `http_date` and `cookie_date`.""" + if d is None: + d = gmtime() + elif isinstance(d, datetime): + d = d.utctimetuple() + elif isinstance(d, (integer_types, float)): + d = gmtime(d) + return "%s, %02d%s%s%s%s %02d:%02d:%02d GMT" % ( + ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[d.tm_wday], + d.tm_mday, + delim, + ( + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + )[d.tm_mon - 1], + delim, + str(d.tm_year), + d.tm_hour, + d.tm_min, + d.tm_sec, + ) + + +def cookie_date(expires=None): + """Formats the time to ensure compatibility with Netscape's cookie + standard. + + Accepts a floating point number expressed in seconds since the epoch in, a + datetime object or a timetuple. All times in UTC. The :func:`parse_date` + function can be used to parse such a date. + + Outputs a string in the format ``Wdy, DD-Mon-YYYY HH:MM:SS GMT``. + + :param expires: If provided that date is used, otherwise the current. + """ + return _dump_date(expires, "-") + + +def http_date(timestamp=None): + """Formats the time to match the RFC1123 date format. + + Accepts a floating point number expressed in seconds since the epoch in, a + datetime object or a timetuple. All times in UTC. The :func:`parse_date` + function can be used to parse such a date. + + Outputs a string in the format ``Wdy, DD Mon YYYY HH:MM:SS GMT``. + + :param timestamp: If provided that date is used, otherwise the current. + """ + return _dump_date(timestamp, " ") + + +def parse_age(value=None): + """Parses a base-10 integer count of seconds into a timedelta. + + If parsing fails, the return value is `None`. + + :param value: a string consisting of an integer represented in base-10 + :return: a :class:`datetime.timedelta` object or `None`. + """ + if not value: + return None + try: + seconds = int(value) + except ValueError: + return None + if seconds < 0: + return None + try: + return timedelta(seconds=seconds) + except OverflowError: + return None + + +def dump_age(age=None): + """Formats the duration as a base-10 integer. + + :param age: should be an integer number of seconds, + a :class:`datetime.timedelta` object, or, + if the age is unknown, `None` (default). + """ + if age is None: + return + if isinstance(age, timedelta): + # do the equivalent of Python 2.7's timedelta.total_seconds(), + # but disregarding fractional seconds + age = age.seconds + (age.days * 24 * 3600) + + age = int(age) + if age < 0: + raise ValueError("age cannot be negative") + + return str(age) + + +def is_resource_modified( + environ, etag=None, data=None, last_modified=None, ignore_if_range=True +): + """Convenience method for conditional requests. + + :param environ: the WSGI environment of the request to be checked. + :param etag: the etag for the response for comparison. + :param data: or alternatively the data of the response to automatically + generate an etag using :func:`generate_etag`. + :param last_modified: an optional date of the last modification. + :param ignore_if_range: If `False`, `If-Range` header will be taken into + account. + :return: `True` if the resource was modified, otherwise `False`. + """ + if etag is None and data is not None: + etag = generate_etag(data) + elif data is not None: + raise TypeError("both data and etag given") + if environ["REQUEST_METHOD"] not in ("GET", "HEAD"): + return False + + unmodified = False + if isinstance(last_modified, string_types): + last_modified = parse_date(last_modified) + + # ensure that microsecond is zero because the HTTP spec does not transmit + # that either and we might have some false positives. See issue #39 + if last_modified is not None: + last_modified = last_modified.replace(microsecond=0) + + if_range = None + if not ignore_if_range and "HTTP_RANGE" in environ: + # https://tools.ietf.org/html/rfc7233#section-3.2 + # A server MUST ignore an If-Range header field received in a request + # that does not contain a Range header field. + if_range = parse_if_range_header(environ.get("HTTP_IF_RANGE")) + + if if_range is not None and if_range.date is not None: + modified_since = if_range.date + else: + modified_since = parse_date(environ.get("HTTP_IF_MODIFIED_SINCE")) + + if modified_since and last_modified and last_modified <= modified_since: + unmodified = True + + if etag: + etag, _ = unquote_etag(etag) + if if_range is not None and if_range.etag is not None: + unmodified = parse_etags(if_range.etag).contains(etag) + else: + if_none_match = parse_etags(environ.get("HTTP_IF_NONE_MATCH")) + if if_none_match: + # https://tools.ietf.org/html/rfc7232#section-3.2 + # "A recipient MUST use the weak comparison function when comparing + # entity-tags for If-None-Match" + unmodified = if_none_match.contains_weak(etag) + + # https://tools.ietf.org/html/rfc7232#section-3.1 + # "Origin server MUST use the strong comparison function when + # comparing entity-tags for If-Match" + if_match = parse_etags(environ.get("HTTP_IF_MATCH")) + if if_match: + unmodified = not if_match.is_strong(etag) + + return not unmodified + + +def remove_entity_headers(headers, allowed=("expires", "content-location")): + """Remove all entity headers from a list or :class:`Headers` object. This + operation works in-place. `Expires` and `Content-Location` headers are + by default not removed. The reason for this is :rfc:`2616` section + 10.3.5 which specifies some entity headers that should be sent. + + .. versionchanged:: 0.5 + added `allowed` parameter. + + :param headers: a list or :class:`Headers` object. + :param allowed: a list of headers that should still be allowed even though + they are entity headers. + """ + allowed = set(x.lower() for x in allowed) + headers[:] = [ + (key, value) + for key, value in headers + if not is_entity_header(key) or key.lower() in allowed + ] + + +def remove_hop_by_hop_headers(headers): + """Remove all HTTP/1.1 "Hop-by-Hop" headers from a list or + :class:`Headers` object. This operation works in-place. + + .. versionadded:: 0.5 + + :param headers: a list or :class:`Headers` object. + """ + headers[:] = [ + (key, value) for key, value in headers if not is_hop_by_hop_header(key) + ] + + +def is_entity_header(header): + """Check if a header is an entity header. + + .. versionadded:: 0.5 + + :param header: the header to test. + :return: `True` if it's an entity header, `False` otherwise. + """ + return header.lower() in _entity_headers + + +def is_hop_by_hop_header(header): + """Check if a header is an HTTP/1.1 "Hop-by-Hop" header. + + .. versionadded:: 0.5 + + :param header: the header to test. + :return: `True` if it's an HTTP/1.1 "Hop-by-Hop" header, `False` otherwise. + """ + return header.lower() in _hop_by_hop_headers + + +def parse_cookie(header, charset="utf-8", errors="replace", cls=None): + """Parse a cookie. Either from a string or WSGI environ. + + Per default encoding errors are ignored. If you want a different behavior + you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a + :exc:`HTTPUnicodeError` is raised. + + .. versionchanged:: 0.5 + This function now returns a :class:`TypeConversionDict` instead of a + regular dict. The `cls` parameter was added. + + :param header: the header to be used to parse the cookie. Alternatively + this can be a WSGI environment. + :param charset: the charset for the cookie values. + :param errors: the error behavior for the charset decoding. + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`TypeConversionDict` is + used. + """ + if isinstance(header, dict): + header = header.get("HTTP_COOKIE", "") + elif header is None: + header = "" + + # If the value is an unicode string it's mangled through latin1. This + # is done because on PEP 3333 on Python 3 all headers are assumed latin1 + # which however is incorrect for cookies, which are sent in page encoding. + # As a result we + if isinstance(header, text_type): + header = header.encode("latin1", "replace") + + if cls is None: + cls = TypeConversionDict + + def _parse_pairs(): + for key, val in _cookie_parse_impl(header): + key = to_unicode(key, charset, errors, allow_none_charset=True) + if not key: + continue + val = to_unicode(val, charset, errors, allow_none_charset=True) + yield try_coerce_native(key), val + + return cls(_parse_pairs()) + + +def dump_cookie( + key, + value="", + max_age=None, + expires=None, + path="/", + domain=None, + secure=False, + httponly=False, + charset="utf-8", + sync_expires=True, + max_size=4093, + samesite=None, +): + """Creates a new Set-Cookie header without the ``Set-Cookie`` prefix + The parameters are the same as in the cookie Morsel object in the + Python standard library but it accepts unicode data, too. + + On Python 3 the return value of this function will be a unicode + string, on Python 2 it will be a native string. In both cases the + return value is usually restricted to ascii as the vast majority of + values are properly escaped, but that is no guarantee. If a unicode + string is returned it's tunneled through latin1 as required by + PEP 3333. + + The return value is not ASCII safe if the key contains unicode + characters. This is technically against the specification but + happens in the wild. It's strongly recommended to not use + non-ASCII values for the keys. + + :param max_age: should be a number of seconds, or `None` (default) if + the cookie should last only as long as the client's + browser session. Additionally `timedelta` objects + are accepted, too. + :param expires: should be a `datetime` object or unix timestamp. + :param path: limits the cookie to a given path, per default it will + span the whole domain. + :param domain: Use this if you want to set a cross-domain cookie. For + example, ``domain=".example.com"`` will set a cookie + that is readable by the domain ``www.example.com``, + ``foo.example.com`` etc. Otherwise, a cookie will only + be readable by the domain that set it. + :param secure: The cookie will only be available via HTTPS + :param httponly: disallow JavaScript to access the cookie. This is an + extension to the cookie standard and probably not + supported by all browsers. + :param charset: the encoding for unicode values. + :param sync_expires: automatically set expires if max_age is defined + but expires not. + :param max_size: Warn if the final header value exceeds this size. The + default, 4093, should be safely `supported by most browsers + <cookie_>`_. Set to 0 to disable this check. + :param samesite: Limits the scope of the cookie such that it will only + be attached to requests if those requests are "same-site". + + .. _`cookie`: http://browsercookielimits.squawky.net/ + """ + key = to_bytes(key, charset) + value = to_bytes(value, charset) + + if path is not None: + path = iri_to_uri(path, charset) + domain = _make_cookie_domain(domain) + if isinstance(max_age, timedelta): + max_age = (max_age.days * 60 * 60 * 24) + max_age.seconds + if expires is not None: + if not isinstance(expires, string_types): + expires = cookie_date(expires) + elif max_age is not None and sync_expires: + expires = to_bytes(cookie_date(time() + max_age)) + + samesite = samesite.title() if samesite else None + if samesite not in ("Strict", "Lax", None): + raise ValueError("invalid SameSite value; must be 'Strict', 'Lax' or None") + + buf = [key + b"=" + _cookie_quote(value)] + + # XXX: In theory all of these parameters that are not marked with `None` + # should be quoted. Because stdlib did not quote it before I did not + # want to introduce quoting there now. + for k, v, q in ( + (b"Domain", domain, True), + (b"Expires", expires, False), + (b"Max-Age", max_age, False), + (b"Secure", secure, None), + (b"HttpOnly", httponly, None), + (b"Path", path, False), + (b"SameSite", samesite, False), + ): + if q is None: + if v: + buf.append(k) + continue + + if v is None: + continue + + tmp = bytearray(k) + if not isinstance(v, (bytes, bytearray)): + v = to_bytes(text_type(v), charset) + if q: + v = _cookie_quote(v) + tmp += b"=" + v + buf.append(bytes(tmp)) + + # The return value will be an incorrectly encoded latin1 header on + # Python 3 for consistency with the headers object and a bytestring + # on Python 2 because that's how the API makes more sense. + rv = b"; ".join(buf) + if not PY2: + rv = rv.decode("latin1") + + # Warn if the final value of the cookie is less than the limit. If the + # cookie is too large, then it may be silently ignored, which can be quite + # hard to debug. + cookie_size = len(rv) + + if max_size and cookie_size > max_size: + value_size = len(value) + warnings.warn( + 'The "{key}" cookie is too large: the value was {value_size} bytes' + " but the header required {extra_size} extra bytes. The final size" + " was {cookie_size} bytes but the limit is {max_size} bytes." + " Browsers may silently ignore cookies larger than this.".format( + key=key, + value_size=value_size, + extra_size=cookie_size - value_size, + cookie_size=cookie_size, + max_size=max_size, + ), + stacklevel=2, + ) + + return rv + + +def is_byte_range_valid(start, stop, length): + """Checks if a given byte content range is valid for the given length. + + .. versionadded:: 0.7 + """ + if (start is None) != (stop is None): + return False + elif start is None: + return length is None or length >= 0 + elif length is None: + return 0 <= start < stop + elif start >= stop: + return False + return 0 <= start < length + + +# circular dependency fun +from .datastructures import Accept +from .datastructures import Authorization +from .datastructures import ContentRange +from .datastructures import ETags +from .datastructures import HeaderSet +from .datastructures import IfRange +from .datastructures import Range +from .datastructures import RequestCacheControl +from .datastructures import TypeConversionDict +from .datastructures import WWWAuthenticate +from .urls import iri_to_uri + +# DEPRECATED +from .datastructures import CharsetAccept as _CharsetAccept +from .datastructures import Headers as _Headers +from .datastructures import LanguageAccept as _LanguageAccept +from .datastructures import MIMEAccept as _MIMEAccept + + +class MIMEAccept(_MIMEAccept): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.http.MIMEAccept' has moved to 'werkzeug" + ".datastructures.MIMEAccept' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(MIMEAccept, self).__init__(*args, **kwargs) + + +class CharsetAccept(_CharsetAccept): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.http.CharsetAccept' has moved to 'werkzeug" + ".datastructures.CharsetAccept' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(CharsetAccept, self).__init__(*args, **kwargs) + + +class LanguageAccept(_LanguageAccept): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.http.LanguageAccept' has moved to 'werkzeug" + ".datastructures.LanguageAccept' as of version 0.5. This" + " old import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(LanguageAccept, self).__init__(*args, **kwargs) + + +class Headers(_Headers): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.http.Headers' has moved to 'werkzeug" + ".datastructures.Headers' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(Headers, self).__init__(*args, **kwargs) diff --git a/python/werkzeug/local.py b/python/werkzeug/local.py new file mode 100644 index 0000000..9a6088c --- /dev/null +++ b/python/werkzeug/local.py @@ -0,0 +1,421 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.local + ~~~~~~~~~~~~~~ + + This module implements context-local objects. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import copy +from functools import update_wrapper + +from ._compat import implements_bool +from ._compat import PY2 +from .wsgi import ClosingIterator + +# since each thread has its own greenlet we can just use those as identifiers +# for the context. If greenlets are not available we fall back to the +# current thread ident depending on where it is. +try: + from greenlet import getcurrent as get_ident +except ImportError: + try: + from thread import get_ident + except ImportError: + from _thread import get_ident + + +def release_local(local): + """Releases the contents of the local for the current context. + This makes it possible to use locals without a manager. + + Example:: + + >>> loc = Local() + >>> loc.foo = 42 + >>> release_local(loc) + >>> hasattr(loc, 'foo') + False + + With this function one can release :class:`Local` objects as well + as :class:`LocalStack` objects. However it is not possible to + release data held by proxies that way, one always has to retain + a reference to the underlying local object in order to be able + to release it. + + .. versionadded:: 0.6.1 + """ + local.__release_local__() + + +class Local(object): + __slots__ = ("__storage__", "__ident_func__") + + def __init__(self): + object.__setattr__(self, "__storage__", {}) + object.__setattr__(self, "__ident_func__", get_ident) + + def __iter__(self): + return iter(self.__storage__.items()) + + def __call__(self, proxy): + """Create a proxy for a name.""" + return LocalProxy(self, proxy) + + def __release_local__(self): + self.__storage__.pop(self.__ident_func__(), None) + + def __getattr__(self, name): + try: + return self.__storage__[self.__ident_func__()][name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name, value): + ident = self.__ident_func__() + storage = self.__storage__ + try: + storage[ident][name] = value + except KeyError: + storage[ident] = {name: value} + + def __delattr__(self, name): + try: + del self.__storage__[self.__ident_func__()][name] + except KeyError: + raise AttributeError(name) + + +class LocalStack(object): + """This class works similar to a :class:`Local` but keeps a stack + of objects instead. This is best explained with an example:: + + >>> ls = LocalStack() + >>> ls.push(42) + >>> ls.top + 42 + >>> ls.push(23) + >>> ls.top + 23 + >>> ls.pop() + 23 + >>> ls.top + 42 + + They can be force released by using a :class:`LocalManager` or with + the :func:`release_local` function but the correct way is to pop the + item from the stack after using. When the stack is empty it will + no longer be bound to the current context (and as such released). + + By calling the stack without arguments it returns a proxy that resolves to + the topmost item on the stack. + + .. versionadded:: 0.6.1 + """ + + def __init__(self): + self._local = Local() + + def __release_local__(self): + self._local.__release_local__() + + def _get__ident_func__(self): + return self._local.__ident_func__ + + def _set__ident_func__(self, value): + object.__setattr__(self._local, "__ident_func__", value) + + __ident_func__ = property(_get__ident_func__, _set__ident_func__) + del _get__ident_func__, _set__ident_func__ + + def __call__(self): + def _lookup(): + rv = self.top + if rv is None: + raise RuntimeError("object unbound") + return rv + + return LocalProxy(_lookup) + + def push(self, obj): + """Pushes a new item to the stack""" + rv = getattr(self._local, "stack", None) + if rv is None: + self._local.stack = rv = [] + rv.append(obj) + return rv + + def pop(self): + """Removes the topmost item from the stack, will return the + old value or `None` if the stack was already empty. + """ + stack = getattr(self._local, "stack", None) + if stack is None: + return None + elif len(stack) == 1: + release_local(self._local) + return stack[-1] + else: + return stack.pop() + + @property + def top(self): + """The topmost item on the stack. If the stack is empty, + `None` is returned. + """ + try: + return self._local.stack[-1] + except (AttributeError, IndexError): + return None + + +class LocalManager(object): + """Local objects cannot manage themselves. For that you need a local + manager. You can pass a local manager multiple locals or add them later + by appending them to `manager.locals`. Every time the manager cleans up, + it will clean up all the data left in the locals for this context. + + The `ident_func` parameter can be added to override the default ident + function for the wrapped locals. + + .. versionchanged:: 0.6.1 + Instead of a manager the :func:`release_local` function can be used + as well. + + .. versionchanged:: 0.7 + `ident_func` was added. + """ + + def __init__(self, locals=None, ident_func=None): + if locals is None: + self.locals = [] + elif isinstance(locals, Local): + self.locals = [locals] + else: + self.locals = list(locals) + if ident_func is not None: + self.ident_func = ident_func + for local in self.locals: + object.__setattr__(local, "__ident_func__", ident_func) + else: + self.ident_func = get_ident + + def get_ident(self): + """Return the context identifier the local objects use internally for + this context. You cannot override this method to change the behavior + but use it to link other context local objects (such as SQLAlchemy's + scoped sessions) to the Werkzeug locals. + + .. versionchanged:: 0.7 + You can pass a different ident function to the local manager that + will then be propagated to all the locals passed to the + constructor. + """ + return self.ident_func() + + def cleanup(self): + """Manually clean up the data in the locals for this context. Call + this at the end of the request or use `make_middleware()`. + """ + for local in self.locals: + release_local(local) + + def make_middleware(self, app): + """Wrap a WSGI application so that cleaning up happens after + request end. + """ + + def application(environ, start_response): + return ClosingIterator(app(environ, start_response), self.cleanup) + + return application + + def middleware(self, func): + """Like `make_middleware` but for decorating functions. + + Example usage:: + + @manager.middleware + def application(environ, start_response): + ... + + The difference to `make_middleware` is that the function passed + will have all the arguments copied from the inner application + (name, docstring, module). + """ + return update_wrapper(self.make_middleware(func), func) + + def __repr__(self): + return "<%s storages: %d>" % (self.__class__.__name__, len(self.locals)) + + +@implements_bool +class LocalProxy(object): + """Acts as a proxy for a werkzeug local. Forwards all operations to + a proxied object. The only operations not supported for forwarding + are right handed operands and any kind of assignment. + + Example usage:: + + from werkzeug.local import Local + l = Local() + + # these are proxies + request = l('request') + user = l('user') + + + from werkzeug.local import LocalStack + _response_local = LocalStack() + + # this is a proxy + response = _response_local() + + Whenever something is bound to l.user / l.request the proxy objects + will forward all operations. If no object is bound a :exc:`RuntimeError` + will be raised. + + To create proxies to :class:`Local` or :class:`LocalStack` objects, + call the object as shown above. If you want to have a proxy to an + object looked up by a function, you can (as of Werkzeug 0.6.1) pass + a function to the :class:`LocalProxy` constructor:: + + session = LocalProxy(lambda: get_current_request().session) + + .. versionchanged:: 0.6.1 + The class can be instantiated with a callable as well now. + """ + + __slots__ = ("__local", "__dict__", "__name__", "__wrapped__") + + def __init__(self, local, name=None): + object.__setattr__(self, "_LocalProxy__local", local) + object.__setattr__(self, "__name__", name) + if callable(local) and not hasattr(local, "__release_local__"): + # "local" is a callable that is not an instance of Local or + # LocalManager: mark it as a wrapped function. + object.__setattr__(self, "__wrapped__", local) + + def _get_current_object(self): + """Return the current object. This is useful if you want the real + object behind the proxy at a time for performance reasons or because + you want to pass the object into a different context. + """ + if not hasattr(self.__local, "__release_local__"): + return self.__local() + try: + return getattr(self.__local, self.__name__) + except AttributeError: + raise RuntimeError("no object bound to %s" % self.__name__) + + @property + def __dict__(self): + try: + return self._get_current_object().__dict__ + except RuntimeError: + raise AttributeError("__dict__") + + def __repr__(self): + try: + obj = self._get_current_object() + except RuntimeError: + return "<%s unbound>" % self.__class__.__name__ + return repr(obj) + + def __bool__(self): + try: + return bool(self._get_current_object()) + except RuntimeError: + return False + + def __unicode__(self): + try: + return unicode(self._get_current_object()) # noqa + except RuntimeError: + return repr(self) + + def __dir__(self): + try: + return dir(self._get_current_object()) + except RuntimeError: + return [] + + def __getattr__(self, name): + if name == "__members__": + return dir(self._get_current_object()) + return getattr(self._get_current_object(), name) + + def __setitem__(self, key, value): + self._get_current_object()[key] = value + + def __delitem__(self, key): + del self._get_current_object()[key] + + if PY2: + __getslice__ = lambda x, i, j: x._get_current_object()[i:j] + + def __setslice__(self, i, j, seq): + self._get_current_object()[i:j] = seq + + def __delslice__(self, i, j): + del self._get_current_object()[i:j] + + __setattr__ = lambda x, n, v: setattr(x._get_current_object(), n, v) + __delattr__ = lambda x, n: delattr(x._get_current_object(), n) + __str__ = lambda x: str(x._get_current_object()) + __lt__ = lambda x, o: x._get_current_object() < o + __le__ = lambda x, o: x._get_current_object() <= o + __eq__ = lambda x, o: x._get_current_object() == o + __ne__ = lambda x, o: x._get_current_object() != o + __gt__ = lambda x, o: x._get_current_object() > o + __ge__ = lambda x, o: x._get_current_object() >= o + __cmp__ = lambda x, o: cmp(x._get_current_object(), o) # noqa + __hash__ = lambda x: hash(x._get_current_object()) + __call__ = lambda x, *a, **kw: x._get_current_object()(*a, **kw) + __len__ = lambda x: len(x._get_current_object()) + __getitem__ = lambda x, i: x._get_current_object()[i] + __iter__ = lambda x: iter(x._get_current_object()) + __contains__ = lambda x, i: i in x._get_current_object() + __add__ = lambda x, o: x._get_current_object() + o + __sub__ = lambda x, o: x._get_current_object() - o + __mul__ = lambda x, o: x._get_current_object() * o + __floordiv__ = lambda x, o: x._get_current_object() // o + __mod__ = lambda x, o: x._get_current_object() % o + __divmod__ = lambda x, o: x._get_current_object().__divmod__(o) + __pow__ = lambda x, o: x._get_current_object() ** o + __lshift__ = lambda x, o: x._get_current_object() << o + __rshift__ = lambda x, o: x._get_current_object() >> o + __and__ = lambda x, o: x._get_current_object() & o + __xor__ = lambda x, o: x._get_current_object() ^ o + __or__ = lambda x, o: x._get_current_object() | o + __div__ = lambda x, o: x._get_current_object().__div__(o) + __truediv__ = lambda x, o: x._get_current_object().__truediv__(o) + __neg__ = lambda x: -(x._get_current_object()) + __pos__ = lambda x: +(x._get_current_object()) + __abs__ = lambda x: abs(x._get_current_object()) + __invert__ = lambda x: ~(x._get_current_object()) + __complex__ = lambda x: complex(x._get_current_object()) + __int__ = lambda x: int(x._get_current_object()) + __long__ = lambda x: long(x._get_current_object()) # noqa + __float__ = lambda x: float(x._get_current_object()) + __oct__ = lambda x: oct(x._get_current_object()) + __hex__ = lambda x: hex(x._get_current_object()) + __index__ = lambda x: x._get_current_object().__index__() + __coerce__ = lambda x, o: x._get_current_object().__coerce__(x, o) + __enter__ = lambda x: x._get_current_object().__enter__() + __exit__ = lambda x, *a, **kw: x._get_current_object().__exit__(*a, **kw) + __radd__ = lambda x, o: o + x._get_current_object() + __rsub__ = lambda x, o: o - x._get_current_object() + __rmul__ = lambda x, o: o * x._get_current_object() + __rdiv__ = lambda x, o: o / x._get_current_object() + if PY2: + __rtruediv__ = lambda x, o: x._get_current_object().__rtruediv__(o) + else: + __rtruediv__ = __rdiv__ + __rfloordiv__ = lambda x, o: o // x._get_current_object() + __rmod__ = lambda x, o: o % x._get_current_object() + __rdivmod__ = lambda x, o: x._get_current_object().__rdivmod__(o) + __copy__ = lambda x: copy.copy(x._get_current_object()) + __deepcopy__ = lambda x, memo: copy.deepcopy(x._get_current_object(), memo) diff --git a/python/werkzeug/middleware/__init__.py b/python/werkzeug/middleware/__init__.py new file mode 100644 index 0000000..5e049f5 --- /dev/null +++ b/python/werkzeug/middleware/__init__.py @@ -0,0 +1,25 @@ +""" +Middleware +========== + +A WSGI middleware is a WSGI application that wraps another application +in order to observe or change its behavior. Werkzeug provides some +middleware for common use cases. + +.. toctree:: + :maxdepth: 1 + + proxy_fix + shared_data + dispatcher + http_proxy + lint + profiler + +The :doc:`interactive debugger </debug>` is also a middleware that can +be applied manually, although it is typically used automatically with +the :doc:`development server </serving>`. + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" diff --git a/python/werkzeug/middleware/dispatcher.py b/python/werkzeug/middleware/dispatcher.py new file mode 100644 index 0000000..2eb173e --- /dev/null +++ b/python/werkzeug/middleware/dispatcher.py @@ -0,0 +1,66 @@ +""" +Application Dispatcher +====================== + +This middleware creates a single WSGI application that dispatches to +multiple other WSGI applications mounted at different URL paths. + +A common example is writing a Single Page Application, where you have a +backend API and a frontend written in JavaScript that does the routing +in the browser rather than requesting different pages from the server. +The frontend is a single HTML and JS file that should be served for any +path besides "/api". + +This example dispatches to an API app under "/api", an admin app +under "/admin", and an app that serves frontend files for all other +requests:: + + app = DispatcherMiddleware(serve_frontend, { + '/api': api_app, + '/admin': admin_app, + }) + +In production, you might instead handle this at the HTTP server level, +serving files or proxying to application servers based on location. The +API and admin apps would each be deployed with a separate WSGI server, +and the static files would be served directly by the HTTP server. + +.. autoclass:: DispatcherMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" + + +class DispatcherMiddleware(object): + """Combine multiple applications as a single WSGI application. + Requests are dispatched to an application based on the path it is + mounted under. + + :param app: The WSGI application to dispatch to if the request + doesn't match a mounted path. + :param mounts: Maps path prefixes to applications for dispatching. + """ + + def __init__(self, app, mounts=None): + self.app = app + self.mounts = mounts or {} + + def __call__(self, environ, start_response): + script = environ.get("PATH_INFO", "") + path_info = "" + + while "/" in script: + if script in self.mounts: + app = self.mounts[script] + break + + script, last_item = script.rsplit("/", 1) + path_info = "/%s%s" % (last_item, path_info) + else: + app = self.mounts.get(script, self.app) + + original_script_name = environ.get("SCRIPT_NAME", "") + environ["SCRIPT_NAME"] = original_script_name + script + environ["PATH_INFO"] = path_info + return app(environ, start_response) diff --git a/python/werkzeug/middleware/http_proxy.py b/python/werkzeug/middleware/http_proxy.py new file mode 100644 index 0000000..bfdc071 --- /dev/null +++ b/python/werkzeug/middleware/http_proxy.py @@ -0,0 +1,219 @@ +""" +Basic HTTP Proxy +================ + +.. autoclass:: ProxyMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import socket + +from ..datastructures import EnvironHeaders +from ..http import is_hop_by_hop_header +from ..urls import url_parse +from ..urls import url_quote +from ..wsgi import get_input_stream + +try: + from http import client +except ImportError: + import httplib as client + + +class ProxyMiddleware(object): + """Proxy requests under a path to an external server, routing other + requests to the app. + + This middleware can only proxy HTTP requests, as that is the only + protocol handled by the WSGI server. Other protocols, such as + websocket requests, cannot be proxied at this layer. This should + only be used for development, in production a real proxying server + should be used. + + The middleware takes a dict that maps a path prefix to a dict + describing the host to be proxied to:: + + app = ProxyMiddleware(app, { + "/static/": { + "target": "http://127.0.0.1:5001/", + } + }) + + Each host has the following options: + + ``target``: + The target URL to dispatch to. This is required. + ``remove_prefix``: + Whether to remove the prefix from the URL before dispatching it + to the target. The default is ``False``. + ``host``: + ``"<auto>"`` (default): + The host header is automatically rewritten to the URL of the + target. + ``None``: + The host header is unmodified from the client request. + Any other value: + The host header is overwritten with the value. + ``headers``: + A dictionary of headers to be sent with the request to the + target. The default is ``{}``. + ``ssl_context``: + A :class:`ssl.SSLContext` defining how to verify requests if the + target is HTTPS. The default is ``None``. + + In the example above, everything under ``"/static/"`` is proxied to + the server on port 5001. The host header is rewritten to the target, + and the ``"/static/"`` prefix is removed from the URLs. + + :param app: The WSGI application to wrap. + :param targets: Proxy target configurations. See description above. + :param chunk_size: Size of chunks to read from input stream and + write to target. + :param timeout: Seconds before an operation to a target fails. + + .. versionadded:: 0.14 + """ + + def __init__(self, app, targets, chunk_size=2 << 13, timeout=10): + def _set_defaults(opts): + opts.setdefault("remove_prefix", False) + opts.setdefault("host", "<auto>") + opts.setdefault("headers", {}) + opts.setdefault("ssl_context", None) + return opts + + self.app = app + self.targets = dict( + ("/%s/" % k.strip("/"), _set_defaults(v)) for k, v in targets.items() + ) + self.chunk_size = chunk_size + self.timeout = timeout + + def proxy_to(self, opts, path, prefix): + target = url_parse(opts["target"]) + + def application(environ, start_response): + headers = list(EnvironHeaders(environ).items()) + headers[:] = [ + (k, v) + for k, v in headers + if not is_hop_by_hop_header(k) + and k.lower() not in ("content-length", "host") + ] + headers.append(("Connection", "close")) + + if opts["host"] == "<auto>": + headers.append(("Host", target.ascii_host)) + elif opts["host"] is None: + headers.append(("Host", environ["HTTP_HOST"])) + else: + headers.append(("Host", opts["host"])) + + headers.extend(opts["headers"].items()) + remote_path = path + + if opts["remove_prefix"]: + remote_path = "%s/%s" % ( + target.path.rstrip("/"), + remote_path[len(prefix) :].lstrip("/"), + ) + + content_length = environ.get("CONTENT_LENGTH") + chunked = False + + if content_length not in ("", None): + headers.append(("Content-Length", content_length)) + elif content_length is not None: + headers.append(("Transfer-Encoding", "chunked")) + chunked = True + + try: + if target.scheme == "http": + con = client.HTTPConnection( + target.ascii_host, target.port or 80, timeout=self.timeout + ) + elif target.scheme == "https": + con = client.HTTPSConnection( + target.ascii_host, + target.port or 443, + timeout=self.timeout, + context=opts["ssl_context"], + ) + else: + raise RuntimeError( + "Target scheme must be 'http' or 'https', got '{}'.".format( + target.scheme + ) + ) + + con.connect() + remote_url = url_quote(remote_path) + querystring = environ["QUERY_STRING"] + + if querystring: + remote_url = remote_url + "?" + querystring + + con.putrequest(environ["REQUEST_METHOD"], remote_url, skip_host=True) + + for k, v in headers: + if k.lower() == "connection": + v = "close" + + con.putheader(k, v) + + con.endheaders() + stream = get_input_stream(environ) + + while 1: + data = stream.read(self.chunk_size) + + if not data: + break + + if chunked: + con.send(b"%x\r\n%s\r\n" % (len(data), data)) + else: + con.send(data) + + resp = con.getresponse() + except socket.error: + from ..exceptions import BadGateway + + return BadGateway()(environ, start_response) + + start_response( + "%d %s" % (resp.status, resp.reason), + [ + (k.title(), v) + for k, v in resp.getheaders() + if not is_hop_by_hop_header(k) + ], + ) + + def read(): + while 1: + try: + data = resp.read(self.chunk_size) + except socket.error: + break + + if not data: + break + + yield data + + return read() + + return application + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] + app = self.app + + for prefix, opts in self.targets.items(): + if path.startswith(prefix): + app = self.proxy_to(opts, path, prefix) + break + + return app(environ, start_response) diff --git a/python/werkzeug/middleware/lint.py b/python/werkzeug/middleware/lint.py new file mode 100644 index 0000000..98f9581 --- /dev/null +++ b/python/werkzeug/middleware/lint.py @@ -0,0 +1,408 @@ +""" +WSGI Protocol Linter +==================== + +This module provides a middleware that performs sanity checks on the +behavior of the WSGI server and application. It checks that the +:pep:`3333` WSGI spec is properly implemented. It also warns on some +common HTTP errors such as non-empty responses for 304 status codes. + +.. autoclass:: LintMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +from warnings import warn + +from .._compat import implements_iterator +from .._compat import PY2 +from .._compat import string_types +from ..datastructures import Headers +from ..http import is_entity_header +from ..wsgi import FileWrapper + +try: + from urllib.parse import urlparse +except ImportError: + from urlparse import urlparse + + +class WSGIWarning(Warning): + """Warning class for WSGI warnings.""" + + +class HTTPWarning(Warning): + """Warning class for HTTP warnings.""" + + +def check_string(context, obj, stacklevel=3): + if type(obj) is not str: + warn( + "'%s' requires strings, got '%s'" % (context, type(obj).__name__), + WSGIWarning, + ) + + +class InputStream(object): + def __init__(self, stream): + self._stream = stream + + def read(self, *args): + if len(args) == 0: + warn( + "WSGI does not guarantee an EOF marker on the input stream, thus making" + " calls to 'wsgi.input.read()' unsafe. Conforming servers may never" + " return from this call.", + WSGIWarning, + stacklevel=2, + ) + elif len(args) != 1: + warn( + "Too many parameters passed to 'wsgi.input.read()'.", + WSGIWarning, + stacklevel=2, + ) + return self._stream.read(*args) + + def readline(self, *args): + if len(args) == 0: + warn( + "Calls to 'wsgi.input.readline()' without arguments are unsafe. Use" + " 'wsgi.input.read()' instead.", + WSGIWarning, + stacklevel=2, + ) + elif len(args) == 1: + warn( + "'wsgi.input.readline()' was called with a size hint. WSGI does not" + " support this, although it's available on all major servers.", + WSGIWarning, + stacklevel=2, + ) + else: + raise TypeError("Too many arguments passed to 'wsgi.input.readline()'.") + return self._stream.readline(*args) + + def __iter__(self): + try: + return iter(self._stream) + except TypeError: + warn("'wsgi.input' is not iterable.", WSGIWarning, stacklevel=2) + return iter(()) + + def close(self): + warn("The application closed the input stream!", WSGIWarning, stacklevel=2) + self._stream.close() + + +class ErrorStream(object): + def __init__(self, stream): + self._stream = stream + + def write(self, s): + check_string("wsgi.error.write()", s) + self._stream.write(s) + + def flush(self): + self._stream.flush() + + def writelines(self, seq): + for line in seq: + self.write(line) + + def close(self): + warn("The application closed the error stream!", WSGIWarning, stacklevel=2) + self._stream.close() + + +class GuardedWrite(object): + def __init__(self, write, chunks): + self._write = write + self._chunks = chunks + + def __call__(self, s): + check_string("write()", s) + self._write.write(s) + self._chunks.append(len(s)) + + +@implements_iterator +class GuardedIterator(object): + def __init__(self, iterator, headers_set, chunks): + self._iterator = iterator + if PY2: + self._next = iter(iterator).next + else: + self._next = iter(iterator).__next__ + self.closed = False + self.headers_set = headers_set + self.chunks = chunks + + def __iter__(self): + return self + + def __next__(self): + if self.closed: + warn("Iterated over closed 'app_iter'.", WSGIWarning, stacklevel=2) + + rv = self._next() + + if not self.headers_set: + warn( + "The application returned before it started the response.", + WSGIWarning, + stacklevel=2, + ) + + check_string("application iterator items", rv) + self.chunks.append(len(rv)) + return rv + + def close(self): + self.closed = True + + if hasattr(self._iterator, "close"): + self._iterator.close() + + if self.headers_set: + status_code, headers = self.headers_set + bytes_sent = sum(self.chunks) + content_length = headers.get("content-length", type=int) + + if status_code == 304: + for key, _value in headers: + key = key.lower() + if key not in ("expires", "content-location") and is_entity_header( + key + ): + warn( + "Entity header %r found in 304 response." % key, HTTPWarning + ) + if bytes_sent: + warn("304 responses must not have a body.", HTTPWarning) + elif 100 <= status_code < 200 or status_code == 204: + if content_length != 0: + warn( + "%r responses must have an empty content length." % status_code, + HTTPWarning, + ) + if bytes_sent: + warn( + "%r responses must not have a body." % status_code, HTTPWarning + ) + elif content_length is not None and content_length != bytes_sent: + warn( + "Content-Length and the number of bytes sent to the client do not" + " match.", + WSGIWarning, + ) + + def __del__(self): + if not self.closed: + try: + warn( + "Iterator was garbage collected before it was closed.", WSGIWarning + ) + except Exception: + pass + + +class LintMiddleware(object): + """Warns about common errors in the WSGI and HTTP behavior of the + server and wrapped application. Some of the issues it check are: + + - invalid status codes + - non-bytestrings sent to the WSGI server + - strings returned from the WSGI application + - non-empty conditional responses + - unquoted etags + - relative URLs in the Location header + - unsafe calls to wsgi.input + - unclosed iterators + + Error information is emitted using the :mod:`warnings` module. + + :param app: The WSGI application to wrap. + + .. code-block:: python + + from werkzeug.middleware.lint import LintMiddleware + app = LintMiddleware(app) + """ + + def __init__(self, app): + self.app = app + + def check_environ(self, environ): + if type(environ) is not dict: + warn( + "WSGI environment is not a standard Python dict.", + WSGIWarning, + stacklevel=4, + ) + for key in ( + "REQUEST_METHOD", + "SERVER_NAME", + "SERVER_PORT", + "wsgi.version", + "wsgi.input", + "wsgi.errors", + "wsgi.multithread", + "wsgi.multiprocess", + "wsgi.run_once", + ): + if key not in environ: + warn( + "Required environment key %r not found" % key, + WSGIWarning, + stacklevel=3, + ) + if environ["wsgi.version"] != (1, 0): + warn("Environ is not a WSGI 1.0 environ.", WSGIWarning, stacklevel=3) + + script_name = environ.get("SCRIPT_NAME", "") + path_info = environ.get("PATH_INFO", "") + + if script_name and script_name[0] != "/": + warn( + "'SCRIPT_NAME' does not start with a slash: %r" % script_name, + WSGIWarning, + stacklevel=3, + ) + + if path_info and path_info[0] != "/": + warn( + "'PATH_INFO' does not start with a slash: %r" % path_info, + WSGIWarning, + stacklevel=3, + ) + + def check_start_response(self, status, headers, exc_info): + check_string("status", status) + status_code = status.split(None, 1)[0] + + if len(status_code) != 3 or not status_code.isdigit(): + warn(WSGIWarning("Status code must be three digits"), stacklevel=3) + + if len(status) < 4 or status[3] != " ": + warn( + WSGIWarning( + "Invalid value for status %r. Valid " + "status strings are three digits, a space " + "and a status explanation" + ), + stacklevel=3, + ) + + status_code = int(status_code) + + if status_code < 100: + warn(WSGIWarning("status code < 100 detected"), stacklevel=3) + + if type(headers) is not list: + warn(WSGIWarning("header list is not a list"), stacklevel=3) + + for item in headers: + if type(item) is not tuple or len(item) != 2: + warn(WSGIWarning("Headers must tuple 2-item tuples"), stacklevel=3) + name, value = item + if type(name) is not str or type(value) is not str: + warn(WSGIWarning("header items must be strings"), stacklevel=3) + if name.lower() == "status": + warn( + WSGIWarning( + "The status header is not supported due to " + "conflicts with the CGI spec." + ), + stacklevel=3, + ) + + if exc_info is not None and not isinstance(exc_info, tuple): + warn(WSGIWarning("invalid value for exc_info"), stacklevel=3) + + headers = Headers(headers) + self.check_headers(headers) + + return status_code, headers + + def check_headers(self, headers): + etag = headers.get("etag") + + if etag is not None: + if etag.startswith(("W/", "w/")): + if etag.startswith("w/"): + warn( + HTTPWarning("weak etag indicator should be upcase."), + stacklevel=4, + ) + + etag = etag[2:] + + if not (etag[:1] == etag[-1:] == '"'): + warn(HTTPWarning("unquoted etag emitted."), stacklevel=4) + + location = headers.get("location") + + if location is not None: + if not urlparse(location).netloc: + warn( + HTTPWarning("absolute URLs required for location header"), + stacklevel=4, + ) + + def check_iterator(self, app_iter): + if isinstance(app_iter, string_types): + warn( + "The application returned astring. The response will send one character" + " at a time to the client, which will kill performance. Return a list" + " or iterable instead.", + WSGIWarning, + stacklevel=3, + ) + + def __call__(self, *args, **kwargs): + if len(args) != 2: + warn("A WSGI app takes two arguments.", WSGIWarning, stacklevel=2) + + if kwargs: + warn( + "A WSGI app does not take keyword arguments.", WSGIWarning, stacklevel=2 + ) + + environ, start_response = args + + self.check_environ(environ) + environ["wsgi.input"] = InputStream(environ["wsgi.input"]) + environ["wsgi.errors"] = ErrorStream(environ["wsgi.errors"]) + + # Hook our own file wrapper in so that applications will always + # iterate to the end and we can check the content length. + environ["wsgi.file_wrapper"] = FileWrapper + + headers_set = [] + chunks = [] + + def checking_start_response(*args, **kwargs): + if len(args) not in (2, 3): + warn( + "Invalid number of arguments: %s, expected 2 or 3." % len(args), + WSGIWarning, + stacklevel=2, + ) + + if kwargs: + warn("'start_response' does not take keyword arguments.", WSGIWarning) + + status, headers = args[:2] + + if len(args) == 3: + exc_info = args[2] + else: + exc_info = None + + headers_set[:] = self.check_start_response(status, headers, exc_info) + return GuardedWrite(start_response(status, headers, exc_info), chunks) + + app_iter = self.app(environ, checking_start_response) + self.check_iterator(app_iter) + return GuardedIterator(app_iter, headers_set, chunks) diff --git a/python/werkzeug/middleware/profiler.py b/python/werkzeug/middleware/profiler.py new file mode 100644 index 0000000..32a14d9 --- /dev/null +++ b/python/werkzeug/middleware/profiler.py @@ -0,0 +1,132 @@ +""" +Application Profiler +==================== + +This module provides a middleware that profiles each request with the +:mod:`cProfile` module. This can help identify bottlenecks in your code +that may be slowing down your application. + +.. autoclass:: ProfilerMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +from __future__ import print_function + +import os.path +import sys +import time +from pstats import Stats + +try: + from cProfile import Profile +except ImportError: + from profile import Profile + + +class ProfilerMiddleware(object): + """Wrap a WSGI application and profile the execution of each + request. Responses are buffered so that timings are more exact. + + If ``stream`` is given, :class:`pstats.Stats` are written to it + after each request. If ``profile_dir`` is given, :mod:`cProfile` + data files are saved to that directory, one file per request. + + The filename can be customized by passing ``filename_format``. If + it is a string, it will be formatted using :meth:`str.format` with + the following fields available: + + - ``{method}`` - The request method; GET, POST, etc. + - ``{path}`` - The request path or 'root' should one not exist. + - ``{elapsed}`` - The elapsed time of the request. + - ``{time}`` - The time of the request. + + If it is a callable, it will be called with the WSGI ``environ`` + dict and should return a filename. + + :param app: The WSGI application to wrap. + :param stream: Write stats to this stream. Disable with ``None``. + :param sort_by: A tuple of columns to sort stats by. See + :meth:`pstats.Stats.sort_stats`. + :param restrictions: A tuple of restrictions to filter stats by. See + :meth:`pstats.Stats.print_stats`. + :param profile_dir: Save profile data files to this directory. + :param filename_format: Format string for profile data file names, + or a callable returning a name. See explanation above. + + .. code-block:: python + + from werkzeug.middleware.profiler import ProfilerMiddleware + app = ProfilerMiddleware(app) + + .. versionchanged:: 0.15 + Stats are written even if ``profile_dir`` is given, and can be + disable by passing ``stream=None``. + + .. versionadded:: 0.15 + Added ``filename_format``. + + .. versionadded:: 0.9 + Added ``restrictions`` and ``profile_dir``. + """ + + def __init__( + self, + app, + stream=sys.stdout, + sort_by=("time", "calls"), + restrictions=(), + profile_dir=None, + filename_format="{method}.{path}.{elapsed:.0f}ms.{time:.0f}.prof", + ): + self._app = app + self._stream = stream + self._sort_by = sort_by + self._restrictions = restrictions + self._profile_dir = profile_dir + self._filename_format = filename_format + + def __call__(self, environ, start_response): + response_body = [] + + def catching_start_response(status, headers, exc_info=None): + start_response(status, headers, exc_info) + return response_body.append + + def runapp(): + app_iter = self._app(environ, catching_start_response) + response_body.extend(app_iter) + + if hasattr(app_iter, "close"): + app_iter.close() + + profile = Profile() + start = time.time() + profile.runcall(runapp) + body = b"".join(response_body) + elapsed = time.time() - start + + if self._profile_dir is not None: + if callable(self._filename_format): + filename = self._filename_format(environ) + else: + filename = self._filename_format.format( + method=environ["REQUEST_METHOD"], + path=( + environ.get("PATH_INFO").strip("/").replace("/", ".") or "root" + ), + elapsed=elapsed * 1000.0, + time=time.time(), + ) + filename = os.path.join(self._profile_dir, filename) + profile.dump_stats(filename) + + if self._stream is not None: + stats = Stats(profile, stream=self._stream) + stats.sort_stats(*self._sort_by) + print("-" * 80, file=self._stream) + print("PATH: {!r}".format(environ.get("PATH_INFO", "")), file=self._stream) + stats.print_stats(*self._restrictions) + print("-" * 80 + "\n", file=self._stream) + + return [body] diff --git a/python/werkzeug/middleware/proxy_fix.py b/python/werkzeug/middleware/proxy_fix.py new file mode 100644 index 0000000..dc1dacc --- /dev/null +++ b/python/werkzeug/middleware/proxy_fix.py @@ -0,0 +1,228 @@ +""" +X-Forwarded-For Proxy Fix +========================= + +This module provides a middleware that adjusts the WSGI environ based on +``X-Forwarded-`` headers that proxies in front of an application may +set. + +When an application is running behind a proxy server, WSGI may see the +request as coming from that server rather than the real client. Proxies +set various headers to track where the request actually came from. + +This middleware should only be applied if the application is actually +behind such a proxy, and should be configured with the number of proxies +that are chained in front of it. Not all proxies set all the headers. +Since incoming headers can be faked, you must set how many proxies are +setting each header so the middleware knows what to trust. + +.. autoclass:: ProxyFix + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import warnings + + +class ProxyFix(object): + """Adjust the WSGI environ based on ``X-Forwarded-`` that proxies in + front of the application may set. + + - ``X-Forwarded-For`` sets ``REMOTE_ADDR``. + - ``X-Forwarded-Proto`` sets ``wsgi.url_scheme``. + - ``X-Forwarded-Host`` sets ``HTTP_HOST``, ``SERVER_NAME``, and + ``SERVER_PORT``. + - ``X-Forwarded-Port`` sets ``HTTP_HOST`` and ``SERVER_PORT``. + - ``X-Forwarded-Prefix`` sets ``SCRIPT_NAME``. + + You must tell the middleware how many proxies set each header so it + knows what values to trust. It is a security issue to trust values + that came from the client rather than a proxy. + + The original values of the headers are stored in the WSGI + environ as ``werkzeug.proxy_fix.orig``, a dict. + + :param app: The WSGI application to wrap. + :param x_for: Number of values to trust for ``X-Forwarded-For``. + :param x_proto: Number of values to trust for ``X-Forwarded-Proto``. + :param x_host: Number of values to trust for ``X-Forwarded-Host``. + :param x_port: Number of values to trust for ``X-Forwarded-Port``. + :param x_prefix: Number of values to trust for + ``X-Forwarded-Prefix``. + :param num_proxies: Deprecated, use ``x_for`` instead. + + .. code-block:: python + + from werkzeug.middleware.proxy_fix import ProxyFix + # App is behind one proxy that sets the -For and -Host headers. + app = ProxyFix(app, x_for=1, x_host=1) + + .. versionchanged:: 0.15 + All headers support multiple values. The ``num_proxies`` + argument is deprecated. Each header is configured with a + separate number of trusted proxies. + + .. versionchanged:: 0.15 + Original WSGI environ values are stored in the + ``werkzeug.proxy_fix.orig`` dict. ``orig_remote_addr``, + ``orig_wsgi_url_scheme``, and ``orig_http_host`` are deprecated + and will be removed in 1.0. + + .. versionchanged:: 0.15 + Support ``X-Forwarded-Port`` and ``X-Forwarded-Prefix``. + + .. versionchanged:: 0.15 + ``X-Fowarded-Host`` and ``X-Forwarded-Port`` modify + ``SERVER_NAME`` and ``SERVER_PORT``. + """ + + def __init__( + self, app, num_proxies=None, x_for=1, x_proto=0, x_host=0, x_port=0, x_prefix=0 + ): + self.app = app + self.x_for = x_for + self.x_proto = x_proto + self.x_host = x_host + self.x_port = x_port + self.x_prefix = x_prefix + self.num_proxies = num_proxies + + @property + def num_proxies(self): + """The number of proxies setting ``X-Forwarded-For`` in front + of the application. + + .. deprecated:: 0.15 + A separate number of trusted proxies is configured for each + header. ``num_proxies`` maps to ``x_for``. This method will + be removed in 1.0. + + :internal: + """ + warnings.warn( + "'num_proxies' is deprecated as of version 0.15 and will be" + " removed in version 1.0. Use 'x_for' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.x_for + + @num_proxies.setter + def num_proxies(self, value): + if value is not None: + warnings.warn( + "'num_proxies' is deprecated as of version 0.15 and" + " will be removed in version 1.0. Use 'x_for' instead.", + DeprecationWarning, + stacklevel=2, + ) + self.x_for = value + + def get_remote_addr(self, forwarded_for): + """Get the real ``remote_addr`` by looking backwards ``x_for`` + number of values in the ``X-Forwarded-For`` header. + + :param forwarded_for: List of values parsed from the + ``X-Forwarded-For`` header. + :return: The real ``remote_addr``, or ``None`` if there were not + at least ``x_for`` values. + + .. deprecated:: 0.15 + This is handled internally for each header. This method will + be removed in 1.0. + + .. versionchanged:: 0.9 + Use ``num_proxies`` instead of always picking the first + value. + + .. versionadded:: 0.8 + """ + warnings.warn( + "'get_remote_addr' is deprecated as of version 0.15 and" + " will be removed in version 1.0. It is now handled" + " internally for each header.", + DeprecationWarning, + ) + return self._get_trusted_comma(self.x_for, ",".join(forwarded_for)) + + def _get_trusted_comma(self, trusted, value): + """Get the real value from a comma-separated header based on the + configured number of trusted proxies. + + :param trusted: Number of values to trust in the header. + :param value: Header value to parse. + :return: The real value, or ``None`` if there are fewer values + than the number of trusted proxies. + + .. versionadded:: 0.15 + """ + if not (trusted and value): + return + values = [x.strip() for x in value.split(",")] + if len(values) >= trusted: + return values[-trusted] + + def __call__(self, environ, start_response): + """Modify the WSGI environ based on the various ``Forwarded`` + headers before calling the wrapped application. Store the + original environ values in ``werkzeug.proxy_fix.orig_{key}``. + """ + environ_get = environ.get + orig_remote_addr = environ_get("REMOTE_ADDR") + orig_wsgi_url_scheme = environ_get("wsgi.url_scheme") + orig_http_host = environ_get("HTTP_HOST") + environ.update( + { + "werkzeug.proxy_fix.orig": { + "REMOTE_ADDR": orig_remote_addr, + "wsgi.url_scheme": orig_wsgi_url_scheme, + "HTTP_HOST": orig_http_host, + "SERVER_NAME": environ_get("SERVER_NAME"), + "SERVER_PORT": environ_get("SERVER_PORT"), + "SCRIPT_NAME": environ_get("SCRIPT_NAME"), + }, + # todo: remove deprecated keys + "werkzeug.proxy_fix.orig_remote_addr": orig_remote_addr, + "werkzeug.proxy_fix.orig_wsgi_url_scheme": orig_wsgi_url_scheme, + "werkzeug.proxy_fix.orig_http_host": orig_http_host, + } + ) + + x_for = self._get_trusted_comma(self.x_for, environ_get("HTTP_X_FORWARDED_FOR")) + if x_for: + environ["REMOTE_ADDR"] = x_for + + x_proto = self._get_trusted_comma( + self.x_proto, environ_get("HTTP_X_FORWARDED_PROTO") + ) + if x_proto: + environ["wsgi.url_scheme"] = x_proto + + x_host = self._get_trusted_comma( + self.x_host, environ_get("HTTP_X_FORWARDED_HOST") + ) + if x_host: + environ["HTTP_HOST"] = x_host + parts = x_host.split(":", 1) + environ["SERVER_NAME"] = parts[0] + if len(parts) == 2: + environ["SERVER_PORT"] = parts[1] + + x_port = self._get_trusted_comma( + self.x_port, environ_get("HTTP_X_FORWARDED_PORT") + ) + if x_port: + host = environ.get("HTTP_HOST") + if host: + parts = host.split(":", 1) + host = parts[0] if len(parts) == 2 else host + environ["HTTP_HOST"] = "%s:%s" % (host, x_port) + environ["SERVER_PORT"] = x_port + + x_prefix = self._get_trusted_comma( + self.x_prefix, environ_get("HTTP_X_FORWARDED_PREFIX") + ) + if x_prefix: + environ["SCRIPT_NAME"] = x_prefix + + return self.app(environ, start_response) diff --git a/python/werkzeug/middleware/shared_data.py b/python/werkzeug/middleware/shared_data.py new file mode 100644 index 0000000..a902281 --- /dev/null +++ b/python/werkzeug/middleware/shared_data.py @@ -0,0 +1,260 @@ +""" +Serve Shared Static Files +========================= + +.. autoclass:: SharedDataMiddleware + :members: is_allowed + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import mimetypes +import os +import posixpath +from datetime import datetime +from io import BytesIO +from time import mktime +from time import time +from zlib import adler32 + +from .._compat import PY2 +from .._compat import string_types +from ..filesystem import get_filesystem_encoding +from ..http import http_date +from ..http import is_resource_modified +from ..wsgi import get_path_info +from ..wsgi import wrap_file + + +class SharedDataMiddleware(object): + + """A WSGI middleware that provides static content for development + environments or simple server setups. Usage is quite simple:: + + import os + from werkzeug.wsgi import SharedDataMiddleware + + app = SharedDataMiddleware(app, { + '/static': os.path.join(os.path.dirname(__file__), 'static') + }) + + The contents of the folder ``./shared`` will now be available on + ``http://example.com/shared/``. This is pretty useful during development + because a standalone media server is not required. One can also mount + files on the root folder and still continue to use the application because + the shared data middleware forwards all unhandled requests to the + application, even if the requests are below one of the shared folders. + + If `pkg_resources` is available you can also tell the middleware to serve + files from package data:: + + app = SharedDataMiddleware(app, { + '/static': ('myapplication', 'static') + }) + + This will then serve the ``static`` folder in the `myapplication` + Python package. + + The optional `disallow` parameter can be a list of :func:`~fnmatch.fnmatch` + rules for files that are not accessible from the web. If `cache` is set to + `False` no caching headers are sent. + + Currently the middleware does not support non ASCII filenames. If the + encoding on the file system happens to be the encoding of the URI it may + work but this could also be by accident. We strongly suggest using ASCII + only file names for static files. + + The middleware will guess the mimetype using the Python `mimetype` + module. If it's unable to figure out the charset it will fall back + to `fallback_mimetype`. + + .. versionchanged:: 0.5 + The cache timeout is configurable now. + + .. versionadded:: 0.6 + The `fallback_mimetype` parameter was added. + + :param app: the application to wrap. If you don't want to wrap an + application you can pass it :exc:`NotFound`. + :param exports: a list or dict of exported files and folders. + :param disallow: a list of :func:`~fnmatch.fnmatch` rules. + :param fallback_mimetype: the fallback mimetype for unknown files. + :param cache: enable or disable caching headers. + :param cache_timeout: the cache timeout in seconds for the headers. + """ + + def __init__( + self, + app, + exports, + disallow=None, + cache=True, + cache_timeout=60 * 60 * 12, + fallback_mimetype="text/plain", + ): + self.app = app + self.exports = [] + self.cache = cache + self.cache_timeout = cache_timeout + + if hasattr(exports, "items"): + exports = exports.items() + + for key, value in exports: + if isinstance(value, tuple): + loader = self.get_package_loader(*value) + elif isinstance(value, string_types): + if os.path.isfile(value): + loader = self.get_file_loader(value) + else: + loader = self.get_directory_loader(value) + else: + raise TypeError("unknown def %r" % value) + + self.exports.append((key, loader)) + + if disallow is not None: + from fnmatch import fnmatch + + self.is_allowed = lambda x: not fnmatch(x, disallow) + + self.fallback_mimetype = fallback_mimetype + + def is_allowed(self, filename): + """Subclasses can override this method to disallow the access to + certain files. However by providing `disallow` in the constructor + this method is overwritten. + """ + return True + + def _opener(self, filename): + return lambda: ( + open(filename, "rb"), + datetime.utcfromtimestamp(os.path.getmtime(filename)), + int(os.path.getsize(filename)), + ) + + def get_file_loader(self, filename): + return lambda x: (os.path.basename(filename), self._opener(filename)) + + def get_package_loader(self, package, package_path): + from pkg_resources import DefaultProvider, ResourceManager, get_provider + + loadtime = datetime.utcnow() + provider = get_provider(package) + manager = ResourceManager() + filesystem_bound = isinstance(provider, DefaultProvider) + + def loader(path): + if path is None: + return None, None + + path = posixpath.join(package_path, path) + + if not provider.has_resource(path): + return None, None + + basename = posixpath.basename(path) + + if filesystem_bound: + return ( + basename, + self._opener(provider.get_resource_filename(manager, path)), + ) + + s = provider.get_resource_string(manager, path) + return basename, lambda: (BytesIO(s), loadtime, len(s)) + + return loader + + def get_directory_loader(self, directory): + def loader(path): + if path is not None: + path = os.path.join(directory, path) + else: + path = directory + + if os.path.isfile(path): + return os.path.basename(path), self._opener(path) + + return None, None + + return loader + + def generate_etag(self, mtime, file_size, real_filename): + if not isinstance(real_filename, bytes): + real_filename = real_filename.encode(get_filesystem_encoding()) + + return "wzsdm-%d-%s-%s" % ( + mktime(mtime.timetuple()), + file_size, + adler32(real_filename) & 0xFFFFFFFF, + ) + + def __call__(self, environ, start_response): + cleaned_path = get_path_info(environ) + + if PY2: + cleaned_path = cleaned_path.encode(get_filesystem_encoding()) + + # sanitize the path for non unix systems + cleaned_path = cleaned_path.strip("/") + + for sep in os.sep, os.altsep: + if sep and sep != "/": + cleaned_path = cleaned_path.replace(sep, "/") + + path = "/" + "/".join(x for x in cleaned_path.split("/") if x and x != "..") + file_loader = None + + for search_path, loader in self.exports: + if search_path == path: + real_filename, file_loader = loader(None) + + if file_loader is not None: + break + + if not search_path.endswith("/"): + search_path += "/" + + if path.startswith(search_path): + real_filename, file_loader = loader(path[len(search_path) :]) + + if file_loader is not None: + break + + if file_loader is None or not self.is_allowed(real_filename): + return self.app(environ, start_response) + + guessed_type = mimetypes.guess_type(real_filename) + mime_type = guessed_type[0] or self.fallback_mimetype + f, mtime, file_size = file_loader() + + headers = [("Date", http_date())] + + if self.cache: + timeout = self.cache_timeout + etag = self.generate_etag(mtime, file_size, real_filename) + headers += [ + ("Etag", '"%s"' % etag), + ("Cache-Control", "max-age=%d, public" % timeout), + ] + + if not is_resource_modified(environ, etag, last_modified=mtime): + f.close() + start_response("304 Not Modified", headers) + return [] + + headers.append(("Expires", http_date(time() + timeout))) + else: + headers.append(("Cache-Control", "public")) + + headers.extend( + ( + ("Content-Type", mime_type), + ("Content-Length", str(file_size)), + ("Last-Modified", http_date(mtime)), + ) + ) + start_response("200 OK", headers) + return wrap_file(environ, f) diff --git a/python/werkzeug/posixemulation.py b/python/werkzeug/posixemulation.py new file mode 100644 index 0000000..696b456 --- /dev/null +++ b/python/werkzeug/posixemulation.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +r""" + werkzeug.posixemulation + ~~~~~~~~~~~~~~~~~~~~~~~ + + Provides a POSIX emulation for some features that are relevant to + web applications. The main purpose is to simplify support for + systems such as Windows NT that are not 100% POSIX compatible. + + Currently this only implements a :func:`rename` function that + follows POSIX semantics. Eg: if the target file already exists it + will be replaced without asking. + + This module was introduced in 0.6.1 and is not a public interface. + It might become one in later versions of Werkzeug. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import errno +import os +import random +import sys +import time + +from ._compat import to_unicode +from .filesystem import get_filesystem_encoding + +can_rename_open_file = False + +if os.name == "nt": + try: + import ctypes + + _MOVEFILE_REPLACE_EXISTING = 0x1 + _MOVEFILE_WRITE_THROUGH = 0x8 + _MoveFileEx = ctypes.windll.kernel32.MoveFileExW + + def _rename(src, dst): + src = to_unicode(src, get_filesystem_encoding()) + dst = to_unicode(dst, get_filesystem_encoding()) + if _rename_atomic(src, dst): + return True + retry = 0 + rv = False + while not rv and retry < 100: + rv = _MoveFileEx( + src, dst, _MOVEFILE_REPLACE_EXISTING | _MOVEFILE_WRITE_THROUGH + ) + if not rv: + time.sleep(0.001) + retry += 1 + return rv + + # new in Vista and Windows Server 2008 + _CreateTransaction = ctypes.windll.ktmw32.CreateTransaction + _CommitTransaction = ctypes.windll.ktmw32.CommitTransaction + _MoveFileTransacted = ctypes.windll.kernel32.MoveFileTransactedW + _CloseHandle = ctypes.windll.kernel32.CloseHandle + can_rename_open_file = True + + def _rename_atomic(src, dst): + ta = _CreateTransaction(None, 0, 0, 0, 0, 1000, "Werkzeug rename") + if ta == -1: + return False + try: + retry = 0 + rv = False + while not rv and retry < 100: + rv = _MoveFileTransacted( + src, + dst, + None, + None, + _MOVEFILE_REPLACE_EXISTING | _MOVEFILE_WRITE_THROUGH, + ta, + ) + if rv: + rv = _CommitTransaction(ta) + break + else: + time.sleep(0.001) + retry += 1 + return rv + finally: + _CloseHandle(ta) + + except Exception: + + def _rename(src, dst): + return False + + def _rename_atomic(src, dst): + return False + + def rename(src, dst): + # Try atomic or pseudo-atomic rename + if _rename(src, dst): + return + # Fall back to "move away and replace" + try: + os.rename(src, dst) + except OSError as e: + if e.errno != errno.EEXIST: + raise + old = "%s-%08x" % (dst, random.randint(0, sys.maxsize)) + os.rename(dst, old) + os.rename(src, dst) + try: + os.unlink(old) + except Exception: + pass + + +else: + rename = os.rename + can_rename_open_file = True diff --git a/python/werkzeug/routing.py b/python/werkzeug/routing.py new file mode 100644 index 0000000..6b1dd98 --- /dev/null +++ b/python/werkzeug/routing.py @@ -0,0 +1,2026 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.routing + ~~~~~~~~~~~~~~~~ + + When it comes to combining multiple controller or view functions (however + you want to call them) you need a dispatcher. A simple way would be + applying regular expression tests on the ``PATH_INFO`` and calling + registered callback functions that return the value then. + + This module implements a much more powerful system than simple regular + expression matching because it can also convert values in the URLs and + build URLs. + + Here a simple example that creates an URL map for an application with + two subdomains (www and kb) and some URL rules: + + >>> m = Map([ + ... # Static URLs + ... Rule('/', endpoint='static/index'), + ... Rule('/about', endpoint='static/about'), + ... Rule('/help', endpoint='static/help'), + ... # Knowledge Base + ... Subdomain('kb', [ + ... Rule('/', endpoint='kb/index'), + ... Rule('/browse/', endpoint='kb/browse'), + ... Rule('/browse/<int:id>/', endpoint='kb/browse'), + ... Rule('/browse/<int:id>/<int:page>', endpoint='kb/browse') + ... ]) + ... ], default_subdomain='www') + + If the application doesn't use subdomains it's perfectly fine to not set + the default subdomain and not use the `Subdomain` rule factory. The endpoint + in the rules can be anything, for example import paths or unique + identifiers. The WSGI application can use those endpoints to get the + handler for that URL. It doesn't have to be a string at all but it's + recommended. + + Now it's possible to create a URL adapter for one of the subdomains and + build URLs: + + >>> c = m.bind('example.com') + >>> c.build("kb/browse", dict(id=42)) + 'http://kb.example.com/browse/42/' + >>> c.build("kb/browse", dict()) + 'http://kb.example.com/browse/' + >>> c.build("kb/browse", dict(id=42, page=3)) + 'http://kb.example.com/browse/42/3' + >>> c.build("static/about") + '/about' + >>> c.build("static/index", force_external=True) + 'http://www.example.com/' + + >>> c = m.bind('example.com', subdomain='kb') + >>> c.build("static/about") + 'http://www.example.com/about' + + The first argument to bind is the server name *without* the subdomain. + Per default it will assume that the script is mounted on the root, but + often that's not the case so you can provide the real mount point as + second argument: + + >>> c = m.bind('example.com', '/applications/example') + + The third argument can be the subdomain, if not given the default + subdomain is used. For more details about binding have a look at the + documentation of the `MapAdapter`. + + And here is how you can match URLs: + + >>> c = m.bind('example.com') + >>> c.match("/") + ('static/index', {}) + >>> c.match("/about") + ('static/about', {}) + >>> c = m.bind('example.com', '/', 'kb') + >>> c.match("/") + ('kb/index', {}) + >>> c.match("/browse/42/23") + ('kb/browse', {'id': 42, 'page': 23}) + + If matching fails you get a `NotFound` exception, if the rule thinks + it's a good idea to redirect (for example because the URL was defined + to have a slash at the end but the request was missing that slash) it + will raise a `RequestRedirect` exception. Both are subclasses of the + `HTTPException` so you can use those errors as responses in the + application. + + If matching succeeded but the URL rule was incompatible to the given + method (for example there were only rules for `GET` and `HEAD` and + routing system tried to match a `POST` request) a `MethodNotAllowed` + exception is raised. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import ast +import difflib +import posixpath +import re +import uuid +from pprint import pformat +from threading import Lock + +from ._compat import implements_to_string +from ._compat import iteritems +from ._compat import itervalues +from ._compat import native_string_result +from ._compat import string_types +from ._compat import text_type +from ._compat import to_bytes +from ._compat import to_unicode +from ._compat import wsgi_decoding_dance +from ._internal import _encode_idna +from ._internal import _get_environ +from .datastructures import ImmutableDict +from .datastructures import MultiDict +from .exceptions import BadHost +from .exceptions import HTTPException +from .exceptions import MethodNotAllowed +from .exceptions import NotFound +from .urls import _fast_url_quote +from .urls import url_encode +from .urls import url_join +from .urls import url_quote +from .utils import cached_property +from .utils import format_string +from .utils import redirect +from .wsgi import get_host + +_rule_re = re.compile( + r""" + (?P<static>[^<]*) # static rule data + < + (?: + (?P<converter>[a-zA-Z_][a-zA-Z0-9_]*) # converter name + (?:\((?P<args>.*?)\))? # converter arguments + \: # variable delimiter + )? + (?P<variable>[a-zA-Z_][a-zA-Z0-9_]*) # variable name + > + """, + re.VERBOSE, +) +_simple_rule_re = re.compile(r"<([^>]+)>") +_converter_args_re = re.compile( + r""" + ((?P<name>\w+)\s*=\s*)? + (?P<value> + True|False| + \d+.\d+| + \d+.| + \d+| + [\w\d_.]+| + [urUR]?(?P<stringval>"[^"]*?"|'[^']*') + )\s*, + """, + re.VERBOSE | re.UNICODE, +) + + +_PYTHON_CONSTANTS = {"None": None, "True": True, "False": False} + + +def _pythonize(value): + if value in _PYTHON_CONSTANTS: + return _PYTHON_CONSTANTS[value] + for convert in int, float: + try: + return convert(value) + except ValueError: + pass + if value[:1] == value[-1:] and value[0] in "\"'": + value = value[1:-1] + return text_type(value) + + +def parse_converter_args(argstr): + argstr += "," + args = [] + kwargs = {} + + for item in _converter_args_re.finditer(argstr): + value = item.group("stringval") + if value is None: + value = item.group("value") + value = _pythonize(value) + if not item.group("name"): + args.append(value) + else: + name = item.group("name") + kwargs[name] = value + + return tuple(args), kwargs + + +def parse_rule(rule): + """Parse a rule and return it as generator. Each iteration yields tuples + in the form ``(converter, arguments, variable)``. If the converter is + `None` it's a static url part, otherwise it's a dynamic one. + + :internal: + """ + pos = 0 + end = len(rule) + do_match = _rule_re.match + used_names = set() + while pos < end: + m = do_match(rule, pos) + if m is None: + break + data = m.groupdict() + if data["static"]: + yield None, None, data["static"] + variable = data["variable"] + converter = data["converter"] or "default" + if variable in used_names: + raise ValueError("variable name %r used twice." % variable) + used_names.add(variable) + yield converter, data["args"] or None, variable + pos = m.end() + if pos < end: + remaining = rule[pos:] + if ">" in remaining or "<" in remaining: + raise ValueError("malformed url rule: %r" % rule) + yield None, None, remaining + + +class RoutingException(Exception): + """Special exceptions that require the application to redirect, notifying + about missing urls, etc. + + :internal: + """ + + +class RequestRedirect(HTTPException, RoutingException): + """Raise if the map requests a redirect. This is for example the case if + `strict_slashes` are activated and an url that requires a trailing slash. + + The attribute `new_url` contains the absolute destination url. + """ + + code = 308 + + def __init__(self, new_url): + RoutingException.__init__(self, new_url) + self.new_url = new_url + + def get_response(self, environ): + return redirect(self.new_url, self.code) + + +class RequestSlash(RoutingException): + """Internal exception.""" + + +class RequestAliasRedirect(RoutingException): # noqa: B903 + """This rule is an alias and wants to redirect to the canonical URL.""" + + def __init__(self, matched_values): + self.matched_values = matched_values + + +@implements_to_string +class BuildError(RoutingException, LookupError): + """Raised if the build system cannot find a URL for an endpoint with the + values provided. + """ + + def __init__(self, endpoint, values, method, adapter=None): + LookupError.__init__(self, endpoint, values, method) + self.endpoint = endpoint + self.values = values + self.method = method + self.adapter = adapter + + @cached_property + def suggested(self): + return self.closest_rule(self.adapter) + + def closest_rule(self, adapter): + def _score_rule(rule): + return sum( + [ + 0.98 + * difflib.SequenceMatcher( + None, rule.endpoint, self.endpoint + ).ratio(), + 0.01 * bool(set(self.values or ()).issubset(rule.arguments)), + 0.01 * bool(rule.methods and self.method in rule.methods), + ] + ) + + if adapter and adapter.map._rules: + return max(adapter.map._rules, key=_score_rule) + + def __str__(self): + message = [] + message.append("Could not build url for endpoint %r" % self.endpoint) + if self.method: + message.append(" (%r)" % self.method) + if self.values: + message.append(" with values %r" % sorted(self.values.keys())) + message.append(".") + if self.suggested: + if self.endpoint == self.suggested.endpoint: + if self.method and self.method not in self.suggested.methods: + message.append( + " Did you mean to use methods %r?" + % sorted(self.suggested.methods) + ) + missing_values = self.suggested.arguments.union( + set(self.suggested.defaults or ()) + ) - set(self.values.keys()) + if missing_values: + message.append( + " Did you forget to specify values %r?" % sorted(missing_values) + ) + else: + message.append(" Did you mean %r instead?" % self.suggested.endpoint) + return u"".join(message) + + +class ValidationError(ValueError): + """Validation error. If a rule converter raises this exception the rule + does not match the current URL and the next URL is tried. + """ + + +class RuleFactory(object): + """As soon as you have more complex URL setups it's a good idea to use rule + factories to avoid repetitive tasks. Some of them are builtin, others can + be added by subclassing `RuleFactory` and overriding `get_rules`. + """ + + def get_rules(self, map): + """Subclasses of `RuleFactory` have to override this method and return + an iterable of rules.""" + raise NotImplementedError() + + +class Subdomain(RuleFactory): + """All URLs provided by this factory have the subdomain set to a + specific domain. For example if you want to use the subdomain for + the current language this can be a good setup:: + + url_map = Map([ + Rule('/', endpoint='#select_language'), + Subdomain('<string(length=2):lang_code>', [ + Rule('/', endpoint='index'), + Rule('/about', endpoint='about'), + Rule('/help', endpoint='help') + ]) + ]) + + All the rules except for the ``'#select_language'`` endpoint will now + listen on a two letter long subdomain that holds the language code + for the current request. + """ + + def __init__(self, subdomain, rules): + self.subdomain = subdomain + self.rules = rules + + def get_rules(self, map): + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.subdomain = self.subdomain + yield rule + + +class Submount(RuleFactory): + """Like `Subdomain` but prefixes the URL rule with a given string:: + + url_map = Map([ + Rule('/', endpoint='index'), + Submount('/blog', [ + Rule('/', endpoint='blog/index'), + Rule('/entry/<entry_slug>', endpoint='blog/show') + ]) + ]) + + Now the rule ``'blog/show'`` matches ``/blog/entry/<entry_slug>``. + """ + + def __init__(self, path, rules): + self.path = path.rstrip("/") + self.rules = rules + + def get_rules(self, map): + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.rule = self.path + rule.rule + yield rule + + +class EndpointPrefix(RuleFactory): + """Prefixes all endpoints (which must be strings for this factory) with + another string. This can be useful for sub applications:: + + url_map = Map([ + Rule('/', endpoint='index'), + EndpointPrefix('blog/', [Submount('/blog', [ + Rule('/', endpoint='index'), + Rule('/entry/<entry_slug>', endpoint='show') + ])]) + ]) + """ + + def __init__(self, prefix, rules): + self.prefix = prefix + self.rules = rules + + def get_rules(self, map): + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.endpoint = self.prefix + rule.endpoint + yield rule + + +class RuleTemplate(object): + """Returns copies of the rules wrapped and expands string templates in + the endpoint, rule, defaults or subdomain sections. + + Here a small example for such a rule template:: + + from werkzeug.routing import Map, Rule, RuleTemplate + + resource = RuleTemplate([ + Rule('/$name/', endpoint='$name.list'), + Rule('/$name/<int:id>', endpoint='$name.show') + ]) + + url_map = Map([resource(name='user'), resource(name='page')]) + + When a rule template is called the keyword arguments are used to + replace the placeholders in all the string parameters. + """ + + def __init__(self, rules): + self.rules = list(rules) + + def __call__(self, *args, **kwargs): + return RuleTemplateFactory(self.rules, dict(*args, **kwargs)) + + +class RuleTemplateFactory(RuleFactory): + """A factory that fills in template variables into rules. Used by + `RuleTemplate` internally. + + :internal: + """ + + def __init__(self, rules, context): + self.rules = rules + self.context = context + + def get_rules(self, map): + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + new_defaults = subdomain = None + if rule.defaults: + new_defaults = {} + for key, value in iteritems(rule.defaults): + if isinstance(value, string_types): + value = format_string(value, self.context) + new_defaults[key] = value + if rule.subdomain is not None: + subdomain = format_string(rule.subdomain, self.context) + new_endpoint = rule.endpoint + if isinstance(new_endpoint, string_types): + new_endpoint = format_string(new_endpoint, self.context) + yield Rule( + format_string(rule.rule, self.context), + new_defaults, + subdomain, + rule.methods, + rule.build_only, + new_endpoint, + rule.strict_slashes, + ) + + +def _prefix_names(src): + """ast parse and prefix names with `.` to avoid collision with user vars""" + tree = ast.parse(src).body[0] + if isinstance(tree, ast.Expr): + tree = tree.value + for node in ast.walk(tree): + if isinstance(node, ast.Name): + node.id = "." + node.id + return tree + + +_CALL_CONVERTER_CODE_FMT = "self._converters[{elem!r}].to_url()" +_IF_KWARGS_URL_ENCODE_CODE = """\ +if kwargs: + q = '?' + params = self._encode_query_vars(kwargs) +else: + q = params = '' +""" +_IF_KWARGS_URL_ENCODE_AST = _prefix_names(_IF_KWARGS_URL_ENCODE_CODE) +_URL_ENCODE_AST_NAMES = (_prefix_names("q"), _prefix_names("params")) + + +@implements_to_string +class Rule(RuleFactory): + """A Rule represents one URL pattern. There are some options for `Rule` + that change the way it behaves and are passed to the `Rule` constructor. + Note that besides the rule-string all arguments *must* be keyword arguments + in order to not break the application on Werkzeug upgrades. + + `string` + Rule strings basically are just normal URL paths with placeholders in + the format ``<converter(arguments):name>`` where the converter and the + arguments are optional. If no converter is defined the `default` + converter is used which means `string` in the normal configuration. + + URL rules that end with a slash are branch URLs, others are leaves. + If you have `strict_slashes` enabled (which is the default), all + branch URLs that are matched without a trailing slash will trigger a + redirect to the same URL with the missing slash appended. + + The converters are defined on the `Map`. + + `endpoint` + The endpoint for this rule. This can be anything. A reference to a + function, a string, a number etc. The preferred way is using a string + because the endpoint is used for URL generation. + + `defaults` + An optional dict with defaults for other rules with the same endpoint. + This is a bit tricky but useful if you want to have unique URLs:: + + url_map = Map([ + Rule('/all/', defaults={'page': 1}, endpoint='all_entries'), + Rule('/all/page/<int:page>', endpoint='all_entries') + ]) + + If a user now visits ``http://example.com/all/page/1`` he will be + redirected to ``http://example.com/all/``. If `redirect_defaults` is + disabled on the `Map` instance this will only affect the URL + generation. + + `subdomain` + The subdomain rule string for this rule. If not specified the rule + only matches for the `default_subdomain` of the map. If the map is + not bound to a subdomain this feature is disabled. + + Can be useful if you want to have user profiles on different subdomains + and all subdomains are forwarded to your application:: + + url_map = Map([ + Rule('/', subdomain='<username>', endpoint='user/homepage'), + Rule('/stats', subdomain='<username>', endpoint='user/stats') + ]) + + `methods` + A sequence of http methods this rule applies to. If not specified, all + methods are allowed. For example this can be useful if you want different + endpoints for `POST` and `GET`. If methods are defined and the path + matches but the method matched against is not in this list or in the + list of another rule for that path the error raised is of the type + `MethodNotAllowed` rather than `NotFound`. If `GET` is present in the + list of methods and `HEAD` is not, `HEAD` is added automatically. + + .. versionchanged:: 0.6.1 + `HEAD` is now automatically added to the methods if `GET` is + present. The reason for this is that existing code often did not + work properly in servers not rewriting `HEAD` to `GET` + automatically and it was not documented how `HEAD` should be + treated. This was considered a bug in Werkzeug because of that. + + `strict_slashes` + Override the `Map` setting for `strict_slashes` only for this rule. If + not specified the `Map` setting is used. + + `build_only` + Set this to True and the rule will never match but will create a URL + that can be build. This is useful if you have resources on a subdomain + or folder that are not handled by the WSGI application (like static data) + + `redirect_to` + If given this must be either a string or callable. In case of a + callable it's called with the url adapter that triggered the match and + the values of the URL as keyword arguments and has to return the target + for the redirect, otherwise it has to be a string with placeholders in + rule syntax:: + + def foo_with_slug(adapter, id): + # ask the database for the slug for the old id. this of + # course has nothing to do with werkzeug. + return 'foo/' + Foo.get_slug_for_id(id) + + url_map = Map([ + Rule('/foo/<slug>', endpoint='foo'), + Rule('/some/old/url/<slug>', redirect_to='foo/<slug>'), + Rule('/other/old/url/<int:id>', redirect_to=foo_with_slug) + ]) + + When the rule is matched the routing system will raise a + `RequestRedirect` exception with the target for the redirect. + + Keep in mind that the URL will be joined against the URL root of the + script so don't use a leading slash on the target URL unless you + really mean root of that domain. + + `alias` + If enabled this rule serves as an alias for another rule with the same + endpoint and arguments. + + `host` + If provided and the URL map has host matching enabled this can be + used to provide a match rule for the whole host. This also means + that the subdomain feature is disabled. + + .. versionadded:: 0.7 + The `alias` and `host` parameters were added. + """ + + def __init__( + self, + string, + defaults=None, + subdomain=None, + methods=None, + build_only=False, + endpoint=None, + strict_slashes=None, + redirect_to=None, + alias=False, + host=None, + ): + if not string.startswith("/"): + raise ValueError("urls must start with a leading slash") + self.rule = string + self.is_leaf = not string.endswith("/") + + self.map = None + self.strict_slashes = strict_slashes + self.subdomain = subdomain + self.host = host + self.defaults = defaults + self.build_only = build_only + self.alias = alias + if methods is None: + self.methods = None + else: + if isinstance(methods, str): + raise TypeError("param `methods` should be `Iterable[str]`, not `str`") + self.methods = set([x.upper() for x in methods]) + if "HEAD" not in self.methods and "GET" in self.methods: + self.methods.add("HEAD") + self.endpoint = endpoint + self.redirect_to = redirect_to + + if defaults: + self.arguments = set(map(str, defaults)) + else: + self.arguments = set() + self._trace = self._converters = self._regex = self._argument_weights = None + + def empty(self): + """ + Return an unbound copy of this rule. + + This can be useful if want to reuse an already bound URL for another + map. See ``get_empty_kwargs`` to override what keyword arguments are + provided to the new copy. + """ + return type(self)(self.rule, **self.get_empty_kwargs()) + + def get_empty_kwargs(self): + """ + Provides kwargs for instantiating empty copy with empty() + + Use this method to provide custom keyword arguments to the subclass of + ``Rule`` when calling ``some_rule.empty()``. Helpful when the subclass + has custom keyword arguments that are needed at instantiation. + + Must return a ``dict`` that will be provided as kwargs to the new + instance of ``Rule``, following the initial ``self.rule`` value which + is always provided as the first, required positional argument. + """ + defaults = None + if self.defaults: + defaults = dict(self.defaults) + return dict( + defaults=defaults, + subdomain=self.subdomain, + methods=self.methods, + build_only=self.build_only, + endpoint=self.endpoint, + strict_slashes=self.strict_slashes, + redirect_to=self.redirect_to, + alias=self.alias, + host=self.host, + ) + + def get_rules(self, map): + yield self + + def refresh(self): + """Rebinds and refreshes the URL. Call this if you modified the + rule in place. + + :internal: + """ + self.bind(self.map, rebind=True) + + def bind(self, map, rebind=False): + """Bind the url to a map and create a regular expression based on + the information from the rule itself and the defaults from the map. + + :internal: + """ + if self.map is not None and not rebind: + raise RuntimeError("url rule %r already bound to map %r" % (self, self.map)) + self.map = map + if self.strict_slashes is None: + self.strict_slashes = map.strict_slashes + if self.subdomain is None: + self.subdomain = map.default_subdomain + self.compile() + + def get_converter(self, variable_name, converter_name, args, kwargs): + """Looks up the converter for the given parameter. + + .. versionadded:: 0.9 + """ + if converter_name not in self.map.converters: + raise LookupError("the converter %r does not exist" % converter_name) + return self.map.converters[converter_name](self.map, *args, **kwargs) + + def _encode_query_vars(self, query_vars): + return url_encode( + query_vars, + charset=self.map.charset, + sort=self.map.sort_parameters, + key=self.map.sort_key, + ) + + def compile(self): + """Compiles the regular expression and stores it.""" + assert self.map is not None, "rule not bound" + + if self.map.host_matching: + domain_rule = self.host or "" + else: + domain_rule = self.subdomain or "" + + self._trace = [] + self._converters = {} + self._static_weights = [] + self._argument_weights = [] + regex_parts = [] + + def _build_regex(rule): + index = 0 + for converter, arguments, variable in parse_rule(rule): + if converter is None: + regex_parts.append(re.escape(variable)) + self._trace.append((False, variable)) + for part in variable.split("/"): + if part: + self._static_weights.append((index, -len(part))) + else: + if arguments: + c_args, c_kwargs = parse_converter_args(arguments) + else: + c_args = () + c_kwargs = {} + convobj = self.get_converter(variable, converter, c_args, c_kwargs) + regex_parts.append("(?P<%s>%s)" % (variable, convobj.regex)) + self._converters[variable] = convobj + self._trace.append((True, variable)) + self._argument_weights.append(convobj.weight) + self.arguments.add(str(variable)) + index = index + 1 + + _build_regex(domain_rule) + regex_parts.append("\\|") + self._trace.append((False, "|")) + _build_regex(self.rule if self.is_leaf else self.rule.rstrip("/")) + if not self.is_leaf: + self._trace.append((False, "/")) + + self._build = self._compile_builder(False).__get__(self, None) + self._build_unknown = self._compile_builder(True).__get__(self, None) + + if self.build_only: + return + regex = r"^%s%s$" % ( + u"".join(regex_parts), + (not self.is_leaf or not self.strict_slashes) + and "(?<!/)(?P<__suffix__>/?)" + or "", + ) + self._regex = re.compile(regex, re.UNICODE) + + def match(self, path, method=None): + """Check if the rule matches a given path. Path is a string in the + form ``"subdomain|/path"`` and is assembled by the map. If + the map is doing host matching the subdomain part will be the host + instead. + + If the rule matches a dict with the converted values is returned, + otherwise the return value is `None`. + + :internal: + """ + if not self.build_only: + m = self._regex.search(path) + if m is not None: + groups = m.groupdict() + # we have a folder like part of the url without a trailing + # slash and strict slashes enabled. raise an exception that + # tells the map to redirect to the same url but with a + # trailing slash + if ( + self.strict_slashes + and not self.is_leaf + and not groups.pop("__suffix__") + and ( + method is None or self.methods is None or method in self.methods + ) + ): + raise RequestSlash() + # if we are not in strict slashes mode we have to remove + # a __suffix__ + elif not self.strict_slashes: + del groups["__suffix__"] + + result = {} + for name, value in iteritems(groups): + try: + value = self._converters[name].to_python(value) + except ValidationError: + return + result[str(name)] = value + if self.defaults: + result.update(self.defaults) + + if self.alias and self.map.redirect_defaults: + raise RequestAliasRedirect(result) + + return result + + @staticmethod + def _get_func_code(code, name): + globs, locs = {}, {} + exec(code, globs, locs) + return locs[name] + + def _compile_builder(self, append_unknown=True): + defaults = self.defaults or {} + dom_ops = [] + url_ops = [] + + opl = dom_ops + for is_dynamic, data in self._trace: + if data == "|" and opl is dom_ops: + opl = url_ops + continue + # this seems like a silly case to ever come up but: + # if a default is given for a value that appears in the rule, + # resolve it to a constant ahead of time + if is_dynamic and data in defaults: + data = self._converters[data].to_url(defaults[data]) + opl.append((False, data)) + elif not is_dynamic: + opl.append( + (False, url_quote(to_bytes(data, self.map.charset), safe="/:|+")) + ) + else: + opl.append((True, data)) + + def _convert(elem): + ret = _prefix_names(_CALL_CONVERTER_CODE_FMT.format(elem=elem)) + ret.args = [ast.Name(str(elem), ast.Load())] # str for py2 + return ret + + def _parts(ops): + parts = [ + _convert(elem) if is_dynamic else ast.Str(s=elem) + for is_dynamic, elem in ops + ] + parts = parts or [ast.Str("")] + # constant fold + ret = [parts[0]] + for p in parts[1:]: + if isinstance(p, ast.Str) and isinstance(ret[-1], ast.Str): + ret[-1] = ast.Str(ret[-1].s + p.s) + else: + ret.append(p) + return ret + + dom_parts = _parts(dom_ops) + url_parts = _parts(url_ops) + if not append_unknown: + body = [] + else: + body = [_IF_KWARGS_URL_ENCODE_AST] + url_parts.extend(_URL_ENCODE_AST_NAMES) + + def _join(parts): + if len(parts) == 1: # shortcut + return parts[0] + elif hasattr(ast, "JoinedStr"): # py36+ + return ast.JoinedStr(parts) + else: + call = _prefix_names('"".join()') + call.args = [ast.Tuple(parts, ast.Load())] + return call + + body.append( + ast.Return(ast.Tuple([_join(dom_parts), _join(url_parts)], ast.Load())) + ) + + # str is necessary for python2 + pargs = [ + str(elem) + for is_dynamic, elem in dom_ops + url_ops + if is_dynamic and elem not in defaults + ] + kargs = [str(k) for k in defaults] + + func_ast = _prefix_names("def _(): pass") + func_ast.name = "<builder:{!r}>".format(self.rule) + if hasattr(ast, "arg"): # py3 + func_ast.args.args.append(ast.arg(".self", None)) + for arg in pargs + kargs: + func_ast.args.args.append(ast.arg(arg, None)) + func_ast.args.kwarg = ast.arg(".kwargs", None) + else: + func_ast.args.args.append(ast.Name(".self", ast.Load())) + for arg in pargs + kargs: + func_ast.args.args.append(ast.Name(arg, ast.Load())) + func_ast.args.kwarg = ".kwargs" + for _ in kargs: + func_ast.args.defaults.append(ast.Str("")) + func_ast.body = body + + module = ast.fix_missing_locations(ast.Module([func_ast])) + code = compile(module, "<werkzeug routing>", "exec") + return self._get_func_code(code, func_ast.name) + + def build(self, values, append_unknown=True): + """Assembles the relative url for that rule and the subdomain. + If building doesn't work for some reasons `None` is returned. + + :internal: + """ + try: + if append_unknown: + return self._build_unknown(**values) + else: + return self._build(**values) + except ValidationError: + return None + + def provides_defaults_for(self, rule): + """Check if this rule has defaults for a given rule. + + :internal: + """ + return ( + not self.build_only + and self.defaults + and self.endpoint == rule.endpoint + and self != rule + and self.arguments == rule.arguments + ) + + def suitable_for(self, values, method=None): + """Check if the dict of values has enough data for url generation. + + :internal: + """ + # if a method was given explicitly and that method is not supported + # by this rule, this rule is not suitable. + if ( + method is not None + and self.methods is not None + and method not in self.methods + ): + return False + + defaults = self.defaults or () + + # all arguments required must be either in the defaults dict or + # the value dictionary otherwise it's not suitable + for key in self.arguments: + if key not in defaults and key not in values: + return False + + # in case defaults are given we ensure that either the value was + # skipped or the value is the same as the default value. + if defaults: + for key, value in iteritems(defaults): + if key in values and value != values[key]: + return False + + return True + + def match_compare_key(self): + """The match compare key for sorting. + + Current implementation: + + 1. rules without any arguments come first for performance + reasons only as we expect them to match faster and some + common ones usually don't have any arguments (index pages etc.) + 2. rules with more static parts come first so the second argument + is the negative length of the number of the static weights. + 3. we order by static weights, which is a combination of index + and length + 4. The more complex rules come first so the next argument is the + negative length of the number of argument weights. + 5. lastly we order by the actual argument weights. + + :internal: + """ + return ( + bool(self.arguments), + -len(self._static_weights), + self._static_weights, + -len(self._argument_weights), + self._argument_weights, + ) + + def build_compare_key(self): + """The build compare key for sorting. + + :internal: + """ + return 1 if self.alias else 0, -len(self.arguments), -len(self.defaults or ()) + + def __eq__(self, other): + return self.__class__ is other.__class__ and self._trace == other._trace + + __hash__ = None + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return self.rule + + @native_string_result + def __repr__(self): + if self.map is None: + return u"<%s (unbound)>" % self.__class__.__name__ + tmp = [] + for is_dynamic, data in self._trace: + if is_dynamic: + tmp.append(u"<%s>" % data) + else: + tmp.append(data) + return u"<%s %s%s -> %s>" % ( + self.__class__.__name__, + repr((u"".join(tmp)).lstrip(u"|")).lstrip(u"u"), + self.methods is not None and u" (%s)" % u", ".join(self.methods) or u"", + self.endpoint, + ) + + +class BaseConverter(object): + """Base class for all converters.""" + + regex = "[^/]+" + weight = 100 + + def __init__(self, map): + self.map = map + + def to_python(self, value): + return value + + def to_url(self, value): + if isinstance(value, (bytes, bytearray)): + return _fast_url_quote(value) + return _fast_url_quote(text_type(value).encode(self.map.charset)) + + +class UnicodeConverter(BaseConverter): + """This converter is the default converter and accepts any string but + only one path segment. Thus the string can not include a slash. + + This is the default validator. + + Example:: + + Rule('/pages/<page>'), + Rule('/<string(length=2):lang_code>') + + :param map: the :class:`Map`. + :param minlength: the minimum length of the string. Must be greater + or equal 1. + :param maxlength: the maximum length of the string. + :param length: the exact length of the string. + """ + + def __init__(self, map, minlength=1, maxlength=None, length=None): + BaseConverter.__init__(self, map) + if length is not None: + length = "{%d}" % int(length) + else: + if maxlength is None: + maxlength = "" + else: + maxlength = int(maxlength) + length = "{%s,%s}" % (int(minlength), maxlength) + self.regex = "[^/]" + length + + +class AnyConverter(BaseConverter): + """Matches one of the items provided. Items can either be Python + identifiers or strings:: + + Rule('/<any(about, help, imprint, class, "foo,bar"):page_name>') + + :param map: the :class:`Map`. + :param items: this function accepts the possible items as positional + arguments. + """ + + def __init__(self, map, *items): + BaseConverter.__init__(self, map) + self.regex = "(?:%s)" % "|".join([re.escape(x) for x in items]) + + +class PathConverter(BaseConverter): + """Like the default :class:`UnicodeConverter`, but it also matches + slashes. This is useful for wikis and similar applications:: + + Rule('/<path:wikipage>') + Rule('/<path:wikipage>/edit') + + :param map: the :class:`Map`. + """ + + regex = "[^/].*?" + weight = 200 + + +class NumberConverter(BaseConverter): + """Baseclass for `IntegerConverter` and `FloatConverter`. + + :internal: + """ + + weight = 50 + + def __init__(self, map, fixed_digits=0, min=None, max=None, signed=False): + if signed: + self.regex = self.signed_regex + BaseConverter.__init__(self, map) + self.fixed_digits = fixed_digits + self.min = min + self.max = max + self.signed = signed + + def to_python(self, value): + if self.fixed_digits and len(value) != self.fixed_digits: + raise ValidationError() + value = self.num_convert(value) + if (self.min is not None and value < self.min) or ( + self.max is not None and value > self.max + ): + raise ValidationError() + return value + + def to_url(self, value): + value = self.num_convert(value) + if self.fixed_digits: + value = ("%%0%sd" % self.fixed_digits) % value + return str(value) + + @property + def signed_regex(self): + return r"-?" + self.regex + + +class IntegerConverter(NumberConverter): + """This converter only accepts integer values:: + + Rule("/page/<int:page>") + + By default it only accepts unsigned, positive values. The ``signed`` + parameter will enable signed, negative values. :: + + Rule("/page/<int(signed=True):page>") + + :param map: The :class:`Map`. + :param fixed_digits: The number of fixed digits in the URL. If you + set this to ``4`` for example, the rule will only match if the + URL looks like ``/0001/``. The default is variable length. + :param min: The minimal value. + :param max: The maximal value. + :param signed: Allow signed (negative) values. + + .. versionadded:: 0.15 + The ``signed`` parameter. + """ + + regex = r"\d+" + num_convert = int + + +class FloatConverter(NumberConverter): + """This converter only accepts floating point values:: + + Rule("/probability/<float:probability>") + + By default it only accepts unsigned, positive values. The ``signed`` + parameter will enable signed, negative values. :: + + Rule("/offset/<float(signed=True):offset>") + + :param map: The :class:`Map`. + :param min: The minimal value. + :param max: The maximal value. + :param signed: Allow signed (negative) values. + + .. versionadded:: 0.15 + The ``signed`` parameter. + """ + + regex = r"\d+\.\d+" + num_convert = float + + def __init__(self, map, min=None, max=None, signed=False): + NumberConverter.__init__(self, map, min=min, max=max, signed=signed) + + +class UUIDConverter(BaseConverter): + """This converter only accepts UUID strings:: + + Rule('/object/<uuid:identifier>') + + .. versionadded:: 0.10 + + :param map: the :class:`Map`. + """ + + regex = ( + r"[A-Fa-f0-9]{8}-[A-Fa-f0-9]{4}-" + r"[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{12}" + ) + + def to_python(self, value): + return uuid.UUID(value) + + def to_url(self, value): + return str(value) + + +#: the default converter mapping for the map. +DEFAULT_CONVERTERS = { + "default": UnicodeConverter, + "string": UnicodeConverter, + "any": AnyConverter, + "path": PathConverter, + "int": IntegerConverter, + "float": FloatConverter, + "uuid": UUIDConverter, +} + + +class Map(object): + """The map class stores all the URL rules and some configuration + parameters. Some of the configuration values are only stored on the + `Map` instance since those affect all rules, others are just defaults + and can be overridden for each rule. Note that you have to specify all + arguments besides the `rules` as keyword arguments! + + :param rules: sequence of url rules for this map. + :param default_subdomain: The default subdomain for rules without a + subdomain defined. + :param charset: charset of the url. defaults to ``"utf-8"`` + :param strict_slashes: Take care of trailing slashes. + :param redirect_defaults: This will redirect to the default rule if it + wasn't visited that way. This helps creating + unique URLs. + :param converters: A dict of converters that adds additional converters + to the list of converters. If you redefine one + converter this will override the original one. + :param sort_parameters: If set to `True` the url parameters are sorted. + See `url_encode` for more details. + :param sort_key: The sort key function for `url_encode`. + :param encoding_errors: the error method to use for decoding + :param host_matching: if set to `True` it enables the host matching + feature and disables the subdomain one. If + enabled the `host` parameter to rules is used + instead of the `subdomain` one. + + .. versionadded:: 0.5 + `sort_parameters` and `sort_key` was added. + + .. versionadded:: 0.7 + `encoding_errors` and `host_matching` was added. + """ + + #: A dict of default converters to be used. + default_converters = ImmutableDict(DEFAULT_CONVERTERS) + + def __init__( + self, + rules=None, + default_subdomain="", + charset="utf-8", + strict_slashes=True, + redirect_defaults=True, + converters=None, + sort_parameters=False, + sort_key=None, + encoding_errors="replace", + host_matching=False, + ): + self._rules = [] + self._rules_by_endpoint = {} + self._remap = True + self._remap_lock = Lock() + + self.default_subdomain = default_subdomain + self.charset = charset + self.encoding_errors = encoding_errors + self.strict_slashes = strict_slashes + self.redirect_defaults = redirect_defaults + self.host_matching = host_matching + + self.converters = self.default_converters.copy() + if converters: + self.converters.update(converters) + + self.sort_parameters = sort_parameters + self.sort_key = sort_key + + for rulefactory in rules or (): + self.add(rulefactory) + + def is_endpoint_expecting(self, endpoint, *arguments): + """Iterate over all rules and check if the endpoint expects + the arguments provided. This is for example useful if you have + some URLs that expect a language code and others that do not and + you want to wrap the builder a bit so that the current language + code is automatically added if not provided but endpoints expect + it. + + :param endpoint: the endpoint to check. + :param arguments: this function accepts one or more arguments + as positional arguments. Each one of them is + checked. + """ + self.update() + arguments = set(arguments) + for rule in self._rules_by_endpoint[endpoint]: + if arguments.issubset(rule.arguments): + return True + return False + + def iter_rules(self, endpoint=None): + """Iterate over all rules or the rules of an endpoint. + + :param endpoint: if provided only the rules for that endpoint + are returned. + :return: an iterator + """ + self.update() + if endpoint is not None: + return iter(self._rules_by_endpoint[endpoint]) + return iter(self._rules) + + def add(self, rulefactory): + """Add a new rule or factory to the map and bind it. Requires that the + rule is not bound to another map. + + :param rulefactory: a :class:`Rule` or :class:`RuleFactory` + """ + for rule in rulefactory.get_rules(self): + rule.bind(self) + self._rules.append(rule) + self._rules_by_endpoint.setdefault(rule.endpoint, []).append(rule) + self._remap = True + + def bind( + self, + server_name, + script_name=None, + subdomain=None, + url_scheme="http", + default_method="GET", + path_info=None, + query_args=None, + ): + """Return a new :class:`MapAdapter` with the details specified to the + call. Note that `script_name` will default to ``'/'`` if not further + specified or `None`. The `server_name` at least is a requirement + because the HTTP RFC requires absolute URLs for redirects and so all + redirect exceptions raised by Werkzeug will contain the full canonical + URL. + + If no path_info is passed to :meth:`match` it will use the default path + info passed to bind. While this doesn't really make sense for + manual bind calls, it's useful if you bind a map to a WSGI + environment which already contains the path info. + + `subdomain` will default to the `default_subdomain` for this map if + no defined. If there is no `default_subdomain` you cannot use the + subdomain feature. + + .. versionadded:: 0.7 + `query_args` added + + .. versionadded:: 0.8 + `query_args` can now also be a string. + + .. versionchanged:: 0.15 + ``path_info`` defaults to ``'/'`` if ``None``. + """ + server_name = server_name.lower() + if self.host_matching: + if subdomain is not None: + raise RuntimeError("host matching enabled and a subdomain was provided") + elif subdomain is None: + subdomain = self.default_subdomain + if script_name is None: + script_name = "/" + if path_info is None: + path_info = "/" + try: + server_name = _encode_idna(server_name) + except UnicodeError: + raise BadHost() + return MapAdapter( + self, + server_name, + script_name, + subdomain, + url_scheme, + path_info, + default_method, + query_args, + ) + + def bind_to_environ(self, environ, server_name=None, subdomain=None): + """Like :meth:`bind` but you can pass it an WSGI environment and it + will fetch the information from that dictionary. Note that because of + limitations in the protocol there is no way to get the current + subdomain and real `server_name` from the environment. If you don't + provide it, Werkzeug will use `SERVER_NAME` and `SERVER_PORT` (or + `HTTP_HOST` if provided) as used `server_name` with disabled subdomain + feature. + + If `subdomain` is `None` but an environment and a server name is + provided it will calculate the current subdomain automatically. + Example: `server_name` is ``'example.com'`` and the `SERVER_NAME` + in the wsgi `environ` is ``'staging.dev.example.com'`` the calculated + subdomain will be ``'staging.dev'``. + + If the object passed as environ has an environ attribute, the value of + this attribute is used instead. This allows you to pass request + objects. Additionally `PATH_INFO` added as a default of the + :class:`MapAdapter` so that you don't have to pass the path info to + the match method. + + .. versionchanged:: 0.5 + previously this method accepted a bogus `calculate_subdomain` + parameter that did not have any effect. It was removed because + of that. + + .. versionchanged:: 0.8 + This will no longer raise a ValueError when an unexpected server + name was passed. + + :param environ: a WSGI environment. + :param server_name: an optional server name hint (see above). + :param subdomain: optionally the current subdomain (see above). + """ + environ = _get_environ(environ) + + wsgi_server_name = get_host(environ).lower() + + if server_name is None: + server_name = wsgi_server_name + else: + server_name = server_name.lower() + + if subdomain is None and not self.host_matching: + cur_server_name = wsgi_server_name.split(".") + real_server_name = server_name.split(".") + offset = -len(real_server_name) + if cur_server_name[offset:] != real_server_name: + # This can happen even with valid configs if the server was + # accesssed directly by IP address under some situations. + # Instead of raising an exception like in Werkzeug 0.7 or + # earlier we go by an invalid subdomain which will result + # in a 404 error on matching. + subdomain = "<invalid>" + else: + subdomain = ".".join(filter(None, cur_server_name[:offset])) + + def _get_wsgi_string(name): + val = environ.get(name) + if val is not None: + return wsgi_decoding_dance(val, self.charset) + + script_name = _get_wsgi_string("SCRIPT_NAME") + path_info = _get_wsgi_string("PATH_INFO") + query_args = _get_wsgi_string("QUERY_STRING") + return Map.bind( + self, + server_name, + script_name, + subdomain, + environ["wsgi.url_scheme"], + environ["REQUEST_METHOD"], + path_info, + query_args=query_args, + ) + + def update(self): + """Called before matching and building to keep the compiled rules + in the correct order after things changed. + """ + if not self._remap: + return + + with self._remap_lock: + if not self._remap: + return + + self._rules.sort(key=lambda x: x.match_compare_key()) + for rules in itervalues(self._rules_by_endpoint): + rules.sort(key=lambda x: x.build_compare_key()) + self._remap = False + + def __repr__(self): + rules = self.iter_rules() + return "%s(%s)" % (self.__class__.__name__, pformat(list(rules))) + + +class MapAdapter(object): + + """Returned by :meth:`Map.bind` or :meth:`Map.bind_to_environ` and does + the URL matching and building based on runtime information. + """ + + def __init__( + self, + map, + server_name, + script_name, + subdomain, + url_scheme, + path_info, + default_method, + query_args=None, + ): + self.map = map + self.server_name = to_unicode(server_name) + script_name = to_unicode(script_name) + if not script_name.endswith(u"/"): + script_name += u"/" + self.script_name = script_name + self.subdomain = to_unicode(subdomain) + self.url_scheme = to_unicode(url_scheme) + self.path_info = to_unicode(path_info) + self.default_method = to_unicode(default_method) + self.query_args = query_args + + def dispatch( + self, view_func, path_info=None, method=None, catch_http_exceptions=False + ): + """Does the complete dispatching process. `view_func` is called with + the endpoint and a dict with the values for the view. It should + look up the view function, call it, and return a response object + or WSGI application. http exceptions are not caught by default + so that applications can display nicer error messages by just + catching them by hand. If you want to stick with the default + error messages you can pass it ``catch_http_exceptions=True`` and + it will catch the http exceptions. + + Here a small example for the dispatch usage:: + + from werkzeug.wrappers import Request, Response + from werkzeug.wsgi import responder + from werkzeug.routing import Map, Rule + + def on_index(request): + return Response('Hello from the index') + + url_map = Map([Rule('/', endpoint='index')]) + views = {'index': on_index} + + @responder + def application(environ, start_response): + request = Request(environ) + urls = url_map.bind_to_environ(environ) + return urls.dispatch(lambda e, v: views[e](request, **v), + catch_http_exceptions=True) + + Keep in mind that this method might return exception objects, too, so + use :class:`Response.force_type` to get a response object. + + :param view_func: a function that is called with the endpoint as + first argument and the value dict as second. Has + to dispatch to the actual view function with this + information. (see above) + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + :param catch_http_exceptions: set to `True` to catch any of the + werkzeug :class:`HTTPException`\\s. + """ + try: + try: + endpoint, args = self.match(path_info, method) + except RequestRedirect as e: + return e + return view_func(endpoint, args) + except HTTPException as e: + if catch_http_exceptions: + return e + raise + + def match(self, path_info=None, method=None, return_rule=False, query_args=None): + """The usage is simple: you just pass the match method the current + path info as well as the method (which defaults to `GET`). The + following things can then happen: + + - you receive a `NotFound` exception that indicates that no URL is + matching. A `NotFound` exception is also a WSGI application you + can call to get a default page not found page (happens to be the + same object as `werkzeug.exceptions.NotFound`) + + - you receive a `MethodNotAllowed` exception that indicates that there + is a match for this URL but not for the current request method. + This is useful for RESTful applications. + + - you receive a `RequestRedirect` exception with a `new_url` + attribute. This exception is used to notify you about a request + Werkzeug requests from your WSGI application. This is for example the + case if you request ``/foo`` although the correct URL is ``/foo/`` + You can use the `RequestRedirect` instance as response-like object + similar to all other subclasses of `HTTPException`. + + - you get a tuple in the form ``(endpoint, arguments)`` if there is + a match (unless `return_rule` is True, in which case you get a tuple + in the form ``(rule, arguments)``) + + If the path info is not passed to the match method the default path + info of the map is used (defaults to the root URL if not defined + explicitly). + + All of the exceptions raised are subclasses of `HTTPException` so they + can be used as WSGI responses. They will all render generic error or + redirect pages. + + Here is a small example for matching: + + >>> m = Map([ + ... Rule('/', endpoint='index'), + ... Rule('/downloads/', endpoint='downloads/index'), + ... Rule('/downloads/<int:id>', endpoint='downloads/show') + ... ]) + >>> urls = m.bind("example.com", "/") + >>> urls.match("/", "GET") + ('index', {}) + >>> urls.match("/downloads/42") + ('downloads/show', {'id': 42}) + + And here is what happens on redirect and missing URLs: + + >>> urls.match("/downloads") + Traceback (most recent call last): + ... + RequestRedirect: http://example.com/downloads/ + >>> urls.match("/missing") + Traceback (most recent call last): + ... + NotFound: 404 Not Found + + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + :param return_rule: return the rule that matched instead of just the + endpoint (defaults to `False`). + :param query_args: optional query arguments that are used for + automatic redirects as string or dictionary. It's + currently not possible to use the query arguments + for URL matching. + + .. versionadded:: 0.6 + `return_rule` was added. + + .. versionadded:: 0.7 + `query_args` was added. + + .. versionchanged:: 0.8 + `query_args` can now also be a string. + """ + self.map.update() + if path_info is None: + path_info = self.path_info + else: + path_info = to_unicode(path_info, self.map.charset) + if query_args is None: + query_args = self.query_args + method = (method or self.default_method).upper() + + path = u"%s|%s" % ( + self.map.host_matching and self.server_name or self.subdomain, + path_info and "/%s" % path_info.lstrip("/"), + ) + + have_match_for = set() + for rule in self.map._rules: + try: + rv = rule.match(path, method) + except RequestSlash: + raise RequestRedirect( + self.make_redirect_url( + url_quote(path_info, self.map.charset, safe="/:|+") + "/", + query_args, + ) + ) + except RequestAliasRedirect as e: + raise RequestRedirect( + self.make_alias_redirect_url( + path, rule.endpoint, e.matched_values, method, query_args + ) + ) + if rv is None: + continue + if rule.methods is not None and method not in rule.methods: + have_match_for.update(rule.methods) + continue + + if self.map.redirect_defaults: + redirect_url = self.get_default_redirect(rule, method, rv, query_args) + if redirect_url is not None: + raise RequestRedirect(redirect_url) + + if rule.redirect_to is not None: + if isinstance(rule.redirect_to, string_types): + + def _handle_match(match): + value = rv[match.group(1)] + return rule._converters[match.group(1)].to_url(value) + + redirect_url = _simple_rule_re.sub(_handle_match, rule.redirect_to) + else: + redirect_url = rule.redirect_to(self, **rv) + raise RequestRedirect( + str( + url_join( + "%s://%s%s%s" + % ( + self.url_scheme or "http", + self.subdomain + "." if self.subdomain else "", + self.server_name, + self.script_name, + ), + redirect_url, + ) + ) + ) + + if return_rule: + return rule, rv + else: + return rule.endpoint, rv + + if have_match_for: + raise MethodNotAllowed(valid_methods=list(have_match_for)) + raise NotFound() + + def test(self, path_info=None, method=None): + """Test if a rule would match. Works like `match` but returns `True` + if the URL matches, or `False` if it does not exist. + + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + """ + try: + self.match(path_info, method) + except RequestRedirect: + pass + except HTTPException: + return False + return True + + def allowed_methods(self, path_info=None): + """Returns the valid methods that match for a given path. + + .. versionadded:: 0.7 + """ + try: + self.match(path_info, method="--") + except MethodNotAllowed as e: + return e.valid_methods + except HTTPException: + pass + return [] + + def get_host(self, domain_part): + """Figures out the full host name for the given domain part. The + domain part is a subdomain in case host matching is disabled or + a full host name. + """ + if self.map.host_matching: + if domain_part is None: + return self.server_name + return to_unicode(domain_part, "ascii") + subdomain = domain_part + if subdomain is None: + subdomain = self.subdomain + else: + subdomain = to_unicode(subdomain, "ascii") + return (subdomain + u"." if subdomain else u"") + self.server_name + + def get_default_redirect(self, rule, method, values, query_args): + """A helper that returns the URL to redirect to if it finds one. + This is used for default redirecting only. + + :internal: + """ + assert self.map.redirect_defaults + for r in self.map._rules_by_endpoint[rule.endpoint]: + # every rule that comes after this one, including ourself + # has a lower priority for the defaults. We order the ones + # with the highest priority up for building. + if r is rule: + break + if r.provides_defaults_for(rule) and r.suitable_for(values, method): + values.update(r.defaults) + domain_part, path = r.build(values) + return self.make_redirect_url(path, query_args, domain_part=domain_part) + + def encode_query_args(self, query_args): + if not isinstance(query_args, string_types): + query_args = url_encode(query_args, self.map.charset) + return query_args + + def make_redirect_url(self, path_info, query_args=None, domain_part=None): + """Creates a redirect URL. + + :internal: + """ + suffix = "" + if query_args: + suffix = "?" + self.encode_query_args(query_args) + return str( + "%s://%s/%s%s" + % ( + self.url_scheme or "http", + self.get_host(domain_part), + posixpath.join( + self.script_name[:-1].lstrip("/"), path_info.lstrip("/") + ), + suffix, + ) + ) + + def make_alias_redirect_url(self, path, endpoint, values, method, query_args): + """Internally called to make an alias redirect URL.""" + url = self.build( + endpoint, values, method, append_unknown=False, force_external=True + ) + if query_args: + url += "?" + self.encode_query_args(query_args) + assert url != path, "detected invalid alias setting. No canonical URL found" + return url + + def _partial_build(self, endpoint, values, method, append_unknown): + """Helper for :meth:`build`. Returns subdomain and path for the + rule that accepts this endpoint, values and method. + + :internal: + """ + # in case the method is none, try with the default method first + if method is None: + rv = self._partial_build( + endpoint, values, self.default_method, append_unknown + ) + if rv is not None: + return rv + + # default method did not match or a specific method is passed, + # check all and go with first result. + for rule in self.map._rules_by_endpoint.get(endpoint, ()): + if rule.suitable_for(values, method): + rv = rule.build(values, append_unknown) + if rv is not None: + return rv + + def build( + self, + endpoint, + values=None, + method=None, + force_external=False, + append_unknown=True, + ): + """Building URLs works pretty much the other way round. Instead of + `match` you call `build` and pass it the endpoint and a dict of + arguments for the placeholders. + + The `build` function also accepts an argument called `force_external` + which, if you set it to `True` will force external URLs. Per default + external URLs (include the server name) will only be used if the + target URL is on a different subdomain. + + >>> m = Map([ + ... Rule('/', endpoint='index'), + ... Rule('/downloads/', endpoint='downloads/index'), + ... Rule('/downloads/<int:id>', endpoint='downloads/show') + ... ]) + >>> urls = m.bind("example.com", "/") + >>> urls.build("index", {}) + '/' + >>> urls.build("downloads/show", {'id': 42}) + '/downloads/42' + >>> urls.build("downloads/show", {'id': 42}, force_external=True) + 'http://example.com/downloads/42' + + Because URLs cannot contain non ASCII data you will always get + bytestrings back. Non ASCII characters are urlencoded with the + charset defined on the map instance. + + Additional values are converted to unicode and appended to the URL as + URL querystring parameters: + + >>> urls.build("index", {'q': 'My Searchstring'}) + '/?q=My+Searchstring' + + When processing those additional values, lists are furthermore + interpreted as multiple values (as per + :py:class:`werkzeug.datastructures.MultiDict`): + + >>> urls.build("index", {'q': ['a', 'b', 'c']}) + '/?q=a&q=b&q=c' + + Passing a ``MultiDict`` will also add multiple values: + + >>> urls.build("index", MultiDict((('p', 'z'), ('q', 'a'), ('q', 'b')))) + '/?p=z&q=a&q=b' + + If a rule does not exist when building a `BuildError` exception is + raised. + + The build method accepts an argument called `method` which allows you + to specify the method you want to have an URL built for if you have + different methods for the same endpoint specified. + + .. versionadded:: 0.6 + the `append_unknown` parameter was added. + + :param endpoint: the endpoint of the URL to build. + :param values: the values for the URL to build. Unhandled values are + appended to the URL as query parameters. + :param method: the HTTP method for the rule if there are different + URLs for different methods on the same endpoint. + :param force_external: enforce full canonical external URLs. If the URL + scheme is not provided, this will generate + a protocol-relative URL. + :param append_unknown: unknown parameters are appended to the generated + URL as query string argument. Disable this + if you want the builder to ignore those. + """ + self.map.update() + + if values: + if isinstance(values, MultiDict): + temp_values = {} + # iteritems(dict, values) is like `values.lists()` + # without the call or `list()` coercion overhead. + for key, value in iteritems(dict, values): + if not value: + continue + if len(value) == 1: # flatten single item lists + value = value[0] + if value is None: # drop None + continue + temp_values[key] = value + values = temp_values + else: + # drop None + values = dict(i for i in iteritems(values) if i[1] is not None) + else: + values = {} + + rv = self._partial_build(endpoint, values, method, append_unknown) + if rv is None: + raise BuildError(endpoint, values, method, self) + domain_part, path = rv + + host = self.get_host(domain_part) + + # shortcut this. + if not force_external and ( + (self.map.host_matching and host == self.server_name) + or (not self.map.host_matching and domain_part == self.subdomain) + ): + return "%s/%s" % (self.script_name.rstrip("/"), path.lstrip("/")) + return str( + "%s//%s%s/%s" + % ( + self.url_scheme + ":" if self.url_scheme else "", + host, + self.script_name[:-1], + path.lstrip("/"), + ) + ) diff --git a/python/werkzeug/security.py b/python/werkzeug/security.py new file mode 100644 index 0000000..1842afd --- /dev/null +++ b/python/werkzeug/security.py @@ -0,0 +1,241 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.security + ~~~~~~~~~~~~~~~~~ + + Security related helpers such as secure password hashing tools. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import hashlib +import hmac +import os +import posixpath +from random import SystemRandom +from struct import Struct + +from ._compat import izip +from ._compat import PY2 +from ._compat import range_type +from ._compat import text_type +from ._compat import to_bytes +from ._compat import to_native + +SALT_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" +DEFAULT_PBKDF2_ITERATIONS = 150000 + +_pack_int = Struct(">I").pack +_builtin_safe_str_cmp = getattr(hmac, "compare_digest", None) +_sys_rng = SystemRandom() +_os_alt_seps = list( + sep for sep in [os.path.sep, os.path.altsep] if sep not in (None, "/") +) + + +def pbkdf2_hex( + data, salt, iterations=DEFAULT_PBKDF2_ITERATIONS, keylen=None, hashfunc=None +): + """Like :func:`pbkdf2_bin`, but returns a hex-encoded string. + + .. versionadded:: 0.9 + + :param data: the data to derive. + :param salt: the salt for the derivation. + :param iterations: the number of iterations. + :param keylen: the length of the resulting key. If not provided, + the digest size will be used. + :param hashfunc: the hash function to use. This can either be the + string name of a known hash function, or a function + from the hashlib module. Defaults to sha256. + """ + rv = pbkdf2_bin(data, salt, iterations, keylen, hashfunc) + return to_native(codecs.encode(rv, "hex_codec")) + + +def pbkdf2_bin( + data, salt, iterations=DEFAULT_PBKDF2_ITERATIONS, keylen=None, hashfunc=None +): + """Returns a binary digest for the PBKDF2 hash algorithm of `data` + with the given `salt`. It iterates `iterations` times and produces a + key of `keylen` bytes. By default, SHA-256 is used as hash function; + a different hashlib `hashfunc` can be provided. + + .. versionadded:: 0.9 + + :param data: the data to derive. + :param salt: the salt for the derivation. + :param iterations: the number of iterations. + :param keylen: the length of the resulting key. If not provided + the digest size will be used. + :param hashfunc: the hash function to use. This can either be the + string name of a known hash function or a function + from the hashlib module. Defaults to sha256. + """ + if not hashfunc: + hashfunc = "sha256" + + data = to_bytes(data) + salt = to_bytes(salt) + + if callable(hashfunc): + _test_hash = hashfunc() + hash_name = getattr(_test_hash, "name", None) + else: + hash_name = hashfunc + return hashlib.pbkdf2_hmac(hash_name, data, salt, iterations, keylen) + + +def safe_str_cmp(a, b): + """This function compares strings in somewhat constant time. This + requires that the length of at least one string is known in advance. + + Returns `True` if the two strings are equal, or `False` if they are not. + + .. versionadded:: 0.7 + """ + if isinstance(a, text_type): + a = a.encode("utf-8") + if isinstance(b, text_type): + b = b.encode("utf-8") + + if _builtin_safe_str_cmp is not None: + return _builtin_safe_str_cmp(a, b) + + if len(a) != len(b): + return False + + rv = 0 + if PY2: + for x, y in izip(a, b): + rv |= ord(x) ^ ord(y) + else: + for x, y in izip(a, b): + rv |= x ^ y + + return rv == 0 + + +def gen_salt(length): + """Generate a random string of SALT_CHARS with specified ``length``.""" + if length <= 0: + raise ValueError("Salt length must be positive") + return "".join(_sys_rng.choice(SALT_CHARS) for _ in range_type(length)) + + +def _hash_internal(method, salt, password): + """Internal password hash helper. Supports plaintext without salt, + unsalted and salted passwords. In case salted passwords are used + hmac is used. + """ + if method == "plain": + return password, method + + if isinstance(password, text_type): + password = password.encode("utf-8") + + if method.startswith("pbkdf2:"): + args = method[7:].split(":") + if len(args) not in (1, 2): + raise ValueError("Invalid number of arguments for PBKDF2") + method = args.pop(0) + iterations = args and int(args[0] or 0) or DEFAULT_PBKDF2_ITERATIONS + is_pbkdf2 = True + actual_method = "pbkdf2:%s:%d" % (method, iterations) + else: + is_pbkdf2 = False + actual_method = method + + if is_pbkdf2: + if not salt: + raise ValueError("Salt is required for PBKDF2") + rv = pbkdf2_hex(password, salt, iterations, hashfunc=method) + elif salt: + if isinstance(salt, text_type): + salt = salt.encode("utf-8") + mac = _create_mac(salt, password, method) + rv = mac.hexdigest() + else: + rv = hashlib.new(method, password).hexdigest() + return rv, actual_method + + +def _create_mac(key, msg, method): + if callable(method): + return hmac.HMAC(key, msg, method) + + def hashfunc(d=b""): + return hashlib.new(method, d) + + # Python 2.7 used ``hasattr(digestmod, '__call__')`` + # to detect if hashfunc is callable + hashfunc.__call__ = hashfunc + return hmac.HMAC(key, msg, hashfunc) + + +def generate_password_hash(password, method="pbkdf2:sha256", salt_length=8): + """Hash a password with the given method and salt with a string of + the given length. The format of the string returned includes the method + that was used so that :func:`check_password_hash` can check the hash. + + The format for the hashed string looks like this:: + + method$salt$hash + + This method can **not** generate unsalted passwords but it is possible + to set param method='plain' in order to enforce plaintext passwords. + If a salt is used, hmac is used internally to salt the password. + + If PBKDF2 is wanted it can be enabled by setting the method to + ``pbkdf2:method:iterations`` where iterations is optional:: + + pbkdf2:sha256:80000$salt$hash + pbkdf2:sha256$salt$hash + + :param password: the password to hash. + :param method: the hash method to use (one that hashlib supports). Can + optionally be in the format ``pbkdf2:<method>[:iterations]`` + to enable PBKDF2. + :param salt_length: the length of the salt in letters. + """ + salt = gen_salt(salt_length) if method != "plain" else "" + h, actual_method = _hash_internal(method, salt, password) + return "%s$%s$%s" % (actual_method, salt, h) + + +def check_password_hash(pwhash, password): + """check a password against a given salted and hashed password value. + In order to support unsalted legacy passwords this method supports + plain text passwords, md5 and sha1 hashes (both salted and unsalted). + + Returns `True` if the password matched, `False` otherwise. + + :param pwhash: a hashed string like returned by + :func:`generate_password_hash`. + :param password: the plaintext password to compare against the hash. + """ + if pwhash.count("$") < 2: + return False + method, salt, hashval = pwhash.split("$", 2) + return safe_str_cmp(_hash_internal(method, salt, password)[0], hashval) + + +def safe_join(directory, *pathnames): + """Safely join `directory` and one or more untrusted `pathnames`. If this + cannot be done, this function returns ``None``. + + :param directory: the base directory. + :param pathnames: the untrusted pathnames relative to that directory. + """ + parts = [directory] + for filename in pathnames: + if filename != "": + filename = posixpath.normpath(filename) + for sep in _os_alt_seps: + if sep in filename: + return None + if os.path.isabs(filename) or filename == ".." or filename.startswith("../"): + return None + parts.append(filename) + return posixpath.join(*parts) diff --git a/python/werkzeug/serving.py b/python/werkzeug/serving.py new file mode 100644 index 0000000..ff9f880 --- /dev/null +++ b/python/werkzeug/serving.py @@ -0,0 +1,1074 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.serving + ~~~~~~~~~~~~~~~~ + + There are many ways to serve a WSGI application. While you're developing + it you usually don't want a full blown webserver like Apache but a simple + standalone one. From Python 2.5 onwards there is the `wsgiref`_ server in + the standard library. If you're using older versions of Python you can + download the package from the cheeseshop. + + However there are some caveats. Sourcecode won't reload itself when + changed and each time you kill the server using ``^C`` you get an + `KeyboardInterrupt` error. While the latter is easy to solve the first + one can be a pain in the ass in some situations. + + The easiest way is creating a small ``start-myproject.py`` that runs the + application:: + + #!/usr/bin/env python + # -*- coding: utf-8 -*- + from myproject import make_app + from werkzeug.serving import run_simple + + app = make_app(...) + run_simple('localhost', 8080, app, use_reloader=True) + + You can also pass it a `extra_files` keyword argument with a list of + additional files (like configuration files) you want to observe. + + For bigger applications you should consider using `click` + (http://click.pocoo.org) instead of a simple start file. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import io +import os +import signal +import socket +import sys + +import werkzeug +from ._compat import PY2 +from ._compat import reraise +from ._compat import WIN +from ._compat import wsgi_encoding_dance +from ._internal import _log +from .exceptions import InternalServerError +from .urls import uri_to_iri +from .urls import url_parse +from .urls import url_unquote + +try: + import socketserver + from http.server import BaseHTTPRequestHandler + from http.server import HTTPServer +except ImportError: + import SocketServer as socketserver + from BaseHTTPServer import HTTPServer + from BaseHTTPServer import BaseHTTPRequestHandler + +try: + import ssl +except ImportError: + + class _SslDummy(object): + def __getattr__(self, name): + raise RuntimeError("SSL support unavailable") + + ssl = _SslDummy() + +try: + import termcolor +except ImportError: + termcolor = None + + +def _get_openssl_crypto_module(): + try: + from OpenSSL import crypto + except ImportError: + raise TypeError("Using ad-hoc certificates requires the pyOpenSSL library.") + else: + return crypto + + +ThreadingMixIn = socketserver.ThreadingMixIn +can_fork = hasattr(os, "fork") + +if can_fork: + ForkingMixIn = socketserver.ForkingMixIn +else: + + class ForkingMixIn(object): + pass + + +try: + af_unix = socket.AF_UNIX +except AttributeError: + af_unix = None + + +LISTEN_QUEUE = 128 +can_open_by_fd = not WIN and hasattr(socket, "fromfd") + +# On Python 3, ConnectionError represents the same errnos as +# socket.error from Python 2, while socket.error is an alias for the +# more generic OSError. +if PY2: + _ConnectionError = socket.error +else: + _ConnectionError = ConnectionError + + +class DechunkedInput(io.RawIOBase): + """An input stream that handles Transfer-Encoding 'chunked'""" + + def __init__(self, rfile): + self._rfile = rfile + self._done = False + self._len = 0 + + def readable(self): + return True + + def read_chunk_len(self): + try: + line = self._rfile.readline().decode("latin1") + _len = int(line.strip(), 16) + except ValueError: + raise IOError("Invalid chunk header") + if _len < 0: + raise IOError("Negative chunk length not allowed") + return _len + + def readinto(self, buf): + read = 0 + while not self._done and read < len(buf): + if self._len == 0: + # This is the first chunk or we fully consumed the previous + # one. Read the next length of the next chunk + self._len = self.read_chunk_len() + + if self._len == 0: + # Found the final chunk of size 0. The stream is now exhausted, + # but there is still a final newline that should be consumed + self._done = True + + if self._len > 0: + # There is data (left) in this chunk, so append it to the + # buffer. If this operation fully consumes the chunk, this will + # reset self._len to 0. + n = min(len(buf), self._len) + buf[read : read + n] = self._rfile.read(n) + self._len -= n + read += n + + if self._len == 0: + # Skip the terminating newline of a chunk that has been fully + # consumed. This also applies to the 0-sized final chunk + terminator = self._rfile.readline() + if terminator not in (b"\n", b"\r\n", b"\r"): + raise IOError("Missing chunk terminating newline") + + return read + + +class WSGIRequestHandler(BaseHTTPRequestHandler, object): + + """A request handler that implements WSGI dispatching.""" + + @property + def server_version(self): + return "Werkzeug/" + werkzeug.__version__ + + def make_environ(self): + request_url = url_parse(self.path) + + def shutdown_server(): + self.server.shutdown_signal = True + + url_scheme = "http" if self.server.ssl_context is None else "https" + if not self.client_address: + self.client_address = "<local>" + if isinstance(self.client_address, str): + self.client_address = (self.client_address, 0) + else: + pass + path_info = url_unquote(request_url.path) + + environ = { + "wsgi.version": (1, 0), + "wsgi.url_scheme": url_scheme, + "wsgi.input": self.rfile, + "wsgi.errors": sys.stderr, + "wsgi.multithread": self.server.multithread, + "wsgi.multiprocess": self.server.multiprocess, + "wsgi.run_once": False, + "werkzeug.server.shutdown": shutdown_server, + "SERVER_SOFTWARE": self.server_version, + "REQUEST_METHOD": self.command, + "SCRIPT_NAME": "", + "PATH_INFO": wsgi_encoding_dance(path_info), + "QUERY_STRING": wsgi_encoding_dance(request_url.query), + # Non-standard, added by mod_wsgi, uWSGI + "REQUEST_URI": wsgi_encoding_dance(self.path), + # Non-standard, added by gunicorn + "RAW_URI": wsgi_encoding_dance(self.path), + "REMOTE_ADDR": self.address_string(), + "REMOTE_PORT": self.port_integer(), + "SERVER_NAME": self.server.server_address[0], + "SERVER_PORT": str(self.server.server_address[1]), + "SERVER_PROTOCOL": self.request_version, + } + + for key, value in self.get_header_items(): + key = key.upper().replace("-", "_") + value = value.replace("\r\n", "") + if key not in ("CONTENT_TYPE", "CONTENT_LENGTH"): + key = "HTTP_" + key + if key in environ: + value = "{},{}".format(environ[key], value) + environ[key] = value + + if environ.get("HTTP_TRANSFER_ENCODING", "").strip().lower() == "chunked": + environ["wsgi.input_terminated"] = True + environ["wsgi.input"] = DechunkedInput(environ["wsgi.input"]) + + if request_url.scheme and request_url.netloc: + environ["HTTP_HOST"] = request_url.netloc + + return environ + + def run_wsgi(self): + if self.headers.get("Expect", "").lower().strip() == "100-continue": + self.wfile.write(b"HTTP/1.1 100 Continue\r\n\r\n") + + self.environ = environ = self.make_environ() + headers_set = [] + headers_sent = [] + + def write(data): + assert headers_set, "write() before start_response" + if not headers_sent: + status, response_headers = headers_sent[:] = headers_set + try: + code, msg = status.split(None, 1) + except ValueError: + code, msg = status, "" + code = int(code) + self.send_response(code, msg) + header_keys = set() + for key, value in response_headers: + self.send_header(key, value) + key = key.lower() + header_keys.add(key) + if not ( + "content-length" in header_keys + or environ["REQUEST_METHOD"] == "HEAD" + or code < 200 + or code in (204, 304) + ): + self.close_connection = True + self.send_header("Connection", "close") + if "server" not in header_keys: + self.send_header("Server", self.version_string()) + if "date" not in header_keys: + self.send_header("Date", self.date_time_string()) + self.end_headers() + + assert isinstance(data, bytes), "applications must write bytes" + self.wfile.write(data) + self.wfile.flush() + + def start_response(status, response_headers, exc_info=None): + if exc_info: + try: + if headers_sent: + reraise(*exc_info) + finally: + exc_info = None + elif headers_set: + raise AssertionError("Headers already set") + headers_set[:] = [status, response_headers] + return write + + def execute(app): + application_iter = app(environ, start_response) + try: + for data in application_iter: + write(data) + if not headers_sent: + write(b"") + finally: + if hasattr(application_iter, "close"): + application_iter.close() + application_iter = None + + try: + execute(self.server.app) + except (_ConnectionError, socket.timeout) as e: + self.connection_dropped(e, environ) + except Exception: + if self.server.passthrough_errors: + raise + from .debug.tbtools import get_current_traceback + + traceback = get_current_traceback(ignore_system_exceptions=True) + try: + # if we haven't yet sent the headers but they are set + # we roll back to be able to set them again. + if not headers_sent: + del headers_set[:] + execute(InternalServerError()) + except Exception: + pass + self.server.log("error", "Error on request:\n%s", traceback.plaintext) + + def handle(self): + """Handles a request ignoring dropped connections.""" + rv = None + try: + rv = BaseHTTPRequestHandler.handle(self) + except (_ConnectionError, socket.timeout) as e: + self.connection_dropped(e) + except Exception as e: + if self.server.ssl_context is None or not is_ssl_error(e): + raise + if self.server.shutdown_signal: + self.initiate_shutdown() + return rv + + def initiate_shutdown(self): + """A horrible, horrible way to kill the server for Python 2.6 and + later. It's the best we can do. + """ + # Windows does not provide SIGKILL, go with SIGTERM then. + sig = getattr(signal, "SIGKILL", signal.SIGTERM) + # reloader active + if is_running_from_reloader(): + os.kill(os.getpid(), sig) + # python 2.7 + self.server._BaseServer__shutdown_request = True + # python 2.6 + self.server._BaseServer__serving = False + + def connection_dropped(self, error, environ=None): + """Called if the connection was closed by the client. By default + nothing happens. + """ + + def handle_one_request(self): + """Handle a single HTTP request.""" + self.raw_requestline = self.rfile.readline() + if not self.raw_requestline: + self.close_connection = 1 + elif self.parse_request(): + return self.run_wsgi() + + def send_response(self, code, message=None): + """Send the response header and log the response code.""" + self.log_request(code) + if message is None: + message = code in self.responses and self.responses[code][0] or "" + if self.request_version != "HTTP/0.9": + hdr = "%s %d %s\r\n" % (self.protocol_version, code, message) + self.wfile.write(hdr.encode("ascii")) + + def version_string(self): + return BaseHTTPRequestHandler.version_string(self).strip() + + def address_string(self): + if getattr(self, "environ", None): + return self.environ["REMOTE_ADDR"] + elif not self.client_address: + return "<local>" + elif isinstance(self.client_address, str): + return self.client_address + else: + return self.client_address[0] + + def port_integer(self): + return self.client_address[1] + + def log_request(self, code="-", size="-"): + try: + path = uri_to_iri(self.path) + msg = "%s %s %s" % (self.command, path, self.request_version) + except AttributeError: + # path isn't set if the requestline was bad + msg = self.requestline + + code = str(code) + + if termcolor: + color = termcolor.colored + + if code[0] == "1": # 1xx - Informational + msg = color(msg, attrs=["bold"]) + elif code[0] == "2": # 2xx - Success + msg = color(msg, color="white") + elif code == "304": # 304 - Resource Not Modified + msg = color(msg, color="cyan") + elif code[0] == "3": # 3xx - Redirection + msg = color(msg, color="green") + elif code == "404": # 404 - Resource Not Found + msg = color(msg, color="yellow") + elif code[0] == "4": # 4xx - Client Error + msg = color(msg, color="red", attrs=["bold"]) + else: # 5xx, or any other response + msg = color(msg, color="magenta", attrs=["bold"]) + + self.log("info", '"%s" %s %s', msg, code, size) + + def log_error(self, *args): + self.log("error", *args) + + def log_message(self, format, *args): + self.log("info", format, *args) + + def log(self, type, message, *args): + _log( + type, + "%s - - [%s] %s\n" + % (self.address_string(), self.log_date_time_string(), message % args), + ) + + def get_header_items(self): + """ + Get an iterable list of key/value pairs representing headers. + + This function provides Python 2/3 compatibility as related to the + parsing of request headers. Python 2.7 is not compliant with + RFC 3875 Section 4.1.18 which requires multiple values for headers + to be provided or RFC 2616 which allows for folding of multi-line + headers. This function will return a matching list regardless + of Python version. It can be removed once Python 2.7 support + is dropped. + + :return: List of tuples containing header hey/value pairs + """ + if PY2: + # For Python 2, process the headers manually according to + # W3C RFC 2616 Section 4.2. + items = [] + for header in self.headers.headers: + # Remove "\r\n" from the header and split on ":" to get + # the field name and value. + try: + key, value = header[0:-2].split(":", 1) + except ValueError: + # If header could not be slit with : but starts with white + # space and it follows an existing header, it's a folded + # header. + if header[0] in ("\t", " ") and items: + # Pop off the last header + key, value = items.pop() + # Append the current header to the value of the last + # header which will be placed back on the end of the + # list + value = value + header + # Otherwise it's just a bad header and should error + else: + # Re-raise the value error + raise + + # Add the key and the value once stripped of leading + # white space. The specification allows for stripping + # trailing white space but the Python 3 code does not + # strip trailing white space. Therefore, trailing space + # will be left as is to match the Python 3 behavior. + items.append((key, value.lstrip())) + else: + items = self.headers.items() + + return items + + +#: backwards compatible name if someone is subclassing it +BaseRequestHandler = WSGIRequestHandler + + +def generate_adhoc_ssl_pair(cn=None): + from random import random + + crypto = _get_openssl_crypto_module() + + # pretty damn sure that this is not actually accepted by anyone + if cn is None: + cn = "*" + + cert = crypto.X509() + cert.set_serial_number(int(random() * sys.maxsize)) + cert.gmtime_adj_notBefore(0) + cert.gmtime_adj_notAfter(60 * 60 * 24 * 365) + + subject = cert.get_subject() + subject.CN = cn + subject.O = "Dummy Certificate" # noqa: E741 + + issuer = cert.get_issuer() + issuer.CN = subject.CN + issuer.O = subject.O # noqa: E741 + + pkey = crypto.PKey() + pkey.generate_key(crypto.TYPE_RSA, 2048) + cert.set_pubkey(pkey) + cert.sign(pkey, "sha256") + + return cert, pkey + + +def make_ssl_devcert(base_path, host=None, cn=None): + """Creates an SSL key for development. This should be used instead of + the ``'adhoc'`` key which generates a new cert on each server start. + It accepts a path for where it should store the key and cert and + either a host or CN. If a host is given it will use the CN + ``*.host/CN=host``. + + For more information see :func:`run_simple`. + + .. versionadded:: 0.9 + + :param base_path: the path to the certificate and key. The extension + ``.crt`` is added for the certificate, ``.key`` is + added for the key. + :param host: the name of the host. This can be used as an alternative + for the `cn`. + :param cn: the `CN` to use. + """ + from OpenSSL import crypto + + if host is not None: + cn = "*.%s/CN=%s" % (host, host) + cert, pkey = generate_adhoc_ssl_pair(cn=cn) + + cert_file = base_path + ".crt" + pkey_file = base_path + ".key" + + with open(cert_file, "wb") as f: + f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert)) + with open(pkey_file, "wb") as f: + f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, pkey)) + + return cert_file, pkey_file + + +def generate_adhoc_ssl_context(): + """Generates an adhoc SSL context for the development server.""" + crypto = _get_openssl_crypto_module() + import tempfile + import atexit + + cert, pkey = generate_adhoc_ssl_pair() + cert_handle, cert_file = tempfile.mkstemp() + pkey_handle, pkey_file = tempfile.mkstemp() + atexit.register(os.remove, pkey_file) + atexit.register(os.remove, cert_file) + + os.write(cert_handle, crypto.dump_certificate(crypto.FILETYPE_PEM, cert)) + os.write(pkey_handle, crypto.dump_privatekey(crypto.FILETYPE_PEM, pkey)) + os.close(cert_handle) + os.close(pkey_handle) + ctx = load_ssl_context(cert_file, pkey_file) + return ctx + + +def load_ssl_context(cert_file, pkey_file=None, protocol=None): + """Loads SSL context from cert/private key files and optional protocol. + Many parameters are directly taken from the API of + :py:class:`ssl.SSLContext`. + + :param cert_file: Path of the certificate to use. + :param pkey_file: Path of the private key to use. If not given, the key + will be obtained from the certificate file. + :param protocol: One of the ``PROTOCOL_*`` constants in the stdlib ``ssl`` + module. Defaults to ``PROTOCOL_SSLv23``. + """ + if protocol is None: + protocol = ssl.PROTOCOL_SSLv23 + ctx = _SSLContext(protocol) + ctx.load_cert_chain(cert_file, pkey_file) + return ctx + + +class _SSLContext(object): + + """A dummy class with a small subset of Python3's ``ssl.SSLContext``, only + intended to be used with and by Werkzeug.""" + + def __init__(self, protocol): + self._protocol = protocol + self._certfile = None + self._keyfile = None + self._password = None + + def load_cert_chain(self, certfile, keyfile=None, password=None): + self._certfile = certfile + self._keyfile = keyfile or certfile + self._password = password + + def wrap_socket(self, sock, **kwargs): + return ssl.wrap_socket( + sock, + keyfile=self._keyfile, + certfile=self._certfile, + ssl_version=self._protocol, + **kwargs + ) + + +def is_ssl_error(error=None): + """Checks if the given error (or the current one) is an SSL error.""" + exc_types = (ssl.SSLError,) + try: + from OpenSSL.SSL import Error + + exc_types += (Error,) + except ImportError: + pass + + if error is None: + error = sys.exc_info()[1] + return isinstance(error, exc_types) + + +def select_address_family(host, port): + """Return ``AF_INET4``, ``AF_INET6``, or ``AF_UNIX`` depending on + the host and port.""" + # disabled due to problems with current ipv6 implementations + # and various operating systems. Probably this code also is + # not supposed to work, but I can't come up with any other + # ways to implement this. + # try: + # info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + # socket.SOCK_STREAM, 0, + # socket.AI_PASSIVE) + # if info: + # return info[0][0] + # except socket.gaierror: + # pass + if host.startswith("unix://"): + return socket.AF_UNIX + elif ":" in host and hasattr(socket, "AF_INET6"): + return socket.AF_INET6 + return socket.AF_INET + + +def get_sockaddr(host, port, family): + """Return a fully qualified socket address that can be passed to + :func:`socket.bind`.""" + if family == af_unix: + return host.split("://", 1)[1] + try: + res = socket.getaddrinfo( + host, port, family, socket.SOCK_STREAM, socket.IPPROTO_TCP + ) + except socket.gaierror: + return host, port + return res[0][4] + + +class BaseWSGIServer(HTTPServer, object): + + """Simple single-threaded, single-process WSGI server.""" + + multithread = False + multiprocess = False + request_queue_size = LISTEN_QUEUE + + def __init__( + self, + host, + port, + app, + handler=None, + passthrough_errors=False, + ssl_context=None, + fd=None, + ): + if handler is None: + handler = WSGIRequestHandler + + self.address_family = select_address_family(host, port) + + if fd is not None: + real_sock = socket.fromfd(fd, self.address_family, socket.SOCK_STREAM) + port = 0 + + server_address = get_sockaddr(host, int(port), self.address_family) + + # remove socket file if it already exists + if self.address_family == af_unix and os.path.exists(server_address): + os.unlink(server_address) + HTTPServer.__init__(self, server_address, handler) + + self.app = app + self.passthrough_errors = passthrough_errors + self.shutdown_signal = False + self.host = host + self.port = self.socket.getsockname()[1] + + # Patch in the original socket. + if fd is not None: + self.socket.close() + self.socket = real_sock + self.server_address = self.socket.getsockname() + + if ssl_context is not None: + if isinstance(ssl_context, tuple): + ssl_context = load_ssl_context(*ssl_context) + if ssl_context == "adhoc": + ssl_context = generate_adhoc_ssl_context() + # If we are on Python 2 the return value from socket.fromfd + # is an internal socket object but what we need for ssl wrap + # is the wrapper around it :( + sock = self.socket + if PY2 and not isinstance(sock, socket.socket): + sock = socket.socket(sock.family, sock.type, sock.proto, sock) + self.socket = ssl_context.wrap_socket(sock, server_side=True) + self.ssl_context = ssl_context + else: + self.ssl_context = None + + def log(self, type, message, *args): + _log(type, message, *args) + + def serve_forever(self): + self.shutdown_signal = False + try: + HTTPServer.serve_forever(self) + except KeyboardInterrupt: + pass + finally: + self.server_close() + + def handle_error(self, request, client_address): + if self.passthrough_errors: + raise + # Python 2 still causes a socket.error after the earlier + # handling, so silence it here. + if isinstance(sys.exc_info()[1], _ConnectionError): + return + return HTTPServer.handle_error(self, request, client_address) + + def get_request(self): + con, info = self.socket.accept() + return con, info + + +class ThreadedWSGIServer(ThreadingMixIn, BaseWSGIServer): + + """A WSGI server that does threading.""" + + multithread = True + daemon_threads = True + + +class ForkingWSGIServer(ForkingMixIn, BaseWSGIServer): + + """A WSGI server that does forking.""" + + multiprocess = True + + def __init__( + self, + host, + port, + app, + processes=40, + handler=None, + passthrough_errors=False, + ssl_context=None, + fd=None, + ): + if not can_fork: + raise ValueError("Your platform does not support forking.") + BaseWSGIServer.__init__( + self, host, port, app, handler, passthrough_errors, ssl_context, fd + ) + self.max_children = processes + + +def make_server( + host=None, + port=None, + app=None, + threaded=False, + processes=1, + request_handler=None, + passthrough_errors=False, + ssl_context=None, + fd=None, +): + """Create a new server instance that is either threaded, or forks + or just processes one request after another. + """ + if threaded and processes > 1: + raise ValueError("cannot have a multithreaded and multi process server.") + elif threaded: + return ThreadedWSGIServer( + host, port, app, request_handler, passthrough_errors, ssl_context, fd=fd + ) + elif processes > 1: + return ForkingWSGIServer( + host, + port, + app, + processes, + request_handler, + passthrough_errors, + ssl_context, + fd=fd, + ) + else: + return BaseWSGIServer( + host, port, app, request_handler, passthrough_errors, ssl_context, fd=fd + ) + + +def is_running_from_reloader(): + """Checks if the application is running from within the Werkzeug + reloader subprocess. + + .. versionadded:: 0.10 + """ + return os.environ.get("WERKZEUG_RUN_MAIN") == "true" + + +def run_simple( + hostname, + port, + application, + use_reloader=False, + use_debugger=False, + use_evalex=True, + extra_files=None, + reloader_interval=1, + reloader_type="auto", + threaded=False, + processes=1, + request_handler=None, + static_files=None, + passthrough_errors=False, + ssl_context=None, +): + """Start a WSGI application. Optional features include a reloader, + multithreading and fork support. + + This function has a command-line interface too:: + + python -m werkzeug.serving --help + + .. versionadded:: 0.5 + `static_files` was added to simplify serving of static files as well + as `passthrough_errors`. + + .. versionadded:: 0.6 + support for SSL was added. + + .. versionadded:: 0.8 + Added support for automatically loading a SSL context from certificate + file and private key. + + .. versionadded:: 0.9 + Added command-line interface. + + .. versionadded:: 0.10 + Improved the reloader and added support for changing the backend + through the `reloader_type` parameter. See :ref:`reloader` + for more information. + + .. versionchanged:: 0.15 + Bind to a Unix socket by passing a path that starts with + ``unix://`` as the ``hostname``. + + :param hostname: The host to bind to, for example ``'localhost'``. + If the value is a path that starts with ``unix://`` it will bind + to a Unix socket instead of a TCP socket.. + :param port: The port for the server. eg: ``8080`` + :param application: the WSGI application to execute + :param use_reloader: should the server automatically restart the python + process if modules were changed? + :param use_debugger: should the werkzeug debugging system be used? + :param use_evalex: should the exception evaluation feature be enabled? + :param extra_files: a list of files the reloader should watch + additionally to the modules. For example configuration + files. + :param reloader_interval: the interval for the reloader in seconds. + :param reloader_type: the type of reloader to use. The default is + auto detection. Valid values are ``'stat'`` and + ``'watchdog'``. See :ref:`reloader` for more + information. + :param threaded: should the process handle each request in a separate + thread? + :param processes: if greater than 1 then handle each request in a new process + up to this maximum number of concurrent processes. + :param request_handler: optional parameter that can be used to replace + the default one. You can use this to replace it + with a different + :class:`~BaseHTTPServer.BaseHTTPRequestHandler` + subclass. + :param static_files: a list or dict of paths for static files. This works + exactly like :class:`SharedDataMiddleware`, it's actually + just wrapping the application in that middleware before + serving. + :param passthrough_errors: set this to `True` to disable the error catching. + This means that the server will die on errors but + it can be useful to hook debuggers in (pdb etc.) + :param ssl_context: an SSL context for the connection. Either an + :class:`ssl.SSLContext`, a tuple in the form + ``(cert_file, pkey_file)``, the string ``'adhoc'`` if + the server should automatically create one, or ``None`` + to disable SSL (which is the default). + """ + if not isinstance(port, int): + raise TypeError("port must be an integer") + if use_debugger: + from .debug import DebuggedApplication + + application = DebuggedApplication(application, use_evalex) + if static_files: + from .middleware.shared_data import SharedDataMiddleware + + application = SharedDataMiddleware(application, static_files) + + def log_startup(sock): + display_hostname = hostname if hostname not in ("", "*") else "localhost" + quit_msg = "(Press CTRL+C to quit)" + if sock.family == af_unix: + _log("info", " * Running on %s %s", display_hostname, quit_msg) + else: + if ":" in display_hostname: + display_hostname = "[%s]" % display_hostname + port = sock.getsockname()[1] + _log( + "info", + " * Running on %s://%s:%d/ %s", + "http" if ssl_context is None else "https", + display_hostname, + port, + quit_msg, + ) + + def inner(): + try: + fd = int(os.environ["WERKZEUG_SERVER_FD"]) + except (LookupError, ValueError): + fd = None + srv = make_server( + hostname, + port, + application, + threaded, + processes, + request_handler, + passthrough_errors, + ssl_context, + fd=fd, + ) + if fd is None: + log_startup(srv.socket) + srv.serve_forever() + + if use_reloader: + # If we're not running already in the subprocess that is the + # reloader we want to open up a socket early to make sure the + # port is actually available. + if not is_running_from_reloader(): + if port == 0 and not can_open_by_fd: + raise ValueError( + "Cannot bind to a random port with enabled " + "reloader if the Python interpreter does " + "not support socket opening by fd." + ) + + # Create and destroy a socket so that any exceptions are + # raised before we spawn a separate Python interpreter and + # lose this ability. + address_family = select_address_family(hostname, port) + server_address = get_sockaddr(hostname, port, address_family) + s = socket.socket(address_family, socket.SOCK_STREAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + s.bind(server_address) + if hasattr(s, "set_inheritable"): + s.set_inheritable(True) + + # If we can open the socket by file descriptor, then we can just + # reuse this one and our socket will survive the restarts. + if can_open_by_fd: + os.environ["WERKZEUG_SERVER_FD"] = str(s.fileno()) + s.listen(LISTEN_QUEUE) + log_startup(s) + else: + s.close() + if address_family == af_unix: + _log("info", "Unlinking %s" % server_address) + os.unlink(server_address) + + # Do not use relative imports, otherwise "python -m werkzeug.serving" + # breaks. + from ._reloader import run_with_reloader + + run_with_reloader(inner, extra_files, reloader_interval, reloader_type) + else: + inner() + + +def run_with_reloader(*args, **kwargs): + # People keep using undocumented APIs. Do not use this function + # please, we do not guarantee that it continues working. + from ._reloader import run_with_reloader + + return run_with_reloader(*args, **kwargs) + + +def main(): + """A simple command-line interface for :py:func:`run_simple`.""" + + # in contrast to argparse, this works at least under Python < 2.7 + import optparse + from .utils import import_string + + parser = optparse.OptionParser(usage="Usage: %prog [options] app_module:app_object") + parser.add_option( + "-b", + "--bind", + dest="address", + help="The hostname:port the app should listen on.", + ) + parser.add_option( + "-d", + "--debug", + dest="use_debugger", + action="store_true", + default=False, + help="Use Werkzeug's debugger.", + ) + parser.add_option( + "-r", + "--reload", + dest="use_reloader", + action="store_true", + default=False, + help="Reload Python process if modules change.", + ) + options, args = parser.parse_args() + + hostname, port = None, None + if options.address: + address = options.address.split(":") + hostname = address[0] + if len(address) > 1: + port = address[1] + + if len(args) != 1: + sys.stdout.write("No application supplied, or too much. See --help\n") + sys.exit(1) + app = import_string(args[0]) + + run_simple( + hostname=(hostname or "127.0.0.1"), + port=int(port or 5000), + application=app, + use_reloader=options.use_reloader, + use_debugger=options.use_debugger, + ) + + +if __name__ == "__main__": + main() diff --git a/python/werkzeug/test.py b/python/werkzeug/test.py new file mode 100644 index 0000000..6148665 --- /dev/null +++ b/python/werkzeug/test.py @@ -0,0 +1,1146 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.test + ~~~~~~~~~~~~~ + + This module implements a client to WSGI applications for testing. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import mimetypes +import sys +from io import BytesIO +from itertools import chain +from random import random +from tempfile import TemporaryFile +from time import time + +from ._compat import iteritems +from ._compat import iterlists +from ._compat import itervalues +from ._compat import make_literal_wrapper +from ._compat import reraise +from ._compat import string_types +from ._compat import text_type +from ._compat import to_bytes +from ._compat import wsgi_encoding_dance +from ._internal import _get_environ +from .datastructures import CallbackDict +from .datastructures import CombinedMultiDict +from .datastructures import EnvironHeaders +from .datastructures import FileMultiDict +from .datastructures import FileStorage +from .datastructures import Headers +from .datastructures import MultiDict +from .http import dump_cookie +from .http import dump_options_header +from .http import parse_options_header +from .urls import iri_to_uri +from .urls import url_encode +from .urls import url_fix +from .urls import url_parse +from .urls import url_unparse +from .urls import url_unquote +from .utils import get_content_type +from .wrappers import BaseRequest +from .wsgi import ClosingIterator +from .wsgi import get_current_url + +try: + from urllib.request import Request as U2Request +except ImportError: + from urllib2 import Request as U2Request + +try: + from http.cookiejar import CookieJar +except ImportError: + from cookielib import CookieJar + + +def stream_encode_multipart( + values, use_tempfile=True, threshold=1024 * 500, boundary=None, charset="utf-8" +): + """Encode a dict of values (either strings or file descriptors or + :class:`FileStorage` objects.) into a multipart encoded string stored + in a file descriptor. + """ + if boundary is None: + boundary = "---------------WerkzeugFormPart_%s%s" % (time(), random()) + _closure = [BytesIO(), 0, False] + + if use_tempfile: + + def write_binary(string): + stream, total_length, on_disk = _closure + if on_disk: + stream.write(string) + else: + length = len(string) + if length + _closure[1] <= threshold: + stream.write(string) + else: + new_stream = TemporaryFile("wb+") + new_stream.write(stream.getvalue()) + new_stream.write(string) + _closure[0] = new_stream + _closure[2] = True + _closure[1] = total_length + length + + else: + write_binary = _closure[0].write + + def write(string): + write_binary(string.encode(charset)) + + if not isinstance(values, MultiDict): + values = MultiDict(values) + + for key, values in iterlists(values): + for value in values: + write('--%s\r\nContent-Disposition: form-data; name="%s"' % (boundary, key)) + reader = getattr(value, "read", None) + if reader is not None: + filename = getattr(value, "filename", getattr(value, "name", None)) + content_type = getattr(value, "content_type", None) + if content_type is None: + content_type = ( + filename + and mimetypes.guess_type(filename)[0] + or "application/octet-stream" + ) + if filename is not None: + write('; filename="%s"\r\n' % filename) + else: + write("\r\n") + write("Content-Type: %s\r\n\r\n" % content_type) + while 1: + chunk = reader(16384) + if not chunk: + break + write_binary(chunk) + else: + if not isinstance(value, string_types): + value = str(value) + + value = to_bytes(value, charset) + write("\r\n\r\n") + write_binary(value) + write("\r\n") + write("--%s--\r\n" % boundary) + + length = int(_closure[0].tell()) + _closure[0].seek(0) + return _closure[0], length, boundary + + +def encode_multipart(values, boundary=None, charset="utf-8"): + """Like `stream_encode_multipart` but returns a tuple in the form + (``boundary``, ``data``) where data is a bytestring. + """ + stream, length, boundary = stream_encode_multipart( + values, use_tempfile=False, boundary=boundary, charset=charset + ) + return boundary, stream.read() + + +def File(fd, filename=None, mimetype=None): + """Backwards compat. + + .. deprecated:: 0.5 + """ + from warnings import warn + + warn( + "'werkzeug.test.File' is deprecated as of version 0.5 and will" + " be removed in version 1.0. Use 'EnvironBuilder' or" + " 'FileStorage' instead.", + DeprecationWarning, + stacklevel=2, + ) + return FileStorage(fd, filename=filename, content_type=mimetype) + + +class _TestCookieHeaders(object): + + """A headers adapter for cookielib + """ + + def __init__(self, headers): + self.headers = headers + + def getheaders(self, name): + headers = [] + name = name.lower() + for k, v in self.headers: + if k.lower() == name: + headers.append(v) + return headers + + def get_all(self, name, default=None): + rv = [] + for k, v in self.headers: + if k.lower() == name.lower(): + rv.append(v) + return rv or default or [] + + +class _TestCookieResponse(object): + + """Something that looks like a httplib.HTTPResponse, but is actually just an + adapter for our test responses to make them available for cookielib. + """ + + def __init__(self, headers): + self.headers = _TestCookieHeaders(headers) + + def info(self): + return self.headers + + +class _TestCookieJar(CookieJar): + + """A cookielib.CookieJar modified to inject and read cookie headers from + and to wsgi environments, and wsgi application responses. + """ + + def inject_wsgi(self, environ): + """Inject the cookies as client headers into the server's wsgi + environment. + """ + cvals = ["%s=%s" % (c.name, c.value) for c in self] + + if cvals: + environ["HTTP_COOKIE"] = "; ".join(cvals) + else: + environ.pop("HTTP_COOKIE", None) + + def extract_wsgi(self, environ, headers): + """Extract the server's set-cookie headers as cookies into the + cookie jar. + """ + self.extract_cookies( + _TestCookieResponse(headers), U2Request(get_current_url(environ)) + ) + + +def _iter_data(data): + """Iterates over a `dict` or :class:`MultiDict` yielding all keys and + values. + This is used to iterate over the data passed to the + :class:`EnvironBuilder`. + """ + if isinstance(data, MultiDict): + for key, values in iterlists(data): + for value in values: + yield key, value + else: + for key, values in iteritems(data): + if isinstance(values, list): + for value in values: + yield key, value + else: + yield key, values + + +class EnvironBuilder(object): + """This class can be used to conveniently create a WSGI environment + for testing purposes. It can be used to quickly create WSGI environments + or request objects from arbitrary data. + + The signature of this class is also used in some other places as of + Werkzeug 0.5 (:func:`create_environ`, :meth:`BaseResponse.from_values`, + :meth:`Client.open`). Because of this most of the functionality is + available through the constructor alone. + + Files and regular form data can be manipulated independently of each + other with the :attr:`form` and :attr:`files` attributes, but are + passed with the same argument to the constructor: `data`. + + `data` can be any of these values: + + - a `str` or `bytes` object: The object is converted into an + :attr:`input_stream`, the :attr:`content_length` is set and you have to + provide a :attr:`content_type`. + - a `dict` or :class:`MultiDict`: The keys have to be strings. The values + have to be either any of the following objects, or a list of any of the + following objects: + + - a :class:`file`-like object: These are converted into + :class:`FileStorage` objects automatically. + - a `tuple`: The :meth:`~FileMultiDict.add_file` method is called + with the key and the unpacked `tuple` items as positional + arguments. + - a `str`: The string is set as form data for the associated key. + - a file-like object: The object content is loaded in memory and then + handled like a regular `str` or a `bytes`. + + :param path: the path of the request. In the WSGI environment this will + end up as `PATH_INFO`. If the `query_string` is not defined + and there is a question mark in the `path` everything after + it is used as query string. + :param base_url: the base URL is a URL that is used to extract the WSGI + URL scheme, host (server name + server port) and the + script root (`SCRIPT_NAME`). + :param query_string: an optional string or dict with URL parameters. + :param method: the HTTP method to use, defaults to `GET`. + :param input_stream: an optional input stream. Do not specify this and + `data`. As soon as an input stream is set you can't + modify :attr:`args` and :attr:`files` unless you + set the :attr:`input_stream` to `None` again. + :param content_type: The content type for the request. As of 0.5 you + don't have to provide this when specifying files + and form data via `data`. + :param content_length: The content length for the request. You don't + have to specify this when providing data via + `data`. + :param errors_stream: an optional error stream that is used for + `wsgi.errors`. Defaults to :data:`stderr`. + :param multithread: controls `wsgi.multithread`. Defaults to `False`. + :param multiprocess: controls `wsgi.multiprocess`. Defaults to `False`. + :param run_once: controls `wsgi.run_once`. Defaults to `False`. + :param headers: an optional list or :class:`Headers` object of headers. + :param data: a string or dict of form data or a file-object. + See explanation above. + :param json: An object to be serialized and assigned to ``data``. + Defaults the content type to ``"application/json"``. + Serialized with the function assigned to :attr:`json_dumps`. + :param environ_base: an optional dict of environment defaults. + :param environ_overrides: an optional dict of environment overrides. + :param charset: the charset used to encode unicode data. + + .. versionadded:: 0.15 + The ``json`` param and :meth:`json_dumps` method. + + .. versionadded:: 0.15 + The environ has keys ``REQUEST_URI`` and ``RAW_URI`` containing + the path before perecent-decoding. This is not part of the WSGI + PEP, but many WSGI servers include it. + + .. versionchanged:: 0.6 + ``path`` and ``base_url`` can now be unicode strings that are + encoded with :func:`iri_to_uri`. + """ + + #: the server protocol to use. defaults to HTTP/1.1 + server_protocol = "HTTP/1.1" + + #: the wsgi version to use. defaults to (1, 0) + wsgi_version = (1, 0) + + #: the default request class for :meth:`get_request` + request_class = BaseRequest + + import json + + #: The serialization function used when ``json`` is passed. + json_dumps = staticmethod(json.dumps) + del json + + def __init__( + self, + path="/", + base_url=None, + query_string=None, + method="GET", + input_stream=None, + content_type=None, + content_length=None, + errors_stream=None, + multithread=False, + multiprocess=False, + run_once=False, + headers=None, + data=None, + environ_base=None, + environ_overrides=None, + charset="utf-8", + mimetype=None, + json=None, + ): + path_s = make_literal_wrapper(path) + if query_string is not None and path_s("?") in path: + raise ValueError("Query string is defined in the path and as an argument") + if query_string is None and path_s("?") in path: + path, query_string = path.split(path_s("?"), 1) + self.charset = charset + self.path = iri_to_uri(path) + if base_url is not None: + base_url = url_fix(iri_to_uri(base_url, charset), charset) + self.base_url = base_url + if isinstance(query_string, (bytes, text_type)): + self.query_string = query_string + else: + if query_string is None: + query_string = MultiDict() + elif not isinstance(query_string, MultiDict): + query_string = MultiDict(query_string) + self.args = query_string + self.method = method + if headers is None: + headers = Headers() + elif not isinstance(headers, Headers): + headers = Headers(headers) + self.headers = headers + if content_type is not None: + self.content_type = content_type + if errors_stream is None: + errors_stream = sys.stderr + self.errors_stream = errors_stream + self.multithread = multithread + self.multiprocess = multiprocess + self.run_once = run_once + self.environ_base = environ_base + self.environ_overrides = environ_overrides + self.input_stream = input_stream + self.content_length = content_length + self.closed = False + + if json is not None: + if data is not None: + raise TypeError("can't provide both json and data") + + data = self.json_dumps(json) + + if self.content_type is None: + self.content_type = "application/json" + + if data: + if input_stream is not None: + raise TypeError("can't provide input stream and data") + if hasattr(data, "read"): + data = data.read() + if isinstance(data, text_type): + data = data.encode(self.charset) + if isinstance(data, bytes): + self.input_stream = BytesIO(data) + if self.content_length is None: + self.content_length = len(data) + else: + for key, value in _iter_data(data): + if isinstance(value, (tuple, dict)) or hasattr(value, "read"): + self._add_file_from_data(key, value) + else: + self.form.setlistdefault(key).append(value) + + if mimetype is not None: + self.mimetype = mimetype + + @classmethod + def from_environ(cls, environ, **kwargs): + """Turn an environ dict back into a builder. Any extra kwargs + override the args extracted from the environ. + + .. versionadded:: 0.15 + """ + headers = Headers(EnvironHeaders(environ)) + out = { + "path": environ["PATH_INFO"], + "base_url": cls._make_base_url( + environ["wsgi.url_scheme"], headers.pop("Host"), environ["SCRIPT_NAME"] + ), + "query_string": environ["QUERY_STRING"], + "method": environ["REQUEST_METHOD"], + "input_stream": environ["wsgi.input"], + "content_type": headers.pop("Content-Type", None), + "content_length": headers.pop("Content-Length", None), + "errors_stream": environ["wsgi.errors"], + "multithread": environ["wsgi.multithread"], + "multiprocess": environ["wsgi.multiprocess"], + "run_once": environ["wsgi.run_once"], + "headers": headers, + } + out.update(kwargs) + return cls(**out) + + def _add_file_from_data(self, key, value): + """Called in the EnvironBuilder to add files from the data dict.""" + if isinstance(value, tuple): + self.files.add_file(key, *value) + elif isinstance(value, dict): + from warnings import warn + + warn( + "Passing a dict as file data is deprecated as of" + " version 0.5 and will be removed in version 1.0. Use" + " a tuple or 'FileStorage' object instead.", + DeprecationWarning, + stacklevel=2, + ) + value = dict(value) + mimetype = value.pop("mimetype", None) + if mimetype is not None: + value["content_type"] = mimetype + self.files.add_file(key, **value) + else: + self.files.add_file(key, value) + + @staticmethod + def _make_base_url(scheme, host, script_root): + return url_unparse((scheme, host, script_root, "", "")).rstrip("/") + "/" + + @property + def base_url(self): + """The base URL is used to extract the URL scheme, host name, + port, and root path. + """ + return self._make_base_url(self.url_scheme, self.host, self.script_root) + + @base_url.setter + def base_url(self, value): + if value is None: + scheme = "http" + netloc = "localhost" + script_root = "" + else: + scheme, netloc, script_root, qs, anchor = url_parse(value) + if qs or anchor: + raise ValueError("base url must not contain a query string or fragment") + self.script_root = script_root.rstrip("/") + self.host = netloc + self.url_scheme = scheme + + def _get_content_type(self): + ct = self.headers.get("Content-Type") + if ct is None and not self._input_stream: + if self._files: + return "multipart/form-data" + elif self._form: + return "application/x-www-form-urlencoded" + return None + return ct + + def _set_content_type(self, value): + if value is None: + self.headers.pop("Content-Type", None) + else: + self.headers["Content-Type"] = value + + content_type = property( + _get_content_type, + _set_content_type, + doc="""The content type for the request. Reflected from and to + the :attr:`headers`. Do not set if you set :attr:`files` or + :attr:`form` for auto detection.""", + ) + del _get_content_type, _set_content_type + + def _get_content_length(self): + return self.headers.get("Content-Length", type=int) + + def _get_mimetype(self): + ct = self.content_type + if ct: + return ct.split(";")[0].strip() + + def _set_mimetype(self, value): + self.content_type = get_content_type(value, self.charset) + + def _get_mimetype_params(self): + def on_update(d): + self.headers["Content-Type"] = dump_options_header(self.mimetype, d) + + d = parse_options_header(self.headers.get("content-type", ""))[1] + return CallbackDict(d, on_update) + + mimetype = property( + _get_mimetype, + _set_mimetype, + doc="""The mimetype (content type without charset etc.) + + .. versionadded:: 0.14 + """, + ) + mimetype_params = property( + _get_mimetype_params, + doc=""" The mimetype parameters as dict. For example if the + content type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + + .. versionadded:: 0.14 + """, + ) + del _get_mimetype, _set_mimetype, _get_mimetype_params + + def _set_content_length(self, value): + if value is None: + self.headers.pop("Content-Length", None) + else: + self.headers["Content-Length"] = str(value) + + content_length = property( + _get_content_length, + _set_content_length, + doc="""The content length as integer. Reflected from and to the + :attr:`headers`. Do not set if you set :attr:`files` or + :attr:`form` for auto detection.""", + ) + del _get_content_length, _set_content_length + + def form_property(name, storage, doc): # noqa: B902 + key = "_" + name + + def getter(self): + if self._input_stream is not None: + raise AttributeError("an input stream is defined") + rv = getattr(self, key) + if rv is None: + rv = storage() + setattr(self, key, rv) + + return rv + + def setter(self, value): + self._input_stream = None + setattr(self, key, value) + + return property(getter, setter, doc=doc) + + form = form_property("form", MultiDict, doc="A :class:`MultiDict` of form values.") + files = form_property( + "files", + FileMultiDict, + doc="""A :class:`FileMultiDict` of uploaded files. You can use + the :meth:`~FileMultiDict.add_file` method to add new files to + the dict.""", + ) + del form_property + + def _get_input_stream(self): + return self._input_stream + + def _set_input_stream(self, value): + self._input_stream = value + self._form = self._files = None + + input_stream = property( + _get_input_stream, + _set_input_stream, + doc="""An optional input stream. If you set this it will clear + :attr:`form` and :attr:`files`.""", + ) + del _get_input_stream, _set_input_stream + + def _get_query_string(self): + if self._query_string is None: + if self._args is not None: + return url_encode(self._args, charset=self.charset) + return "" + return self._query_string + + def _set_query_string(self, value): + self._query_string = value + self._args = None + + query_string = property( + _get_query_string, + _set_query_string, + doc="""The query string. If you set this to a string + :attr:`args` will no longer be available.""", + ) + del _get_query_string, _set_query_string + + def _get_args(self): + if self._query_string is not None: + raise AttributeError("a query string is defined") + if self._args is None: + self._args = MultiDict() + return self._args + + def _set_args(self, value): + self._query_string = None + self._args = value + + args = property( + _get_args, _set_args, doc="The URL arguments as :class:`MultiDict`." + ) + del _get_args, _set_args + + @property + def server_name(self): + """The server name (read-only, use :attr:`host` to set)""" + return self.host.split(":", 1)[0] + + @property + def server_port(self): + """The server port as integer (read-only, use :attr:`host` to set)""" + pieces = self.host.split(":", 1) + if len(pieces) == 2 and pieces[1].isdigit(): + return int(pieces[1]) + elif self.url_scheme == "https": + return 443 + return 80 + + def __del__(self): + try: + self.close() + except Exception: + pass + + def close(self): + """Closes all files. If you put real :class:`file` objects into the + :attr:`files` dict you can call this method to automatically close + them all in one go. + """ + if self.closed: + return + try: + files = itervalues(self.files) + except AttributeError: + files = () + for f in files: + try: + f.close() + except Exception: + pass + self.closed = True + + def get_environ(self): + """Return the built environ. + + .. versionchanged:: 0.15 + The content type and length headers are set based on + input stream detection. Previously this only set the WSGI + keys. + """ + input_stream = self.input_stream + content_length = self.content_length + + mimetype = self.mimetype + content_type = self.content_type + + if input_stream is not None: + start_pos = input_stream.tell() + input_stream.seek(0, 2) + end_pos = input_stream.tell() + input_stream.seek(start_pos) + content_length = end_pos - start_pos + elif mimetype == "multipart/form-data": + values = CombinedMultiDict([self.form, self.files]) + input_stream, content_length, boundary = stream_encode_multipart( + values, charset=self.charset + ) + content_type = mimetype + '; boundary="%s"' % boundary + elif mimetype == "application/x-www-form-urlencoded": + # XXX: py2v3 review + values = url_encode(self.form, charset=self.charset) + values = values.encode("ascii") + content_length = len(values) + input_stream = BytesIO(values) + else: + input_stream = BytesIO() + + result = {} + if self.environ_base: + result.update(self.environ_base) + + def _path_encode(x): + return wsgi_encoding_dance(url_unquote(x, self.charset), self.charset) + + qs = wsgi_encoding_dance(self.query_string) + + result.update( + { + "REQUEST_METHOD": self.method, + "SCRIPT_NAME": _path_encode(self.script_root), + "PATH_INFO": _path_encode(self.path), + "QUERY_STRING": qs, + # Non-standard, added by mod_wsgi, uWSGI + "REQUEST_URI": wsgi_encoding_dance(self.path), + # Non-standard, added by gunicorn + "RAW_URI": wsgi_encoding_dance(self.path), + "SERVER_NAME": self.server_name, + "SERVER_PORT": str(self.server_port), + "HTTP_HOST": self.host, + "SERVER_PROTOCOL": self.server_protocol, + "wsgi.version": self.wsgi_version, + "wsgi.url_scheme": self.url_scheme, + "wsgi.input": input_stream, + "wsgi.errors": self.errors_stream, + "wsgi.multithread": self.multithread, + "wsgi.multiprocess": self.multiprocess, + "wsgi.run_once": self.run_once, + } + ) + + headers = self.headers.copy() + + if content_type is not None: + result["CONTENT_TYPE"] = content_type + headers.set("Content-Type", content_type) + + if content_length is not None: + result["CONTENT_LENGTH"] = str(content_length) + headers.set("Content-Length", content_length) + + for key, value in headers.to_wsgi_list(): + result["HTTP_%s" % key.upper().replace("-", "_")] = value + + if self.environ_overrides: + result.update(self.environ_overrides) + + return result + + def get_request(self, cls=None): + """Returns a request with the data. If the request class is not + specified :attr:`request_class` is used. + + :param cls: The request wrapper to use. + """ + if cls is None: + cls = self.request_class + return cls(self.get_environ()) + + +class ClientRedirectError(Exception): + """If a redirect loop is detected when using follow_redirects=True with + the :cls:`Client`, then this exception is raised. + """ + + +class Client(object): + """This class allows you to send requests to a wrapped application. + + The response wrapper can be a class or factory function that takes + three arguments: app_iter, status and headers. The default response + wrapper just returns a tuple. + + Example:: + + class ClientResponse(BaseResponse): + ... + + client = Client(MyApplication(), response_wrapper=ClientResponse) + + The use_cookies parameter indicates whether cookies should be stored and + sent for subsequent requests. This is True by default, but passing False + will disable this behaviour. + + If you want to request some subdomain of your application you may set + `allow_subdomain_redirects` to `True` as if not no external redirects + are allowed. + + .. versionadded:: 0.5 + `use_cookies` is new in this version. Older versions did not provide + builtin cookie support. + + .. versionadded:: 0.14 + The `mimetype` parameter was added. + + .. versionadded:: 0.15 + The ``json`` parameter. + """ + + def __init__( + self, + application, + response_wrapper=None, + use_cookies=True, + allow_subdomain_redirects=False, + ): + self.application = application + self.response_wrapper = response_wrapper + if use_cookies: + self.cookie_jar = _TestCookieJar() + else: + self.cookie_jar = None + self.allow_subdomain_redirects = allow_subdomain_redirects + + def set_cookie( + self, + server_name, + key, + value="", + max_age=None, + expires=None, + path="/", + domain=None, + secure=None, + httponly=False, + charset="utf-8", + ): + """Sets a cookie in the client's cookie jar. The server name + is required and has to match the one that is also passed to + the open call. + """ + assert self.cookie_jar is not None, "cookies disabled" + header = dump_cookie( + key, value, max_age, expires, path, domain, secure, httponly, charset + ) + environ = create_environ(path, base_url="http://" + server_name) + headers = [("Set-Cookie", header)] + self.cookie_jar.extract_wsgi(environ, headers) + + def delete_cookie(self, server_name, key, path="/", domain=None): + """Deletes a cookie in the test client.""" + self.set_cookie( + server_name, key, expires=0, max_age=0, path=path, domain=domain + ) + + def run_wsgi_app(self, environ, buffered=False): + """Runs the wrapped WSGI app with the given environment.""" + if self.cookie_jar is not None: + self.cookie_jar.inject_wsgi(environ) + rv = run_wsgi_app(self.application, environ, buffered=buffered) + if self.cookie_jar is not None: + self.cookie_jar.extract_wsgi(environ, rv[2]) + return rv + + def resolve_redirect(self, response, new_location, environ, buffered=False): + """Perform a new request to the location given by the redirect + response to the previous request. + """ + scheme, netloc, path, qs, anchor = url_parse(new_location) + builder = EnvironBuilder.from_environ(environ, query_string=qs) + + to_name_parts = netloc.split(":", 1)[0].split(".") + from_name_parts = builder.server_name.split(".") + + if to_name_parts != [""]: + # The new location has a host, use it for the base URL. + builder.url_scheme = scheme + builder.host = netloc + else: + # A local redirect with autocorrect_location_header=False + # doesn't have a host, so use the request's host. + to_name_parts = from_name_parts + + # Explain why a redirect to a different server name won't be followed. + if to_name_parts != from_name_parts: + if to_name_parts[-len(from_name_parts) :] == from_name_parts: + if not self.allow_subdomain_redirects: + raise RuntimeError("Following subdomain redirects is not enabled.") + else: + raise RuntimeError("Following external redirects is not supported.") + + path_parts = path.split("/") + root_parts = builder.script_root.split("/") + + if path_parts[: len(root_parts)] == root_parts: + # Strip the script root from the path. + builder.path = path[len(builder.script_root) :] + else: + # The new location is not under the script root, so use the + # whole path and clear the previous root. + builder.path = path + builder.script_root = "" + + status_code = int(response[1].split(None, 1)[0]) + + # Only 307 and 308 preserve all of the original request. + if status_code not in {307, 308}: + # HEAD is preserved, everything else becomes GET. + if builder.method != "HEAD": + builder.method = "GET" + + # Clear the body and the headers that describe it. + builder.input_stream = None + builder.content_type = None + builder.content_length = None + builder.headers.pop("Transfer-Encoding", None) + + # Disable the response wrapper while handling redirects. Not + # thread safe, but the client should not be shared anyway. + old_response_wrapper = self.response_wrapper + self.response_wrapper = None + + try: + return self.open(builder, as_tuple=True, buffered=buffered) + finally: + self.response_wrapper = old_response_wrapper + + def open(self, *args, **kwargs): + """Takes the same arguments as the :class:`EnvironBuilder` class with + some additions: You can provide a :class:`EnvironBuilder` or a WSGI + environment as only argument instead of the :class:`EnvironBuilder` + arguments and two optional keyword arguments (`as_tuple`, `buffered`) + that change the type of the return value or the way the application is + executed. + + .. versionchanged:: 0.5 + If a dict is provided as file in the dict for the `data` parameter + the content type has to be called `content_type` now instead of + `mimetype`. This change was made for consistency with + :class:`werkzeug.FileWrapper`. + + The `follow_redirects` parameter was added to :func:`open`. + + Additional parameters: + + :param as_tuple: Returns a tuple in the form ``(environ, result)`` + :param buffered: Set this to True to buffer the application run. + This will automatically close the application for + you as well. + :param follow_redirects: Set this to True if the `Client` should + follow HTTP redirects. + """ + as_tuple = kwargs.pop("as_tuple", False) + buffered = kwargs.pop("buffered", False) + follow_redirects = kwargs.pop("follow_redirects", False) + environ = None + if not kwargs and len(args) == 1: + if isinstance(args[0], EnvironBuilder): + environ = args[0].get_environ() + elif isinstance(args[0], dict): + environ = args[0] + if environ is None: + builder = EnvironBuilder(*args, **kwargs) + try: + environ = builder.get_environ() + finally: + builder.close() + + response = self.run_wsgi_app(environ.copy(), buffered=buffered) + + # handle redirects + redirect_chain = [] + while 1: + status_code = int(response[1].split(None, 1)[0]) + if ( + status_code not in {301, 302, 303, 305, 307, 308} + or not follow_redirects + ): + break + + # Exhaust intermediate response bodies to ensure middleware + # that returns an iterator runs any cleanup code. + if not buffered: + for _ in response[0]: + pass + + new_location = response[2]["location"] + new_redirect_entry = (new_location, status_code) + if new_redirect_entry in redirect_chain: + raise ClientRedirectError("loop detected") + redirect_chain.append(new_redirect_entry) + environ, response = self.resolve_redirect( + response, new_location, environ, buffered=buffered + ) + + if self.response_wrapper is not None: + response = self.response_wrapper(*response) + if as_tuple: + return environ, response + return response + + def get(self, *args, **kw): + """Like open but method is enforced to GET.""" + kw["method"] = "GET" + return self.open(*args, **kw) + + def patch(self, *args, **kw): + """Like open but method is enforced to PATCH.""" + kw["method"] = "PATCH" + return self.open(*args, **kw) + + def post(self, *args, **kw): + """Like open but method is enforced to POST.""" + kw["method"] = "POST" + return self.open(*args, **kw) + + def head(self, *args, **kw): + """Like open but method is enforced to HEAD.""" + kw["method"] = "HEAD" + return self.open(*args, **kw) + + def put(self, *args, **kw): + """Like open but method is enforced to PUT.""" + kw["method"] = "PUT" + return self.open(*args, **kw) + + def delete(self, *args, **kw): + """Like open but method is enforced to DELETE.""" + kw["method"] = "DELETE" + return self.open(*args, **kw) + + def options(self, *args, **kw): + """Like open but method is enforced to OPTIONS.""" + kw["method"] = "OPTIONS" + return self.open(*args, **kw) + + def trace(self, *args, **kw): + """Like open but method is enforced to TRACE.""" + kw["method"] = "TRACE" + return self.open(*args, **kw) + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.application) + + +def create_environ(*args, **kwargs): + """Create a new WSGI environ dict based on the values passed. The first + parameter should be the path of the request which defaults to '/'. The + second one can either be an absolute path (in that case the host is + localhost:80) or a full path to the request with scheme, netloc port and + the path to the script. + + This accepts the same arguments as the :class:`EnvironBuilder` + constructor. + + .. versionchanged:: 0.5 + This function is now a thin wrapper over :class:`EnvironBuilder` which + was added in 0.5. The `headers`, `environ_base`, `environ_overrides` + and `charset` parameters were added. + """ + builder = EnvironBuilder(*args, **kwargs) + try: + return builder.get_environ() + finally: + builder.close() + + +def run_wsgi_app(app, environ, buffered=False): + """Return a tuple in the form (app_iter, status, headers) of the + application output. This works best if you pass it an application that + returns an iterator all the time. + + Sometimes applications may use the `write()` callable returned + by the `start_response` function. This tries to resolve such edge + cases automatically. But if you don't get the expected output you + should set `buffered` to `True` which enforces buffering. + + If passed an invalid WSGI application the behavior of this function is + undefined. Never pass non-conforming WSGI applications to this function. + + :param app: the application to execute. + :param buffered: set to `True` to enforce buffering. + :return: tuple in the form ``(app_iter, status, headers)`` + """ + environ = _get_environ(environ) + response = [] + buffer = [] + + def start_response(status, headers, exc_info=None): + if exc_info is not None: + reraise(*exc_info) + response[:] = [status, headers] + return buffer.append + + app_rv = app(environ, start_response) + close_func = getattr(app_rv, "close", None) + app_iter = iter(app_rv) + + # when buffering we emit the close call early and convert the + # application iterator into a regular list + if buffered: + try: + app_iter = list(app_iter) + finally: + if close_func is not None: + close_func() + + # otherwise we iterate the application iter until we have a response, chain + # the already received data with the already collected data and wrap it in + # a new `ClosingIterator` if we need to restore a `close` callable from the + # original return value. + else: + for item in app_iter: + buffer.append(item) + if response: + break + if buffer: + app_iter = chain(buffer, app_iter) + if close_func is not None and app_iter is not app_rv: + app_iter = ClosingIterator(app_iter, close_func) + + return app_iter, response[0], Headers(response[1]) diff --git a/python/werkzeug/testapp.py b/python/werkzeug/testapp.py new file mode 100644 index 0000000..8ea23be --- /dev/null +++ b/python/werkzeug/testapp.py @@ -0,0 +1,241 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.testapp + ~~~~~~~~~~~~~~~~ + + Provide a small test application that can be used to test a WSGI server + and check it for WSGI compliance. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import base64 +import os +import sys +from textwrap import wrap + +import werkzeug +from .utils import escape +from .wrappers import BaseRequest as Request +from .wrappers import BaseResponse as Response + +logo = Response( + base64.b64decode( + """ +R0lGODlhoACgAOMIAAEDACwpAEpCAGdgAJaKAM28AOnVAP3rAP///////// +//////////////////////yH5BAEKAAgALAAAAACgAKAAAAT+EMlJq704680R+F0ojmRpnuj0rWnrv +nB8rbRs33gu0bzu/0AObxgsGn3D5HHJbCUFyqZ0ukkSDlAidctNFg7gbI9LZlrBaHGtzAae0eloe25 +7w9EDOX2fst/xenyCIn5/gFqDiVVDV4aGeYiKkhSFjnCQY5OTlZaXgZp8nJ2ekaB0SQOjqphrpnOiq +ncEn65UsLGytLVmQ6m4sQazpbtLqL/HwpnER8bHyLrLOc3Oz8PRONPU1crXN9na263dMt/g4SzjMeX +m5yDpLqgG7OzJ4u8lT/P69ej3JPn69kHzN2OIAHkB9RUYSFCFQYQJFTIkCDBiwoXWGnowaLEjRm7+G +p9A7Hhx4rUkAUaSLJlxHMqVMD/aSycSZkyTplCqtGnRAM5NQ1Ly5OmzZc6gO4d6DGAUKA+hSocWYAo +SlM6oUWX2O/o0KdaVU5vuSQLAa0ADwQgMEMB2AIECZhVSnTno6spgbtXmHcBUrQACcc2FrTrWS8wAf +78cMFBgwIBgbN+qvTt3ayikRBk7BoyGAGABAdYyfdzRQGV3l4coxrqQ84GpUBmrdR3xNIDUPAKDBSA +ADIGDhhqTZIWaDcrVX8EsbNzbkvCOxG8bN5w8ly9H8jyTJHC6DFndQydbguh2e/ctZJFXRxMAqqPVA +tQH5E64SPr1f0zz7sQYjAHg0In+JQ11+N2B0XXBeeYZgBZFx4tqBToiTCPv0YBgQv8JqA6BEf6RhXx +w1ENhRBnWV8ctEX4Ul2zc3aVGcQNC2KElyTDYyYUWvShdjDyMOGMuFjqnII45aogPhz/CodUHFwaDx +lTgsaOjNyhGWJQd+lFoAGk8ObghI0kawg+EV5blH3dr+digkYuAGSaQZFHFz2P/cTaLmhF52QeSb45 +Jwxd+uSVGHlqOZpOeJpCFZ5J+rkAkFjQ0N1tah7JJSZUFNsrkeJUJMIBi8jyaEKIhKPomnC91Uo+NB +yyaJ5umnnpInIFh4t6ZSpGaAVmizqjpByDegYl8tPE0phCYrhcMWSv+uAqHfgH88ak5UXZmlKLVJhd +dj78s1Fxnzo6yUCrV6rrDOkluG+QzCAUTbCwf9SrmMLzK6p+OPHx7DF+bsfMRq7Ec61Av9i6GLw23r +idnZ+/OO0a99pbIrJkproCQMA17OPG6suq3cca5ruDfXCCDoS7BEdvmJn5otdqscn+uogRHHXs8cbh +EIfYaDY1AkrC0cqwcZpnM6ludx72x0p7Fo/hZAcpJDjax0UdHavMKAbiKltMWCF3xxh9k25N/Viud8 +ba78iCvUkt+V6BpwMlErmcgc502x+u1nSxJSJP9Mi52awD1V4yB/QHONsnU3L+A/zR4VL/indx/y64 +gqcj+qgTeweM86f0Qy1QVbvmWH1D9h+alqg254QD8HJXHvjQaGOqEqC22M54PcftZVKVSQG9jhkv7C +JyTyDoAJfPdu8v7DRZAxsP/ky9MJ3OL36DJfCFPASC3/aXlfLOOON9vGZZHydGf8LnxYJuuVIbl83y +Az5n/RPz07E+9+zw2A2ahz4HxHo9Kt79HTMx1Q7ma7zAzHgHqYH0SoZWyTuOLMiHwSfZDAQTn0ajk9 +YQqodnUYjByQZhZak9Wu4gYQsMyEpIOAOQKze8CmEF45KuAHTvIDOfHJNipwoHMuGHBnJElUoDmAyX +c2Qm/R8Ah/iILCCJOEokGowdhDYc/yoL+vpRGwyVSCWFYZNljkhEirGXsalWcAgOdeAdoXcktF2udb +qbUhjWyMQxYO01o6KYKOr6iK3fE4MaS+DsvBsGOBaMb0Y6IxADaJhFICaOLmiWTlDAnY1KzDG4ambL +cWBA8mUzjJsN2KjSaSXGqMCVXYpYkj33mcIApyhQf6YqgeNAmNvuC0t4CsDbSshZJkCS1eNisKqlyG +cF8G2JeiDX6tO6Mv0SmjCa3MFb0bJaGPMU0X7c8XcpvMaOQmCajwSeY9G0WqbBmKv34DsMIEztU6Y2 +KiDlFdt6jnCSqx7Dmt6XnqSKaFFHNO5+FmODxMCWBEaco77lNDGXBM0ECYB/+s7nKFdwSF5hgXumQe +EZ7amRg39RHy3zIjyRCykQh8Zo2iviRKyTDn/zx6EefptJj2Cw+Ep2FSc01U5ry4KLPYsTyWnVGnvb +UpyGlhjBUljyjHhWpf8OFaXwhp9O4T1gU9UeyPPa8A2l0p1kNqPXEVRm1AOs1oAGZU596t6SOR2mcB +Oco1srWtkaVrMUzIErrKri85keKqRQYX9VX0/eAUK1hrSu6HMEX3Qh2sCh0q0D2CtnUqS4hj62sE/z +aDs2Sg7MBS6xnQeooc2R2tC9YrKpEi9pLXfYXp20tDCpSP8rKlrD4axprb9u1Df5hSbz9QU0cRpfgn +kiIzwKucd0wsEHlLpe5yHXuc6FrNelOl7pY2+11kTWx7VpRu97dXA3DO1vbkhcb4zyvERYajQgAADs +=""" + ), + mimetype="image/png", +) + + +TEMPLATE = u"""\ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"> +<title>WSGI Information</title> +<style type="text/css"> + @import url(https://fonts.googleapis.com/css?family=Ubuntu); + + body { font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; background-color: white; color: #000; + font-size: 15px; text-align: center; } + #logo { float: right; padding: 0 0 10px 10px; } + div.box { text-align: left; width: 45em; margin: auto; padding: 50px 0; + background-color: white; } + h1, h2 { font-family: 'Ubuntu', 'Lucida Grande', 'Lucida Sans Unicode', + 'Geneva', 'Verdana', sans-serif; font-weight: normal; } + h1 { margin: 0 0 30px 0; } + h2 { font-size: 1.4em; margin: 1em 0 0.5em 0; } + table { width: 100%%; border-collapse: collapse; border: 1px solid #AFC5C9 } + table th { background-color: #AFC1C4; color: white; font-size: 0.72em; + font-weight: normal; width: 18em; vertical-align: top; + padding: 0.5em 0 0.1em 0.5em; } + table td { border: 1px solid #AFC5C9; padding: 0.1em 0 0.1em 0.5em; } + code { font-family: 'Consolas', 'Monaco', 'Bitstream Vera Sans Mono', + monospace; font-size: 0.7em; } + ul li { line-height: 1.5em; } + ul.path { font-size: 0.7em; margin: 0 -30px; padding: 8px 30px; + list-style: none; background: #E8EFF0; } + ul.path li { line-height: 1.6em; } + li.virtual { color: #999; text-decoration: underline; } + li.exp { background: white; } +</style> +<div class="box"> + <img src="?resource=logo" id="logo" alt="[The Werkzeug Logo]" /> + <h1>WSGI Information</h1> + <p> + This page displays all available information about the WSGI server and + the underlying Python interpreter. + <h2 id="python-interpreter">Python Interpreter</h2> + <table> + <tr> + <th>Python Version + <td>%(python_version)s + <tr> + <th>Platform + <td>%(platform)s [%(os)s] + <tr> + <th>API Version + <td>%(api_version)s + <tr> + <th>Byteorder + <td>%(byteorder)s + <tr> + <th>Werkzeug Version + <td>%(werkzeug_version)s + </table> + <h2 id="wsgi-environment">WSGI Environment</h2> + <table>%(wsgi_env)s</table> + <h2 id="installed-eggs">Installed Eggs</h2> + <p> + The following python packages were installed on the system as + Python eggs: + <ul>%(python_eggs)s</ul> + <h2 id="sys-path">System Path</h2> + <p> + The following paths are the current contents of the load path. The + following entries are looked up for Python packages. Note that not + all items in this path are folders. Gray and underlined items are + entries pointing to invalid resources or used by custom import hooks + such as the zip importer. + <p> + Items with a bright background were expanded for display from a relative + path. If you encounter such paths in the output you might want to check + your setup as relative paths are usually problematic in multithreaded + environments. + <ul class="path">%(sys_path)s</ul> +</div> +""" + + +def iter_sys_path(): + if os.name == "posix": + + def strip(x): + prefix = os.path.expanduser("~") + if x.startswith(prefix): + x = "~" + x[len(prefix) :] + return x + + else: + + def strip(x): + return x + + cwd = os.path.abspath(os.getcwd()) + for item in sys.path: + path = os.path.join(cwd, item or os.path.curdir) + yield strip(os.path.normpath(path)), not os.path.isdir(path), path != item + + +def render_testapp(req): + try: + import pkg_resources + except ImportError: + eggs = () + else: + eggs = sorted(pkg_resources.working_set, key=lambda x: x.project_name.lower()) + python_eggs = [] + for egg in eggs: + try: + version = egg.version + except (ValueError, AttributeError): + version = "unknown" + python_eggs.append( + "<li>%s <small>[%s]</small>" % (escape(egg.project_name), escape(version)) + ) + + wsgi_env = [] + sorted_environ = sorted(req.environ.items(), key=lambda x: repr(x[0]).lower()) + for key, value in sorted_environ: + wsgi_env.append( + "<tr><th>%s<td><code>%s</code>" + % (escape(str(key)), " ".join(wrap(escape(repr(value))))) + ) + + sys_path = [] + for item, virtual, expanded in iter_sys_path(): + class_ = [] + if virtual: + class_.append("virtual") + if expanded: + class_.append("exp") + sys_path.append( + "<li%s>%s" + % (' class="%s"' % " ".join(class_) if class_ else "", escape(item)) + ) + + return ( + TEMPLATE + % { + "python_version": "<br>".join(escape(sys.version).splitlines()), + "platform": escape(sys.platform), + "os": escape(os.name), + "api_version": sys.api_version, + "byteorder": sys.byteorder, + "werkzeug_version": werkzeug.__version__, + "python_eggs": "\n".join(python_eggs), + "wsgi_env": "\n".join(wsgi_env), + "sys_path": "\n".join(sys_path), + } + ).encode("utf-8") + + +def test_app(environ, start_response): + """Simple test application that dumps the environment. You can use + it to check if Werkzeug is working properly: + + .. sourcecode:: pycon + + >>> from werkzeug.serving import run_simple + >>> from werkzeug.testapp import test_app + >>> run_simple('localhost', 3000, test_app) + * Running on http://localhost:3000/ + + The application displays important information from the WSGI environment, + the Python interpreter and the installed libraries. + """ + req = Request(environ, populate_request=False) + if req.args.get("resource") == "logo": + response = logo + else: + response = Response(render_testapp(req), mimetype="text/html") + return response(environ, start_response) + + +if __name__ == "__main__": + from .serving import run_simple + + run_simple("localhost", 5000, test_app, use_reloader=True) diff --git a/python/werkzeug/urls.py b/python/werkzeug/urls.py new file mode 100644 index 0000000..38e9e5a --- /dev/null +++ b/python/werkzeug/urls.py @@ -0,0 +1,1134 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.urls + ~~~~~~~~~~~~~ + + ``werkzeug.urls`` used to provide several wrapper functions for Python 2 + urlparse, whose main purpose were to work around the behavior of the Py2 + stdlib and its lack of unicode support. While this was already a somewhat + inconvenient situation, it got even more complicated because Python 3's + ``urllib.parse`` actually does handle unicode properly. In other words, + this module would wrap two libraries with completely different behavior. So + now this module contains a 2-and-3-compatible backport of Python 3's + ``urllib.parse``, which is mostly API-compatible. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import os +import re +from collections import namedtuple + +from ._compat import fix_tuple_repr +from ._compat import implements_to_string +from ._compat import make_literal_wrapper +from ._compat import normalize_string_tuple +from ._compat import PY2 +from ._compat import text_type +from ._compat import to_native +from ._compat import to_unicode +from ._compat import try_coerce_native +from ._internal import _decode_idna +from ._internal import _encode_idna +from .datastructures import iter_multi_items +from .datastructures import MultiDict + +# A regular expression for what a valid schema looks like +_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$") + +# Characters that are safe in any part of an URL. +_always_safe = frozenset( + bytearray( + b"abcdefghijklmnopqrstuvwxyz" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789" + b"-._~" + ) +) + +_hexdigits = "0123456789ABCDEFabcdef" +_hextobyte = dict( + ((a + b).encode(), int(a + b, 16)) for a in _hexdigits for b in _hexdigits +) +_bytetohex = [("%%%02X" % char).encode("ascii") for char in range(256)] + + +_URLTuple = fix_tuple_repr( + namedtuple("_URLTuple", ["scheme", "netloc", "path", "query", "fragment"]) +) + + +class BaseURL(_URLTuple): + """Superclass of :py:class:`URL` and :py:class:`BytesURL`.""" + + __slots__ = () + + def replace(self, **kwargs): + """Return an URL with the same values, except for those parameters + given new values by whichever keyword arguments are specified.""" + return self._replace(**kwargs) + + @property + def host(self): + """The host part of the URL if available, otherwise `None`. The + host is either the hostname or the IP address mentioned in the + URL. It will not contain the port. + """ + return self._split_host()[0] + + @property + def ascii_host(self): + """Works exactly like :attr:`host` but will return a result that + is restricted to ASCII. If it finds a netloc that is not ASCII + it will attempt to idna decode it. This is useful for socket + operations when the URL might include internationalized characters. + """ + rv = self.host + if rv is not None and isinstance(rv, text_type): + try: + rv = _encode_idna(rv) + except UnicodeError: + rv = rv.encode("ascii", "ignore") + return to_native(rv, "ascii", "ignore") + + @property + def port(self): + """The port in the URL as an integer if it was present, `None` + otherwise. This does not fill in default ports. + """ + try: + rv = int(to_native(self._split_host()[1])) + if 0 <= rv <= 65535: + return rv + except (ValueError, TypeError): + pass + + @property + def auth(self): + """The authentication part in the URL if available, `None` + otherwise. + """ + return self._split_netloc()[0] + + @property + def username(self): + """The username if it was part of the URL, `None` otherwise. + This undergoes URL decoding and will always be a unicode string. + """ + rv = self._split_auth()[0] + if rv is not None: + return _url_unquote_legacy(rv) + + @property + def raw_username(self): + """The username if it was part of the URL, `None` otherwise. + Unlike :attr:`username` this one is not being decoded. + """ + return self._split_auth()[0] + + @property + def password(self): + """The password if it was part of the URL, `None` otherwise. + This undergoes URL decoding and will always be a unicode string. + """ + rv = self._split_auth()[1] + if rv is not None: + return _url_unquote_legacy(rv) + + @property + def raw_password(self): + """The password if it was part of the URL, `None` otherwise. + Unlike :attr:`password` this one is not being decoded. + """ + return self._split_auth()[1] + + def decode_query(self, *args, **kwargs): + """Decodes the query part of the URL. Ths is a shortcut for + calling :func:`url_decode` on the query argument. The arguments and + keyword arguments are forwarded to :func:`url_decode` unchanged. + """ + return url_decode(self.query, *args, **kwargs) + + def join(self, *args, **kwargs): + """Joins this URL with another one. This is just a convenience + function for calling into :meth:`url_join` and then parsing the + return value again. + """ + return url_parse(url_join(self, *args, **kwargs)) + + def to_url(self): + """Returns a URL string or bytes depending on the type of the + information stored. This is just a convenience function + for calling :meth:`url_unparse` for this URL. + """ + return url_unparse(self) + + def decode_netloc(self): + """Decodes the netloc part into a string.""" + rv = _decode_idna(self.host or "") + + if ":" in rv: + rv = "[%s]" % rv + port = self.port + if port is not None: + rv = "%s:%d" % (rv, port) + auth = ":".join( + filter( + None, + [ + _url_unquote_legacy(self.raw_username or "", "/:%@"), + _url_unquote_legacy(self.raw_password or "", "/:%@"), + ], + ) + ) + if auth: + rv = "%s@%s" % (auth, rv) + return rv + + def to_uri_tuple(self): + """Returns a :class:`BytesURL` tuple that holds a URI. This will + encode all the information in the URL properly to ASCII using the + rules a web browser would follow. + + It's usually more interesting to directly call :meth:`iri_to_uri` which + will return a string. + """ + return url_parse(iri_to_uri(self).encode("ascii")) + + def to_iri_tuple(self): + """Returns a :class:`URL` tuple that holds a IRI. This will try + to decode as much information as possible in the URL without + losing information similar to how a web browser does it for the + URL bar. + + It's usually more interesting to directly call :meth:`uri_to_iri` which + will return a string. + """ + return url_parse(uri_to_iri(self)) + + def get_file_location(self, pathformat=None): + """Returns a tuple with the location of the file in the form + ``(server, location)``. If the netloc is empty in the URL or + points to localhost, it's represented as ``None``. + + The `pathformat` by default is autodetection but needs to be set + when working with URLs of a specific system. The supported values + are ``'windows'`` when working with Windows or DOS paths and + ``'posix'`` when working with posix paths. + + If the URL does not point to a local file, the server and location + are both represented as ``None``. + + :param pathformat: The expected format of the path component. + Currently ``'windows'`` and ``'posix'`` are + supported. Defaults to ``None`` which is + autodetect. + """ + if self.scheme != "file": + return None, None + + path = url_unquote(self.path) + host = self.netloc or None + + if pathformat is None: + if os.name == "nt": + pathformat = "windows" + else: + pathformat = "posix" + + if pathformat == "windows": + if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:": + path = path[1:2] + ":" + path[3:] + windows_share = path[:3] in ("\\" * 3, "/" * 3) + import ntpath + + path = ntpath.normpath(path) + # Windows shared drives are represented as ``\\host\\directory``. + # That results in a URL like ``file://///host/directory``, and a + # path like ``///host/directory``. We need to special-case this + # because the path contains the hostname. + if windows_share and host is None: + parts = path.lstrip("\\").split("\\", 1) + if len(parts) == 2: + host, path = parts + else: + host = parts[0] + path = "" + elif pathformat == "posix": + import posixpath + + path = posixpath.normpath(path) + else: + raise TypeError("Invalid path format %s" % repr(pathformat)) + + if host in ("127.0.0.1", "::1", "localhost"): + host = None + + return host, path + + def _split_netloc(self): + if self._at in self.netloc: + return self.netloc.split(self._at, 1) + return None, self.netloc + + def _split_auth(self): + auth = self._split_netloc()[0] + if not auth: + return None, None + if self._colon not in auth: + return auth, None + return auth.split(self._colon, 1) + + def _split_host(self): + rv = self._split_netloc()[1] + if not rv: + return None, None + + if not rv.startswith(self._lbracket): + if self._colon in rv: + return rv.split(self._colon, 1) + return rv, None + + idx = rv.find(self._rbracket) + if idx < 0: + return rv, None + + host = rv[1:idx] + rest = rv[idx + 1 :] + if rest.startswith(self._colon): + return host, rest[1:] + return host, None + + +@implements_to_string +class URL(BaseURL): + """Represents a parsed URL. This behaves like a regular tuple but + also has some extra attributes that give further insight into the + URL. + """ + + __slots__ = () + _at = "@" + _colon = ":" + _lbracket = "[" + _rbracket = "]" + + def __str__(self): + return self.to_url() + + def encode_netloc(self): + """Encodes the netloc part to an ASCII safe URL as bytes.""" + rv = self.ascii_host or "" + if ":" in rv: + rv = "[%s]" % rv + port = self.port + if port is not None: + rv = "%s:%d" % (rv, port) + auth = ":".join( + filter( + None, + [ + url_quote(self.raw_username or "", "utf-8", "strict", "/:%"), + url_quote(self.raw_password or "", "utf-8", "strict", "/:%"), + ], + ) + ) + if auth: + rv = "%s@%s" % (auth, rv) + return to_native(rv) + + def encode(self, charset="utf-8", errors="replace"): + """Encodes the URL to a tuple made out of bytes. The charset is + only being used for the path, query and fragment. + """ + return BytesURL( + self.scheme.encode("ascii"), + self.encode_netloc(), + self.path.encode(charset, errors), + self.query.encode(charset, errors), + self.fragment.encode(charset, errors), + ) + + +class BytesURL(BaseURL): + """Represents a parsed URL in bytes.""" + + __slots__ = () + _at = b"@" + _colon = b":" + _lbracket = b"[" + _rbracket = b"]" + + def __str__(self): + return self.to_url().decode("utf-8", "replace") + + def encode_netloc(self): + """Returns the netloc unchanged as bytes.""" + return self.netloc + + def decode(self, charset="utf-8", errors="replace"): + """Decodes the URL to a tuple made out of strings. The charset is + only being used for the path, query and fragment. + """ + return URL( + self.scheme.decode("ascii"), + self.decode_netloc(), + self.path.decode(charset, errors), + self.query.decode(charset, errors), + self.fragment.decode(charset, errors), + ) + + +_unquote_maps = {frozenset(): _hextobyte} + + +def _unquote_to_bytes(string, unsafe=""): + if isinstance(string, text_type): + string = string.encode("utf-8") + + if isinstance(unsafe, text_type): + unsafe = unsafe.encode("utf-8") + + unsafe = frozenset(bytearray(unsafe)) + groups = iter(string.split(b"%")) + result = bytearray(next(groups, b"")) + + try: + hex_to_byte = _unquote_maps[unsafe] + except KeyError: + hex_to_byte = _unquote_maps[unsafe] = { + h: b for h, b in _hextobyte.items() if b not in unsafe + } + + for group in groups: + code = group[:2] + + if code in hex_to_byte: + result.append(hex_to_byte[code]) + result.extend(group[2:]) + else: + result.append(37) # % + result.extend(group) + + return bytes(result) + + +def _url_encode_impl(obj, charset, encode_keys, sort, key): + iterable = iter_multi_items(obj) + if sort: + iterable = sorted(iterable, key=key) + for key, value in iterable: + if value is None: + continue + if not isinstance(key, bytes): + key = text_type(key).encode(charset) + if not isinstance(value, bytes): + value = text_type(value).encode(charset) + yield _fast_url_quote_plus(key) + "=" + _fast_url_quote_plus(value) + + +def _url_unquote_legacy(value, unsafe=""): + try: + return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe) + except UnicodeError: + return url_unquote(value, charset="latin1", unsafe=unsafe) + + +def url_parse(url, scheme=None, allow_fragments=True): + """Parses a URL from a string into a :class:`URL` tuple. If the URL + is lacking a scheme it can be provided as second argument. Otherwise, + it is ignored. Optionally fragments can be stripped from the URL + by setting `allow_fragments` to `False`. + + The inverse of this function is :func:`url_unparse`. + + :param url: the URL to parse. + :param scheme: the default schema to use if the URL is schemaless. + :param allow_fragments: if set to `False` a fragment will be removed + from the URL. + """ + s = make_literal_wrapper(url) + is_text_based = isinstance(url, text_type) + + if scheme is None: + scheme = s("") + netloc = query = fragment = s("") + i = url.find(s(":")) + if i > 0 and _scheme_re.match(to_native(url[:i], errors="replace")): + # make sure "iri" is not actually a port number (in which case + # "scheme" is really part of the path) + rest = url[i + 1 :] + if not rest or any(c not in s("0123456789") for c in rest): + # not a port number + scheme, url = url[:i].lower(), rest + + if url[:2] == s("//"): + delim = len(url) + for c in s("/?#"): + wdelim = url.find(c, 2) + if wdelim >= 0: + delim = min(delim, wdelim) + netloc, url = url[2:delim], url[delim:] + if (s("[") in netloc and s("]") not in netloc) or ( + s("]") in netloc and s("[") not in netloc + ): + raise ValueError("Invalid IPv6 URL") + + if allow_fragments and s("#") in url: + url, fragment = url.split(s("#"), 1) + if s("?") in url: + url, query = url.split(s("?"), 1) + + result_type = URL if is_text_based else BytesURL + return result_type(scheme, netloc, url, query, fragment) + + +def _make_fast_url_quote(charset="utf-8", errors="strict", safe="/:", unsafe=""): + """Precompile the translation table for a URL encoding function. + + Unlike :func:`url_quote`, the generated function only takes the + string to quote. + + :param charset: The charset to encode the result with. + :param errors: How to handle encoding errors. + :param safe: An optional sequence of safe characters to never encode. + :param unsafe: An optional sequence of unsafe characters to always encode. + """ + if isinstance(safe, text_type): + safe = safe.encode(charset, errors) + + if isinstance(unsafe, text_type): + unsafe = unsafe.encode(charset, errors) + + safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) + table = [chr(c) if c in safe else "%%%02X" % c for c in range(256)] + + if not PY2: + + def quote(string): + return "".join([table[c] for c in string]) + + else: + + def quote(string): + return "".join([table[c] for c in bytearray(string)]) + + return quote + + +_fast_url_quote = _make_fast_url_quote() +_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+") + + +def _fast_url_quote_plus(string): + return _fast_quote_plus(string).replace(" ", "+") + + +def url_quote(string, charset="utf-8", errors="strict", safe="/:", unsafe=""): + """URL encode a single string with a given encoding. + + :param s: the string to quote. + :param charset: the charset to be used. + :param safe: an optional sequence of safe characters. + :param unsafe: an optional sequence of unsafe characters. + + .. versionadded:: 0.9.2 + The `unsafe` parameter was added. + """ + if not isinstance(string, (text_type, bytes, bytearray)): + string = text_type(string) + if isinstance(string, text_type): + string = string.encode(charset, errors) + if isinstance(safe, text_type): + safe = safe.encode(charset, errors) + if isinstance(unsafe, text_type): + unsafe = unsafe.encode(charset, errors) + safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) + rv = bytearray() + for char in bytearray(string): + if char in safe: + rv.append(char) + else: + rv.extend(_bytetohex[char]) + return to_native(bytes(rv)) + + +def url_quote_plus(string, charset="utf-8", errors="strict", safe=""): + """URL encode a single string with the given encoding and convert + whitespace to "+". + + :param s: The string to quote. + :param charset: The charset to be used. + :param safe: An optional sequence of safe characters. + """ + return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+") + + +def url_unparse(components): + """The reverse operation to :meth:`url_parse`. This accepts arbitrary + as well as :class:`URL` tuples and returns a URL as a string. + + :param components: the parsed URL as tuple which should be converted + into a URL string. + """ + scheme, netloc, path, query, fragment = normalize_string_tuple(components) + s = make_literal_wrapper(scheme) + url = s("") + + # We generally treat file:///x and file:/x the same which is also + # what browsers seem to do. This also allows us to ignore a schema + # register for netloc utilization or having to differenciate between + # empty and missing netloc. + if netloc or (scheme and path.startswith(s("/"))): + if path and path[:1] != s("/"): + path = s("/") + path + url = s("//") + (netloc or s("")) + path + elif path: + url += path + if scheme: + url = scheme + s(":") + url + if query: + url = url + s("?") + query + if fragment: + url = url + s("#") + fragment + return url + + +def url_unquote(string, charset="utf-8", errors="replace", unsafe=""): + """URL decode a single string with a given encoding. If the charset + is set to `None` no unicode decoding is performed and raw bytes + are returned. + + :param s: the string to unquote. + :param charset: the charset of the query string. If set to `None` + no unicode decoding will take place. + :param errors: the error handling for the charset decoding. + """ + rv = _unquote_to_bytes(string, unsafe) + if charset is not None: + rv = rv.decode(charset, errors) + return rv + + +def url_unquote_plus(s, charset="utf-8", errors="replace"): + """URL decode a single string with the given `charset` and decode "+" to + whitespace. + + Per default encoding errors are ignored. If you want a different behavior + you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a + :exc:`HTTPUnicodeError` is raised. + + :param s: The string to unquote. + :param charset: the charset of the query string. If set to `None` + no unicode decoding will take place. + :param errors: The error handling for the `charset` decoding. + """ + if isinstance(s, text_type): + s = s.replace(u"+", u" ") + else: + s = s.replace(b"+", b" ") + return url_unquote(s, charset, errors) + + +def url_fix(s, charset="utf-8"): + r"""Sometimes you get an URL by a user that just isn't a real URL because + it contains unsafe characters like ' ' and so on. This function can fix + some of the problems in a similar way browsers handle data entered by the + user: + + >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') + 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)' + + :param s: the string with the URL to fix. + :param charset: The target charset for the URL if the url was given as + unicode string. + """ + # First step is to switch to unicode processing and to convert + # backslashes (which are invalid in URLs anyways) to slashes. This is + # consistent with what Chrome does. + s = to_unicode(s, charset, "replace").replace("\\", "/") + + # For the specific case that we look like a malformed windows URL + # we want to fix this up manually: + if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"): + s = "file:///" + s[7:] + + url = url_parse(s) + path = url_quote(url.path, charset, safe="/%+$!*'(),") + qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),") + anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),") + return to_native(url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor))) + + +# not-unreserved characters remain quoted when unquoting to IRI +_to_iri_unsafe = "".join([chr(c) for c in range(128) if c not in _always_safe]) + + +def _codec_error_url_quote(e): + """Used in :func:`uri_to_iri` after unquoting to re-quote any + invalid bytes. + """ + out = _fast_url_quote(e.object[e.start : e.end]) + + if PY2: + out = out.decode("utf-8") + + return out, e.end + + +codecs.register_error("werkzeug.url_quote", _codec_error_url_quote) + + +def uri_to_iri(uri, charset="utf-8", errors="werkzeug.url_quote"): + """Convert a URI to an IRI. All valid UTF-8 characters are unquoted, + leaving all reserved and invalid characters quoted. If the URL has + a domain, it is decoded from Punycode. + + >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF") + 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF' + + :param uri: The URI to convert. + :param charset: The encoding to encode unquoted bytes with. + :param errors: Error handler to use during ``bytes.encode``. By + default, invalid bytes are left quoted. + + .. versionchanged:: 0.15 + All reserved and invalid characters remain quoted. Previously, + only some reserved characters were preserved, and invalid bytes + were replaced instead of left quoted. + + .. versionadded:: 0.6 + """ + if isinstance(uri, tuple): + uri = url_unparse(uri) + + uri = url_parse(to_unicode(uri, charset)) + path = url_unquote(uri.path, charset, errors, _to_iri_unsafe) + query = url_unquote(uri.query, charset, errors, _to_iri_unsafe) + fragment = url_unquote(uri.fragment, charset, errors, _to_iri_unsafe) + return url_unparse((uri.scheme, uri.decode_netloc(), path, query, fragment)) + + +# reserved characters remain unquoted when quoting to URI +_to_uri_safe = ":/?#[]@!$&'()*+,;=%" + + +def iri_to_uri(iri, charset="utf-8", errors="strict", safe_conversion=False): + """Convert an IRI to a URI. All non-ASCII and unsafe characters are + quoted. If the URL has a domain, it is encoded to Punycode. + + >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF') + 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF' + + :param iri: The IRI to convert. + :param charset: The encoding of the IRI. + :param errors: Error handler to use during ``bytes.encode``. + :param safe_conversion: Return the URL unchanged if it only contains + ASCII characters and no whitespace. See the explanation below. + + There is a general problem with IRI conversion with some protocols + that are in violation of the URI specification. Consider the + following two IRIs:: + + magnet:?xt=uri:whatever + itms-services://?action=download-manifest + + After parsing, we don't know if the scheme requires the ``//``, + which is dropped if empty, but conveys different meanings in the + final URL if it's present or not. In this case, you can use + ``safe_conversion``, which will return the URL unchanged if it only + contains ASCII characters and no whitespace. This can result in a + URI with unquoted characters if it was not already quoted correctly, + but preserves the URL's semantics. Werkzeug uses this for the + ``Location`` header for redirects. + + .. versionchanged:: 0.15 + All reserved characters remain unquoted. Previously, only some + reserved characters were left unquoted. + + .. versionchanged:: 0.9.6 + The ``safe_conversion`` parameter was added. + + .. versionadded:: 0.6 + """ + if isinstance(iri, tuple): + iri = url_unparse(iri) + + if safe_conversion: + # If we're not sure if it's safe to convert the URL, and it only + # contains ASCII characters, return it unconverted. + try: + native_iri = to_native(iri) + ascii_iri = native_iri.encode("ascii") + + # Only return if it doesn't have whitespace. (Why?) + if len(ascii_iri.split()) == 1: + return native_iri + except UnicodeError: + pass + + iri = url_parse(to_unicode(iri, charset, errors)) + path = url_quote(iri.path, charset, errors, _to_uri_safe) + query = url_quote(iri.query, charset, errors, _to_uri_safe) + fragment = url_quote(iri.fragment, charset, errors, _to_uri_safe) + return to_native( + url_unparse((iri.scheme, iri.encode_netloc(), path, query, fragment)) + ) + + +def url_decode( + s, + charset="utf-8", + decode_keys=False, + include_empty=True, + errors="replace", + separator="&", + cls=None, +): + """ + Parse a querystring and return it as :class:`MultiDict`. There is a + difference in key decoding on different Python versions. On Python 3 + keys will always be fully decoded whereas on Python 2, keys will + remain bytestrings if they fit into ASCII. On 2.x keys can be forced + to be unicode by setting `decode_keys` to `True`. + + If the charset is set to `None` no unicode decoding will happen and + raw bytes will be returned. + + Per default a missing value for a key will default to an empty key. If + you don't want that behavior you can set `include_empty` to `False`. + + Per default encoding errors are ignored. If you want a different behavior + you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a + `HTTPUnicodeError` is raised. + + .. versionchanged:: 0.5 + In previous versions ";" and "&" could be used for url decoding. + This changed in 0.5 where only "&" is supported. If you want to + use ";" instead a different `separator` can be provided. + + The `cls` parameter was added. + + :param s: a string with the query string to decode. + :param charset: the charset of the query string. If set to `None` + no unicode decoding will take place. + :param decode_keys: Used on Python 2.x to control whether keys should + be forced to be unicode objects. If set to `True` + then keys will be unicode in all cases. Otherwise, + they remain `str` if they fit into ASCII. + :param include_empty: Set to `False` if you don't want empty values to + appear in the dict. + :param errors: the decoding error behavior. + :param separator: the pair separator to be used, defaults to ``&`` + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + """ + if cls is None: + cls = MultiDict + if isinstance(s, text_type) and not isinstance(separator, text_type): + separator = separator.decode(charset or "ascii") + elif isinstance(s, bytes) and not isinstance(separator, bytes): + separator = separator.encode(charset or "ascii") + return cls( + _url_decode_impl( + s.split(separator), charset, decode_keys, include_empty, errors + ) + ) + + +def url_decode_stream( + stream, + charset="utf-8", + decode_keys=False, + include_empty=True, + errors="replace", + separator="&", + cls=None, + limit=None, + return_iterator=False, +): + """Works like :func:`url_decode` but decodes a stream. The behavior + of stream and limit follows functions like + :func:`~werkzeug.wsgi.make_line_iter`. The generator of pairs is + directly fed to the `cls` so you can consume the data while it's + parsed. + + .. versionadded:: 0.8 + + :param stream: a stream with the encoded querystring + :param charset: the charset of the query string. If set to `None` + no unicode decoding will take place. + :param decode_keys: Used on Python 2.x to control whether keys should + be forced to be unicode objects. If set to `True`, + keys will be unicode in all cases. Otherwise, they + remain `str` if they fit into ASCII. + :param include_empty: Set to `False` if you don't want empty values to + appear in the dict. + :param errors: the decoding error behavior. + :param separator: the pair separator to be used, defaults to ``&`` + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + :param limit: the content length of the URL data. Not necessary if + a limited stream is provided. + :param return_iterator: if set to `True` the `cls` argument is ignored + and an iterator over all decoded pairs is + returned + """ + from .wsgi import make_chunk_iter + + pair_iter = make_chunk_iter(stream, separator, limit) + decoder = _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors) + + if return_iterator: + return decoder + + if cls is None: + cls = MultiDict + + return cls(decoder) + + +def _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors): + for pair in pair_iter: + if not pair: + continue + s = make_literal_wrapper(pair) + equal = s("=") + if equal in pair: + key, value = pair.split(equal, 1) + else: + if not include_empty: + continue + key = pair + value = s("") + key = url_unquote_plus(key, charset, errors) + if charset is not None and PY2 and not decode_keys: + key = try_coerce_native(key) + yield key, url_unquote_plus(value, charset, errors) + + +def url_encode( + obj, charset="utf-8", encode_keys=False, sort=False, key=None, separator=b"&" +): + """URL encode a dict/`MultiDict`. If a value is `None` it will not appear + in the result string. Per default only values are encoded into the target + charset strings. If `encode_keys` is set to ``True`` unicode keys are + supported too. + + If `sort` is set to `True` the items are sorted by `key` or the default + sorting algorithm. + + .. versionadded:: 0.5 + `sort`, `key`, and `separator` were added. + + :param obj: the object to encode into a query string. + :param charset: the charset of the query string. + :param encode_keys: set to `True` if you have unicode keys. (Ignored on + Python 3.x) + :param sort: set to `True` if you want parameters to be sorted by `key`. + :param separator: the separator to be used for the pairs. + :param key: an optional function to be used for sorting. For more details + check out the :func:`sorted` documentation. + """ + separator = to_native(separator, "ascii") + return separator.join(_url_encode_impl(obj, charset, encode_keys, sort, key)) + + +def url_encode_stream( + obj, + stream=None, + charset="utf-8", + encode_keys=False, + sort=False, + key=None, + separator=b"&", +): + """Like :meth:`url_encode` but writes the results to a stream + object. If the stream is `None` a generator over all encoded + pairs is returned. + + .. versionadded:: 0.8 + + :param obj: the object to encode into a query string. + :param stream: a stream to write the encoded object into or `None` if + an iterator over the encoded pairs should be returned. In + that case the separator argument is ignored. + :param charset: the charset of the query string. + :param encode_keys: set to `True` if you have unicode keys. (Ignored on + Python 3.x) + :param sort: set to `True` if you want parameters to be sorted by `key`. + :param separator: the separator to be used for the pairs. + :param key: an optional function to be used for sorting. For more details + check out the :func:`sorted` documentation. + """ + separator = to_native(separator, "ascii") + gen = _url_encode_impl(obj, charset, encode_keys, sort, key) + if stream is None: + return gen + for idx, chunk in enumerate(gen): + if idx: + stream.write(separator) + stream.write(chunk) + + +def url_join(base, url, allow_fragments=True): + """Join a base URL and a possibly relative URL to form an absolute + interpretation of the latter. + + :param base: the base URL for the join operation. + :param url: the URL to join. + :param allow_fragments: indicates whether fragments should be allowed. + """ + if isinstance(base, tuple): + base = url_unparse(base) + if isinstance(url, tuple): + url = url_unparse(url) + + base, url = normalize_string_tuple((base, url)) + s = make_literal_wrapper(base) + + if not base: + return url + if not url: + return base + + bscheme, bnetloc, bpath, bquery, bfragment = url_parse( + base, allow_fragments=allow_fragments + ) + scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments) + if scheme != bscheme: + return url + if netloc: + return url_unparse((scheme, netloc, path, query, fragment)) + netloc = bnetloc + + if path[:1] == s("/"): + segments = path.split(s("/")) + elif not path: + segments = bpath.split(s("/")) + if not query: + query = bquery + else: + segments = bpath.split(s("/"))[:-1] + path.split(s("/")) + + # If the rightmost part is "./" we want to keep the slash but + # remove the dot. + if segments[-1] == s("."): + segments[-1] = s("") + + # Resolve ".." and "." + segments = [segment for segment in segments if segment != s(".")] + while 1: + i = 1 + n = len(segments) - 1 + while i < n: + if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")): + del segments[i - 1 : i + 1] + break + i += 1 + else: + break + + # Remove trailing ".." if the URL is absolute + unwanted_marker = [s(""), s("..")] + while segments[:2] == unwanted_marker: + del segments[1] + + path = s("/").join(segments) + return url_unparse((scheme, netloc, path, query, fragment)) + + +class Href(object): + """Implements a callable that constructs URLs with the given base. The + function can be called with any number of positional and keyword + arguments which than are used to assemble the URL. Works with URLs + and posix paths. + + Positional arguments are appended as individual segments to + the path of the URL: + + >>> href = Href('/foo') + >>> href('bar', 23) + '/foo/bar/23' + >>> href('foo', bar=23) + '/foo/foo?bar=23' + + If any of the arguments (positional or keyword) evaluates to `None` it + will be skipped. If no keyword arguments are given the last argument + can be a :class:`dict` or :class:`MultiDict` (or any other dict subclass), + otherwise the keyword arguments are used for the query parameters, cutting + off the first trailing underscore of the parameter name: + + >>> href(is_=42) + '/foo?is=42' + >>> href({'foo': 'bar'}) + '/foo?foo=bar' + + Combining of both methods is not allowed: + + >>> href({'foo': 'bar'}, bar=42) + Traceback (most recent call last): + ... + TypeError: keyword arguments and query-dicts can't be combined + + Accessing attributes on the href object creates a new href object with + the attribute name as prefix: + + >>> bar_href = href.bar + >>> bar_href("blub") + '/foo/bar/blub' + + If `sort` is set to `True` the items are sorted by `key` or the default + sorting algorithm: + + >>> href = Href("/", sort=True) + >>> href(a=1, b=2, c=3) + '/?a=1&b=2&c=3' + + .. versionadded:: 0.5 + `sort` and `key` were added. + """ + + def __init__(self, base="./", charset="utf-8", sort=False, key=None): + if not base: + base = "./" + self.base = base + self.charset = charset + self.sort = sort + self.key = key + + def __getattr__(self, name): + if name[:2] == "__": + raise AttributeError(name) + base = self.base + if base[-1:] != "/": + base += "/" + return Href(url_join(base, name), self.charset, self.sort, self.key) + + def __call__(self, *path, **query): + if path and isinstance(path[-1], dict): + if query: + raise TypeError("keyword arguments and query-dicts can't be combined") + query, path = path[-1], path[:-1] + elif query: + query = dict( + [(k.endswith("_") and k[:-1] or k, v) for k, v in query.items()] + ) + path = "/".join( + [ + to_unicode(url_quote(x, self.charset), "ascii") + for x in path + if x is not None + ] + ).lstrip("/") + rv = self.base + if path: + if not rv.endswith("/"): + rv += "/" + rv = url_join(rv, "./" + path) + if query: + rv += "?" + to_unicode( + url_encode(query, self.charset, sort=self.sort, key=self.key), "ascii" + ) + return to_native(rv) diff --git a/python/werkzeug/useragents.py b/python/werkzeug/useragents.py new file mode 100644 index 0000000..e265e09 --- /dev/null +++ b/python/werkzeug/useragents.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.useragents + ~~~~~~~~~~~~~~~~~~~ + + This module provides a helper to inspect user agent strings. This module + is far from complete but should work for most of the currently available + browsers. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import re +import warnings + + +class UserAgentParser(object): + """A simple user agent parser. Used by the `UserAgent`.""" + + platforms = ( + ("cros", "chromeos"), + ("iphone|ios", "iphone"), + ("ipad", "ipad"), + (r"darwin|mac|os\s*x", "macos"), + ("win", "windows"), + (r"android", "android"), + ("netbsd", "netbsd"), + ("openbsd", "openbsd"), + ("freebsd", "freebsd"), + ("dragonfly", "dragonflybsd"), + ("(sun|i86)os", "solaris"), + (r"x11|lin(\b|ux)?", "linux"), + (r"nintendo\s+wii", "wii"), + ("irix", "irix"), + ("hp-?ux", "hpux"), + ("aix", "aix"), + ("sco|unix_sv", "sco"), + ("bsd", "bsd"), + ("amiga", "amiga"), + ("blackberry|playbook", "blackberry"), + ("symbian", "symbian"), + ) + browsers = ( + ("googlebot", "google"), + ("msnbot", "msn"), + ("yahoo", "yahoo"), + ("ask jeeves", "ask"), + (r"aol|america\s+online\s+browser", "aol"), + ("opera", "opera"), + ("edge", "edge"), + ("chrome|crios", "chrome"), + ("seamonkey", "seamonkey"), + ("firefox|firebird|phoenix|iceweasel", "firefox"), + ("galeon", "galeon"), + ("safari|version", "safari"), + ("webkit", "webkit"), + ("camino", "camino"), + ("konqueror", "konqueror"), + ("k-meleon", "kmeleon"), + ("netscape", "netscape"), + (r"msie|microsoft\s+internet\s+explorer|trident/.+? rv:", "msie"), + ("lynx", "lynx"), + ("links", "links"), + ("Baiduspider", "baidu"), + ("bingbot", "bing"), + ("mozilla", "mozilla"), + ) + + _browser_version_re = r"(?:%s)[/\sa-z(]*(\d+[.\da-z]+)?" + _language_re = re.compile( + r"(?:;\s*|\s+)(\b\w{2}\b(?:-\b\w{2}\b)?)\s*;|" + r"(?:\(|\[|;)\s*(\b\w{2}\b(?:-\b\w{2}\b)?)\s*(?:\]|\)|;)" + ) + + def __init__(self): + self.platforms = [(b, re.compile(a, re.I)) for a, b in self.platforms] + self.browsers = [ + (b, re.compile(self._browser_version_re % a, re.I)) + for a, b in self.browsers + ] + + def __call__(self, user_agent): + for platform, regex in self.platforms: # noqa: B007 + match = regex.search(user_agent) + if match is not None: + break + else: + platform = None + for browser, regex in self.browsers: # noqa: B007 + match = regex.search(user_agent) + if match is not None: + version = match.group(1) + break + else: + browser = version = None + match = self._language_re.search(user_agent) + if match is not None: + language = match.group(1) or match.group(2) + else: + language = None + return platform, browser, version, language + + +class UserAgent(object): + """Represents a user agent. Pass it a WSGI environment or a user agent + string and you can inspect some of the details from the user agent + string via the attributes. The following attributes exist: + + .. attribute:: string + + the raw user agent string + + .. attribute:: platform + + the browser platform. The following platforms are currently + recognized: + + - `aix` + - `amiga` + - `android` + - `blackberry` + - `bsd` + - `chromeos` + - `dragonflybsd` + - `freebsd` + - `hpux` + - `ipad` + - `iphone` + - `irix` + - `linux` + - `macos` + - `netbsd` + - `openbsd` + - `sco` + - `solaris` + - `symbian` + - `wii` + - `windows` + + .. attribute:: browser + + the name of the browser. The following browsers are currently + recognized: + + - `aol` * + - `ask` * + - `baidu` * + - `bing` * + - `camino` + - `chrome` + - `edge` + - `firefox` + - `galeon` + - `google` * + - `kmeleon` + - `konqueror` + - `links` + - `lynx` + - `mozilla` + - `msie` + - `msn` + - `netscape` + - `opera` + - `safari` + - `seamonkey` + - `webkit` + - `yahoo` * + + (Browsers marked with a star (``*``) are crawlers.) + + .. attribute:: version + + the version of the browser + + .. attribute:: language + + the language of the browser + """ + + _parser = UserAgentParser() + + def __init__(self, environ_or_string): + if isinstance(environ_or_string, dict): + environ_or_string = environ_or_string.get("HTTP_USER_AGENT", "") + self.string = environ_or_string + self.platform, self.browser, self.version, self.language = self._parser( + environ_or_string + ) + + def to_header(self): + return self.string + + def __str__(self): + return self.string + + def __nonzero__(self): + return bool(self.browser) + + __bool__ = __nonzero__ + + def __repr__(self): + return "<%s %r/%s>" % (self.__class__.__name__, self.browser, self.version) + + +# DEPRECATED +from .wrappers import UserAgentMixin as _UserAgentMixin + + +class UserAgentMixin(_UserAgentMixin): + @property + def user_agent(self, *args, **kwargs): + warnings.warn( + "'werkzeug.useragents.UserAgentMixin' should be imported" + " from 'werkzeug.wrappers.UserAgentMixin'. This old import" + " will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return super(_UserAgentMixin, self).user_agent diff --git a/python/werkzeug/utils.py b/python/werkzeug/utils.py new file mode 100644 index 0000000..2062057 --- /dev/null +++ b/python/werkzeug/utils.py @@ -0,0 +1,836 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.utils + ~~~~~~~~~~~~~~ + + This module implements various utilities for WSGI applications. Most of + them are used by the request and response wrappers but especially for + middleware development it makes sense to use them without the wrappers. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import os +import pkgutil +import re +import sys +import warnings + +from ._compat import iteritems +from ._compat import PY2 +from ._compat import reraise +from ._compat import string_types +from ._compat import text_type +from ._compat import unichr +from ._internal import _DictAccessorProperty +from ._internal import _missing +from ._internal import _parse_signature + +try: + from html.entities import name2codepoint +except ImportError: + from htmlentitydefs import name2codepoint + + +_format_re = re.compile(r"\$(?:(%s)|\{(%s)\})" % (("[a-zA-Z_][a-zA-Z0-9_]*",) * 2)) +_entity_re = re.compile(r"&([^;]+);") +_filename_ascii_strip_re = re.compile(r"[^A-Za-z0-9_.-]") +_windows_device_files = ( + "CON", + "AUX", + "COM1", + "COM2", + "COM3", + "COM4", + "LPT1", + "LPT2", + "LPT3", + "PRN", + "NUL", +) + + +class cached_property(property): + """A decorator that converts a function into a lazy property. The + function wrapped is called the first time to retrieve the result + and then that calculated result is used the next time you access + the value:: + + class Foo(object): + + @cached_property + def foo(self): + # calculate something important here + return 42 + + The class has to have a `__dict__` in order for this property to + work. + """ + + # implementation detail: A subclass of python's builtin property + # decorator, we override __get__ to check for a cached value. If one + # chooses to invoke __get__ by hand the property will still work as + # expected because the lookup logic is replicated in __get__ for + # manual invocation. + + def __init__(self, func, name=None, doc=None): + self.__name__ = name or func.__name__ + self.__module__ = func.__module__ + self.__doc__ = doc or func.__doc__ + self.func = func + + def __set__(self, obj, value): + obj.__dict__[self.__name__] = value + + def __get__(self, obj, type=None): + if obj is None: + return self + value = obj.__dict__.get(self.__name__, _missing) + if value is _missing: + value = self.func(obj) + obj.__dict__[self.__name__] = value + return value + + +class environ_property(_DictAccessorProperty): + """Maps request attributes to environment variables. This works not only + for the Werzeug request object, but also any other class with an + environ attribute: + + >>> class Test(object): + ... environ = {'key': 'value'} + ... test = environ_property('key') + >>> var = Test() + >>> var.test + 'value' + + If you pass it a second value it's used as default if the key does not + exist, the third one can be a converter that takes a value and converts + it. If it raises :exc:`ValueError` or :exc:`TypeError` the default value + is used. If no default value is provided `None` is used. + + Per default the property is read only. You have to explicitly enable it + by passing ``read_only=False`` to the constructor. + """ + + read_only = True + + def lookup(self, obj): + return obj.environ + + +class header_property(_DictAccessorProperty): + """Like `environ_property` but for headers.""" + + def lookup(self, obj): + return obj.headers + + +class HTMLBuilder(object): + """Helper object for HTML generation. + + Per default there are two instances of that class. The `html` one, and + the `xhtml` one for those two dialects. The class uses keyword parameters + and positional parameters to generate small snippets of HTML. + + Keyword parameters are converted to XML/SGML attributes, positional + arguments are used as children. Because Python accepts positional + arguments before keyword arguments it's a good idea to use a list with the + star-syntax for some children: + + >>> html.p(class_='foo', *[html.a('foo', href='foo.html'), ' ', + ... html.a('bar', href='bar.html')]) + u'<p class="foo"><a href="foo.html">foo</a> <a href="bar.html">bar</a></p>' + + This class works around some browser limitations and can not be used for + arbitrary SGML/XML generation. For that purpose lxml and similar + libraries exist. + + Calling the builder escapes the string passed: + + >>> html.p(html("<foo>")) + u'<p><foo></p>' + """ + + _entity_re = re.compile(r"&([^;]+);") + _entities = name2codepoint.copy() + _entities["apos"] = 39 + _empty_elements = { + "area", + "base", + "basefont", + "br", + "col", + "command", + "embed", + "frame", + "hr", + "img", + "input", + "keygen", + "isindex", + "link", + "meta", + "param", + "source", + "wbr", + } + _boolean_attributes = { + "selected", + "checked", + "compact", + "declare", + "defer", + "disabled", + "ismap", + "multiple", + "nohref", + "noresize", + "noshade", + "nowrap", + } + _plaintext_elements = {"textarea"} + _c_like_cdata = {"script", "style"} + + def __init__(self, dialect): + self._dialect = dialect + + def __call__(self, s): + return escape(s) + + def __getattr__(self, tag): + if tag[:2] == "__": + raise AttributeError(tag) + + def proxy(*children, **arguments): + buffer = "<" + tag + for key, value in iteritems(arguments): + if value is None: + continue + if key[-1] == "_": + key = key[:-1] + if key in self._boolean_attributes: + if not value: + continue + if self._dialect == "xhtml": + value = '="' + key + '"' + else: + value = "" + else: + value = '="' + escape(value) + '"' + buffer += " " + key + value + if not children and tag in self._empty_elements: + if self._dialect == "xhtml": + buffer += " />" + else: + buffer += ">" + return buffer + buffer += ">" + + children_as_string = "".join( + [text_type(x) for x in children if x is not None] + ) + + if children_as_string: + if tag in self._plaintext_elements: + children_as_string = escape(children_as_string) + elif tag in self._c_like_cdata and self._dialect == "xhtml": + children_as_string = ( + "/*<![CDATA[*/" + children_as_string + "/*]]>*/" + ) + buffer += children_as_string + "</" + tag + ">" + return buffer + + return proxy + + def __repr__(self): + return "<%s for %r>" % (self.__class__.__name__, self._dialect) + + +html = HTMLBuilder("html") +xhtml = HTMLBuilder("xhtml") + +# https://cgit.freedesktop.org/xdg/shared-mime-info/tree/freedesktop.org.xml.in +# https://www.iana.org/assignments/media-types/media-types.xhtml +# Types listed in the XDG mime info that have a charset in the IANA registration. +_charset_mimetypes = { + "application/ecmascript", + "application/javascript", + "application/sql", + "application/xml", + "application/xml-dtd", + "application/xml-external-parsed-entity", +} + + +def get_content_type(mimetype, charset): + """Returns the full content type string with charset for a mimetype. + + If the mimetype represents text, the charset parameter will be + appended, otherwise the mimetype is returned unchanged. + + :param mimetype: The mimetype to be used as content type. + :param charset: The charset to be appended for text mimetypes. + :return: The content type. + + .. verionchanged:: 0.15 + Any type that ends with ``+xml`` gets a charset, not just those + that start with ``application/``. Known text types such as + ``application/javascript`` are also given charsets. + """ + if ( + mimetype.startswith("text/") + or mimetype in _charset_mimetypes + or mimetype.endswith("+xml") + ): + mimetype += "; charset=" + charset + + return mimetype + + +def detect_utf_encoding(data): + """Detect which UTF encoding was used to encode the given bytes. + + The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is + accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big + or little endian. Some editors or libraries may prepend a BOM. + + :internal: + + :param data: Bytes in unknown UTF encoding. + :return: UTF encoding name + + .. versionadded:: 0.15 + """ + head = data[:4] + + if head[:3] == codecs.BOM_UTF8: + return "utf-8-sig" + + if b"\x00" not in head: + return "utf-8" + + if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE): + return "utf-32" + + if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE): + return "utf-16" + + if len(head) == 4: + if head[:3] == b"\x00\x00\x00": + return "utf-32-be" + + if head[::2] == b"\x00\x00": + return "utf-16-be" + + if head[1:] == b"\x00\x00\x00": + return "utf-32-le" + + if head[1::2] == b"\x00\x00": + return "utf-16-le" + + if len(head) == 2: + return "utf-16-be" if head.startswith(b"\x00") else "utf-16-le" + + return "utf-8" + + +def format_string(string, context): + """String-template format a string: + + >>> format_string('$foo and ${foo}s', dict(foo=42)) + '42 and 42s' + + This does not do any attribute lookup etc. For more advanced string + formattings have a look at the `werkzeug.template` module. + + :param string: the format string. + :param context: a dict with the variables to insert. + """ + + def lookup_arg(match): + x = context[match.group(1) or match.group(2)] + if not isinstance(x, string_types): + x = type(string)(x) + return x + + return _format_re.sub(lookup_arg, string) + + +def secure_filename(filename): + r"""Pass it a filename and it will return a secure version of it. This + filename can then safely be stored on a regular file system and passed + to :func:`os.path.join`. The filename returned is an ASCII only string + for maximum portability. + + On windows systems the function also makes sure that the file is not + named after one of the special device files. + + >>> secure_filename("My cool movie.mov") + 'My_cool_movie.mov' + >>> secure_filename("../../../etc/passwd") + 'etc_passwd' + >>> secure_filename(u'i contain cool \xfcml\xe4uts.txt') + 'i_contain_cool_umlauts.txt' + + The function might return an empty filename. It's your responsibility + to ensure that the filename is unique and that you abort or + generate a random filename if the function returned an empty one. + + .. versionadded:: 0.5 + + :param filename: the filename to secure + """ + if isinstance(filename, text_type): + from unicodedata import normalize + + filename = normalize("NFKD", filename).encode("ascii", "ignore") + if not PY2: + filename = filename.decode("ascii") + for sep in os.path.sep, os.path.altsep: + if sep: + filename = filename.replace(sep, " ") + filename = str(_filename_ascii_strip_re.sub("", "_".join(filename.split()))).strip( + "._" + ) + + # on nt a couple of special files are present in each folder. We + # have to ensure that the target file is not such a filename. In + # this case we prepend an underline + if ( + os.name == "nt" + and filename + and filename.split(".")[0].upper() in _windows_device_files + ): + filename = "_" + filename + + return filename + + +def escape(s, quote=None): + """Replace special characters "&", "<", ">" and (") to HTML-safe sequences. + + There is a special handling for `None` which escapes to an empty string. + + .. versionchanged:: 0.9 + `quote` is now implicitly on. + + :param s: the string to escape. + :param quote: ignored. + """ + if s is None: + return "" + elif hasattr(s, "__html__"): + return text_type(s.__html__()) + elif not isinstance(s, string_types): + s = text_type(s) + if quote is not None: + from warnings import warn + + warn( + "The 'quote' parameter is no longer used as of version 0.9" + " and will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + s = ( + s.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + ) + return s + + +def unescape(s): + """The reverse function of `escape`. This unescapes all the HTML + entities, not only the XML entities inserted by `escape`. + + :param s: the string to unescape. + """ + + def handle_match(m): + name = m.group(1) + if name in HTMLBuilder._entities: + return unichr(HTMLBuilder._entities[name]) + try: + if name[:2] in ("#x", "#X"): + return unichr(int(name[2:], 16)) + elif name.startswith("#"): + return unichr(int(name[1:])) + except ValueError: + pass + return u"" + + return _entity_re.sub(handle_match, s) + + +def redirect(location, code=302, Response=None): + """Returns a response object (a WSGI application) that, if called, + redirects the client to the target location. Supported codes are + 301, 302, 303, 305, 307, and 308. 300 is not supported because + it's not a real redirect and 304 because it's the answer for a + request with a request with defined If-Modified-Since headers. + + .. versionadded:: 0.6 + The location can now be a unicode string that is encoded using + the :func:`iri_to_uri` function. + + .. versionadded:: 0.10 + The class used for the Response object can now be passed in. + + :param location: the location the response should redirect to. + :param code: the redirect status code. defaults to 302. + :param class Response: a Response class to use when instantiating a + response. The default is :class:`werkzeug.wrappers.Response` if + unspecified. + """ + if Response is None: + from .wrappers import Response + + display_location = escape(location) + if isinstance(location, text_type): + # Safe conversion is necessary here as we might redirect + # to a broken URI scheme (for instance itms-services). + from .urls import iri_to_uri + + location = iri_to_uri(location, safe_conversion=True) + response = Response( + '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\n' + "<title>Redirecting...</title>\n" + "<h1>Redirecting...</h1>\n" + "<p>You should be redirected automatically to target URL: " + '<a href="%s">%s</a>. If not click the link.' + % (escape(location), display_location), + code, + mimetype="text/html", + ) + response.headers["Location"] = location + return response + + +def append_slash_redirect(environ, code=301): + """Redirects to the same URL but with a slash appended. The behavior + of this function is undefined if the path ends with a slash already. + + :param environ: the WSGI environment for the request that triggers + the redirect. + :param code: the status code for the redirect. + """ + new_path = environ["PATH_INFO"].strip("/") + "/" + query_string = environ.get("QUERY_STRING") + if query_string: + new_path += "?" + query_string + return redirect(new_path, code) + + +def import_string(import_name, silent=False): + """Imports an object based on a string. This is useful if you want to + use import paths as endpoints or something similar. An import path can + be specified either in dotted notation (``xml.sax.saxutils.escape``) + or with a colon as object delimiter (``xml.sax.saxutils:escape``). + + If `silent` is True the return value will be `None` if the import fails. + + :param import_name: the dotted name for the object to import. + :param silent: if set to `True` import errors are ignored and + `None` is returned instead. + :return: imported object + """ + # force the import name to automatically convert to strings + # __import__ is not able to handle unicode strings in the fromlist + # if the module is a package + import_name = str(import_name).replace(":", ".") + try: + try: + __import__(import_name) + except ImportError: + if "." not in import_name: + raise + else: + return sys.modules[import_name] + + module_name, obj_name = import_name.rsplit(".", 1) + module = __import__(module_name, globals(), locals(), [obj_name]) + try: + return getattr(module, obj_name) + except AttributeError as e: + raise ImportError(e) + + except ImportError as e: + if not silent: + reraise( + ImportStringError, ImportStringError(import_name, e), sys.exc_info()[2] + ) + + +def find_modules(import_path, include_packages=False, recursive=False): + """Finds all the modules below a package. This can be useful to + automatically import all views / controllers so that their metaclasses / + function decorators have a chance to register themselves on the + application. + + Packages are not returned unless `include_packages` is `True`. This can + also recursively list modules but in that case it will import all the + packages to get the correct load path of that module. + + :param import_path: the dotted name for the package to find child modules. + :param include_packages: set to `True` if packages should be returned, too. + :param recursive: set to `True` if recursion should happen. + :return: generator + """ + module = import_string(import_path) + path = getattr(module, "__path__", None) + if path is None: + raise ValueError("%r is not a package" % import_path) + basename = module.__name__ + "." + for _importer, modname, ispkg in pkgutil.iter_modules(path): + modname = basename + modname + if ispkg: + if include_packages: + yield modname + if recursive: + for item in find_modules(modname, include_packages, True): + yield item + else: + yield modname + + +def validate_arguments(func, args, kwargs, drop_extra=True): + """Checks if the function accepts the arguments and keyword arguments. + Returns a new ``(args, kwargs)`` tuple that can safely be passed to + the function without causing a `TypeError` because the function signature + is incompatible. If `drop_extra` is set to `True` (which is the default) + any extra positional or keyword arguments are dropped automatically. + + The exception raised provides three attributes: + + `missing` + A set of argument names that the function expected but where + missing. + + `extra` + A dict of keyword arguments that the function can not handle but + where provided. + + `extra_positional` + A list of values that where given by positional argument but the + function cannot accept. + + This can be useful for decorators that forward user submitted data to + a view function:: + + from werkzeug.utils import ArgumentValidationError, validate_arguments + + def sanitize(f): + def proxy(request): + data = request.values.to_dict() + try: + args, kwargs = validate_arguments(f, (request,), data) + except ArgumentValidationError: + raise BadRequest('The browser failed to transmit all ' + 'the data expected.') + return f(*args, **kwargs) + return proxy + + :param func: the function the validation is performed against. + :param args: a tuple of positional arguments. + :param kwargs: a dict of keyword arguments. + :param drop_extra: set to `False` if you don't want extra arguments + to be silently dropped. + :return: tuple in the form ``(args, kwargs)``. + """ + parser = _parse_signature(func) + args, kwargs, missing, extra, extra_positional = parser(args, kwargs)[:5] + if missing: + raise ArgumentValidationError(tuple(missing)) + elif (extra or extra_positional) and not drop_extra: + raise ArgumentValidationError(None, extra, extra_positional) + return tuple(args), kwargs + + +def bind_arguments(func, args, kwargs): + """Bind the arguments provided into a dict. When passed a function, + a tuple of arguments and a dict of keyword arguments `bind_arguments` + returns a dict of names as the function would see it. This can be useful + to implement a cache decorator that uses the function arguments to build + the cache key based on the values of the arguments. + + :param func: the function the arguments should be bound for. + :param args: tuple of positional arguments. + :param kwargs: a dict of keyword arguments. + :return: a :class:`dict` of bound keyword arguments. + """ + ( + args, + kwargs, + missing, + extra, + extra_positional, + arg_spec, + vararg_var, + kwarg_var, + ) = _parse_signature(func)(args, kwargs) + values = {} + for (name, _has_default, _default), value in zip(arg_spec, args): + values[name] = value + if vararg_var is not None: + values[vararg_var] = tuple(extra_positional) + elif extra_positional: + raise TypeError("too many positional arguments") + if kwarg_var is not None: + multikw = set(extra) & set([x[0] for x in arg_spec]) + if multikw: + raise TypeError( + "got multiple values for keyword argument " + repr(next(iter(multikw))) + ) + values[kwarg_var] = extra + elif extra: + raise TypeError("got unexpected keyword argument " + repr(next(iter(extra)))) + return values + + +class ArgumentValidationError(ValueError): + + """Raised if :func:`validate_arguments` fails to validate""" + + def __init__(self, missing=None, extra=None, extra_positional=None): + self.missing = set(missing or ()) + self.extra = extra or {} + self.extra_positional = extra_positional or [] + ValueError.__init__( + self, + "function arguments invalid. (%d missing, %d additional)" + % (len(self.missing), len(self.extra) + len(self.extra_positional)), + ) + + +class ImportStringError(ImportError): + """Provides information about a failed :func:`import_string` attempt.""" + + #: String in dotted notation that failed to be imported. + import_name = None + #: Wrapped exception. + exception = None + + def __init__(self, import_name, exception): + self.import_name = import_name + self.exception = exception + + msg = ( + "import_string() failed for %r. Possible reasons are:\n\n" + "- missing __init__.py in a package;\n" + "- package or module path not included in sys.path;\n" + "- duplicated package or module name taking precedence in " + "sys.path;\n" + "- missing module, class, function or variable;\n\n" + "Debugged import:\n\n%s\n\n" + "Original exception:\n\n%s: %s" + ) + + name = "" + tracked = [] + for part in import_name.replace(":", ".").split("."): + name += (name and ".") + part + imported = import_string(name, silent=True) + if imported: + tracked.append((name, getattr(imported, "__file__", None))) + else: + track = ["- %r found in %r." % (n, i) for n, i in tracked] + track.append("- %r not found." % name) + msg = msg % ( + import_name, + "\n".join(track), + exception.__class__.__name__, + str(exception), + ) + break + + ImportError.__init__(self, msg) + + def __repr__(self): + return "<%s(%r, %r)>" % ( + self.__class__.__name__, + self.import_name, + self.exception, + ) + + +# DEPRECATED +from .datastructures import CombinedMultiDict as _CombinedMultiDict +from .datastructures import EnvironHeaders as _EnvironHeaders +from .datastructures import Headers as _Headers +from .datastructures import MultiDict as _MultiDict +from .http import dump_cookie as _dump_cookie +from .http import parse_cookie as _parse_cookie + + +class MultiDict(_MultiDict): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.utils.MultiDict' has moved to 'werkzeug" + ".datastructures.MultiDict' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(MultiDict, self).__init__(*args, **kwargs) + + +class CombinedMultiDict(_CombinedMultiDict): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.utils.CombinedMultiDict' has moved to 'werkzeug" + ".datastructures.CombinedMultiDict' as of version 0.5. This" + " old import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(CombinedMultiDict, self).__init__(*args, **kwargs) + + +class Headers(_Headers): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.utils.Headers' has moved to 'werkzeug" + ".datastructures.Headers' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(Headers, self).__init__(*args, **kwargs) + + +class EnvironHeaders(_EnvironHeaders): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.utils.EnvironHeaders' has moved to 'werkzeug" + ".datastructures.EnvironHeaders' as of version 0.5. This" + " old import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(EnvironHeaders, self).__init__(*args, **kwargs) + + +def parse_cookie(*args, **kwargs): + warnings.warn( + "'werkzeug.utils.parse_cookie' as moved to 'werkzeug.http" + ".parse_cookie' as of version 0.5. This old import will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return _parse_cookie(*args, **kwargs) + + +def dump_cookie(*args, **kwargs): + warnings.warn( + "'werkzeug.utils.dump_cookie' as moved to 'werkzeug.http" + ".dump_cookie' as of version 0.5. This old import will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return _dump_cookie(*args, **kwargs) diff --git a/python/werkzeug/wrappers/__init__.py b/python/werkzeug/wrappers/__init__.py new file mode 100644 index 0000000..56c764a --- /dev/null +++ b/python/werkzeug/wrappers/__init__.py @@ -0,0 +1,36 @@ +""" +werkzeug.wrappers +~~~~~~~~~~~~~~~~~ + +The wrappers are simple request and response objects which you can +subclass to do whatever you want them to do. The request object contains +the information transmitted by the client (webbrowser) and the response +object contains all the information sent back to the browser. + +An important detail is that the request object is created with the WSGI +environ and will act as high-level proxy whereas the response object is an +actual WSGI application. + +Like everything else in Werkzeug these objects will work correctly with +unicode data. Incoming form data parsed by the response object will be +decoded into an unicode object if possible and if it makes sense. + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +from .accept import AcceptMixin +from .auth import AuthorizationMixin +from .auth import WWWAuthenticateMixin +from .base_request import BaseRequest +from .base_response import BaseResponse +from .common_descriptors import CommonRequestDescriptorsMixin +from .common_descriptors import CommonResponseDescriptorsMixin +from .etag import ETagRequestMixin +from .etag import ETagResponseMixin +from .request import PlainRequest +from .request import Request +from .request import StreamOnlyMixin +from .response import Response +from .response import ResponseStream +from .response import ResponseStreamMixin +from .user_agent import UserAgentMixin diff --git a/python/werkzeug/wrappers/accept.py b/python/werkzeug/wrappers/accept.py new file mode 100644 index 0000000..d0620a0 --- /dev/null +++ b/python/werkzeug/wrappers/accept.py @@ -0,0 +1,50 @@ +from ..datastructures import CharsetAccept +from ..datastructures import LanguageAccept +from ..datastructures import MIMEAccept +from ..http import parse_accept_header +from ..utils import cached_property + + +class AcceptMixin(object): + """A mixin for classes with an :attr:`~BaseResponse.environ` attribute + to get all the HTTP accept headers as + :class:`~werkzeug.datastructures.Accept` objects (or subclasses + thereof). + """ + + @cached_property + def accept_mimetypes(self): + """List of mimetypes this client supports as + :class:`~werkzeug.datastructures.MIMEAccept` object. + """ + return parse_accept_header(self.environ.get("HTTP_ACCEPT"), MIMEAccept) + + @cached_property + def accept_charsets(self): + """List of charsets this client supports as + :class:`~werkzeug.datastructures.CharsetAccept` object. + """ + return parse_accept_header( + self.environ.get("HTTP_ACCEPT_CHARSET"), CharsetAccept + ) + + @cached_property + def accept_encodings(self): + """List of encodings this client accepts. Encodings in a HTTP term + are compression encodings such as gzip. For charsets have a look at + :attr:`accept_charset`. + """ + return parse_accept_header(self.environ.get("HTTP_ACCEPT_ENCODING")) + + @cached_property + def accept_languages(self): + """List of languages this client accepts as + :class:`~werkzeug.datastructures.LanguageAccept` object. + + .. versionchanged 0.5 + In previous versions this was a regular + :class:`~werkzeug.datastructures.Accept` object. + """ + return parse_accept_header( + self.environ.get("HTTP_ACCEPT_LANGUAGE"), LanguageAccept + ) diff --git a/python/werkzeug/wrappers/auth.py b/python/werkzeug/wrappers/auth.py new file mode 100644 index 0000000..714f755 --- /dev/null +++ b/python/werkzeug/wrappers/auth.py @@ -0,0 +1,33 @@ +from ..http import parse_authorization_header +from ..http import parse_www_authenticate_header +from ..utils import cached_property + + +class AuthorizationMixin(object): + """Adds an :attr:`authorization` property that represents the parsed + value of the `Authorization` header as + :class:`~werkzeug.datastructures.Authorization` object. + """ + + @cached_property + def authorization(self): + """The `Authorization` object in parsed form.""" + header = self.environ.get("HTTP_AUTHORIZATION") + return parse_authorization_header(header) + + +class WWWAuthenticateMixin(object): + """Adds a :attr:`www_authenticate` property to a response object.""" + + @property + def www_authenticate(self): + """The `WWW-Authenticate` header in a parsed form.""" + + def on_update(www_auth): + if not www_auth and "www-authenticate" in self.headers: + del self.headers["www-authenticate"] + elif www_auth: + self.headers["WWW-Authenticate"] = www_auth.to_header() + + header = self.headers.get("www-authenticate") + return parse_www_authenticate_header(header, on_update) diff --git a/python/werkzeug/wrappers/base_request.py b/python/werkzeug/wrappers/base_request.py new file mode 100644 index 0000000..41e8aad --- /dev/null +++ b/python/werkzeug/wrappers/base_request.py @@ -0,0 +1,693 @@ +import warnings +from functools import update_wrapper +from io import BytesIO + +from .._compat import to_native +from .._compat import to_unicode +from .._compat import wsgi_decoding_dance +from .._compat import wsgi_get_bytes +from ..datastructures import CombinedMultiDict +from ..datastructures import EnvironHeaders +from ..datastructures import ImmutableList +from ..datastructures import ImmutableMultiDict +from ..datastructures import ImmutableTypeConversionDict +from ..datastructures import iter_multi_items +from ..datastructures import MultiDict +from ..formparser import default_stream_factory +from ..formparser import FormDataParser +from ..http import parse_cookie +from ..http import parse_options_header +from ..urls import url_decode +from ..utils import cached_property +from ..utils import environ_property +from ..wsgi import get_content_length +from ..wsgi import get_current_url +from ..wsgi import get_host +from ..wsgi import get_input_stream + + +class BaseRequest(object): + """Very basic request object. This does not implement advanced stuff like + entity tag parsing or cache controls. The request object is created with + the WSGI environment as first argument and will add itself to the WSGI + environment as ``'werkzeug.request'`` unless it's created with + `populate_request` set to False. + + There are a couple of mixins available that add additional functionality + to the request object, there is also a class called `Request` which + subclasses `BaseRequest` and all the important mixins. + + It's a good idea to create a custom subclass of the :class:`BaseRequest` + and add missing functionality either via mixins or direct implementation. + Here an example for such subclasses:: + + from werkzeug.wrappers import BaseRequest, ETagRequestMixin + + class Request(BaseRequest, ETagRequestMixin): + pass + + Request objects are **read only**. As of 0.5 modifications are not + allowed in any place. Unlike the lower level parsing functions the + request object will use immutable objects everywhere possible. + + Per default the request object will assume all the text data is `utf-8` + encoded. Please refer to :doc:`the unicode chapter </unicode>` for more + details about customizing the behavior. + + Per default the request object will be added to the WSGI + environment as `werkzeug.request` to support the debugging system. + If you don't want that, set `populate_request` to `False`. + + If `shallow` is `True` the environment is initialized as shallow + object around the environ. Every operation that would modify the + environ in any way (such as consuming form data) raises an exception + unless the `shallow` attribute is explicitly set to `False`. This + is useful for middlewares where you don't want to consume the form + data by accident. A shallow request is not populated to the WSGI + environment. + + .. versionchanged:: 0.5 + read-only mode was enforced by using immutables classes for all + data. + """ + + #: the charset for the request, defaults to utf-8 + charset = "utf-8" + + #: the error handling procedure for errors, defaults to 'replace' + encoding_errors = "replace" + + #: the maximum content length. This is forwarded to the form data + #: parsing function (:func:`parse_form_data`). When set and the + #: :attr:`form` or :attr:`files` attribute is accessed and the + #: parsing fails because more than the specified value is transmitted + #: a :exc:`~werkzeug.exceptions.RequestEntityTooLarge` exception is raised. + #: + #: Have a look at :ref:`dealing-with-request-data` for more details. + #: + #: .. versionadded:: 0.5 + max_content_length = None + + #: the maximum form field size. This is forwarded to the form data + #: parsing function (:func:`parse_form_data`). When set and the + #: :attr:`form` or :attr:`files` attribute is accessed and the + #: data in memory for post data is longer than the specified value a + #: :exc:`~werkzeug.exceptions.RequestEntityTooLarge` exception is raised. + #: + #: Have a look at :ref:`dealing-with-request-data` for more details. + #: + #: .. versionadded:: 0.5 + max_form_memory_size = None + + #: the class to use for `args` and `form`. The default is an + #: :class:`~werkzeug.datastructures.ImmutableMultiDict` which supports + #: multiple values per key. alternatively it makes sense to use an + #: :class:`~werkzeug.datastructures.ImmutableOrderedMultiDict` which + #: preserves order or a :class:`~werkzeug.datastructures.ImmutableDict` + #: which is the fastest but only remembers the last key. It is also + #: possible to use mutable structures, but this is not recommended. + #: + #: .. versionadded:: 0.6 + parameter_storage_class = ImmutableMultiDict + + #: the type to be used for list values from the incoming WSGI environment. + #: By default an :class:`~werkzeug.datastructures.ImmutableList` is used + #: (for example for :attr:`access_list`). + #: + #: .. versionadded:: 0.6 + list_storage_class = ImmutableList + + #: the type to be used for dict values from the incoming WSGI environment. + #: By default an + #: :class:`~werkzeug.datastructures.ImmutableTypeConversionDict` is used + #: (for example for :attr:`cookies`). + #: + #: .. versionadded:: 0.6 + dict_storage_class = ImmutableTypeConversionDict + + #: The form data parser that shoud be used. Can be replaced to customize + #: the form date parsing. + form_data_parser_class = FormDataParser + + #: Optionally a list of hosts that is trusted by this request. By default + #: all hosts are trusted which means that whatever the client sends the + #: host is will be accepted. + #: + #: Because `Host` and `X-Forwarded-Host` headers can be set to any value by + #: a malicious client, it is recommended to either set this property or + #: implement similar validation in the proxy (if application is being run + #: behind one). + #: + #: .. versionadded:: 0.9 + trusted_hosts = None + + #: Indicates whether the data descriptor should be allowed to read and + #: buffer up the input stream. By default it's enabled. + #: + #: .. versionadded:: 0.9 + disable_data_descriptor = False + + def __init__(self, environ, populate_request=True, shallow=False): + self.environ = environ + if populate_request and not shallow: + self.environ["werkzeug.request"] = self + self.shallow = shallow + + def __repr__(self): + # make sure the __repr__ even works if the request was created + # from an invalid WSGI environment. If we display the request + # in a debug session we don't want the repr to blow up. + args = [] + try: + args.append("'%s'" % to_native(self.url, self.url_charset)) + args.append("[%s]" % self.method) + except Exception: + args.append("(invalid WSGI environ)") + + return "<%s %s>" % (self.__class__.__name__, " ".join(args)) + + @property + def url_charset(self): + """The charset that is assumed for URLs. Defaults to the value + of :attr:`charset`. + + .. versionadded:: 0.6 + """ + return self.charset + + @classmethod + def from_values(cls, *args, **kwargs): + """Create a new request object based on the values provided. If + environ is given missing values are filled from there. This method is + useful for small scripts when you need to simulate a request from an URL. + Do not use this method for unittesting, there is a full featured client + object (:class:`Client`) that allows to create multipart requests, + support for cookies etc. + + This accepts the same options as the + :class:`~werkzeug.test.EnvironBuilder`. + + .. versionchanged:: 0.5 + This method now accepts the same arguments as + :class:`~werkzeug.test.EnvironBuilder`. Because of this the + `environ` parameter is now called `environ_overrides`. + + :return: request object + """ + from ..test import EnvironBuilder + + charset = kwargs.pop("charset", cls.charset) + kwargs["charset"] = charset + builder = EnvironBuilder(*args, **kwargs) + try: + return builder.get_request(cls) + finally: + builder.close() + + @classmethod + def application(cls, f): + """Decorate a function as responder that accepts the request as first + argument. This works like the :func:`responder` decorator but the + function is passed the request object as first argument and the + request object will be closed automatically:: + + @Request.application + def my_wsgi_app(request): + return Response('Hello World!') + + As of Werkzeug 0.14 HTTP exceptions are automatically caught and + converted to responses instead of failing. + + :param f: the WSGI callable to decorate + :return: a new WSGI callable + """ + #: return a callable that wraps the -2nd argument with the request + #: and calls the function with all the arguments up to that one and + #: the request. The return value is then called with the latest + #: two arguments. This makes it possible to use this decorator for + #: both methods and standalone WSGI functions. + from ..exceptions import HTTPException + + def application(*args): + request = cls(args[-2]) + with request: + try: + resp = f(*args[:-2] + (request,)) + except HTTPException as e: + resp = e.get_response(args[-2]) + return resp(*args[-2:]) + + return update_wrapper(application, f) + + def _get_file_stream( + self, total_content_length, content_type, filename=None, content_length=None + ): + """Called to get a stream for the file upload. + + This must provide a file-like class with `read()`, `readline()` + and `seek()` methods that is both writeable and readable. + + The default implementation returns a temporary file if the total + content length is higher than 500KB. Because many browsers do not + provide a content length for the files only the total content + length matters. + + :param total_content_length: the total content length of all the + data in the request combined. This value + is guaranteed to be there. + :param content_type: the mimetype of the uploaded file. + :param filename: the filename of the uploaded file. May be `None`. + :param content_length: the length of this file. This value is usually + not provided because webbrowsers do not provide + this value. + """ + return default_stream_factory( + total_content_length=total_content_length, + filename=filename, + content_type=content_type, + content_length=content_length, + ) + + @property + def want_form_data_parsed(self): + """Returns True if the request method carries content. As of + Werkzeug 0.9 this will be the case if a content type is transmitted. + + .. versionadded:: 0.8 + """ + return bool(self.environ.get("CONTENT_TYPE")) + + def make_form_data_parser(self): + """Creates the form data parser. Instantiates the + :attr:`form_data_parser_class` with some parameters. + + .. versionadded:: 0.8 + """ + return self.form_data_parser_class( + self._get_file_stream, + self.charset, + self.encoding_errors, + self.max_form_memory_size, + self.max_content_length, + self.parameter_storage_class, + ) + + def _load_form_data(self): + """Method used internally to retrieve submitted data. After calling + this sets `form` and `files` on the request object to multi dicts + filled with the incoming form data. As a matter of fact the input + stream will be empty afterwards. You can also call this method to + force the parsing of the form data. + + .. versionadded:: 0.8 + """ + # abort early if we have already consumed the stream + if "form" in self.__dict__: + return + + _assert_not_shallow(self) + + if self.want_form_data_parsed: + content_type = self.environ.get("CONTENT_TYPE", "") + content_length = get_content_length(self.environ) + mimetype, options = parse_options_header(content_type) + parser = self.make_form_data_parser() + data = parser.parse( + self._get_stream_for_parsing(), mimetype, content_length, options + ) + else: + data = ( + self.stream, + self.parameter_storage_class(), + self.parameter_storage_class(), + ) + + # inject the values into the instance dict so that we bypass + # our cached_property non-data descriptor. + d = self.__dict__ + d["stream"], d["form"], d["files"] = data + + def _get_stream_for_parsing(self): + """This is the same as accessing :attr:`stream` with the difference + that if it finds cached data from calling :meth:`get_data` first it + will create a new stream out of the cached data. + + .. versionadded:: 0.9.3 + """ + cached_data = getattr(self, "_cached_data", None) + if cached_data is not None: + return BytesIO(cached_data) + return self.stream + + def close(self): + """Closes associated resources of this request object. This + closes all file handles explicitly. You can also use the request + object in a with statement which will automatically close it. + + .. versionadded:: 0.9 + """ + files = self.__dict__.get("files") + for _key, value in iter_multi_items(files or ()): + value.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + self.close() + + @cached_property + def stream(self): + """ + If the incoming form data was not encoded with a known mimetype + the data is stored unmodified in this stream for consumption. Most + of the time it is a better idea to use :attr:`data` which will give + you that data as a string. The stream only returns the data once. + + Unlike :attr:`input_stream` this stream is properly guarded that you + can't accidentally read past the length of the input. Werkzeug will + internally always refer to this stream to read data which makes it + possible to wrap this object with a stream that does filtering. + + .. versionchanged:: 0.9 + This stream is now always available but might be consumed by the + form parser later on. Previously the stream was only set if no + parsing happened. + """ + _assert_not_shallow(self) + return get_input_stream(self.environ) + + input_stream = environ_property( + "wsgi.input", + """The WSGI input stream. + + In general it's a bad idea to use this one because you can + easily read past the boundary. Use the :attr:`stream` + instead.""", + ) + + @cached_property + def args(self): + """The parsed URL parameters (the part in the URL after the question + mark). + + By default an + :class:`~werkzeug.datastructures.ImmutableMultiDict` + is returned from this function. This can be changed by setting + :attr:`parameter_storage_class` to a different type. This might + be necessary if the order of the form data is important. + """ + return url_decode( + wsgi_get_bytes(self.environ.get("QUERY_STRING", "")), + self.url_charset, + errors=self.encoding_errors, + cls=self.parameter_storage_class, + ) + + @cached_property + def data(self): + """ + Contains the incoming request data as string in case it came with + a mimetype Werkzeug does not handle. + """ + + if self.disable_data_descriptor: + raise AttributeError("data descriptor is disabled") + # XXX: this should eventually be deprecated. + + # We trigger form data parsing first which means that the descriptor + # will not cache the data that would otherwise be .form or .files + # data. This restores the behavior that was there in Werkzeug + # before 0.9. New code should use :meth:`get_data` explicitly as + # this will make behavior explicit. + return self.get_data(parse_form_data=True) + + def get_data(self, cache=True, as_text=False, parse_form_data=False): + """This reads the buffered incoming data from the client into one + bytestring. By default this is cached but that behavior can be + changed by setting `cache` to `False`. + + Usually it's a bad idea to call this method without checking the + content length first as a client could send dozens of megabytes or more + to cause memory problems on the server. + + Note that if the form data was already parsed this method will not + return anything as form data parsing does not cache the data like + this method does. To implicitly invoke form data parsing function + set `parse_form_data` to `True`. When this is done the return value + of this method will be an empty string if the form parser handles + the data. This generally is not necessary as if the whole data is + cached (which is the default) the form parser will used the cached + data to parse the form data. Please be generally aware of checking + the content length first in any case before calling this method + to avoid exhausting server memory. + + If `as_text` is set to `True` the return value will be a decoded + unicode string. + + .. versionadded:: 0.9 + """ + rv = getattr(self, "_cached_data", None) + if rv is None: + if parse_form_data: + self._load_form_data() + rv = self.stream.read() + if cache: + self._cached_data = rv + if as_text: + rv = rv.decode(self.charset, self.encoding_errors) + return rv + + @cached_property + def form(self): + """The form parameters. By default an + :class:`~werkzeug.datastructures.ImmutableMultiDict` + is returned from this function. This can be changed by setting + :attr:`parameter_storage_class` to a different type. This might + be necessary if the order of the form data is important. + + Please keep in mind that file uploads will not end up here, but instead + in the :attr:`files` attribute. + + .. versionchanged:: 0.9 + + Previous to Werkzeug 0.9 this would only contain form data for POST + and PUT requests. + """ + self._load_form_data() + return self.form + + @cached_property + def values(self): + """A :class:`werkzeug.datastructures.CombinedMultiDict` that combines + :attr:`args` and :attr:`form`.""" + args = [] + for d in self.args, self.form: + if not isinstance(d, MultiDict): + d = MultiDict(d) + args.append(d) + return CombinedMultiDict(args) + + @cached_property + def files(self): + """:class:`~werkzeug.datastructures.MultiDict` object containing + all uploaded files. Each key in :attr:`files` is the name from the + ``<input type="file" name="">``. Each value in :attr:`files` is a + Werkzeug :class:`~werkzeug.datastructures.FileStorage` object. + + It basically behaves like a standard file object you know from Python, + with the difference that it also has a + :meth:`~werkzeug.datastructures.FileStorage.save` function that can + store the file on the filesystem. + + Note that :attr:`files` will only contain data if the request method was + POST, PUT or PATCH and the ``<form>`` that posted to the request had + ``enctype="multipart/form-data"``. It will be empty otherwise. + + See the :class:`~werkzeug.datastructures.MultiDict` / + :class:`~werkzeug.datastructures.FileStorage` documentation for + more details about the used data structure. + """ + self._load_form_data() + return self.files + + @cached_property + def cookies(self): + """A :class:`dict` with the contents of all cookies transmitted with + the request.""" + return parse_cookie( + self.environ, + self.charset, + self.encoding_errors, + cls=self.dict_storage_class, + ) + + @cached_property + def headers(self): + """The headers from the WSGI environ as immutable + :class:`~werkzeug.datastructures.EnvironHeaders`. + """ + return EnvironHeaders(self.environ) + + @cached_property + def path(self): + """Requested path as unicode. This works a bit like the regular path + info in the WSGI environment but will always include a leading slash, + even if the URL root is accessed. + """ + raw_path = wsgi_decoding_dance( + self.environ.get("PATH_INFO") or "", self.charset, self.encoding_errors + ) + return "/" + raw_path.lstrip("/") + + @cached_property + def full_path(self): + """Requested path as unicode, including the query string.""" + return self.path + u"?" + to_unicode(self.query_string, self.url_charset) + + @cached_property + def script_root(self): + """The root path of the script without the trailing slash.""" + raw_path = wsgi_decoding_dance( + self.environ.get("SCRIPT_NAME") or "", self.charset, self.encoding_errors + ) + return raw_path.rstrip("/") + + @cached_property + def url(self): + """The reconstructed current URL as IRI. + See also: :attr:`trusted_hosts`. + """ + return get_current_url(self.environ, trusted_hosts=self.trusted_hosts) + + @cached_property + def base_url(self): + """Like :attr:`url` but without the querystring + See also: :attr:`trusted_hosts`. + """ + return get_current_url( + self.environ, strip_querystring=True, trusted_hosts=self.trusted_hosts + ) + + @cached_property + def url_root(self): + """The full URL root (with hostname), this is the application + root as IRI. + See also: :attr:`trusted_hosts`. + """ + return get_current_url(self.environ, True, trusted_hosts=self.trusted_hosts) + + @cached_property + def host_url(self): + """Just the host with scheme as IRI. + See also: :attr:`trusted_hosts`. + """ + return get_current_url( + self.environ, host_only=True, trusted_hosts=self.trusted_hosts + ) + + @cached_property + def host(self): + """Just the host including the port if available. + See also: :attr:`trusted_hosts`. + """ + return get_host(self.environ, trusted_hosts=self.trusted_hosts) + + query_string = environ_property( + "QUERY_STRING", + "", + read_only=True, + load_func=wsgi_get_bytes, + doc="The URL parameters as raw bytestring.", + ) + method = environ_property( + "REQUEST_METHOD", + "GET", + read_only=True, + load_func=lambda x: x.upper(), + doc="The request method. (For example ``'GET'`` or ``'POST'``).", + ) + + @cached_property + def access_route(self): + """If a forwarded header exists this is a list of all ip addresses + from the client ip to the last proxy server. + """ + if "HTTP_X_FORWARDED_FOR" in self.environ: + addr = self.environ["HTTP_X_FORWARDED_FOR"].split(",") + return self.list_storage_class([x.strip() for x in addr]) + elif "REMOTE_ADDR" in self.environ: + return self.list_storage_class([self.environ["REMOTE_ADDR"]]) + return self.list_storage_class() + + @property + def remote_addr(self): + """The remote address of the client.""" + return self.environ.get("REMOTE_ADDR") + + remote_user = environ_property( + "REMOTE_USER", + doc="""If the server supports user authentication, and the + script is protected, this attribute contains the username the + user has authenticated as.""", + ) + + scheme = environ_property( + "wsgi.url_scheme", + doc=""" + URL scheme (http or https). + + .. versionadded:: 0.7""", + ) + + @property + def is_xhr(self): + """True if the request was triggered via a JavaScript XMLHttpRequest. + This only works with libraries that support the ``X-Requested-With`` + header and set it to "XMLHttpRequest". Libraries that do that are + prototype, jQuery and Mochikit and probably some more. + + .. deprecated:: 0.13 + ``X-Requested-With`` is not standard and is unreliable. You + may be able to use :attr:`AcceptMixin.accept_mimetypes` + instead. + """ + warnings.warn( + "'Request.is_xhr' is deprecated as of version 0.13 and will" + " be removed in version 1.0. The 'X-Requested-With' header" + " is not standard and is unreliable. You may be able to use" + " 'accept_mimetypes' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.environ.get("HTTP_X_REQUESTED_WITH", "").lower() == "xmlhttprequest" + + is_secure = property( + lambda self: self.environ["wsgi.url_scheme"] == "https", + doc="`True` if the request is secure.", + ) + is_multithread = environ_property( + "wsgi.multithread", + doc="""boolean that is `True` if the application is served by a + multithreaded WSGI server.""", + ) + is_multiprocess = environ_property( + "wsgi.multiprocess", + doc="""boolean that is `True` if the application is served by a + WSGI server that spawns multiple processes.""", + ) + is_run_once = environ_property( + "wsgi.run_once", + doc="""boolean that is `True` if the application will be + executed only once in a process lifetime. This is the case for + CGI for example, but it's not guaranteed that the execution only + happens one time.""", + ) + + +def _assert_not_shallow(request): + if request.shallow: + raise RuntimeError( + "A shallow request tried to consume form data. If you really" + " want to do that, set `shallow` to False." + ) diff --git a/python/werkzeug/wrappers/base_response.py b/python/werkzeug/wrappers/base_response.py new file mode 100644 index 0000000..d944a7d --- /dev/null +++ b/python/werkzeug/wrappers/base_response.py @@ -0,0 +1,702 @@ +import warnings + +from .._compat import integer_types +from .._compat import string_types +from .._compat import text_type +from .._compat import to_bytes +from .._compat import to_native +from ..datastructures import Headers +from ..http import dump_cookie +from ..http import HTTP_STATUS_CODES +from ..http import remove_entity_headers +from ..urls import iri_to_uri +from ..urls import url_join +from ..utils import get_content_type +from ..wsgi import ClosingIterator +from ..wsgi import get_current_url + + +def _run_wsgi_app(*args): + """This function replaces itself to ensure that the test module is not + imported unless required. DO NOT USE! + """ + global _run_wsgi_app + from ..test import run_wsgi_app as _run_wsgi_app + + return _run_wsgi_app(*args) + + +def _warn_if_string(iterable): + """Helper for the response objects to check if the iterable returned + to the WSGI server is not a string. + """ + if isinstance(iterable, string_types): + warnings.warn( + "Response iterable was set to a string. This will appear to" + " work but means that the server will send the data to the" + " client one character at a time. This is almost never" + " intended behavior, use 'response.data' to assign strings" + " to the response object.", + stacklevel=2, + ) + + +def _iter_encoded(iterable, charset): + for item in iterable: + if isinstance(item, text_type): + yield item.encode(charset) + else: + yield item + + +def _clean_accept_ranges(accept_ranges): + if accept_ranges is True: + return "bytes" + elif accept_ranges is False: + return "none" + elif isinstance(accept_ranges, text_type): + return to_native(accept_ranges) + raise ValueError("Invalid accept_ranges value") + + +class BaseResponse(object): + """Base response class. The most important fact about a response object + is that it's a regular WSGI application. It's initialized with a couple + of response parameters (headers, body, status code etc.) and will start a + valid WSGI response when called with the environ and start response + callable. + + Because it's a WSGI application itself processing usually ends before the + actual response is sent to the server. This helps debugging systems + because they can catch all the exceptions before responses are started. + + Here a small example WSGI application that takes advantage of the + response objects:: + + from werkzeug.wrappers import BaseResponse as Response + + def index(): + return Response('Index page') + + def application(environ, start_response): + path = environ.get('PATH_INFO') or '/' + if path == '/': + response = index() + else: + response = Response('Not Found', status=404) + return response(environ, start_response) + + Like :class:`BaseRequest` which object is lacking a lot of functionality + implemented in mixins. This gives you a better control about the actual + API of your response objects, so you can create subclasses and add custom + functionality. A full featured response object is available as + :class:`Response` which implements a couple of useful mixins. + + To enforce a new type of already existing responses you can use the + :meth:`force_type` method. This is useful if you're working with different + subclasses of response objects and you want to post process them with a + known interface. + + Per default the response object will assume all the text data is `utf-8` + encoded. Please refer to :doc:`the unicode chapter </unicode>` for more + details about customizing the behavior. + + Response can be any kind of iterable or string. If it's a string it's + considered being an iterable with one item which is the string passed. + Headers can be a list of tuples or a + :class:`~werkzeug.datastructures.Headers` object. + + Special note for `mimetype` and `content_type`: For most mime types + `mimetype` and `content_type` work the same, the difference affects + only 'text' mimetypes. If the mimetype passed with `mimetype` is a + mimetype starting with `text/`, the charset parameter of the response + object is appended to it. In contrast the `content_type` parameter is + always added as header unmodified. + + .. versionchanged:: 0.5 + the `direct_passthrough` parameter was added. + + :param response: a string or response iterable. + :param status: a string with a status or an integer with the status code. + :param headers: a list of headers or a + :class:`~werkzeug.datastructures.Headers` object. + :param mimetype: the mimetype for the response. See notice above. + :param content_type: the content type for the response. See notice above. + :param direct_passthrough: if set to `True` :meth:`iter_encoded` is not + called before iteration which makes it + possible to pass special iterators through + unchanged (see :func:`wrap_file` for more + details.) + """ + + #: the charset of the response. + charset = "utf-8" + + #: the default status if none is provided. + default_status = 200 + + #: the default mimetype if none is provided. + default_mimetype = "text/plain" + + #: if set to `False` accessing properties on the response object will + #: not try to consume the response iterator and convert it into a list. + #: + #: .. versionadded:: 0.6.2 + #: + #: That attribute was previously called `implicit_seqence_conversion`. + #: (Notice the typo). If you did use this feature, you have to adapt + #: your code to the name change. + implicit_sequence_conversion = True + + #: Should this response object correct the location header to be RFC + #: conformant? This is true by default. + #: + #: .. versionadded:: 0.8 + autocorrect_location_header = True + + #: Should this response object automatically set the content-length + #: header if possible? This is true by default. + #: + #: .. versionadded:: 0.8 + automatically_set_content_length = True + + #: Warn if a cookie header exceeds this size. The default, 4093, should be + #: safely `supported by most browsers <cookie_>`_. A cookie larger than + #: this size will still be sent, but it may be ignored or handled + #: incorrectly by some browsers. Set to 0 to disable this check. + #: + #: .. versionadded:: 0.13 + #: + #: .. _`cookie`: http://browsercookielimits.squawky.net/ + max_cookie_size = 4093 + + def __init__( + self, + response=None, + status=None, + headers=None, + mimetype=None, + content_type=None, + direct_passthrough=False, + ): + if isinstance(headers, Headers): + self.headers = headers + elif not headers: + self.headers = Headers() + else: + self.headers = Headers(headers) + + if content_type is None: + if mimetype is None and "content-type" not in self.headers: + mimetype = self.default_mimetype + if mimetype is not None: + mimetype = get_content_type(mimetype, self.charset) + content_type = mimetype + if content_type is not None: + self.headers["Content-Type"] = content_type + if status is None: + status = self.default_status + if isinstance(status, integer_types): + self.status_code = status + else: + self.status = status + + self.direct_passthrough = direct_passthrough + self._on_close = [] + + # we set the response after the headers so that if a class changes + # the charset attribute, the data is set in the correct charset. + if response is None: + self.response = [] + elif isinstance(response, (text_type, bytes, bytearray)): + self.set_data(response) + else: + self.response = response + + def call_on_close(self, func): + """Adds a function to the internal list of functions that should + be called as part of closing down the response. Since 0.7 this + function also returns the function that was passed so that this + can be used as a decorator. + + .. versionadded:: 0.6 + """ + self._on_close.append(func) + return func + + def __repr__(self): + if self.is_sequence: + body_info = "%d bytes" % sum(map(len, self.iter_encoded())) + else: + body_info = "streamed" if self.is_streamed else "likely-streamed" + return "<%s %s [%s]>" % (self.__class__.__name__, body_info, self.status) + + @classmethod + def force_type(cls, response, environ=None): + """Enforce that the WSGI response is a response object of the current + type. Werkzeug will use the :class:`BaseResponse` internally in many + situations like the exceptions. If you call :meth:`get_response` on an + exception you will get back a regular :class:`BaseResponse` object, even + if you are using a custom subclass. + + This method can enforce a given response type, and it will also + convert arbitrary WSGI callables into response objects if an environ + is provided:: + + # convert a Werkzeug response object into an instance of the + # MyResponseClass subclass. + response = MyResponseClass.force_type(response) + + # convert any WSGI application into a response object + response = MyResponseClass.force_type(response, environ) + + This is especially useful if you want to post-process responses in + the main dispatcher and use functionality provided by your subclass. + + Keep in mind that this will modify response objects in place if + possible! + + :param response: a response object or wsgi application. + :param environ: a WSGI environment object. + :return: a response object. + """ + if not isinstance(response, BaseResponse): + if environ is None: + raise TypeError( + "cannot convert WSGI application into response" + " objects without an environ" + ) + response = BaseResponse(*_run_wsgi_app(response, environ)) + response.__class__ = cls + return response + + @classmethod + def from_app(cls, app, environ, buffered=False): + """Create a new response object from an application output. This + works best if you pass it an application that returns a generator all + the time. Sometimes applications may use the `write()` callable + returned by the `start_response` function. This tries to resolve such + edge cases automatically. But if you don't get the expected output + you should set `buffered` to `True` which enforces buffering. + + :param app: the WSGI application to execute. + :param environ: the WSGI environment to execute against. + :param buffered: set to `True` to enforce buffering. + :return: a response object. + """ + return cls(*_run_wsgi_app(app, environ, buffered)) + + def _get_status_code(self): + return self._status_code + + def _set_status_code(self, code): + self._status_code = code + try: + self._status = "%d %s" % (code, HTTP_STATUS_CODES[code].upper()) + except KeyError: + self._status = "%d UNKNOWN" % code + + status_code = property( + _get_status_code, _set_status_code, doc="The HTTP Status code as number" + ) + del _get_status_code, _set_status_code + + def _get_status(self): + return self._status + + def _set_status(self, value): + try: + self._status = to_native(value) + except AttributeError: + raise TypeError("Invalid status argument") + + try: + self._status_code = int(self._status.split(None, 1)[0]) + except ValueError: + self._status_code = 0 + self._status = "0 %s" % self._status + except IndexError: + raise ValueError("Empty status argument") + + status = property(_get_status, _set_status, doc="The HTTP Status code") + del _get_status, _set_status + + def get_data(self, as_text=False): + """The string representation of the request body. Whenever you call + this property the request iterable is encoded and flattened. This + can lead to unwanted behavior if you stream big data. + + This behavior can be disabled by setting + :attr:`implicit_sequence_conversion` to `False`. + + If `as_text` is set to `True` the return value will be a decoded + unicode string. + + .. versionadded:: 0.9 + """ + self._ensure_sequence() + rv = b"".join(self.iter_encoded()) + if as_text: + rv = rv.decode(self.charset) + return rv + + def set_data(self, value): + """Sets a new string as response. The value set must either by a + unicode or bytestring. If a unicode string is set it's encoded + automatically to the charset of the response (utf-8 by default). + + .. versionadded:: 0.9 + """ + # if an unicode string is set, it's encoded directly so that we + # can set the content length + if isinstance(value, text_type): + value = value.encode(self.charset) + else: + value = bytes(value) + self.response = [value] + if self.automatically_set_content_length: + self.headers["Content-Length"] = str(len(value)) + + data = property( + get_data, + set_data, + doc="A descriptor that calls :meth:`get_data` and :meth:`set_data`.", + ) + + def calculate_content_length(self): + """Returns the content length if available or `None` otherwise.""" + try: + self._ensure_sequence() + except RuntimeError: + return None + return sum(len(x) for x in self.iter_encoded()) + + def _ensure_sequence(self, mutable=False): + """This method can be called by methods that need a sequence. If + `mutable` is true, it will also ensure that the response sequence + is a standard Python list. + + .. versionadded:: 0.6 + """ + if self.is_sequence: + # if we need a mutable object, we ensure it's a list. + if mutable and not isinstance(self.response, list): + self.response = list(self.response) + return + if self.direct_passthrough: + raise RuntimeError( + "Attempted implicit sequence conversion but the" + " response object is in direct passthrough mode." + ) + if not self.implicit_sequence_conversion: + raise RuntimeError( + "The response object required the iterable to be a" + " sequence, but the implicit conversion was disabled." + " Call make_sequence() yourself." + ) + self.make_sequence() + + def make_sequence(self): + """Converts the response iterator in a list. By default this happens + automatically if required. If `implicit_sequence_conversion` is + disabled, this method is not automatically called and some properties + might raise exceptions. This also encodes all the items. + + .. versionadded:: 0.6 + """ + if not self.is_sequence: + # if we consume an iterable we have to ensure that the close + # method of the iterable is called if available when we tear + # down the response + close = getattr(self.response, "close", None) + self.response = list(self.iter_encoded()) + if close is not None: + self.call_on_close(close) + + def iter_encoded(self): + """Iter the response encoded with the encoding of the response. + If the response object is invoked as WSGI application the return + value of this method is used as application iterator unless + :attr:`direct_passthrough` was activated. + """ + if __debug__: + _warn_if_string(self.response) + # Encode in a separate function so that self.response is fetched + # early. This allows us to wrap the response with the return + # value from get_app_iter or iter_encoded. + return _iter_encoded(self.response, self.charset) + + def set_cookie( + self, + key, + value="", + max_age=None, + expires=None, + path="/", + domain=None, + secure=False, + httponly=False, + samesite=None, + ): + """Sets a cookie. The parameters are the same as in the cookie `Morsel` + object in the Python standard library but it accepts unicode data, too. + + A warning is raised if the size of the cookie header exceeds + :attr:`max_cookie_size`, but the header will still be set. + + :param key: the key (name) of the cookie to be set. + :param value: the value of the cookie. + :param max_age: should be a number of seconds, or `None` (default) if + the cookie should last only as long as the client's + browser session. + :param expires: should be a `datetime` object or UNIX timestamp. + :param path: limits the cookie to a given path, per default it will + span the whole domain. + :param domain: if you want to set a cross-domain cookie. For example, + ``domain=".example.com"`` will set a cookie that is + readable by the domain ``www.example.com``, + ``foo.example.com`` etc. Otherwise, a cookie will only + be readable by the domain that set it. + :param secure: If `True`, the cookie will only be available via HTTPS + :param httponly: disallow JavaScript to access the cookie. This is an + extension to the cookie standard and probably not + supported by all browsers. + :param samesite: Limits the scope of the cookie such that it will only + be attached to requests if those requests are + "same-site". + """ + self.headers.add( + "Set-Cookie", + dump_cookie( + key, + value=value, + max_age=max_age, + expires=expires, + path=path, + domain=domain, + secure=secure, + httponly=httponly, + charset=self.charset, + max_size=self.max_cookie_size, + samesite=samesite, + ), + ) + + def delete_cookie(self, key, path="/", domain=None): + """Delete a cookie. Fails silently if key doesn't exist. + + :param key: the key (name) of the cookie to be deleted. + :param path: if the cookie that should be deleted was limited to a + path, the path has to be defined here. + :param domain: if the cookie that should be deleted was limited to a + domain, that domain has to be defined here. + """ + self.set_cookie(key, expires=0, max_age=0, path=path, domain=domain) + + @property + def is_streamed(self): + """If the response is streamed (the response is not an iterable with + a length information) this property is `True`. In this case streamed + means that there is no information about the number of iterations. + This is usually `True` if a generator is passed to the response object. + + This is useful for checking before applying some sort of post + filtering that should not take place for streamed responses. + """ + try: + len(self.response) + except (TypeError, AttributeError): + return True + return False + + @property + def is_sequence(self): + """If the iterator is buffered, this property will be `True`. A + response object will consider an iterator to be buffered if the + response attribute is a list or tuple. + + .. versionadded:: 0.6 + """ + return isinstance(self.response, (tuple, list)) + + def close(self): + """Close the wrapped response if possible. You can also use the object + in a with statement which will automatically close it. + + .. versionadded:: 0.9 + Can now be used in a with statement. + """ + if hasattr(self.response, "close"): + self.response.close() + for func in self._on_close: + func() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + self.close() + + def freeze(self): + """Call this method if you want to make your response object ready for + being pickled. This buffers the generator if there is one. It will + also set the `Content-Length` header to the length of the body. + + .. versionchanged:: 0.6 + The `Content-Length` header is now set. + """ + # we explicitly set the length to a list of the *encoded* response + # iterator. Even if the implicit sequence conversion is disabled. + self.response = list(self.iter_encoded()) + self.headers["Content-Length"] = str(sum(map(len, self.response))) + + def get_wsgi_headers(self, environ): + """This is automatically called right before the response is started + and returns headers modified for the given environment. It returns a + copy of the headers from the response with some modifications applied + if necessary. + + For example the location header (if present) is joined with the root + URL of the environment. Also the content length is automatically set + to zero here for certain status codes. + + .. versionchanged:: 0.6 + Previously that function was called `fix_headers` and modified + the response object in place. Also since 0.6, IRIs in location + and content-location headers are handled properly. + + Also starting with 0.6, Werkzeug will attempt to set the content + length if it is able to figure it out on its own. This is the + case if all the strings in the response iterable are already + encoded and the iterable is buffered. + + :param environ: the WSGI environment of the request. + :return: returns a new :class:`~werkzeug.datastructures.Headers` + object. + """ + headers = Headers(self.headers) + location = None + content_location = None + content_length = None + status = self.status_code + + # iterate over the headers to find all values in one go. Because + # get_wsgi_headers is used each response that gives us a tiny + # speedup. + for key, value in headers: + ikey = key.lower() + if ikey == u"location": + location = value + elif ikey == u"content-location": + content_location = value + elif ikey == u"content-length": + content_length = value + + # make sure the location header is an absolute URL + if location is not None: + old_location = location + if isinstance(location, text_type): + # Safe conversion is necessary here as we might redirect + # to a broken URI scheme (for instance itms-services). + location = iri_to_uri(location, safe_conversion=True) + + if self.autocorrect_location_header: + current_url = get_current_url(environ, strip_querystring=True) + if isinstance(current_url, text_type): + current_url = iri_to_uri(current_url) + location = url_join(current_url, location) + if location != old_location: + headers["Location"] = location + + # make sure the content location is a URL + if content_location is not None and isinstance(content_location, text_type): + headers["Content-Location"] = iri_to_uri(content_location) + + if 100 <= status < 200 or status == 204: + # Per section 3.3.2 of RFC 7230, "a server MUST NOT send a + # Content-Length header field in any response with a status + # code of 1xx (Informational) or 204 (No Content)." + headers.remove("Content-Length") + elif status == 304: + remove_entity_headers(headers) + + # if we can determine the content length automatically, we + # should try to do that. But only if this does not involve + # flattening the iterator or encoding of unicode strings in + # the response. We however should not do that if we have a 304 + # response. + if ( + self.automatically_set_content_length + and self.is_sequence + and content_length is None + and status not in (204, 304) + and not (100 <= status < 200) + ): + try: + content_length = sum(len(to_bytes(x, "ascii")) for x in self.response) + except UnicodeError: + # aha, something non-bytestringy in there, too bad, we + # can't safely figure out the length of the response. + pass + else: + headers["Content-Length"] = str(content_length) + + return headers + + def get_app_iter(self, environ): + """Returns the application iterator for the given environ. Depending + on the request method and the current status code the return value + might be an empty response rather than the one from the response. + + If the request method is `HEAD` or the status code is in a range + where the HTTP specification requires an empty response, an empty + iterable is returned. + + .. versionadded:: 0.6 + + :param environ: the WSGI environment of the request. + :return: a response iterable. + """ + status = self.status_code + if ( + environ["REQUEST_METHOD"] == "HEAD" + or 100 <= status < 200 + or status in (204, 304) + ): + iterable = () + elif self.direct_passthrough: + if __debug__: + _warn_if_string(self.response) + return self.response + else: + iterable = self.iter_encoded() + return ClosingIterator(iterable, self.close) + + def get_wsgi_response(self, environ): + """Returns the final WSGI response as tuple. The first item in + the tuple is the application iterator, the second the status and + the third the list of headers. The response returned is created + specially for the given environment. For example if the request + method in the WSGI environment is ``'HEAD'`` the response will + be empty and only the headers and status code will be present. + + .. versionadded:: 0.6 + + :param environ: the WSGI environment of the request. + :return: an ``(app_iter, status, headers)`` tuple. + """ + headers = self.get_wsgi_headers(environ) + app_iter = self.get_app_iter(environ) + return app_iter, self.status, headers.to_wsgi_list() + + def __call__(self, environ, start_response): + """Process this response as WSGI application. + + :param environ: the WSGI environment. + :param start_response: the response callable provided by the WSGI + server. + :return: an application iterator + """ + app_iter, status, headers = self.get_wsgi_response(environ) + start_response(status, headers) + return app_iter diff --git a/python/werkzeug/wrappers/common_descriptors.py b/python/werkzeug/wrappers/common_descriptors.py new file mode 100644 index 0000000..e4107ee --- /dev/null +++ b/python/werkzeug/wrappers/common_descriptors.py @@ -0,0 +1,322 @@ +from datetime import datetime +from datetime import timedelta + +from .._compat import string_types +from ..datastructures import CallbackDict +from ..http import dump_age +from ..http import dump_header +from ..http import dump_options_header +from ..http import http_date +from ..http import parse_age +from ..http import parse_date +from ..http import parse_options_header +from ..http import parse_set_header +from ..utils import cached_property +from ..utils import environ_property +from ..utils import get_content_type +from ..utils import header_property +from ..wsgi import get_content_length + + +class CommonRequestDescriptorsMixin(object): + """A mixin for :class:`BaseRequest` subclasses. Request objects that + mix this class in will automatically get descriptors for a couple of + HTTP headers with automatic type conversion. + + .. versionadded:: 0.5 + """ + + content_type = environ_property( + "CONTENT_TYPE", + doc="""The Content-Type entity-header field indicates the media + type of the entity-body sent to the recipient or, in the case of + the HEAD method, the media type that would have been sent had + the request been a GET.""", + ) + + @cached_property + def content_length(self): + """The Content-Length entity-header field indicates the size of the + entity-body in bytes or, in the case of the HEAD method, the size of + the entity-body that would have been sent had the request been a + GET. + """ + return get_content_length(self.environ) + + content_encoding = environ_property( + "HTTP_CONTENT_ENCODING", + doc="""The Content-Encoding entity-header field is used as a + modifier to the media-type. When present, its value indicates + what additional content codings have been applied to the + entity-body, and thus what decoding mechanisms must be applied + in order to obtain the media-type referenced by the Content-Type + header field. + + .. versionadded:: 0.9""", + ) + content_md5 = environ_property( + "HTTP_CONTENT_MD5", + doc="""The Content-MD5 entity-header field, as defined in + RFC 1864, is an MD5 digest of the entity-body for the purpose of + providing an end-to-end message integrity check (MIC) of the + entity-body. (Note: a MIC is good for detecting accidental + modification of the entity-body in transit, but is not proof + against malicious attacks.) + + .. versionadded:: 0.9""", + ) + referrer = environ_property( + "HTTP_REFERER", + doc="""The Referer[sic] request-header field allows the client + to specify, for the server's benefit, the address (URI) of the + resource from which the Request-URI was obtained (the + "referrer", although the header field is misspelled).""", + ) + date = environ_property( + "HTTP_DATE", + None, + parse_date, + doc="""The Date general-header field represents the date and + time at which the message was originated, having the same + semantics as orig-date in RFC 822.""", + ) + max_forwards = environ_property( + "HTTP_MAX_FORWARDS", + None, + int, + doc="""The Max-Forwards request-header field provides a + mechanism with the TRACE and OPTIONS methods to limit the number + of proxies or gateways that can forward the request to the next + inbound server.""", + ) + + def _parse_content_type(self): + if not hasattr(self, "_parsed_content_type"): + self._parsed_content_type = parse_options_header( + self.environ.get("CONTENT_TYPE", "") + ) + + @property + def mimetype(self): + """Like :attr:`content_type`, but without parameters (eg, without + charset, type etc.) and always lowercase. For example if the content + type is ``text/HTML; charset=utf-8`` the mimetype would be + ``'text/html'``. + """ + self._parse_content_type() + return self._parsed_content_type[0].lower() + + @property + def mimetype_params(self): + """The mimetype parameters as dict. For example if the content + type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + """ + self._parse_content_type() + return self._parsed_content_type[1] + + @cached_property + def pragma(self): + """The Pragma general-header field is used to include + implementation-specific directives that might apply to any recipient + along the request/response chain. All pragma directives specify + optional behavior from the viewpoint of the protocol; however, some + systems MAY require that behavior be consistent with the directives. + """ + return parse_set_header(self.environ.get("HTTP_PRAGMA", "")) + + +class CommonResponseDescriptorsMixin(object): + """A mixin for :class:`BaseResponse` subclasses. Response objects that + mix this class in will automatically get descriptors for a couple of + HTTP headers with automatic type conversion. + """ + + @property + def mimetype(self): + """The mimetype (content type without charset etc.)""" + ct = self.headers.get("content-type") + if ct: + return ct.split(";")[0].strip() + + @mimetype.setter + def mimetype(self, value): + self.headers["Content-Type"] = get_content_type(value, self.charset) + + @property + def mimetype_params(self): + """The mimetype parameters as dict. For example if the + content type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + + .. versionadded:: 0.5 + """ + + def on_update(d): + self.headers["Content-Type"] = dump_options_header(self.mimetype, d) + + d = parse_options_header(self.headers.get("content-type", ""))[1] + return CallbackDict(d, on_update) + + location = header_property( + "Location", + doc="""The Location response-header field is used to redirect + the recipient to a location other than the Request-URI for + completion of the request or identification of a new + resource.""", + ) + age = header_property( + "Age", + None, + parse_age, + dump_age, + doc="""The Age response-header field conveys the sender's + estimate of the amount of time since the response (or its + revalidation) was generated at the origin server. + + Age values are non-negative decimal integers, representing time + in seconds.""", + ) + content_type = header_property( + "Content-Type", + doc="""The Content-Type entity-header field indicates the media + type of the entity-body sent to the recipient or, in the case of + the HEAD method, the media type that would have been sent had + the request been a GET.""", + ) + content_length = header_property( + "Content-Length", + None, + int, + str, + doc="""The Content-Length entity-header field indicates the size + of the entity-body, in decimal number of OCTETs, sent to the + recipient or, in the case of the HEAD method, the size of the + entity-body that would have been sent had the request been a + GET.""", + ) + content_location = header_property( + "Content-Location", + doc="""The Content-Location entity-header field MAY be used to + supply the resource location for the entity enclosed in the + message when that entity is accessible from a location separate + from the requested resource's URI.""", + ) + content_encoding = header_property( + "Content-Encoding", + doc="""The Content-Encoding entity-header field is used as a + modifier to the media-type. When present, its value indicates + what additional content codings have been applied to the + entity-body, and thus what decoding mechanisms must be applied + in order to obtain the media-type referenced by the Content-Type + header field.""", + ) + content_md5 = header_property( + "Content-MD5", + doc="""The Content-MD5 entity-header field, as defined in + RFC 1864, is an MD5 digest of the entity-body for the purpose of + providing an end-to-end message integrity check (MIC) of the + entity-body. (Note: a MIC is good for detecting accidental + modification of the entity-body in transit, but is not proof + against malicious attacks.)""", + ) + date = header_property( + "Date", + None, + parse_date, + http_date, + doc="""The Date general-header field represents the date and + time at which the message was originated, having the same + semantics as orig-date in RFC 822.""", + ) + expires = header_property( + "Expires", + None, + parse_date, + http_date, + doc="""The Expires entity-header field gives the date/time after + which the response is considered stale. A stale cache entry may + not normally be returned by a cache.""", + ) + last_modified = header_property( + "Last-Modified", + None, + parse_date, + http_date, + doc="""The Last-Modified entity-header field indicates the date + and time at which the origin server believes the variant was + last modified.""", + ) + + @property + def retry_after(self): + """The Retry-After response-header field can be used with a + 503 (Service Unavailable) response to indicate how long the + service is expected to be unavailable to the requesting client. + + Time in seconds until expiration or date. + """ + value = self.headers.get("retry-after") + if value is None: + return + elif value.isdigit(): + return datetime.utcnow() + timedelta(seconds=int(value)) + return parse_date(value) + + @retry_after.setter + def retry_after(self, value): + if value is None: + if "retry-after" in self.headers: + del self.headers["retry-after"] + return + elif isinstance(value, datetime): + value = http_date(value) + else: + value = str(value) + self.headers["Retry-After"] = value + + def _set_property(name, doc=None): # noqa: B902 + def fget(self): + def on_update(header_set): + if not header_set and name in self.headers: + del self.headers[name] + elif header_set: + self.headers[name] = header_set.to_header() + + return parse_set_header(self.headers.get(name), on_update) + + def fset(self, value): + if not value: + del self.headers[name] + elif isinstance(value, string_types): + self.headers[name] = value + else: + self.headers[name] = dump_header(value) + + return property(fget, fset, doc=doc) + + vary = _set_property( + "Vary", + doc="""The Vary field value indicates the set of request-header + fields that fully determines, while the response is fresh, + whether a cache is permitted to use the response to reply to a + subsequent request without revalidation.""", + ) + content_language = _set_property( + "Content-Language", + doc="""The Content-Language entity-header field describes the + natural language(s) of the intended audience for the enclosed + entity. Note that this might not be equivalent to all the + languages used within the entity-body.""", + ) + allow = _set_property( + "Allow", + doc="""The Allow entity-header field lists the set of methods + supported by the resource identified by the Request-URI. The + purpose of this field is strictly to inform the recipient of + valid methods associated with the resource. An Allow header + field MUST be present in a 405 (Method Not Allowed) + response.""", + ) + + del _set_property diff --git a/python/werkzeug/wrappers/etag.py b/python/werkzeug/wrappers/etag.py new file mode 100644 index 0000000..0733506 --- /dev/null +++ b/python/werkzeug/wrappers/etag.py @@ -0,0 +1,304 @@ +from .._compat import string_types +from .._internal import _get_environ +from ..datastructures import ContentRange +from ..datastructures import RequestCacheControl +from ..datastructures import ResponseCacheControl +from ..http import generate_etag +from ..http import http_date +from ..http import is_resource_modified +from ..http import parse_cache_control_header +from ..http import parse_content_range_header +from ..http import parse_date +from ..http import parse_etags +from ..http import parse_if_range_header +from ..http import parse_range_header +from ..http import quote_etag +from ..http import unquote_etag +from ..utils import cached_property +from ..utils import header_property +from ..wrappers.base_response import _clean_accept_ranges +from ..wsgi import _RangeWrapper + + +class ETagRequestMixin(object): + """Add entity tag and cache descriptors to a request object or object with + a WSGI environment available as :attr:`~BaseRequest.environ`. This not + only provides access to etags but also to the cache control header. + """ + + @cached_property + def cache_control(self): + """A :class:`~werkzeug.datastructures.RequestCacheControl` object + for the incoming cache control headers. + """ + cache_control = self.environ.get("HTTP_CACHE_CONTROL") + return parse_cache_control_header(cache_control, None, RequestCacheControl) + + @cached_property + def if_match(self): + """An object containing all the etags in the `If-Match` header. + + :rtype: :class:`~werkzeug.datastructures.ETags` + """ + return parse_etags(self.environ.get("HTTP_IF_MATCH")) + + @cached_property + def if_none_match(self): + """An object containing all the etags in the `If-None-Match` header. + + :rtype: :class:`~werkzeug.datastructures.ETags` + """ + return parse_etags(self.environ.get("HTTP_IF_NONE_MATCH")) + + @cached_property + def if_modified_since(self): + """The parsed `If-Modified-Since` header as datetime object.""" + return parse_date(self.environ.get("HTTP_IF_MODIFIED_SINCE")) + + @cached_property + def if_unmodified_since(self): + """The parsed `If-Unmodified-Since` header as datetime object.""" + return parse_date(self.environ.get("HTTP_IF_UNMODIFIED_SINCE")) + + @cached_property + def if_range(self): + """The parsed `If-Range` header. + + .. versionadded:: 0.7 + + :rtype: :class:`~werkzeug.datastructures.IfRange` + """ + return parse_if_range_header(self.environ.get("HTTP_IF_RANGE")) + + @cached_property + def range(self): + """The parsed `Range` header. + + .. versionadded:: 0.7 + + :rtype: :class:`~werkzeug.datastructures.Range` + """ + return parse_range_header(self.environ.get("HTTP_RANGE")) + + +class ETagResponseMixin(object): + """Adds extra functionality to a response object for etag and cache + handling. This mixin requires an object with at least a `headers` + object that implements a dict like interface similar to + :class:`~werkzeug.datastructures.Headers`. + + If you want the :meth:`freeze` method to automatically add an etag, you + have to mixin this method before the response base class. The default + response class does not do that. + """ + + @property + def cache_control(self): + """The Cache-Control general-header field is used to specify + directives that MUST be obeyed by all caching mechanisms along the + request/response chain. + """ + + def on_update(cache_control): + if not cache_control and "cache-control" in self.headers: + del self.headers["cache-control"] + elif cache_control: + self.headers["Cache-Control"] = cache_control.to_header() + + return parse_cache_control_header( + self.headers.get("cache-control"), on_update, ResponseCacheControl + ) + + def _wrap_response(self, start, length): + """Wrap existing Response in case of Range Request context.""" + if self.status_code == 206: + self.response = _RangeWrapper(self.response, start, length) + + def _is_range_request_processable(self, environ): + """Return ``True`` if `Range` header is present and if underlying + resource is considered unchanged when compared with `If-Range` header. + """ + return ( + "HTTP_IF_RANGE" not in environ + or not is_resource_modified( + environ, + self.headers.get("etag"), + None, + self.headers.get("last-modified"), + ignore_if_range=False, + ) + ) and "HTTP_RANGE" in environ + + def _process_range_request(self, environ, complete_length=None, accept_ranges=None): + """Handle Range Request related headers (RFC7233). If `Accept-Ranges` + header is valid, and Range Request is processable, we set the headers + as described by the RFC, and wrap the underlying response in a + RangeWrapper. + + Returns ``True`` if Range Request can be fulfilled, ``False`` otherwise. + + :raises: :class:`~werkzeug.exceptions.RequestedRangeNotSatisfiable` + if `Range` header could not be parsed or satisfied. + """ + from ..exceptions import RequestedRangeNotSatisfiable + + if accept_ranges is None: + return False + self.headers["Accept-Ranges"] = accept_ranges + if not self._is_range_request_processable(environ) or complete_length is None: + return False + parsed_range = parse_range_header(environ.get("HTTP_RANGE")) + if parsed_range is None: + raise RequestedRangeNotSatisfiable(complete_length) + range_tuple = parsed_range.range_for_length(complete_length) + content_range_header = parsed_range.to_content_range_header(complete_length) + if range_tuple is None or content_range_header is None: + raise RequestedRangeNotSatisfiable(complete_length) + content_length = range_tuple[1] - range_tuple[0] + # Be sure not to send 206 response + # if requested range is the full content. + if content_length != complete_length: + self.headers["Content-Length"] = content_length + self.content_range = content_range_header + self.status_code = 206 + self._wrap_response(range_tuple[0], content_length) + return True + return False + + def make_conditional( + self, request_or_environ, accept_ranges=False, complete_length=None + ): + """Make the response conditional to the request. This method works + best if an etag was defined for the response already. The `add_etag` + method can be used to do that. If called without etag just the date + header is set. + + This does nothing if the request method in the request or environ is + anything but GET or HEAD. + + For optimal performance when handling range requests, it's recommended + that your response data object implements `seekable`, `seek` and `tell` + methods as described by :py:class:`io.IOBase`. Objects returned by + :meth:`~werkzeug.wsgi.wrap_file` automatically implement those methods. + + It does not remove the body of the response because that's something + the :meth:`__call__` function does for us automatically. + + Returns self so that you can do ``return resp.make_conditional(req)`` + but modifies the object in-place. + + :param request_or_environ: a request object or WSGI environment to be + used to make the response conditional + against. + :param accept_ranges: This parameter dictates the value of + `Accept-Ranges` header. If ``False`` (default), + the header is not set. If ``True``, it will be set + to ``"bytes"``. If ``None``, it will be set to + ``"none"``. If it's a string, it will use this + value. + :param complete_length: Will be used only in valid Range Requests. + It will set `Content-Range` complete length + value and compute `Content-Length` real value. + This parameter is mandatory for successful + Range Requests completion. + :raises: :class:`~werkzeug.exceptions.RequestedRangeNotSatisfiable` + if `Range` header could not be parsed or satisfied. + """ + environ = _get_environ(request_or_environ) + if environ["REQUEST_METHOD"] in ("GET", "HEAD"): + # if the date is not in the headers, add it now. We however + # will not override an already existing header. Unfortunately + # this header will be overriden by many WSGI servers including + # wsgiref. + if "date" not in self.headers: + self.headers["Date"] = http_date() + accept_ranges = _clean_accept_ranges(accept_ranges) + is206 = self._process_range_request(environ, complete_length, accept_ranges) + if not is206 and not is_resource_modified( + environ, + self.headers.get("etag"), + None, + self.headers.get("last-modified"), + ): + if parse_etags(environ.get("HTTP_IF_MATCH")): + self.status_code = 412 + else: + self.status_code = 304 + if ( + self.automatically_set_content_length + and "content-length" not in self.headers + ): + length = self.calculate_content_length() + if length is not None: + self.headers["Content-Length"] = length + return self + + def add_etag(self, overwrite=False, weak=False): + """Add an etag for the current response if there is none yet.""" + if overwrite or "etag" not in self.headers: + self.set_etag(generate_etag(self.get_data()), weak) + + def set_etag(self, etag, weak=False): + """Set the etag, and override the old one if there was one.""" + self.headers["ETag"] = quote_etag(etag, weak) + + def get_etag(self): + """Return a tuple in the form ``(etag, is_weak)``. If there is no + ETag the return value is ``(None, None)``. + """ + return unquote_etag(self.headers.get("ETag")) + + def freeze(self, no_etag=False): + """Call this method if you want to make your response object ready for + pickeling. This buffers the generator if there is one. This also + sets the etag unless `no_etag` is set to `True`. + """ + if not no_etag: + self.add_etag() + super(ETagResponseMixin, self).freeze() + + accept_ranges = header_property( + "Accept-Ranges", + doc="""The `Accept-Ranges` header. Even though the name would + indicate that multiple values are supported, it must be one + string token only. + + The values ``'bytes'`` and ``'none'`` are common. + + .. versionadded:: 0.7""", + ) + + def _get_content_range(self): + def on_update(rng): + if not rng: + del self.headers["content-range"] + else: + self.headers["Content-Range"] = rng.to_header() + + rv = parse_content_range_header(self.headers.get("content-range"), on_update) + # always provide a content range object to make the descriptor + # more user friendly. It provides an unset() method that can be + # used to remove the header quickly. + if rv is None: + rv = ContentRange(None, None, None, on_update=on_update) + return rv + + def _set_content_range(self, value): + if not value: + del self.headers["content-range"] + elif isinstance(value, string_types): + self.headers["Content-Range"] = value + else: + self.headers["Content-Range"] = value.to_header() + + content_range = property( + _get_content_range, + _set_content_range, + doc="""The ``Content-Range`` header as + :class:`~werkzeug.datastructures.ContentRange` object. Even if + the header is not set it wil provide such an object for easier + manipulation. + + .. versionadded:: 0.7""", + ) + del _get_content_range, _set_content_range diff --git a/python/werkzeug/wrappers/json.py b/python/werkzeug/wrappers/json.py new file mode 100644 index 0000000..6d5dc33 --- /dev/null +++ b/python/werkzeug/wrappers/json.py @@ -0,0 +1,145 @@ +from __future__ import absolute_import + +import datetime +import uuid + +from .._compat import text_type +from ..exceptions import BadRequest +from ..utils import detect_utf_encoding + +try: + import simplejson as _json +except ImportError: + import json as _json + + +class _JSONModule(object): + @staticmethod + def _default(o): + if isinstance(o, datetime.date): + return o.isoformat() + + if isinstance(o, uuid.UUID): + return str(o) + + if hasattr(o, "__html__"): + return text_type(o.__html__()) + + raise TypeError() + + @classmethod + def dumps(cls, obj, **kw): + kw.setdefault("separators", (",", ":")) + kw.setdefault("default", cls._default) + kw.setdefault("sort_keys", True) + return _json.dumps(obj, **kw) + + @staticmethod + def loads(s, **kw): + if isinstance(s, bytes): + # Needed for Python < 3.6 + encoding = detect_utf_encoding(s) + s = s.decode(encoding) + + return _json.loads(s, **kw) + + +class JSONMixin(object): + """Mixin to parse :attr:`data` as JSON. Can be mixed in for both + :class:`~werkzeug.wrappers.Request` and + :class:`~werkzeug.wrappers.Response` classes. + + If `simplejson`_ is installed it is preferred over Python's built-in + :mod:`json` module. + + .. _simplejson: https://simplejson.readthedocs.io/en/latest/ + """ + + #: A module or other object that has ``dumps`` and ``loads`` + #: functions that match the API of the built-in :mod:`json` module. + json_module = _JSONModule + + @property + def json(self): + """The parsed JSON data if :attr:`mimetype` indicates JSON + (:mimetype:`application/json`, see :meth:`is_json`). + + Calls :meth:`get_json` with default arguments. + """ + return self.get_json() + + @property + def is_json(self): + """Check if the mimetype indicates JSON data, either + :mimetype:`application/json` or :mimetype:`application/*+json`. + """ + mt = self.mimetype + return ( + mt == "application/json" + or mt.startswith("application/") + and mt.endswith("+json") + ) + + def _get_data_for_json(self, cache): + try: + return self.get_data(cache=cache) + except TypeError: + # Response doesn't have cache param. + return self.get_data() + + # Cached values for ``(silent=False, silent=True)``. Initialized + # with sentinel values. + _cached_json = (Ellipsis, Ellipsis) + + def get_json(self, force=False, silent=False, cache=True): + """Parse :attr:`data` as JSON. + + If the mimetype does not indicate JSON + (:mimetype:`application/json`, see :meth:`is_json`), this + returns ``None``. + + If parsing fails, :meth:`on_json_loading_failed` is called and + its return value is used as the return value. + + :param force: Ignore the mimetype and always try to parse JSON. + :param silent: Silence parsing errors and return ``None`` + instead. + :param cache: Store the parsed JSON to return for subsequent + calls. + """ + if cache and self._cached_json[silent] is not Ellipsis: + return self._cached_json[silent] + + if not (force or self.is_json): + return None + + data = self._get_data_for_json(cache=cache) + + try: + rv = self.json_module.loads(data) + except ValueError as e: + if silent: + rv = None + + if cache: + normal_rv, _ = self._cached_json + self._cached_json = (normal_rv, rv) + else: + rv = self.on_json_loading_failed(e) + + if cache: + _, silent_rv = self._cached_json + self._cached_json = (rv, silent_rv) + else: + if cache: + self._cached_json = (rv, rv) + + return rv + + def on_json_loading_failed(self, e): + """Called if :meth:`get_json` parsing fails and isn't silenced. + If this method returns a value, it is used as the return value + for :meth:`get_json`. The default implementation raises + :exc:`~werkzeug.exceptions.BadRequest`. + """ + raise BadRequest("Failed to decode JSON object: {0}".format(e)) diff --git a/python/werkzeug/wrappers/request.py b/python/werkzeug/wrappers/request.py new file mode 100644 index 0000000..d1c71b6 --- /dev/null +++ b/python/werkzeug/wrappers/request.py @@ -0,0 +1,44 @@ +from .accept import AcceptMixin +from .auth import AuthorizationMixin +from .base_request import BaseRequest +from .common_descriptors import CommonRequestDescriptorsMixin +from .etag import ETagRequestMixin +from .user_agent import UserAgentMixin + + +class Request( + BaseRequest, + AcceptMixin, + ETagRequestMixin, + UserAgentMixin, + AuthorizationMixin, + CommonRequestDescriptorsMixin, +): + """Full featured request object implementing the following mixins: + + - :class:`AcceptMixin` for accept header parsing + - :class:`ETagRequestMixin` for etag and cache control handling + - :class:`UserAgentMixin` for user agent introspection + - :class:`AuthorizationMixin` for http auth handling + - :class:`CommonRequestDescriptorsMixin` for common headers + """ + + +class StreamOnlyMixin(object): + """If mixed in before the request object this will change the bahavior + of it to disable handling of form parsing. This disables the + :attr:`files`, :attr:`form` attributes and will just provide a + :attr:`stream` attribute that however is always available. + + .. versionadded:: 0.9 + """ + + disable_data_descriptor = True + want_form_data_parsed = False + + +class PlainRequest(StreamOnlyMixin, Request): + """A request object without special form parsing capabilities. + + .. versionadded:: 0.9 + """ diff --git a/python/werkzeug/wrappers/response.py b/python/werkzeug/wrappers/response.py new file mode 100644 index 0000000..cd86cac --- /dev/null +++ b/python/werkzeug/wrappers/response.py @@ -0,0 +1,78 @@ +from ..utils import cached_property +from .auth import WWWAuthenticateMixin +from .base_response import BaseResponse +from .common_descriptors import CommonResponseDescriptorsMixin +from .etag import ETagResponseMixin + + +class ResponseStream(object): + """A file descriptor like object used by the :class:`ResponseStreamMixin` to + represent the body of the stream. It directly pushes into the response + iterable of the response object. + """ + + mode = "wb+" + + def __init__(self, response): + self.response = response + self.closed = False + + def write(self, value): + if self.closed: + raise ValueError("I/O operation on closed file") + self.response._ensure_sequence(mutable=True) + self.response.response.append(value) + self.response.headers.pop("Content-Length", None) + return len(value) + + def writelines(self, seq): + for item in seq: + self.write(item) + + def close(self): + self.closed = True + + def flush(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def isatty(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return False + + def tell(self): + self.response._ensure_sequence() + return sum(map(len, self.response.response)) + + @property + def encoding(self): + return self.response.charset + + +class ResponseStreamMixin(object): + """Mixin for :class:`BaseRequest` subclasses. Classes that inherit from + this mixin will automatically get a :attr:`stream` property that provides + a write-only interface to the response iterable. + """ + + @cached_property + def stream(self): + """The response iterable as write-only stream.""" + return ResponseStream(self) + + +class Response( + BaseResponse, + ETagResponseMixin, + ResponseStreamMixin, + CommonResponseDescriptorsMixin, + WWWAuthenticateMixin, +): + """Full featured response object implementing the following mixins: + + - :class:`ETagResponseMixin` for etag and cache control handling + - :class:`ResponseStreamMixin` to add support for the `stream` property + - :class:`CommonResponseDescriptorsMixin` for various HTTP descriptors + - :class:`WWWAuthenticateMixin` for HTTP authentication support + """ diff --git a/python/werkzeug/wrappers/user_agent.py b/python/werkzeug/wrappers/user_agent.py new file mode 100644 index 0000000..72588dd --- /dev/null +++ b/python/werkzeug/wrappers/user_agent.py @@ -0,0 +1,15 @@ +from ..utils import cached_property + + +class UserAgentMixin(object): + """Adds a `user_agent` attribute to the request object which + contains the parsed user agent of the browser that triggered the + request as a :class:`~werkzeug.useragents.UserAgent` object. + """ + + @cached_property + def user_agent(self): + """The current user agent.""" + from ..useragents import UserAgent + + return UserAgent(self.environ) diff --git a/python/werkzeug/wsgi.py b/python/werkzeug/wsgi.py new file mode 100644 index 0000000..f069f2d --- /dev/null +++ b/python/werkzeug/wsgi.py @@ -0,0 +1,1067 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.wsgi + ~~~~~~~~~~~~~ + + This module implements WSGI related helpers. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import io +import re +import warnings +from functools import partial +from functools import update_wrapper +from itertools import chain + +from ._compat import BytesIO +from ._compat import implements_iterator +from ._compat import make_literal_wrapper +from ._compat import string_types +from ._compat import text_type +from ._compat import to_bytes +from ._compat import to_unicode +from ._compat import try_coerce_native +from ._compat import wsgi_get_bytes +from ._internal import _encode_idna +from .urls import uri_to_iri +from .urls import url_join +from .urls import url_parse +from .urls import url_quote + + +def responder(f): + """Marks a function as responder. Decorate a function with it and it + will automatically call the return value as WSGI application. + + Example:: + + @responder + def application(environ, start_response): + return Response('Hello World!') + """ + return update_wrapper(lambda *a: f(*a)(*a[-2:]), f) + + +def get_current_url( + environ, + root_only=False, + strip_querystring=False, + host_only=False, + trusted_hosts=None, +): + """A handy helper function that recreates the full URL as IRI for the + current request or parts of it. Here's an example: + + >>> from werkzeug.test import create_environ + >>> env = create_environ("/?param=foo", "http://localhost/script") + >>> get_current_url(env) + 'http://localhost/script/?param=foo' + >>> get_current_url(env, root_only=True) + 'http://localhost/script/' + >>> get_current_url(env, host_only=True) + 'http://localhost/' + >>> get_current_url(env, strip_querystring=True) + 'http://localhost/script/' + + This optionally it verifies that the host is in a list of trusted hosts. + If the host is not in there it will raise a + :exc:`~werkzeug.exceptions.SecurityError`. + + Note that the string returned might contain unicode characters as the + representation is an IRI not an URI. If you need an ASCII only + representation you can use the :func:`~werkzeug.urls.iri_to_uri` + function: + + >>> from werkzeug.urls import iri_to_uri + >>> iri_to_uri(get_current_url(env)) + 'http://localhost/script/?param=foo' + + :param environ: the WSGI environment to get the current URL from. + :param root_only: set `True` if you only want the root URL. + :param strip_querystring: set to `True` if you don't want the querystring. + :param host_only: set to `True` if the host URL should be returned. + :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted` + for more information. + """ + tmp = [environ["wsgi.url_scheme"], "://", get_host(environ, trusted_hosts)] + cat = tmp.append + if host_only: + return uri_to_iri("".join(tmp) + "/") + cat(url_quote(wsgi_get_bytes(environ.get("SCRIPT_NAME", ""))).rstrip("/")) + cat("/") + if not root_only: + cat(url_quote(wsgi_get_bytes(environ.get("PATH_INFO", "")).lstrip(b"/"))) + if not strip_querystring: + qs = get_query_string(environ) + if qs: + cat("?" + qs) + return uri_to_iri("".join(tmp)) + + +def host_is_trusted(hostname, trusted_list): + """Checks if a host is trusted against a list. This also takes care + of port normalization. + + .. versionadded:: 0.9 + + :param hostname: the hostname to check + :param trusted_list: a list of hostnames to check against. If a + hostname starts with a dot it will match against + all subdomains as well. + """ + if not hostname: + return False + + if isinstance(trusted_list, string_types): + trusted_list = [trusted_list] + + def _normalize(hostname): + if ":" in hostname: + hostname = hostname.rsplit(":", 1)[0] + return _encode_idna(hostname) + + try: + hostname = _normalize(hostname) + except UnicodeError: + return False + for ref in trusted_list: + if ref.startswith("."): + ref = ref[1:] + suffix_match = True + else: + suffix_match = False + try: + ref = _normalize(ref) + except UnicodeError: + return False + if ref == hostname: + return True + if suffix_match and hostname.endswith(b"." + ref): + return True + return False + + +def get_host(environ, trusted_hosts=None): + """Return the host for the given WSGI environment. This first checks + the ``Host`` header. If it's not present, then ``SERVER_NAME`` and + ``SERVER_PORT`` are used. The host will only contain the port if it + is different than the standard port for the protocol. + + Optionally, verify that the host is trusted using + :func:`host_is_trusted` and raise a + :exc:`~werkzeug.exceptions.SecurityError` if it is not. + + :param environ: The WSGI environment to get the host from. + :param trusted_hosts: A list of trusted hosts. + :return: Host, with port if necessary. + :raise ~werkzeug.exceptions.SecurityError: If the host is not + trusted. + """ + if "HTTP_HOST" in environ: + rv = environ["HTTP_HOST"] + if environ["wsgi.url_scheme"] == "http" and rv.endswith(":80"): + rv = rv[:-3] + elif environ["wsgi.url_scheme"] == "https" and rv.endswith(":443"): + rv = rv[:-4] + else: + rv = environ["SERVER_NAME"] + if (environ["wsgi.url_scheme"], environ["SERVER_PORT"]) not in ( + ("https", "443"), + ("http", "80"), + ): + rv += ":" + environ["SERVER_PORT"] + if trusted_hosts is not None: + if not host_is_trusted(rv, trusted_hosts): + from .exceptions import SecurityError + + raise SecurityError('Host "%s" is not trusted' % rv) + return rv + + +def get_content_length(environ): + """Returns the content length from the WSGI environment as + integer. If it's not available or chunked transfer encoding is used, + ``None`` is returned. + + .. versionadded:: 0.9 + + :param environ: the WSGI environ to fetch the content length from. + """ + if environ.get("HTTP_TRANSFER_ENCODING", "") == "chunked": + return None + + content_length = environ.get("CONTENT_LENGTH") + if content_length is not None: + try: + return max(0, int(content_length)) + except (ValueError, TypeError): + pass + + +def get_input_stream(environ, safe_fallback=True): + """Returns the input stream from the WSGI environment and wraps it + in the most sensible way possible. The stream returned is not the + raw WSGI stream in most cases but one that is safe to read from + without taking into account the content length. + + If content length is not set, the stream will be empty for safety reasons. + If the WSGI server supports chunked or infinite streams, it should set + the ``wsgi.input_terminated`` value in the WSGI environ to indicate that. + + .. versionadded:: 0.9 + + :param environ: the WSGI environ to fetch the stream from. + :param safe_fallback: use an empty stream as a safe fallback when the + content length is not set. Disabling this allows infinite streams, + which can be a denial-of-service risk. + """ + stream = environ["wsgi.input"] + content_length = get_content_length(environ) + + # A wsgi extension that tells us if the input is terminated. In + # that case we return the stream unchanged as we know we can safely + # read it until the end. + if environ.get("wsgi.input_terminated"): + return stream + + # If the request doesn't specify a content length, returning the stream is + # potentially dangerous because it could be infinite, malicious or not. If + # safe_fallback is true, return an empty stream instead for safety. + if content_length is None: + return BytesIO() if safe_fallback else stream + + # Otherwise limit the stream to the content length + return LimitedStream(stream, content_length) + + +def get_query_string(environ): + """Returns the `QUERY_STRING` from the WSGI environment. This also takes + care about the WSGI decoding dance on Python 3 environments as a + native string. The string returned will be restricted to ASCII + characters. + + .. versionadded:: 0.9 + + :param environ: the WSGI environment object to get the query string from. + """ + qs = wsgi_get_bytes(environ.get("QUERY_STRING", "")) + # QUERY_STRING really should be ascii safe but some browsers + # will send us some unicode stuff (I am looking at you IE). + # In that case we want to urllib quote it badly. + return try_coerce_native(url_quote(qs, safe=":&%=+$!*'(),")) + + +def get_path_info(environ, charset="utf-8", errors="replace"): + """Returns the `PATH_INFO` from the WSGI environment and properly + decodes it. This also takes care about the WSGI decoding dance + on Python 3 environments. if the `charset` is set to `None` a + bytestring is returned. + + .. versionadded:: 0.9 + + :param environ: the WSGI environment object to get the path from. + :param charset: the charset for the path info, or `None` if no + decoding should be performed. + :param errors: the decoding error handling. + """ + path = wsgi_get_bytes(environ.get("PATH_INFO", "")) + return to_unicode(path, charset, errors, allow_none_charset=True) + + +def get_script_name(environ, charset="utf-8", errors="replace"): + """Returns the `SCRIPT_NAME` from the WSGI environment and properly + decodes it. This also takes care about the WSGI decoding dance + on Python 3 environments. if the `charset` is set to `None` a + bytestring is returned. + + .. versionadded:: 0.9 + + :param environ: the WSGI environment object to get the path from. + :param charset: the charset for the path, or `None` if no + decoding should be performed. + :param errors: the decoding error handling. + """ + path = wsgi_get_bytes(environ.get("SCRIPT_NAME", "")) + return to_unicode(path, charset, errors, allow_none_charset=True) + + +def pop_path_info(environ, charset="utf-8", errors="replace"): + """Removes and returns the next segment of `PATH_INFO`, pushing it onto + `SCRIPT_NAME`. Returns `None` if there is nothing left on `PATH_INFO`. + + If the `charset` is set to `None` a bytestring is returned. + + If there are empty segments (``'/foo//bar``) these are ignored but + properly pushed to the `SCRIPT_NAME`: + + >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'} + >>> pop_path_info(env) + 'a' + >>> env['SCRIPT_NAME'] + '/foo/a' + >>> pop_path_info(env) + 'b' + >>> env['SCRIPT_NAME'] + '/foo/a/b' + + .. versionadded:: 0.5 + + .. versionchanged:: 0.9 + The path is now decoded and a charset and encoding + parameter can be provided. + + :param environ: the WSGI environment that is modified. + """ + path = environ.get("PATH_INFO") + if not path: + return None + + script_name = environ.get("SCRIPT_NAME", "") + + # shift multiple leading slashes over + old_path = path + path = path.lstrip("/") + if path != old_path: + script_name += "/" * (len(old_path) - len(path)) + + if "/" not in path: + environ["PATH_INFO"] = "" + environ["SCRIPT_NAME"] = script_name + path + rv = wsgi_get_bytes(path) + else: + segment, path = path.split("/", 1) + environ["PATH_INFO"] = "/" + path + environ["SCRIPT_NAME"] = script_name + segment + rv = wsgi_get_bytes(segment) + + return to_unicode(rv, charset, errors, allow_none_charset=True) + + +def peek_path_info(environ, charset="utf-8", errors="replace"): + """Returns the next segment on the `PATH_INFO` or `None` if there + is none. Works like :func:`pop_path_info` without modifying the + environment: + + >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'} + >>> peek_path_info(env) + 'a' + >>> peek_path_info(env) + 'a' + + If the `charset` is set to `None` a bytestring is returned. + + .. versionadded:: 0.5 + + .. versionchanged:: 0.9 + The path is now decoded and a charset and encoding + parameter can be provided. + + :param environ: the WSGI environment that is checked. + """ + segments = environ.get("PATH_INFO", "").lstrip("/").split("/", 1) + if segments: + return to_unicode( + wsgi_get_bytes(segments[0]), charset, errors, allow_none_charset=True + ) + + +def extract_path_info( + environ_or_baseurl, + path_or_url, + charset="utf-8", + errors="werkzeug.url_quote", + collapse_http_schemes=True, +): + """Extracts the path info from the given URL (or WSGI environment) and + path. The path info returned is a unicode string, not a bytestring + suitable for a WSGI environment. The URLs might also be IRIs. + + If the path info could not be determined, `None` is returned. + + Some examples: + + >>> extract_path_info('http://example.com/app', '/app/hello') + u'/hello' + >>> extract_path_info('http://example.com/app', + ... 'https://example.com/app/hello') + u'/hello' + >>> extract_path_info('http://example.com/app', + ... 'https://example.com/app/hello', + ... collapse_http_schemes=False) is None + True + + Instead of providing a base URL you can also pass a WSGI environment. + + :param environ_or_baseurl: a WSGI environment dict, a base URL or + base IRI. This is the root of the + application. + :param path_or_url: an absolute path from the server root, a + relative path (in which case it's the path info) + or a full URL. Also accepts IRIs and unicode + parameters. + :param charset: the charset for byte data in URLs + :param errors: the error handling on decode + :param collapse_http_schemes: if set to `False` the algorithm does + not assume that http and https on the + same server point to the same + resource. + + .. versionchanged:: 0.15 + The ``errors`` parameter defaults to leaving invalid bytes + quoted instead of replacing them. + + .. versionadded:: 0.6 + """ + + def _normalize_netloc(scheme, netloc): + parts = netloc.split(u"@", 1)[-1].split(u":", 1) + if len(parts) == 2: + netloc, port = parts + if (scheme == u"http" and port == u"80") or ( + scheme == u"https" and port == u"443" + ): + port = None + else: + netloc = parts[0] + port = None + if port is not None: + netloc += u":" + port + return netloc + + # make sure whatever we are working on is a IRI and parse it + path = uri_to_iri(path_or_url, charset, errors) + if isinstance(environ_or_baseurl, dict): + environ_or_baseurl = get_current_url(environ_or_baseurl, root_only=True) + base_iri = uri_to_iri(environ_or_baseurl, charset, errors) + base_scheme, base_netloc, base_path = url_parse(base_iri)[:3] + cur_scheme, cur_netloc, cur_path, = url_parse(url_join(base_iri, path))[:3] + + # normalize the network location + base_netloc = _normalize_netloc(base_scheme, base_netloc) + cur_netloc = _normalize_netloc(cur_scheme, cur_netloc) + + # is that IRI even on a known HTTP scheme? + if collapse_http_schemes: + for scheme in base_scheme, cur_scheme: + if scheme not in (u"http", u"https"): + return None + else: + if not (base_scheme in (u"http", u"https") and base_scheme == cur_scheme): + return None + + # are the netlocs compatible? + if base_netloc != cur_netloc: + return None + + # are we below the application path? + base_path = base_path.rstrip(u"/") + if not cur_path.startswith(base_path): + return None + + return u"/" + cur_path[len(base_path) :].lstrip(u"/") + + +@implements_iterator +class ClosingIterator(object): + """The WSGI specification requires that all middlewares and gateways + respect the `close` callback of the iterable returned by the application. + Because it is useful to add another close action to a returned iterable + and adding a custom iterable is a boring task this class can be used for + that:: + + return ClosingIterator(app(environ, start_response), [cleanup_session, + cleanup_locals]) + + If there is just one close function it can be passed instead of the list. + + A closing iterator is not needed if the application uses response objects + and finishes the processing if the response is started:: + + try: + return response(environ, start_response) + finally: + cleanup_session() + cleanup_locals() + """ + + def __init__(self, iterable, callbacks=None): + iterator = iter(iterable) + self._next = partial(next, iterator) + if callbacks is None: + callbacks = [] + elif callable(callbacks): + callbacks = [callbacks] + else: + callbacks = list(callbacks) + iterable_close = getattr(iterable, "close", None) + if iterable_close: + callbacks.insert(0, iterable_close) + self._callbacks = callbacks + + def __iter__(self): + return self + + def __next__(self): + return self._next() + + def close(self): + for callback in self._callbacks: + callback() + + +def wrap_file(environ, file, buffer_size=8192): + """Wraps a file. This uses the WSGI server's file wrapper if available + or otherwise the generic :class:`FileWrapper`. + + .. versionadded:: 0.5 + + If the file wrapper from the WSGI server is used it's important to not + iterate over it from inside the application but to pass it through + unchanged. If you want to pass out a file wrapper inside a response + object you have to set :attr:`~BaseResponse.direct_passthrough` to `True`. + + More information about file wrappers are available in :pep:`333`. + + :param file: a :class:`file`-like object with a :meth:`~file.read` method. + :param buffer_size: number of bytes for one iteration. + """ + return environ.get("wsgi.file_wrapper", FileWrapper)(file, buffer_size) + + +@implements_iterator +class FileWrapper(object): + """This class can be used to convert a :class:`file`-like object into + an iterable. It yields `buffer_size` blocks until the file is fully + read. + + You should not use this class directly but rather use the + :func:`wrap_file` function that uses the WSGI server's file wrapper + support if it's available. + + .. versionadded:: 0.5 + + If you're using this object together with a :class:`BaseResponse` you have + to use the `direct_passthrough` mode. + + :param file: a :class:`file`-like object with a :meth:`~file.read` method. + :param buffer_size: number of bytes for one iteration. + """ + + def __init__(self, file, buffer_size=8192): + self.file = file + self.buffer_size = buffer_size + + def close(self): + if hasattr(self.file, "close"): + self.file.close() + + def seekable(self): + if hasattr(self.file, "seekable"): + return self.file.seekable() + if hasattr(self.file, "seek"): + return True + return False + + def seek(self, *args): + if hasattr(self.file, "seek"): + self.file.seek(*args) + + def tell(self): + if hasattr(self.file, "tell"): + return self.file.tell() + return None + + def __iter__(self): + return self + + def __next__(self): + data = self.file.read(self.buffer_size) + if data: + return data + raise StopIteration() + + +@implements_iterator +class _RangeWrapper(object): + # private for now, but should we make it public in the future ? + + """This class can be used to convert an iterable object into + an iterable that will only yield a piece of the underlying content. + It yields blocks until the underlying stream range is fully read. + The yielded blocks will have a size that can't exceed the original + iterator defined block size, but that can be smaller. + + If you're using this object together with a :class:`BaseResponse` you have + to use the `direct_passthrough` mode. + + :param iterable: an iterable object with a :meth:`__next__` method. + :param start_byte: byte from which read will start. + :param byte_range: how many bytes to read. + """ + + def __init__(self, iterable, start_byte=0, byte_range=None): + self.iterable = iter(iterable) + self.byte_range = byte_range + self.start_byte = start_byte + self.end_byte = None + if byte_range is not None: + self.end_byte = self.start_byte + self.byte_range + self.read_length = 0 + self.seekable = hasattr(iterable, "seekable") and iterable.seekable() + self.end_reached = False + + def __iter__(self): + return self + + def _next_chunk(self): + try: + chunk = next(self.iterable) + self.read_length += len(chunk) + return chunk + except StopIteration: + self.end_reached = True + raise + + def _first_iteration(self): + chunk = None + if self.seekable: + self.iterable.seek(self.start_byte) + self.read_length = self.iterable.tell() + contextual_read_length = self.read_length + else: + while self.read_length <= self.start_byte: + chunk = self._next_chunk() + if chunk is not None: + chunk = chunk[self.start_byte - self.read_length :] + contextual_read_length = self.start_byte + return chunk, contextual_read_length + + def _next(self): + if self.end_reached: + raise StopIteration() + chunk = None + contextual_read_length = self.read_length + if self.read_length == 0: + chunk, contextual_read_length = self._first_iteration() + if chunk is None: + chunk = self._next_chunk() + if self.end_byte is not None and self.read_length >= self.end_byte: + self.end_reached = True + return chunk[: self.end_byte - contextual_read_length] + return chunk + + def __next__(self): + chunk = self._next() + if chunk: + return chunk + self.end_reached = True + raise StopIteration() + + def close(self): + if hasattr(self.iterable, "close"): + self.iterable.close() + + +def _make_chunk_iter(stream, limit, buffer_size): + """Helper for the line and chunk iter functions.""" + if isinstance(stream, (bytes, bytearray, text_type)): + raise TypeError( + "Passed a string or byte object instead of true iterator or stream." + ) + if not hasattr(stream, "read"): + for item in stream: + if item: + yield item + return + if not isinstance(stream, LimitedStream) and limit is not None: + stream = LimitedStream(stream, limit) + _read = stream.read + while 1: + item = _read(buffer_size) + if not item: + break + yield item + + +def make_line_iter(stream, limit=None, buffer_size=10 * 1024, cap_at_buffer=False): + """Safely iterates line-based over an input stream. If the input stream + is not a :class:`LimitedStream` the `limit` parameter is mandatory. + + This uses the stream's :meth:`~file.read` method internally as opposite + to the :meth:`~file.readline` method that is unsafe and can only be used + in violation of the WSGI specification. The same problem applies to the + `__iter__` function of the input stream which calls :meth:`~file.readline` + without arguments. + + If you need line-by-line processing it's strongly recommended to iterate + over the input stream using this helper function. + + .. versionchanged:: 0.8 + This function now ensures that the limit was reached. + + .. versionadded:: 0.9 + added support for iterators as input stream. + + .. versionadded:: 0.11.10 + added support for the `cap_at_buffer` parameter. + + :param stream: the stream or iterate to iterate over. + :param limit: the limit in bytes for the stream. (Usually + content length. Not necessary if the `stream` + is a :class:`LimitedStream`. + :param buffer_size: The optional buffer size. + :param cap_at_buffer: if this is set chunks are split if they are longer + than the buffer size. Internally this is implemented + that the buffer size might be exhausted by a factor + of two however. + """ + _iter = _make_chunk_iter(stream, limit, buffer_size) + + first_item = next(_iter, "") + if not first_item: + return + + s = make_literal_wrapper(first_item) + empty = s("") + cr = s("\r") + lf = s("\n") + crlf = s("\r\n") + + _iter = chain((first_item,), _iter) + + def _iter_basic_lines(): + _join = empty.join + buffer = [] + while 1: + new_data = next(_iter, "") + if not new_data: + break + new_buf = [] + buf_size = 0 + for item in chain(buffer, new_data.splitlines(True)): + new_buf.append(item) + buf_size += len(item) + if item and item[-1:] in crlf: + yield _join(new_buf) + new_buf = [] + elif cap_at_buffer and buf_size >= buffer_size: + rv = _join(new_buf) + while len(rv) >= buffer_size: + yield rv[:buffer_size] + rv = rv[buffer_size:] + new_buf = [rv] + buffer = new_buf + if buffer: + yield _join(buffer) + + # This hackery is necessary to merge 'foo\r' and '\n' into one item + # of 'foo\r\n' if we were unlucky and we hit a chunk boundary. + previous = empty + for item in _iter_basic_lines(): + if item == lf and previous[-1:] == cr: + previous += item + item = empty + if previous: + yield previous + previous = item + if previous: + yield previous + + +def make_chunk_iter( + stream, separator, limit=None, buffer_size=10 * 1024, cap_at_buffer=False +): + """Works like :func:`make_line_iter` but accepts a separator + which divides chunks. If you want newline based processing + you should use :func:`make_line_iter` instead as it + supports arbitrary newline markers. + + .. versionadded:: 0.8 + + .. versionadded:: 0.9 + added support for iterators as input stream. + + .. versionadded:: 0.11.10 + added support for the `cap_at_buffer` parameter. + + :param stream: the stream or iterate to iterate over. + :param separator: the separator that divides chunks. + :param limit: the limit in bytes for the stream. (Usually + content length. Not necessary if the `stream` + is otherwise already limited). + :param buffer_size: The optional buffer size. + :param cap_at_buffer: if this is set chunks are split if they are longer + than the buffer size. Internally this is implemented + that the buffer size might be exhausted by a factor + of two however. + """ + _iter = _make_chunk_iter(stream, limit, buffer_size) + + first_item = next(_iter, "") + if not first_item: + return + + _iter = chain((first_item,), _iter) + if isinstance(first_item, text_type): + separator = to_unicode(separator) + _split = re.compile(r"(%s)" % re.escape(separator)).split + _join = u"".join + else: + separator = to_bytes(separator) + _split = re.compile(b"(" + re.escape(separator) + b")").split + _join = b"".join + + buffer = [] + while 1: + new_data = next(_iter, "") + if not new_data: + break + chunks = _split(new_data) + new_buf = [] + buf_size = 0 + for item in chain(buffer, chunks): + if item == separator: + yield _join(new_buf) + new_buf = [] + buf_size = 0 + else: + buf_size += len(item) + new_buf.append(item) + + if cap_at_buffer and buf_size >= buffer_size: + rv = _join(new_buf) + while len(rv) >= buffer_size: + yield rv[:buffer_size] + rv = rv[buffer_size:] + new_buf = [rv] + buf_size = len(rv) + + buffer = new_buf + if buffer: + yield _join(buffer) + + +@implements_iterator +class LimitedStream(io.IOBase): + """Wraps a stream so that it doesn't read more than n bytes. If the + stream is exhausted and the caller tries to get more bytes from it + :func:`on_exhausted` is called which by default returns an empty + string. The return value of that function is forwarded + to the reader function. So if it returns an empty string + :meth:`read` will return an empty string as well. + + The limit however must never be higher than what the stream can + output. Otherwise :meth:`readlines` will try to read past the + limit. + + .. admonition:: Note on WSGI compliance + + calls to :meth:`readline` and :meth:`readlines` are not + WSGI compliant because it passes a size argument to the + readline methods. Unfortunately the WSGI PEP is not safely + implementable without a size argument to :meth:`readline` + because there is no EOF marker in the stream. As a result + of that the use of :meth:`readline` is discouraged. + + For the same reason iterating over the :class:`LimitedStream` + is not portable. It internally calls :meth:`readline`. + + We strongly suggest using :meth:`read` only or using the + :func:`make_line_iter` which safely iterates line-based + over a WSGI input stream. + + :param stream: the stream to wrap. + :param limit: the limit for the stream, must not be longer than + what the string can provide if the stream does not + end with `EOF` (like `wsgi.input`) + """ + + def __init__(self, stream, limit): + self._read = stream.read + self._readline = stream.readline + self._pos = 0 + self.limit = limit + + def __iter__(self): + return self + + @property + def is_exhausted(self): + """If the stream is exhausted this attribute is `True`.""" + return self._pos >= self.limit + + def on_exhausted(self): + """This is called when the stream tries to read past the limit. + The return value of this function is returned from the reading + function. + """ + # Read null bytes from the stream so that we get the + # correct end of stream marker. + return self._read(0) + + def on_disconnect(self): + """What should happen if a disconnect is detected? The return + value of this function is returned from read functions in case + the client went away. By default a + :exc:`~werkzeug.exceptions.ClientDisconnected` exception is raised. + """ + from .exceptions import ClientDisconnected + + raise ClientDisconnected() + + def exhaust(self, chunk_size=1024 * 64): + """Exhaust the stream. This consumes all the data left until the + limit is reached. + + :param chunk_size: the size for a chunk. It will read the chunk + until the stream is exhausted and throw away + the results. + """ + to_read = self.limit - self._pos + chunk = chunk_size + while to_read > 0: + chunk = min(to_read, chunk) + self.read(chunk) + to_read -= chunk + + def read(self, size=None): + """Read `size` bytes or if size is not provided everything is read. + + :param size: the number of bytes read. + """ + if self._pos >= self.limit: + return self.on_exhausted() + if size is None or size == -1: # -1 is for consistence with file + size = self.limit + to_read = min(self.limit - self._pos, size) + try: + read = self._read(to_read) + except (IOError, ValueError): + return self.on_disconnect() + if to_read and len(read) != to_read: + return self.on_disconnect() + self._pos += len(read) + return read + + def readline(self, size=None): + """Reads one line from the stream.""" + if self._pos >= self.limit: + return self.on_exhausted() + if size is None: + size = self.limit - self._pos + else: + size = min(size, self.limit - self._pos) + try: + line = self._readline(size) + except (ValueError, IOError): + return self.on_disconnect() + if size and not line: + return self.on_disconnect() + self._pos += len(line) + return line + + def readlines(self, size=None): + """Reads a file into a list of strings. It calls :meth:`readline` + until the file is read to the end. It does support the optional + `size` argument if the underlaying stream supports it for + `readline`. + """ + last_pos = self._pos + result = [] + if size is not None: + end = min(self.limit, last_pos + size) + else: + end = self.limit + while 1: + if size is not None: + size -= last_pos - self._pos + if self._pos >= end: + break + result.append(self.readline(size)) + if size is not None: + last_pos = self._pos + return result + + def tell(self): + """Returns the position of the stream. + + .. versionadded:: 0.9 + """ + return self._pos + + def __next__(self): + line = self.readline() + if not line: + raise StopIteration() + return line + + def readable(self): + return True + + +# DEPRECATED +from .middleware.dispatcher import DispatcherMiddleware as _DispatcherMiddleware +from .middleware.http_proxy import ProxyMiddleware as _ProxyMiddleware +from .middleware.shared_data import SharedDataMiddleware as _SharedDataMiddleware + + +class ProxyMiddleware(_ProxyMiddleware): + """ + .. deprecated:: 0.15 + ``werkzeug.wsgi.ProxyMiddleware`` has moved to + :mod:`werkzeug.middleware.http_proxy`. This import will be + removed in 1.0. + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.wsgi.ProxyMiddleware' has moved to 'werkzeug" + ".middleware.http_proxy.ProxyMiddleware'. This import is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(ProxyMiddleware, self).__init__(*args, **kwargs) + + +class SharedDataMiddleware(_SharedDataMiddleware): + """ + .. deprecated:: 0.15 + ``werkzeug.wsgi.SharedDataMiddleware`` has moved to + :mod:`werkzeug.middleware.shared_data`. This import will be + removed in 1.0. + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.wsgi.SharedDataMiddleware' has moved to" + " 'werkzeug.middleware.shared_data.SharedDataMiddleware'." + " This import is deprecated as of version 0.15 and will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(SharedDataMiddleware, self).__init__(*args, **kwargs) + + +class DispatcherMiddleware(_DispatcherMiddleware): + """ + .. deprecated:: 0.15 + ``werkzeug.wsgi.DispatcherMiddleware`` has moved to + :mod:`werkzeug.middleware.dispatcher`. This import will be + removed in 1.0. + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.wsgi.DispatcherMiddleware' has moved to" + " 'werkzeug.middleware.dispatcher.DispatcherMiddleware'." + " This import is deprecated as of version 0.15 and will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(DispatcherMiddleware, self).__init__(*args, **kwargs) @@ -2,9 +2,12 @@ from gevent import monkey monkey.patch_all() import gevent.socket -from youtube.youtube import youtube +from youtube import yt_app from youtube import util -import http_errors + +# these are just so the files get run - they import yt_app and add routes to it +from youtube import watch, search, playlist, channel, local_playlist, comments, post_comment + import settings from gevent.pywsgi import WSGIServer @@ -22,7 +25,7 @@ def youtu_be(env, start_response): id = env['PATH_INFO'][1:] env['PATH_INFO'] = '/watch' env['QUERY_STRING'] = 'v=' + id - return youtube(env, start_response) + yield from yt_app(env, start_response) def proxy_site(env, start_response): headers = { @@ -41,10 +44,10 @@ def proxy_site(env, start_response): headers = headers.items() start_response('200 OK', headers ) - return content + yield content site_handlers = { - 'youtube.com':youtube, + 'youtube.com':yt_app, 'youtu.be':youtu_be, 'ytimg.com': proxy_site, 'yt3.ggpht.com': proxy_site, @@ -96,29 +99,11 @@ def site_dispatch(env, start_response): except KeyError: continue else: - yield handler(env, start_response) + yield from handler(env, start_response) break else: # did not break yield error_code('404 Not Found', start_response) return - - except http_errors.Code200 as e: # Raised in scenarios where a simple status message is to be returned, such as a terminated channel - start_response('200 OK', ()) - yield str(e).encode('utf-8') - - except http_errors.Error404 as e: - start_response('404 Not Found', ()) - yield str(e).encode('utf-8') - - except urllib.error.HTTPError as e: - start_response(str(e.code) + ' ' + e.reason, ()) - yield b'While fetching url, the following error occured:\n' + str(e).encode('utf-8') - - except socket.error as e: - start_response('502 Bad Gateway', ()) - print(str(e)) - yield b'502 Bad Gateway' - except Exception: start_response('500 Internal Server Error', ()) yield b'500 Internal Server Error' diff --git a/settings.py b/settings.py index 2a3885f..4aedd19 100644 --- a/settings.py +++ b/settings.py @@ -1,36 +1,139 @@ import ast import re import os +import collections -default_settings = '''route_tor = False -port_number = 8080 -allow_foreign_addresses = False +settings_info = collections.OrderedDict([ + ('route_tor', { + 'type': bool, + 'default': False, + 'comment': '', + }), -# 0 - off by default -# 1 - only manually created subtitles on by default -# 2 - enable even if automatically generated is all that's available -subtitles_mode = 0 + ('port_number', { + 'type': int, + 'default': 8080, + 'comment': '', + }), -# ISO 639 language code: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes -subtitles_language = "en" + ('allow_foreign_addresses', { + 'type': bool, + 'default': False, + 'comment': '''This will allow others to connect to your Youtube Local instance as a website. +For security reasons, enabling this is not recommended.''', + }), -enable_related_videos = True -enable_comments = True -enable_comment_avatars = True + ('subtitles_mode', { + 'type': int, + 'default': 0, + 'comment': '''0 - off by default +1 - only manually created subtitles on by default +2 - enable even if automatically generated is all that's available''', + }), -# 0 to sort by top -# 1 to sort by newest -default_comment_sorting = 0 + ('subtitles_language', { + 'type': str, + 'default': 'en', + 'comment': '''ISO 639 language code: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes''', + }), -# developer use to debug 403s -gather_googlevideo_domains = False -''' -exec(default_settings) -allowed_targets = set(("route_tor", "port_number", "allow_foreign_addresses", "subtitles_mode", "subtitles_language", "enable_related_videos", "enable_comments", "enable_comment_avatars", "default_comment_sorting", "gather_googlevideo_domains")) + ('related_videos_mode', { + 'type': int, + 'default': 1, + 'comment': '''0 - Related videos disabled +1 - Related videos always shown +2 - Related videos hidden; shown by clicking a button''' + }), -def log_ignored_line(line_number, message): - print("settings.txt: Ignoring line " + str(node.lineno) + " (" + message + ")") + ('comments_mode', { + 'type': int, + 'default': 1, + 'comment': '''0 - Video comments disabled +1 - Video comments always shown +2 - Video comments hidden; shown by clicking a button''', + }), + + ('enable_comment_avatars', { + 'type': bool, + 'default': True, + 'comment': '', + }), + + ('default_comment_sorting', { + 'type': int, + 'default': 0, + 'comment': '''0 to sort by top +1 to sort by newest''', + }), + + ('gather_googlevideo_domains', { + 'type': bool, + 'default': False, + 'comment': '''Developer use to debug 403s''', + }), + + ('debugging_save_responses', { + 'type': bool, + 'default': False, + 'comment': '''Save all responses from youtube for debugging''', + }), + + ('settings_version', { + 'type': int, + 'default': 2, + 'comment': '''Do not change, remove, or comment out this value, or else your settings may be lost or corrupted''' + }), +]) + +acceptable_targets = settings_info.keys() | {'enable_comments', 'enable_related_videos'} + + +def comment_string(comment): + result = '' + for line in comment.splitlines(): + result += '# ' + line + '\n' + return result + + +def create_missing_settings_string(current_settings): + result = '' + for setting_name, setting_dict in settings_info.items(): + if setting_name not in current_settings: + result += comment_string(setting_dict['comment']) + setting_name + ' = ' + repr(setting_dict['default']) + '\n\n' + return result + +def create_default_settings_string(): + return settings_to_string({}) +def default_settings(): + return {key: setting_info['default'] for key, setting_info in settings_info.items()} + +def settings_to_string(settings): + '''Given a dictionary with the setting names/setting values for the keys/values, outputs a settings file string. + Fills in missing values from the defaults.''' + result = '' + for setting_name, default_setting_dict in settings_info.items(): + if setting_name in settings: + value = settings[setting_name] + else: + value = default_setting_dict['default'] + result += comment_string(default_setting_dict['comment']) + setting_name + ' = ' + repr(value) + '\n\n' + return result + + +def upgrade_to_2(current_settings): + '''Upgrade to settings version 2''' + new_settings = current_settings.copy() + if 'enable_comments' in current_settings: + new_settings['comments_mode'] = int(current_settings['enable_comments']) + del new_settings['enable_comments'] + if 'enable_related_videos' in current_settings: + new_settings['related_videos_mode'] = int(current_settings['enable_related_videos']) + del new_settings['enable_related_videos'] + return new_settings + +def log_ignored_line(line_number, message): + print("WARNING: Ignoring settings.txt line " + str(node.lineno) + " (" + message + ")") if os.path.isfile("settings.txt"): print("Running in portable mode") @@ -43,18 +146,23 @@ else: if not os.path.exists(settings_dir): os.makedirs(settings_dir) +settings_file_path = os.path.join(settings_dir, 'settings.txt') + +locals().update(default_settings()) try: - with open(os.path.join(settings_dir, 'settings.txt'), 'r', encoding='utf-8') as file: + with open(settings_file_path, 'r', encoding='utf-8') as file: settings_text = file.read() except FileNotFoundError: - with open(os.path.join(settings_dir, 'settings.txt'), 'a', encoding='utf-8') as file: - file.write(default_settings) + with open(settings_file_path, 'w', encoding='utf-8') as file: + file.write(create_default_settings_string()) else: if re.fullmatch(r'\s*', settings_text): # blank file - with open(os.path.join(settings_dir, 'settings.txt'), 'a', encoding='utf-8') as file: - file.write(default_settings) + with open(settings_file_path, 'w', encoding='utf-8') as file: + file.write(create_default_settings_string()) else: + # parse settings in a safe way, without exec + current_settings = {} attributes = { ast.NameConstant: 'value', ast.Num: 'n', @@ -75,15 +183,37 @@ else: log_ignored_line(node.lineno, "only simple single-variable assignments allowed") continue - if target.id not in allowed_targets: - log_ignored_line(node.lineno, "target is not a valid setting") + if target.id not in acceptable_targets: + log_ignored_line(node.lineno, target.id + " is not a valid setting") continue if type(node.value) not in (ast.NameConstant, ast.Num, ast.Str): log_ignored_line(node.lineno, "only literals allowed for values") continue - locals()[target.id] = node.value.__getattribute__(attributes[type(node.value)]) + current_settings[target.id] = node.value.__getattribute__(attributes[type(node.value)]) + + + if 'settings_version' not in current_settings: + print('Upgrading settings.txt') + new_settings = upgrade_to_2(current_settings) + locals().update(new_settings) + new_settings_string = settings_to_string(new_settings) + with open(settings_file_path, 'w', encoding='utf-8') as file: + file.write(new_settings_string) + + # some settings not in the file, add those missing settings to the file + elif len(settings_info.keys() - current_settings.keys()) != 0: + print('Adding missing settings to settings.txt') + append_text = create_missing_settings_string(current_settings) + with open(settings_file_path, 'a', encoding='utf-8') as file: + file.write('\n\n' + append_text) + locals().update(current_settings) + else: + locals().update(current_settings) + + + if route_tor: print("Tor routing is ON") diff --git a/youtube/__init__.py b/youtube/__init__.py new file mode 100644 index 0000000..e620827 --- /dev/null +++ b/youtube/__init__.py @@ -0,0 +1,7 @@ +import flask +yt_app = flask.Flask(__name__) +yt_app.url_map.strict_slashes = False + +@yt_app.route('/') +def homepage(): + return flask.render_template('base.html', title="Youtube local") diff --git a/youtube/accounts.py b/youtube/accounts.py index 375bf2a..d2e8a41 100644 --- a/youtube/accounts.py +++ b/youtube/accounts.py @@ -1,5 +1,6 @@ # Contains functions having to do with logging in -from youtube import util, html_common +from youtube import util +from youtube import yt_app import settings import urllib @@ -9,6 +10,9 @@ import http.cookiejar import io import os +import flask +from flask import request + try: with open(os.path.join(settings.data_dir, 'accounts.txt'), 'r', encoding='utf-8') as f: accounts = json.loads(f.read()) @@ -18,7 +22,7 @@ except FileNotFoundError: def account_list_data(): '''Returns iterable of (channel_id, account_display_name)''' - return ( (channel_id, account['display_name']) for channel_id, account in accounts.items() ) + return [ (channel_id, account['display_name']) for channel_id, account in accounts.items() ] def save_accounts(): to_save = {channel_id: account for channel_id, account in accounts.items() if account['save']} @@ -51,91 +55,20 @@ def _add_account(username, password, save, use_tor): return True return False -def add_account(env, start_response): - parameters = env['parameters'] - if 'save' in parameters and parameters['save'][0] == "on": - save_account = True - else: - save_account = False +@yt_app.route('/login', methods=['POST']) +def add_account(): + save_account = request.values.get('save', 'off') == 'on' + use_tor = request.values.get('use_tor', 'off') == 'on' - if 'use_tor' in parameters and parameters['use_tor'][0] == "on": - use_tor = True + if _add_account(request.values['username'], request.values['password'], save_account, use_tor ): + return 'Account successfully added' else: - use_tor = False + return 'Failed to add account' - if _add_account(parameters['username'][0], parameters['password'][0], save_account, use_tor ): - start_response('200 OK', [('Content-type', 'text/plain'),] ) - return b'Account successfully added' - else: - start_response('200 OK', [('Content-type', 'text/plain'),] ) - return b'Failed to add account' - -def get_account_login_page(env, start_response): - start_response('200 OK', [('Content-type','text/html'),] ) - - style = ''' - main{ - display: grid; - grid-template-columns: minmax(0px, 3fr) 640px 40px 500px minmax(0px,2fr); - align-content: start; - grid-row-gap: 40px; - } - - main form{ - margin-top:20px; - grid-column:2; - display:grid; - justify-items: start; - align-content: start; - grid-row-gap: 10px; - } - - #username, #password{ - grid-column:2; - width: 250px; - } - #add-account-button{ - margin-top:20px; - } - #tor-note{ - grid-row:2; - grid-column:2; - background-color: #dddddd; - padding: 10px; - } - ''' - - page = ''' - <form action="''' + util.URL_ORIGIN + '''/login" method="POST"> - <div class="form-field"> - <label for="username">Username:</label> - <input type="text" id="username" name="username"> - </div> - <div class="form-field"> - <label for="password">Password:</label> - <input type="password" id="password" name="password"> - </div> - <div id="save-account-checkbox"> - <input type="checkbox" id="save-account" name="save" checked> - <label for="save-account">Save account info to disk (password will not be saved, only the login cookie)</label> - </div> - <div> - <input type="checkbox" id="use-tor" name="use_tor"> - <label for="use-tor">Use Tor when logging in (WARNING: This will lock your Google account under normal circumstances, see note below)</label> - </div> - <input type="submit" value="Add account" id="add-account-button"> - </form> - <div id="tor-note"><b>Note on using Tor to log in</b><br> -Using Tor to log in should only be done if the account was created using a proxy/VPN/Tor to begin with and hasn't been logged in using your IP. Otherwise, it's pointless since Google already knows who the account belongs to. When logging into a google account, it must be logged in using an IP address geographically close to the area where the account was created or where it is logged into regularly. If the account was created using an IP address in America and is logged into from an IP in Russia, Google will block the Russian IP from logging in, assume someone knows your password, lock the account, and make you change your password. If creating an account using Tor, you must remember the IP (or geographic region) it was created in, and only log in using that geographic region for the exit node. This can be accomplished by <a href="https://tor.stackexchange.com/questions/733/can-i-exit-from-a-specific-country-or-node">putting the desired IP in the torrc file</a> to force Tor to use that exit node. Using the login cookie to post comments through Tor is perfectly safe, however. - </div> - ''' - - return html_common.yt_basic_template.substitute( - page_title = "Login", - style = style, - header = html_common.get_header(), - page = page, - ).encode('utf-8') + +@yt_app.route('/login', methods=['GET']) +def get_account_login_page(): + return flask.render_template('login.html') @@ -229,10 +162,8 @@ def _login(username, password, cookiejar, use_tor): Taken from youtube-dl """ - login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8') - '''with open('debug/login_page', 'w', encoding='utf-8') as f: - f.write(login_page)''' - #print(cookiejar.as_lwp_str()) + login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor, debug_name='login_page').decode('utf-8') + if login_page is False: return @@ -249,16 +180,14 @@ def _login(username, password, cookiejar, use_tor): 'f.req': json.dumps(f_req), 'flowName': 'GlifWebSignIn', 'flowEntry': 'ServiceLogin', + 'bgRequest': '["identifier",""]', }) headers={ 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', 'Google-Accounts-XSRF': 1, } headers.update(yt_dl_headers) - result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8') - #print(cookiejar.as_lwp_str()) - '''with open('debug/' + note, 'w', encoding='utf-8') as f: - f.write(result)''' + result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor, debug_name=note).decode('utf-8') result = re.sub(r'^[^\[]*', '', result) return json.loads(result) @@ -387,12 +316,10 @@ def _login(username, password, cookiejar, use_tor): return False try: - check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8') + check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor, debug_name='check_cookie_results').decode('utf-8') except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err: return False - '''with open('debug/check_cookie_results', 'w', encoding='utf-8') as f: - f.write(check_cookie_results)''' if 'https://myaccount.google.com/' not in check_cookie_results: warn('Unable to log in') diff --git a/youtube/channel.py b/youtube/channel.py index 1b345b5..4c7d380 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -1,7 +1,7 @@ import base64 -from youtube import util, yt_data_extract, html_common, subscriptions +from youtube import util, yt_data_extract, local_playlist +from youtube import yt_app -import http_errors import urllib import json from string import Template @@ -12,11 +12,8 @@ import gevent import re import functools -with open("yt_channel_items_template.html", "r") as file: - yt_channel_items_template = Template(file.read()) - -with open("yt_channel_about_template.html", "r") as file: - yt_channel_about_template = Template(file.read()) +import flask +from flask import request '''continuation = Proto( Field('optional', 'continuation', 80226972, Proto( @@ -91,16 +88,10 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1): url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken print("Sending channel tab ajax request") - content = util.fetch_url(url, util.desktop_ua + headers_1) + content = util.fetch_url(url, util.desktop_ua + headers_1, debug_name='channel_tab') print("Finished recieving channel tab response") - '''with open('debug/channel_debug', 'wb') as f: - f.write(content)''' - info = json.loads(content) - return info - - - + return content def get_number_of_videos(channel_id): # Uploads playlist @@ -110,15 +101,13 @@ def get_number_of_videos(channel_id): # Sometimes retrieving playlist info fails with 403 for no discernable reason try: - response = util.fetch_url(url, util.mobile_ua + headers_pbj) + response = util.fetch_url(url, util.mobile_ua + headers_pbj, debug_name='number_of_videos') except urllib.error.HTTPError as e: if e.code != 403: raise print("Couldn't retrieve number of videos") return 1000 - '''with open('debug/playlist_debug_metadata', 'wb') as f: - f.write(response)''' response = response.decode('utf-8') print("Got response for number of videos") @@ -136,71 +125,20 @@ def get_channel_id(username): response = util.fetch_url(url, util.mobile_ua + headers_1).decode('utf-8') return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1) -def grid_items_html(items, additional_info={}): - result = ''' <nav class="item-grid">\n''' - for item in items: - result += html_common.renderer_html(item, additional_info) - result += '''\n</nav>''' - return result - -def list_items_html(items, additional_info={}): - result = ''' <nav class="item-list">''' - for item in items: - result += html_common.renderer_html(item, additional_info) - result += '''\n</nav>''' - return result - -channel_tab_template = Template('''\n<a class="tab page-button"$href_attribute>$tab_name</a>''') -channel_search_template = Template(''' - <form class="channel-search" action="$action"> - <input type="search" name="query" class="search-box" value="$search_box_value"> - <button type="submit" value="Search" class="search-button">Search</button> - </form>''') - -tabs = ('Videos', 'Playlists', 'About') -def channel_tabs_html(channel_id, current_tab, search_box_value=''): - result = '' - for tab_name in tabs: - if tab_name == current_tab: - result += channel_tab_template.substitute( - href_attribute = '', - tab_name = tab_name, - ) - else: - result += channel_tab_template.substitute( - href_attribute = ' href="' + util.URL_ORIGIN + '/channel/' + channel_id + '/' + tab_name.lower() + '"', - tab_name = tab_name, - ) - result += channel_search_template.substitute( - action = util.URL_ORIGIN + "/channel/" + channel_id + "/search", - search_box_value = html.escape(search_box_value), - ) - return result - -channel_sort_button_template = Template('''\n<a class="sort-button"$href_attribute>$text</a>''') -sorts = { - "videos": (('1', 'views'), ('2', 'oldest'), ('3', 'newest'),), - "playlists": (('2', 'oldest'), ('3', 'newest'), ('4', 'last video added'),), -} -def channel_sort_buttons_html(channel_id, tab, current_sort): - result = '' - for sort_number, sort_name in sorts[tab]: - if sort_number == str(current_sort): - result += channel_sort_button_template.substitute( - href_attribute='', - text = 'Sorted by ' + sort_name - ) - else: - result += channel_sort_button_template.substitute( - href_attribute=' href="' + util.URL_ORIGIN + '/channel/' + channel_id + '/' + tab + '?sort=' + sort_number + '"', - text = 'Sort by ' + sort_name - ) - return result +def get_channel_search_json(channel_id, query, page): + params = proto.string(2, 'search') + proto.string(15, str(page)) + params = proto.percent_b64encode(params) + ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query) + ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii') + + polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1, debug_name='channel_search') + return polymer_json -def get_microformat(response): +def extract_info(polymer_json, tab): + response = polymer_json[1]['response'] try: - return response['microformat']['microformatDataRenderer'] + microformat = response['microformat']['microformatDataRenderer'] # channel doesn't exist or was terminated # example terminated channel: https://www.youtube.com/channel/UCnKJeK_r90jDdIuzHXC0Org @@ -209,227 +147,136 @@ def get_microformat(response): result = '' for alert in response['alerts']: result += alert['alertRenderer']['text']['simpleText'] + '\n' - raise http_errors.Code200(result) + flask.abort(200, result) elif 'errors' in response['responseContext']: for error in response['responseContext']['errors']['error']: if error['code'] == 'INVALID_VALUE' and error['location'] == 'browse_id': - raise http_errors.Error404('This channel does not exist') + flask.abort(404, 'This channel does not exist') raise -# example channel with no videos: https://www.youtube.com/user/jungleace -def get_grid_items(response): - try: - return response['continuationContents']['gridContinuation']['items'] - except KeyError: - try: - contents = response['contents'] - except KeyError: - return [] - - item_section = tab_with_content(contents['twoColumnBrowseResultsRenderer']['tabs'])['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0] - try: - return item_section['gridRenderer']['items'] - except KeyError: - if "messageRenderer" in item_section: - return [] - else: - raise + info = {} + info['current_tab'] = tab -def channel_videos_html(polymer_json, current_page=1, current_sort=3, number_of_videos = 1000, current_query_string=''): - response = polymer_json[1]['response'] - microformat = get_microformat(response) + + # stuff from microformat (info given by youtube for every page on channel) + info['short_description'] = microformat['description'] + info['channel_name'] = microformat['title'] + info['avatar'] = microformat['thumbnail']['thumbnails'][0]['url'] channel_url = microformat['urlCanonical'].rstrip('/') channel_id = channel_url[channel_url.rfind('/')+1:] - if subscriptions.is_subscribed(channel_id): - action_name = 'Unsubscribe' - action = 'unsubscribe' - else: - action_name = 'Subscribe' - action = 'subscribe' + info['channel_id'] = channel_id + info['channel_url'] = 'https://www.youtube.com/channel/' + channel_id + + info['items'] = [] + + # empty channel + if 'contents' not in response and 'continuationContents' not in response: + return info + + + # find the tab with content + # example channel where tabs do not have definite index: https://www.youtube.com/channel/UC4gQ8i3FD7YbhOgqUkeQEJg + # TODO: maybe use the 'selected' attribute for this? + if 'continuationContents' not in response: + tab_renderer = None + tab_content = None + for tab_json in response['contents']['twoColumnBrowseResultsRenderer']['tabs']: + try: + tab_renderer = tab_json['tabRenderer'] + except KeyError: + tab_renderer = tab_json['expandableTabRenderer'] + try: + tab_content = tab_renderer['content'] + break + except KeyError: + pass + else: # didn't break + raise Exception("No tabs found with content") + assert tab == tab_renderer['title'].lower() + + + # extract tab-specific info + if tab in ('videos', 'playlists', 'search'): # find the list of items + if 'continuationContents' in response: + try: + items = response['continuationContents']['gridContinuation']['items'] + except KeyError: + items = response['continuationContents']['sectionListContinuation']['contents'] # for search + else: + contents = tab_content['sectionListRenderer']['contents'] + if 'itemSectionRenderer' in contents[0]: + item_section = contents[0]['itemSectionRenderer']['contents'][0] + try: + items = item_section['gridRenderer']['items'] + except KeyError: + if "messageRenderer" in item_section: + items = [] + else: + raise Exception('gridRenderer missing but messageRenderer not found') + else: + items = contents # for search - items = get_grid_items(response) - items_html = grid_items_html(items, {'author': microformat['title']}) - - return yt_channel_items_template.substitute( - header = html_common.get_header(), - channel_title = microformat['title'], - channel_id = channel_id, - channel_tabs = channel_tabs_html(channel_id, 'Videos'), - sort_buttons = channel_sort_buttons_html(channel_id, 'videos', current_sort), - avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'], - page_title = microformat['title'] + ' - Channel', - items = items_html, - page_buttons = html_common.page_buttons_html(current_page, math.ceil(number_of_videos/30), util.URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string), - number_of_results = '{:,}'.format(number_of_videos) + " videos", - action_name = action_name, - action = action, - ) + # TODO: Fix this URL prefixing shit + additional_info = {'author': info['channel_name'], 'author_url': '/channel/' + channel_id} + info['items'] = [yt_data_extract.renderer_info(renderer, additional_info) for renderer in items] -def channel_playlists_html(polymer_json, current_sort=3): - response = polymer_json[1]['response'] - microformat = get_microformat(response) - channel_url = microformat['urlCanonical'].rstrip('/') - channel_id = channel_url[channel_url.rfind('/')+1:] + elif tab == 'about': + channel_metadata = tab_content['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'] - if subscriptions.is_subscribed(channel_id): - action_name = 'Unsubscribe' - action = 'unsubscribe' - else: - action_name = 'Subscribe' - action = 'subscribe' - items = get_grid_items(response) - items_html = grid_items_html(items, {'author': microformat['title']}) - - return yt_channel_items_template.substitute( - header = html_common.get_header(), - channel_title = microformat['title'], - channel_id = channel_id, - channel_tabs = channel_tabs_html(channel_id, 'Playlists'), - sort_buttons = channel_sort_buttons_html(channel_id, 'playlists', current_sort), - avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'], - page_title = microformat['title'] + ' - Channel', - items = items_html, - page_buttons = '', - number_of_results = '', - action_name = action_name, - action = action, - ) + info['links'] = [] + for link_json in channel_metadata.get('primaryLinks', ()): + url = link_json['navigationEndpoint']['urlEndpoint']['url'] + if url.startswith('/redirect'): # youtube puts these on external links to do tracking + query_string = url[url.find('?')+1: ] + url = urllib.parse.parse_qs(query_string)['q'][0] -# Example channel where tabs do not have definite index: https://www.youtube.com/channel/UC4gQ8i3FD7YbhOgqUkeQEJg -def tab_with_content(tabs): - for tab in tabs: - try: - renderer = tab['tabRenderer'] - except KeyError: - renderer = tab['expandableTabRenderer'] - try: - return renderer['content'] - except KeyError: - pass - - raise Exception("No tabs found with content") - -channel_link_template = Template(''' -<li><a href="$url">$text</a></li>''') -stat_template = Template(''' -<li>$stat_value</li>''') -def channel_about_page(polymer_json): - microformat = get_microformat(polymer_json[1]['response']) - avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'] - # my goodness... - channel_metadata = tab_with_content(polymer_json[1]['response']['contents']['twoColumnBrowseResultsRenderer']['tabs'])['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'] - channel_links = '' - for link_json in channel_metadata.get('primaryLinks', ()): - url = link_json['navigationEndpoint']['urlEndpoint']['url'] - if url.startswith("/redirect"): - query_string = url[url.find('?')+1: ] - url = urllib.parse.parse_qs(query_string)['q'][0] - - channel_links += channel_link_template.substitute( - url = html.escape(url), - text = yt_data_extract.get_plain_text(link_json['title']), - ) + text = yt_data_extract.get_plain_text(link_json['title']) + + info['links'].append( (text, url) ) - stats = '' - for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'): - try: - stat_value = yt_data_extract.get_plain_text(channel_metadata[stat_name]) - except KeyError: - continue - else: - stats += stat_template.substitute(stat_value=stat_value) + info['stats'] = [] + for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'): + try: + stat = channel_metadata[stat_name] + except KeyError: + continue + info['stats'].append(yt_data_extract.get_plain_text(stat)) + + if 'description' in channel_metadata: + info['description'] = yt_data_extract.get_text(channel_metadata['description']) + else: + info['description'] = '' - channel_id = channel_metadata['channelId'] - if subscriptions.is_subscribed(channel_id): - action_name = 'Unsubscribe' - action = 'unsubscribe' else: - action_name = 'Subscribe' - action = 'subscribe' + raise NotImplementedError('Unknown or unsupported channel tab: ' + tab) - try: - description = yt_data_extract.format_text_runs(yt_data_extract.get_formatted_text(channel_metadata['description'])) - except KeyError: - description = '' - return yt_channel_about_template.substitute( - header = html_common.get_header(), - page_title = yt_data_extract.get_plain_text(channel_metadata['title']) + ' - About', - channel_title = yt_data_extract.get_plain_text(channel_metadata['title']), - avatar = html.escape(avatar), - description = description, - links = channel_links, - stats = stats, - channel_id = channel_id, - channel_tabs = channel_tabs_html(channel_metadata['channelId'], 'About'), - action_name = action_name, - action = action, - ) + return info -def channel_search_page(polymer_json, query, current_page=1, number_of_videos = 1000, current_query_string=''): - response = polymer_json[1]['response'] - microformat = get_microformat(response) - channel_url = microformat['urlCanonical'].rstrip('/') - channel_id = channel_url[channel_url.rfind('/')+1:] +def post_process_channel_info(info): + info['avatar'] = '/' + info['avatar'] + info['channel_url'] = '/' + info['channel_url'] + for item in info['items']: + yt_data_extract.prefix_urls(item) + yt_data_extract.add_extra_html_info(item) - if subscriptions.is_subscribed(channel_id): - action_name = 'Unsubscribe' - action = 'unsubscribe' - else: - action_name = 'Subscribe' - action = 'subscribe' - try: - items = tab_with_content(response['contents']['twoColumnBrowseResultsRenderer']['tabs'])['sectionListRenderer']['contents'] - except KeyError: - items = response['continuationContents']['sectionListContinuation']['contents'] - - items_html = list_items_html(items) - - return yt_channel_items_template.substitute( - header = html_common.get_header(), - channel_title = html.escape(microformat['title']), - channel_id = channel_id, - channel_tabs = channel_tabs_html(channel_id, '', query), - avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'], - page_title = html.escape(query + ' - Channel search'), - items = items_html, - page_buttons = html_common.page_buttons_html(current_page, math.ceil(number_of_videos/29), util.URL_ORIGIN + "/channel/" + channel_id + "/search", current_query_string), - number_of_results = '', - sort_buttons = '', - action_name = action_name, - action = action, - ) -def get_channel_search_json(channel_id, query, page): - params = proto.string(2, 'search') + proto.string(15, str(page)) - params = proto.percent_b64encode(params) - ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query) - ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii') - polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1) - '''with open('debug/channel_search_debug', 'wb') as f: - f.write(polymer_json)''' - polymer_json = json.loads(polymer_json) - return polymer_json - playlist_sort_codes = {'2': "da", '3': "dd", '4': "lad"} -def get_channel_page(env, start_response): - path_parts = env['path_parts'] - channel_id = path_parts[1] - try: - tab = path_parts[2] - except IndexError: - tab = 'videos' - - parameters = env['parameters'] - page_number = int(util.default_multi_get(parameters, 'page', 0, default='1')) - sort = util.default_multi_get(parameters, 'sort', 0, default='3') - view = util.default_multi_get(parameters, 'view', 0, default='1') - query = util.default_multi_get(parameters, 'query', 0, default='') + +@yt_app.route('/channel/<channel_id>/') +@yt_app.route('/channel/<channel_id>/<tab>') +def get_channel_page(channel_id, tab='videos'): + + page_number = int(request.args.get('page', 1)) + sort = request.args.get('sort', '3') + view = request.args.get('view', '1') + query = request.args.get('query', '') + if tab == 'videos': tasks = ( @@ -439,17 +286,10 @@ def get_channel_page(env, start_response): gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value - result = channel_videos_html(polymer_json, page_number, sort, number_of_videos, env['QUERY_STRING']) elif tab == 'about': - polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1) - polymer_json = json.loads(polymer_json) - result = channel_about_page(polymer_json) + polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='channel_about') elif tab == 'playlists': - polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1) - '''with open('debug/channel_playlists_debug', 'wb') as f: - f.write(polymer_json)''' - polymer_json = json.loads(polymer_json) - result = channel_playlists_html(polymer_json, sort) + polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1, debug_name='channel_playlists') elif tab == 'search': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), @@ -458,54 +298,80 @@ def get_channel_page(env, start_response): gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value - result = channel_search_page(polymer_json, query, page_number, number_of_videos, env['QUERY_STRING']) - else: - start_response('404 Not Found', [('Content-type', 'text/plain'),]) - return b'Unknown channel tab: ' + tab.encode('utf-8') - - start_response('200 OK', [('Content-type','text/html'),]) - return result.encode('utf-8') - -# youtube.com/user/[username]/[page] -# youtube.com/c/[custom]/[page] -# youtube.com/[custom]/[page] -def get_channel_page_general_url(env, start_response): - path_parts = env['path_parts'] - - is_toplevel = not path_parts[0] in ('user', 'c') - - if len(path_parts) + int(is_toplevel) == 3: # has /[page] after it - page = path_parts[2] - base_url = 'https://www.youtube.com/' + '/'.join(path_parts[0:-1]) - elif len(path_parts) + int(is_toplevel) == 2: # does not have /[page] after it, use /videos by default - page = 'videos' - base_url = 'https://www.youtube.com/' + '/'.join(path_parts) else: - start_response('404 Not Found', [('Content-type', 'text/plain'),]) - return b'Invalid channel url' - - if page == 'videos': - polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1) - '''with open('debug/user_page_videos', 'wb') as f: - f.write(polymer_json)''' - polymer_json = json.loads(polymer_json) - result = channel_videos_html(polymer_json) - elif page == 'about': - polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1) - polymer_json = json.loads(polymer_json) - result = channel_about_page(polymer_json) - elif page == 'playlists': - polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1) - polymer_json = json.loads(polymer_json) - result = channel_playlists_html(polymer_json) - elif page == 'search': + flask.abort(404, 'Unknown channel tab: ' + tab) + + + info = extract_info(json.loads(polymer_json), tab) + post_process_channel_info(info) + if tab in ('videos', 'search'): + info['number_of_videos'] = number_of_videos + info['number_of_pages'] = math.ceil(number_of_videos/30) + info['header_playlist_names'] = local_playlist.get_playlist_names() + if tab in ('videos', 'playlists'): + info['current_sort'] = sort + elif tab == 'search': + info['search_box_value'] = query + + + return flask.render_template('channel.html', + parameters_dictionary = request.args, + **info + ) + + +# youtube.com/user/[username]/[tab] +# youtube.com/c/[custom]/[tab] +# youtube.com/[custom]/[tab] +def get_channel_page_general_url(base_url, tab, request): + + page_number = int(request.args.get('page', 1)) + sort = request.args.get('sort', '3') + view = request.args.get('view', '1') + query = request.args.get('query', '') + + if tab == 'videos': + polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos') + elif tab == 'about': + polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about') + elif tab == 'playlists': + polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1, debug_name='gen_channel_playlists') + elif tab == 'search': raise NotImplementedError() - '''polymer_json = util.fetch_url('https://www.youtube.com/user' + username + '/search?pbj=1&' + query_string, util.desktop_ua + headers_1) - polymer_json = json.loads(polymer_json) - return channel_search_page(''' else: - start_response('404 Not Found', [('Content-type', 'text/plain'),]) - return b'Unknown channel page: ' + page.encode('utf-8') + flask.abort(404, 'Unknown channel tab: ' + tab) + + + info = extract_info(json.loads(polymer_json), tab) + post_process_channel_info(info) + if tab in ('videos', 'search'): + info['number_of_videos'] = 1000 + info['number_of_pages'] = math.ceil(1000/30) + info['header_playlist_names'] = local_playlist.get_playlist_names() + if tab in ('videos', 'playlists'): + info['current_sort'] = sort + elif tab == 'search': + info['search_box_value'] = query + + + return flask.render_template('channel.html', + parameters_dictionary = request.args, + **info + ) + + +@yt_app.route('/user/<username>/') +@yt_app.route('/user/<username>/<tab>') +def get_user_page(username, tab='videos'): + return get_channel_page_general_url('https://www.youtube.com/user/' + username, tab, request) + +@yt_app.route('/c/<custom>/') +@yt_app.route('/c/<custom>/<tab>') +def get_custom_c_page(custom, tab='videos'): + return get_channel_page_general_url('https://www.youtube.com/c/' + custom, tab, request) + +@yt_app.route('/<custom>') +@yt_app.route('/<custom>/<tab>') +def get_toplevel_custom_page(custom, tab='videos'): + return get_channel_page_general_url('https://www.youtube.com/' + custom, tab, request) - start_response('200 OK', [('Content-type','text/html'),]) - return result.encode('utf-8') diff --git a/youtube/comments.py b/youtube/comments.py index 94b086e..3b1ef86 100644 --- a/youtube/comments.py +++ b/youtube/comments.py @@ -1,57 +1,14 @@ -from youtube import proto, util, html_common, yt_data_extract, accounts +from youtube import proto, util, yt_data_extract, accounts +from youtube import yt_app import settings import json import base64 -from string import Template -import urllib.request import urllib -import html import re -comment_area_template = Template(''' -<section class="comment-area"> -$video-metadata -$comment-links -$comment-box -$comments -$more-comments-button -</section> -''') -comment_template = Template(''' - <div class="comment-container"> - <div class="comment"> - <a class="author-avatar" href="$author_url" title="$author"> -$avatar - </a> - <address> - <a class="author" href="$author_url" title="$author">$author</a> - </address> - <a class="permalink" href="$permalink" title="permalink"> - <time datetime="$datetime">$published</time> - </a> - <span class="text">$text</span> - - <span class="likes">$likes</span> - <div class="bottom-row"> -$replies -$action_buttons - </div> - </div> - - </div> -''') -comment_avatar_template = Template(''' <img class="author-avatar-img" src="$author_avatar">''') - -reply_link_template = Template(''' - <a href="$url" class="replies">$view_replies_text</a> -''') -with open("yt_comments_template.html", "r") as file: - yt_comments_template = Template(file.read()) - - -# <a class="replies-link" href="$replies_url">$replies_link_text</a> - +import flask +from flask import request # Here's what I know about the secret key (starting with ASJN_i) # *The secret key definitely contains the following information (or perhaps the information is stored at youtube's servers): @@ -102,6 +59,7 @@ def ctoken_metadata(ctoken): result['is_replies'] = False if (3 in offset_information) and (2 in proto.parse(offset_information[3])): result['is_replies'] = True + result['sort'] = None else: try: result['sort'] = proto.parse(offset_information[4])[6] @@ -109,12 +67,6 @@ def ctoken_metadata(ctoken): result['sort'] = 0 return result -def get_ids(ctoken): - params = proto.parse(proto.b64_to_bytes(ctoken)) - video_id = proto.parse(params[2])[2] - params = proto.parse(params[6]) - params = proto.parse(params[3]) - return params[2].decode('ascii'), video_id.decode('ascii') mobile_headers = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', @@ -131,7 +83,7 @@ def request_comments(ctoken, replies=False): url = base_url + ctoken.replace("=", "%3D") + "&pbj=1" for i in range(0,8): # don't retry more than 8 times - content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments") + content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments", debug_name='request_comments') if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason content = content[4:] elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason @@ -139,116 +91,67 @@ def request_comments(ctoken, replies=False): print("got <!DOCTYPE>, retrying") continue break - '''with open('debug/comments_debug', 'wb') as f: - f.write(content)''' return content + def single_comment_ctoken(video_id, comment_id): page_params = proto.string(2, video_id) + proto.string(6, proto.percent_b64encode(proto.string(15, comment_id))) result = proto.nested(2, page_params) + proto.uint(3,6) return base64.urlsafe_b64encode(result).decode('ascii') - -def parse_comments_ajax(content, replies=False): - try: - content = json.loads(util.uppercase_escape(content.decode('utf-8'))) - #print(content) - comments_raw = content['content']['continuation_contents']['contents'] - ctoken = util.default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='') - - comments = [] - for comment_raw in comments_raw: - replies_url = '' - if not replies: - if comment_raw['replies'] is not None: - reply_ctoken = comment_raw['replies']['continuations'][0]['continuation'] - comment_id, video_id = get_ids(reply_ctoken) - replies_url = util.URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id - comment_raw = comment_raw['comment'] - comment = { - 'author': comment_raw['author']['runs'][0]['text'], - 'author_url': comment_raw['author_endpoint']['url'], - 'author_channel_id': '', - 'author_id': '', - 'author_avatar': comment_raw['author_thumbnail']['url'], - 'likes': comment_raw['like_count'], - 'published': comment_raw['published_time']['runs'][0]['text'], - 'text': comment_raw['content']['runs'], - 'reply_count': '', - 'replies_url': replies_url, - } - comments.append(comment) - except Exception as e: - print('Error parsing comments: ' + str(e)) - comments = () - ctoken = '' - return {'ctoken': ctoken, 'comments': comments} -reply_count_regex = re.compile(r'(\d+)') -def parse_comments_polymer(content, replies=False): +def parse_comments_polymer(content): try: video_title = '' content = json.loads(util.uppercase_escape(content.decode('utf-8'))) url = content[1]['url'] ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0] - video_id = ctoken_metadata(ctoken)['video_id'] - #print(content) + metadata = ctoken_metadata(ctoken) + try: comments_raw = content[1]['response']['continuationContents']['commentSectionContinuation']['items'] except KeyError: comments_raw = content[1]['response']['continuationContents']['commentRepliesContinuation']['contents'] - replies = True ctoken = util.default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='') - + comments = [] - for comment_raw in comments_raw: - replies_url = '' - view_replies_text = '' + for comment_json in comments_raw: + number_of_replies = 0 try: - comment_raw = comment_raw['commentThreadRenderer'] + comment_thread = comment_json['commentThreadRenderer'] except KeyError: - pass + comment_renderer = comment_json['commentRenderer'] else: - if 'commentTargetTitle' in comment_raw: - video_title = comment_raw['commentTargetTitle']['runs'][0]['text'] - - parent_id = comment_raw['comment']['commentRenderer']['commentId'] - # TODO: move this stuff into the comments_html function - if 'replies' in comment_raw: - #reply_ctoken = comment_raw['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']['continuation'] - #comment_id, video_id = get_ids(reply_ctoken) - replies_url = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id - view_replies_text = yt_data_extract.get_plain_text(comment_raw['replies']['commentRepliesRenderer']['moreText']) - match = reply_count_regex.search(view_replies_text) + if 'commentTargetTitle' in comment_thread: + video_title = comment_thread['commentTargetTitle']['runs'][0]['text'] + + if 'replies' in comment_thread: + view_replies_text = yt_data_extract.get_plain_text(comment_thread['replies']['commentRepliesRenderer']['moreText']) + view_replies_text = view_replies_text.replace(',', '') + match = re.search(r'(\d+)', view_replies_text) if match is None: - view_replies_text = '1 reply' + number_of_replies = 1 else: - view_replies_text = match.group(1) + " replies" - elif not replies: - view_replies_text = "Reply" - replies_url = util.URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id - comment_raw = comment_raw['comment'] - - comment_raw = comment_raw['commentRenderer'] + number_of_replies = int(match.group(1)) + comment_renderer = comment_thread['comment']['commentRenderer'] + comment = { - 'author_id': comment_raw.get('authorId', ''), - 'author_avatar': comment_raw['authorThumbnail']['thumbnails'][0]['url'], - 'likes': comment_raw['likeCount'], - 'published': yt_data_extract.get_plain_text(comment_raw['publishedTimeText']), - 'text': comment_raw['contentText'].get('runs', ''), - 'view_replies_text': view_replies_text, - 'replies_url': replies_url, - 'video_id': video_id, - 'comment_id': comment_raw['commentId'], + 'author_id': comment_renderer.get('authorId', ''), + 'author_avatar': comment_renderer['authorThumbnail']['thumbnails'][0]['url'], + 'likes': comment_renderer['likeCount'], + 'published': yt_data_extract.get_plain_text(comment_renderer['publishedTimeText']), + 'text': comment_renderer['contentText'].get('runs', ''), + 'number_of_replies': number_of_replies, + 'comment_id': comment_renderer['commentId'], } - if 'authorText' in comment_raw: # deleted channels have no name or channel link - comment['author'] = yt_data_extract.get_plain_text(comment_raw['authorText']) - comment['author_url'] = comment_raw['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'] - comment['author_channel_id'] = comment_raw['authorEndpoint']['browseEndpoint']['browseId'] + if 'authorText' in comment_renderer: # deleted channels have no name or channel link + comment['author'] = yt_data_extract.get_plain_text(comment_renderer['authorText']) + comment['author_url'] = comment_renderer['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'] + comment['author_channel_id'] = comment_renderer['authorEndpoint']['browseEndpoint']['browseId'] else: comment['author'] = '' comment['author_url'] = '' @@ -260,172 +163,109 @@ def parse_comments_polymer(content, replies=False): comments = () ctoken = '' - return {'ctoken': ctoken, 'comments': comments, 'video_title': video_title} + return { + 'ctoken': ctoken, + 'comments': comments, + 'video_title': video_title, + 'video_id': metadata['video_id'], + 'offset': metadata['offset'], + 'is_replies': metadata['is_replies'], + 'sort': metadata['sort'], + } +def post_process_comments_info(comments_info): + for comment in comments_info['comments']: + comment['author_url'] = util.URL_ORIGIN + comment['author_url'] + comment['author_avatar'] = '/' + comment['author_avatar'] + comment['permalink'] = util.URL_ORIGIN + '/watch?v=' + comments_info['video_id'] + '&lc=' + comment['comment_id'] -def get_comments_html(comments): - html_result = '' - for comment in comments: - replies = '' - if comment['replies_url']: - replies = reply_link_template.substitute(url=comment['replies_url'], view_replies_text=html.escape(comment['view_replies_text'])) - if settings.enable_comment_avatars: - avatar = comment_avatar_template.substitute( - author_url = util.URL_ORIGIN + comment['author_url'], - author_avatar = '/' + comment['author_avatar'], - ) - else: - avatar = '' if comment['author_channel_id'] in accounts.accounts: - delete_url = (util.URL_ORIGIN + '/delete_comment?video_id=' - + comment['video_id'] + comment['delete_url'] = (util.URL_ORIGIN + '/delete_comment?video_id=' + + comments_info['video_id'] + '&channel_id='+ comment['author_channel_id'] + '&author_id=' + comment['author_id'] + '&comment_id=' + comment['comment_id']) - action_buttons = '''<a href="''' + delete_url + '''" target="_blank">Delete</a>''' + num_replies = comment['number_of_replies'] + if num_replies == 0: + comment['replies_url'] = util.URL_ORIGIN + '/post_comment?parent_id=' + comment['comment_id'] + "&video_id=" + comments_info['video_id'] else: - action_buttons = '' - - permalink = util.URL_ORIGIN + '/watch?v=' + comment['video_id'] + '&lc=' + comment['comment_id'] - html_result += comment_template.substitute( - author=comment['author'], - author_url = util.URL_ORIGIN + comment['author_url'], - avatar = avatar, - likes = str(comment['likes']) + ' likes' if str(comment['likes']) != '0' else '', - published = comment['published'], - text = yt_data_extract.format_text_runs(comment['text']), - datetime = '', #TODO - replies = replies, - action_buttons = action_buttons, - permalink = permalink, - ) - return html_result - + comment['replies_url'] = util.URL_ORIGIN + '/comments?parent_id=' + comment['comment_id'] + "&video_id=" + comments_info['video_id'] + + if num_replies == 0: + comment['view_replies_text'] = 'Reply' + elif num_replies == 1: + comment['view_replies_text'] = '1 reply' + else: + comment['view_replies_text'] = str(num_replies) + ' replies' + + + if comment['likes'] == 1: + comment['likes_text'] = '1 like' + else: + comment['likes_text'] = str(comment['likes']) + ' likes' + + comments_info['include_avatars'] = settings.enable_comment_avatars + if comments_info['ctoken'] != '': + comments_info['more_comments_url'] = util.URL_ORIGIN + '/comments?ctoken=' + comments_info['ctoken'] + + comments_info['page_number'] = page_number = str(int(comments_info['offset']/20) + 1) + + if not comments_info['is_replies']: + comments_info['sort_text'] = 'top' if comments_info['sort'] == 0 else 'newest' + + + comments_info['video_url'] = util.URL_ORIGIN + '/watch?v=' + comments_info['video_id'] + comments_info['video_thumbnail'] = '/i.ytimg.com/vi/'+ comments_info['video_id'] + '/mqdefault.jpg' + + def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''): - if settings.enable_comments: - post_comment_url = util.URL_ORIGIN + "/post_comment?video_id=" + video_id - post_comment_link = '''<a class="sort-button" href="''' + post_comment_url + '''">Post comment</a>''' + if settings.comments_mode: + comments_info = parse_comments_polymer(request_comments(make_comment_ctoken(video_id, sort, offset, lc, secret_key))) + post_process_comments_info(comments_info) + post_comment_url = util.URL_ORIGIN + "/post_comment?video_id=" + video_id other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc) - other_sort_name = 'newest' if sort == 0 else 'top' - other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>''' - - comment_links = '''<div class="comment-links">\n''' - comment_links += other_sort_link + '\n' + post_comment_link + '\n' - comment_links += '''</div>''' - - comment_info = parse_comments_polymer(request_comments(make_comment_ctoken(video_id, sort, offset, lc, secret_key))) - ctoken = comment_info['ctoken'] - - if ctoken == '': - more_comments_button = '' - else: - more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken) - - result = '''<section class="comments-area">\n''' - result += comment_links + '\n' - result += '<div class="comments">\n' - result += get_comments_html(comment_info['comments']) + '\n' - result += '</div>\n' - result += more_comments_button + '\n' - result += '''</section>''' - return result - return '' - -more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''') -video_metadata_template = Template('''<section class="video-metadata"> - <a class="video-metadata-thumbnail-box" href="$url" title="$title"> - <img class="video-metadata-thumbnail-img" src="$thumbnail" height="180px" width="320px"> - </a> - <a class="title" href="$url" title="$title">$title</a> - - <h2>Comments page $page_number</h2> - <span>Sorted by $sort</span> -</section> -''') -account_option_template = Template(''' - <option value="$channel_id">$display_name</option>''') - -def comment_box_account_options(): - return ''.join(account_option_template.substitute(channel_id=channel_id, display_name=display_name) for channel_id, display_name in accounts.account_list_data()) - -comment_box_template = Template(''' -<form action="$form_action" method="post" class="comment-form"> - <div id="comment-account-options"> - <label for="account-selection">Account:</label> - <select id="account-selection" name="channel_id"> -$options - </select> - <a href="''' + util.URL_ORIGIN + '''/login" target="_blank">Add account</a> - </div> - <textarea name="comment_text"></textarea> - $video_id_input - <button type="submit" class="post-comment-button">$post_text</button> -</form>''') -def get_comments_page(env, start_response): - start_response('200 OK', [('Content-type','text/html'),] ) - parameters = env['parameters'] - ctoken = util.default_multi_get(parameters, 'ctoken', 0, default='') + other_sort_text = 'Sort by ' + ('newest' if sort == 0 else 'top') + comments_info['comment_links'] = [('Post comment', post_comment_url), (other_sort_text, other_sort_url)] + + return comments_info + + return {} + + + +@yt_app.route('/comments') +def get_comments_page(): + ctoken = request.args.get('ctoken', '') replies = False if not ctoken: - video_id = parameters['video_id'][0] - parent_id = parameters['parent_id'][0] + video_id = request.args['video_id'] + parent_id = request.args['parent_id'] ctoken = comment_replies_ctoken(video_id, parent_id) replies = True - comment_info = parse_comments_polymer(request_comments(ctoken, replies), replies) + comments_info = parse_comments_polymer(request_comments(ctoken, replies)) + post_process_comments_info(comments_info) + + if not replies: + other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(comments_info['video_id'], sort=1 - comments_info['sort']) + other_sort_text = 'Sort by ' + ('newest' if comments_info['sort'] == 0 else 'top') + comments_info['comment_links'] = [(other_sort_text, other_sort_url)] + + + comment_posting_box_info = { + 'form_action': '' if replies else util.URL_ORIGIN + '/post_comment', + 'video_id': comments_info['video_id'], + 'accounts': accounts.account_list_data(), + 'include_video_id_input': not replies, + 'replying': replies, + } + + return flask.render_template('comments_page.html', + comments_info = comments_info, + comment_posting_box_info = comment_posting_box_info, + ) - metadata = ctoken_metadata(ctoken) - if replies: - page_title = 'Replies' - video_metadata = '' - comment_box = comment_box_template.substitute(form_action='', video_id_input='', post_text='Post reply', options=comment_box_account_options()) - comment_links = '' - else: - page_number = str(int(metadata['offset']/20) + 1) - page_title = 'Comments page ' + page_number - - video_metadata = video_metadata_template.substitute( - page_number = page_number, - sort = 'top' if metadata['sort'] == 0 else 'newest', - title = html.escape(comment_info['video_title']), - url = util.URL_ORIGIN + '/watch?v=' + metadata['video_id'], - thumbnail = '/i.ytimg.com/vi/'+ metadata['video_id'] + '/mqdefault.jpg', - ) - comment_box = comment_box_template.substitute( - form_action= util.URL_ORIGIN + '/post_comment', - video_id_input='''<input type="hidden" name="video_id" value="''' + metadata['video_id'] + '''">''', - post_text='Post comment', - options=comment_box_account_options(), - ) - - other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(metadata['video_id'], sort=1 - metadata['sort']) - other_sort_name = 'newest' if metadata['sort'] == 0 else 'top' - other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>''' - - - comment_links = '''<div class="comment-links">\n''' - comment_links += other_sort_link + '\n' - comment_links += '''</div>''' - - comments_html = get_comments_html(comment_info['comments']) - ctoken = comment_info['ctoken'] - if ctoken == '': - more_comments_button = '' - else: - more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken) - comments_area = '<section class="comments-area">\n' - comments_area += video_metadata + comment_box + comment_links + '\n' - comments_area += '<div class="comments">\n' - comments_area += comments_html + '\n' - comments_area += '</div>\n' - comments_area += more_comments_button + '\n' - comments_area += '</section>\n' - return yt_comments_template.substitute( - header = html_common.get_header(), - comments_area = comments_area, - page_title = page_title, - ).encode('utf-8') diff --git a/youtube/html_common.py b/youtube/html_common.py deleted file mode 100644 index 8e65a1f..0000000 --- a/youtube/html_common.py +++ /dev/null @@ -1,379 +0,0 @@ -from youtube.template import Template -from youtube import local_playlist, yt_data_extract, util - -import json -import html - - -with open('yt_basic_template.html', 'r', encoding='utf-8') as file: - yt_basic_template = Template(file.read()) - - - - -page_button_template = Template('''<a class="page-button" href="$href">$page</a>''') -current_page_button_template = Template('''<div class="current-page-button">$page</a>''') - -medium_playlist_item_template = Template(''' - <div class="medium-item-box"> - <div class="medium-item"> - <a class="playlist-thumbnail-box" href="$url" title="$title"> - <img class="playlist-thumbnail-img" src="$thumbnail"> - <div class="playlist-thumbnail-info"> - <span>$size</span> - </div> - </a> - - <a class="title" href="$url" title="$title">$title</a> - - <div class="stats">$stats</div> - </div> - </div> -''') -medium_video_item_template = Template(''' - <div class="medium-item-box"> - <div class="medium-item"> - <a class="video-thumbnail-box" href="$url" title="$title"> - <img class="video-thumbnail-img" src="$thumbnail"> - <span class="video-duration">$duration</span> - </a> - - <a class="title" href="$url" title="$title">$title</a> - - <div class="stats">$stats</div> - - <span class="description">$description</span> - <span class="badges">$badges</span> - </div> - <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-edit"> - </div> -''') - -small_video_item_template = Template(''' - <div class="small-item-box"> - <div class="small-item"> - <a class="video-thumbnail-box" href="$url" title="$title"> - <img class="video-thumbnail-img" src="$thumbnail"> - <span class="video-duration">$duration</span> - </a> - <a class="title" href="$url" title="$title">$title</a> - - <address>$author</address> - <span class="views">$views</span> - - </div> - <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-edit"> - </div> -''') - -small_playlist_item_template = Template(''' - <div class="small-item-box"> - <div class="small-item"> - <a class="playlist-thumbnail-box" href="$url" title="$title"> - <img class="playlist-thumbnail-img" src="$thumbnail"> - <div class="playlist-thumbnail-info"> - <span>$size</span> - </div> - </a> - <a class="title" href="$url" title="$title">$title</a> - - <address>$author</address> - </div> - </div> -''') - -medium_channel_item_template = Template(''' - <div class="medium-item-box"> - <div class="medium-item"> - <a class="video-thumbnail-box" href="$url" title="$title"> - <img class="video-thumbnail-img" src="$thumbnail"> - <span class="video-duration">$duration</span> - </a> - - <a class="title" href="$url">$title</a> - - <span>$subscriber_count</span> - <span>$size</span> - - <span class="description">$description</span> - </div> - </div> -''') - - - - - - -header_template = Template(''' - <header> - - <form id="site-search" action="/youtube.com/search"> - <input type="search" name="query" class="search-box" value="$search_box_value"> - <button type="submit" value="Search" class="search-button">Search</button> - <div class="dropdown"> - <button class="dropdown-label">Options</button> - <div class="css-sucks"> - <div class="dropdown-content"> - <h3>Sort by</h3> - <input type="radio" id="sort_relevance" name="sort" value="0"> - <label for="sort_relevance">Relevance</label> - - <input type="radio" id="sort_upload_date" name="sort" value="2"> - <label for="sort_upload_date">Upload date</label> - - <input type="radio" id="sort_view_count" name="sort" value="3"> - <label for="sort_view_count">View count</label> - - <input type="radio" id="sort_rating" name="sort" value="1"> - <label for="sort_rating">Rating</label> - - - <h3>Upload date</h3> - <input type="radio" id="time_any" name="time" value="0"> - <label for="time_any">Any</label> - - <input type="radio" id="time_last_hour" name="time" value="1"> - <label for="time_last_hour">Last hour</label> - - <input type="radio" id="time_today" name="time" value="2"> - <label for="time_today">Today</label> - - <input type="radio" id="time_this_week" name="time" value="3"> - <label for="time_this_week">This week</label> - - <input type="radio" id="time_this_month" name="time" value="4"> - <label for="time_this_month">This month</label> - - <input type="radio" id="time_this_year" name="time" value="5"> - <label for="time_this_year">This year</label> - - <h3>Type</h3> - <input type="radio" id="type_any" name="type" value="0"> - <label for="type_any">Any</label> - - <input type="radio" id="type_video" name="type" value="1"> - <label for="type_video">Video</label> - - <input type="radio" id="type_channel" name="type" value="2"> - <label for="type_channel">Channel</label> - - <input type="radio" id="type_playlist" name="type" value="3"> - <label for="type_playlist">Playlist</label> - - <input type="radio" id="type_movie" name="type" value="4"> - <label for="type_movie">Movie</label> - - <input type="radio" id="type_show" name="type" value="5"> - <label for="type_show">Show</label> - - - <h3>Duration</h3> - <input type="radio" id="duration_any" name="duration" value="0"> - <label for="duration_any">Any</label> - - <input type="radio" id="duration_short" name="duration" value="1"> - <label for="duration_short">Short (< 4 minutes)</label> - - <input type="radio" id="duration_long" name="duration" value="2"> - <label for="duration_long">Long (> 20 minutes)</label> - - </div> - </div> - </div> - </form> - - <div id="header-right"> - <form id="playlist-edit" action="/youtube.com/edit_playlist" method="post" target="_self"> - <input name="playlist_name" id="playlist-name-selection" list="playlist-options" type="text"> - <datalist id="playlist-options"> -$playlists - </datalist> - <button type="submit" id="playlist-add-button" name="action" value="add">Add to playlist</button> - <button type="reset" id="item-selection-reset">Clear selection</button> - </form> - <a href="/youtube.com/playlists" id="local-playlists">Local playlists</a> - </div> - </header> -''') -playlist_option_template = Template('''<option value="$name">$name</option>''') -def get_header(search_box_value=""): - playlists = '' - for name in local_playlist.get_playlist_names(): - playlists += playlist_option_template.substitute(name = name) - return header_template.substitute(playlists = playlists, search_box_value = html.escape(search_box_value)) - - - - - - - - - - - -def badges_html(badges): - return ' | '.join(map(html.escape, badges)) - - -html_transform_dispatch = { - 'title': html.escape, - 'published': html.escape, - 'id': html.escape, - 'description': yt_data_extract.format_text_runs, - 'duration': html.escape, - 'thumbnail': lambda url: html.escape('/' + url.lstrip('/')), - 'size': html.escape, - 'author': html.escape, - 'author_url': lambda url: html.escape(util.URL_ORIGIN + url), - 'views': html.escape, - 'subscriber_count': html.escape, - 'badges': badges_html, - 'playlist_index': html.escape, -} - -def get_html_ready(item): - html_ready = {} - for key, value in item.items(): - try: - function = html_transform_dispatch[key] - except KeyError: - continue - html_ready[key] = function(value) - return html_ready - - -author_template_url = Template('''<address>By <a href="$author_url">$author</a></address>''') -author_template = Template('''<address><b>$author</b></address>''') -stat_templates = ( - Template('''<span class="views">$views</span>'''), - Template('''<time datetime="$datetime">$published</time>'''), -) -def get_stats(html_ready): - stats = [] - if 'author' in html_ready: - if 'author_url' in html_ready: - stats.append(author_template_url.substitute(html_ready)) - else: - stats.append(author_template.substitute(html_ready)) - for stat in stat_templates: - try: - stats.append(stat.strict_substitute(html_ready)) - except KeyError: - pass - return ' | '.join(stats) - -def video_item_html(item, template, html_exclude=set()): - - video_info = {} - for key in ('id', 'title', 'author'): - try: - video_info[key] = item[key] - except KeyError: - video_info[key] = '' - try: - video_info['duration'] = item['duration'] - except KeyError: - video_info['duration'] = 'Live' # livestreams don't have a duration - - html_ready = get_html_ready(item) - - html_ready['video_info'] = html.escape(json.dumps(video_info) ) - html_ready['url'] = util.URL_ORIGIN + "/watch?v=" + html_ready['id'] - html_ready['datetime'] = '' #TODO - - for key in html_exclude: - del html_ready[key] - html_ready['stats'] = get_stats(html_ready) - - return template.substitute(html_ready) - - -def playlist_item_html(item, template, html_exclude=set()): - html_ready = get_html_ready(item) - - html_ready['url'] = util.URL_ORIGIN + "/playlist?list=" + html_ready['id'] - html_ready['datetime'] = '' #TODO - - for key in html_exclude: - del html_ready[key] - html_ready['stats'] = get_stats(html_ready) - - return template.substitute(html_ready) - - - - - - - -page_button_template = Template('''<a class="page-button" href="$href">$page</a>''') -current_page_button_template = Template('''<div class="page-button">$page</div>''') - -def page_buttons_html(current_page, estimated_pages, url, current_query_string): - if current_page <= 5: - page_start = 1 - page_end = min(9, estimated_pages) - else: - page_start = current_page - 4 - page_end = min(current_page + 4, estimated_pages) - - result = "" - for page in range(page_start, page_end+1): - if page == current_page: - template = current_page_button_template - else: - template = page_button_template - result += template.substitute(page=page, href = url + "?" + util.update_query_string(current_query_string, {'page': [str(page)]}) ) - return result - - - - - - - -showing_results_for = Template(''' - <div class="showing-results-for"> - <div>Showing results for <a>$corrected_query</a></div> - <div>Search instead for <a href="$original_query_url">$original_query</a></div> - </div> -''') - -did_you_mean = Template(''' - <div class="did-you-mean"> - <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div> - </div> -''') - -def renderer_html(renderer, additional_info={}, current_query_string=''): - type = list(renderer.keys())[0] - renderer = renderer[type] - if type == 'itemSectionRenderer': - return renderer_html(renderer['contents'][0], additional_info, current_query_string) - - if type == 'channelRenderer': - info = yt_data_extract.renderer_info(renderer) - html_ready = get_html_ready(info) - html_ready['url'] = util.URL_ORIGIN + "/channel/" + html_ready['id'] - return medium_channel_item_template.substitute(html_ready) - - if type in ('movieRenderer', 'clarificationRenderer'): - return '' - - info = yt_data_extract.renderer_info(renderer) - info.update(additional_info) - html_exclude = set(additional_info.keys()) - if type == 'compactVideoRenderer': - return video_item_html(info, small_video_item_template, html_exclude=html_exclude) - if type in ('compactPlaylistRenderer', 'compactRadioRenderer', 'compactShowRenderer'): - return playlist_item_html(info, small_playlist_item_template, html_exclude=html_exclude) - if type in ('videoRenderer', 'gridVideoRenderer'): - return video_item_html(info, medium_video_item_template, html_exclude=html_exclude) - if type in ('playlistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'gridRadioRenderer', 'gridShowRenderer', 'showRenderer'): - return playlist_item_html(info, medium_playlist_item_template, html_exclude=html_exclude) - - #print(renderer) - #raise NotImplementedError('Unknown renderer type: ' + type) - return ''
\ No newline at end of file diff --git a/youtube/local_playlist.py b/youtube/local_playlist.py index d083e33..88d020f 100644 --- a/youtube/local_playlist.py +++ b/youtube/local_playlist.py @@ -1,5 +1,5 @@ -from youtube.template import Template -from youtube import util, html_common +from youtube import util, yt_data_extract +from youtube import yt_app import settings import os @@ -7,13 +7,14 @@ import json import html import gevent import urllib +import math + +import flask +from flask import request playlists_directory = os.path.join(settings.data_dir, "playlists") thumbnails_directory = os.path.join(settings.data_dir, "playlist_thumbnails") -with open('yt_local_playlist_template.html', 'r', encoding='utf-8') as file: - local_playlist_template = Template(file.read()) - def video_ids_in_playlist(name): try: with open(os.path.join(playlists_directory, name + ".txt"), 'r', encoding='utf-8') as file: @@ -36,36 +37,34 @@ def add_to_playlist(name, video_info_list): gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails) -def get_local_playlist_page(name): +def get_local_playlist_videos(name, offset=0, amount=50): try: thumbnails = set(os.listdir(os.path.join(thumbnails_directory, name))) except FileNotFoundError: thumbnails = set() missing_thumbnails = [] - videos_html = '' + videos = [] with open(os.path.join(playlists_directory, name + ".txt"), 'r', encoding='utf-8') as file: - videos = file.read() - videos = videos.splitlines() - for video in videos: + data = file.read() + videos_json = data.splitlines() + for video_json in videos_json: try: - info = json.loads(video) + info = json.loads(video_json) if info['id'] + ".jpg" in thumbnails: info['thumbnail'] = "/youtube.com/data/playlist_thumbnails/" + name + "/" + info['id'] + ".jpg" else: info['thumbnail'] = util.get_thumbnail_url(info['id']) missing_thumbnails.append(info['id']) - videos_html += html_common.video_item_html(info, html_common.small_video_item_template) + info['item_size'] = 'small' + info['type'] = 'video' + yt_data_extract.add_extra_html_info(info) + videos.append(info) except json.decoder.JSONDecodeError: - pass + if not video_json.strip() == '': + print('Corrupt playlist video entry: ' + video_json) gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails) - return local_playlist_template.substitute( - page_title = name + ' - Local playlist', - header = html_common.get_header(), - videos = videos_html, - title = name, - page_buttons = '' - ) + return videos[offset:offset+amount], len(videos) def get_playlist_names(): try: @@ -98,47 +97,47 @@ def remove_from_playlist(name, video_info_list): for file in to_delete: os.remove(os.path.join(thumbnails_directory, name, file)) -def get_playlists_list_page(): - page = '''<ul>\n''' - list_item_template = Template(''' <li><a href="$url">$name</a></li>\n''') - for name in get_playlist_names(): - page += list_item_template.substitute(url = html.escape(util.URL_ORIGIN + '/playlists/' + name), name = html.escape(name)) - page += '''</ul>\n''' - return html_common.yt_basic_template.substitute( - page_title = "Local playlists", - header = html_common.get_header(), - style = '', - page = page, - ) - - -def get_playlist_page(env, start_response): - start_response('200 OK', [('Content-type','text/html'),]) - path_parts = env['path_parts'] - if len(path_parts) == 1: - return get_playlists_list_page().encode('utf-8') - else: - return get_local_playlist_page(path_parts[1]).encode('utf-8') + return len(videos_out) -def path_edit_playlist(env, start_response): - '''Called when making changes to the playlist from that playlist's page''' - parameters = env['parameters'] - if parameters['action'][0] == 'remove': - playlist_name = env['path_parts'][1] - remove_from_playlist(playlist_name, parameters['video_info_list']) - start_response('303 See Other', [('Location', util.URL_ORIGIN + env['PATH_INFO']),] ) - return b'' +@yt_app.route('/playlists', methods=['GET']) +@yt_app.route('/playlists/<playlist_name>', methods=['GET']) +def get_local_playlist_page(playlist_name=None): + if playlist_name is None: + playlists = [(name, util.URL_ORIGIN + '/playlists/' + name) for name in get_playlist_names()] + return flask.render_template('local_playlists_list.html', playlists=playlists) + else: + page = int(request.args.get('page', 1)) + offset = 50*(page - 1) + videos, num_videos = get_local_playlist_videos(playlist_name, offset=offset, amount=50) + return flask.render_template('local_playlist.html', + playlist_name = playlist_name, + videos = videos, + num_pages = math.ceil(num_videos/50), + parameters_dictionary = request.args, + ) + +@yt_app.route('/playlists/<playlist_name>', methods=['POST']) +def path_edit_playlist(playlist_name): + '''Called when making changes to the playlist from that playlist's page''' + if request.values['action'] == 'remove': + videos_to_remove = request.values.getlist('video_info_list') + number_of_videos_remaining = remove_from_playlist(playlist_name, videos_to_remove) + redirect_page_number = min(int(request.values.get('page', 1)), math.ceil(number_of_videos_remaining/50)) + return flask.redirect(util.URL_ORIGIN + request.path + '?page=' + str(redirect_page_number)) else: - start_response('400 Bad Request', [('Content-type', 'text/plain'),]) - return b'400 Bad Request' + flask.abort(400) -def edit_playlist(env, start_response): +@yt_app.route('/edit_playlist', methods=['POST']) +def edit_playlist(): '''Called when adding videos to a playlist from elsewhere''' - parameters = env['parameters'] - if parameters['action'][0] == 'add': - add_to_playlist(parameters['playlist_name'][0], parameters['video_info_list']) - start_response('204 No Content', ()) + if request.values['action'] == 'add': + add_to_playlist(request.values['playlist_name'], request.values.getlist('video_info_list')) + return '', 204 else: - start_response('400 Bad Request', [('Content-type', 'text/plain'),]) - return b'400 Bad Request' + flask.abort(400) + +@yt_app.route('/data/playlist_thumbnails/<playlist_name>/<thumbnail>') +def serve_thumbnail(playlist_name, thumbnail): + # .. is necessary because flask always uses the application directory at ./youtube, not the working directory + return flask.send_from_directory(os.path.join('..', thumbnails_directory, playlist_name), thumbnail) diff --git a/youtube/opensearch.xml b/youtube/opensearch.xml index fce9d71..9f035a6 100644 --- a/youtube/opensearch.xml +++ b/youtube/opensearch.xml @@ -4,7 +4,7 @@ <InputEncoding>UTF-8</InputEncoding> <Image width="16" height="16">data:image/x-icon;base64,AAABAAEAEBAAAAEACAAlAgAAFgAAAIlQTkcNChoKAAAADUlIRFIAAAAQAAAAEAgGAAAAH/P/YQAAAexJREFUOI2lkzFPmlEUhp/73fshtCUCRtvQkJoKMrDQJvoHnBzUhc3EH0DUQf+As6tujo4M6mTiIDp0kGiMTRojTRNSW6o12iD4YYXv3g7Qr4O0ScM7npz7vOe+J0fk83lDF7K6eQygwkdHhI+P0bYNxmBXq5RmZui5vGQgn0f7fKi7O4oLC1gPD48BP9JpnpRKJFZXcQMB3m1u4vr9NHp76d/bo39/n4/z84ROThBa4/r91OJxMKb9BSn5mskAIOt1eq6uEFpjVyrEcjk+T0+TXlzkbTZLuFDAur9/nIFRipuREQCe7+zgBgK8mZvj/fIylVTKa/6UzXKbSnnuHkA0GnwbH/cA0a0takND3IyOEiwWAXBiMYTWjzLwtvB9bAyAwMUF8ZUVPiwtYTWbHqA6PIxoNv8OMLbN3eBga9TZWYQxaKX+AJJJhOv+AyAlT0slAG6TSX5n8+zszJugkzxA4PzcK9YSCQCk42DXaq1aGwqgfT5ebG9jpMQyUjKwu8vrtbWWqxC83NjAd31NsO2uleJnX58HCJ6eEjk8BGNQAA+RCOXJScpTU2AMwnUxlkXk4ACA+2iUSKGArNeRjkMsl6M8MYHQGtHpmIxSvFpfRzoORinQGqvZBCEwQoAxfMlkaIRCnQH/o66v8Re19MavaDNLfgAAAABJRU5ErkJggg==</Image> -<Url type="text/html" method="GET" template="http://localhost/youtube.com/search"> +<Url type="text/html" method="GET" template="http://localhost:$port_number/youtube.com/search"> <Param name="query" value="{searchTerms}"/> </Url> <SearchForm>http://localhost:$port_number/youtube.com/search</SearchForm> diff --git a/youtube/playlist.py b/youtube/playlist.py index fbe6448..5df7074 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -1,4 +1,5 @@ -from youtube import util, yt_data_extract, html_common, template, proto +from youtube import util, yt_data_extract, proto +from youtube import yt_app import base64 import urllib @@ -6,10 +7,8 @@ import json import string import gevent import math - -with open("yt_playlist_template.html", "r") as file: - yt_playlist_template = template.Template(file.read()) - +from flask import request +import flask @@ -48,9 +47,7 @@ headers_1 = ( def playlist_first_page(playlist_id, report_text = "Retrieved playlist"): url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' - content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text) - '''with open('debug/playlist_debug', 'wb') as f: - f.write(content)''' + content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text, debug_name='playlist_first_page') content = json.loads(util.uppercase_escape(content.decode('utf-8'))) return content @@ -68,22 +65,21 @@ def get_videos(playlist_id, page): 'X-YouTube-Client-Version': '2.20180508', } - content = util.fetch_url(url, headers, report_text="Retrieved playlist") - '''with open('debug/playlist_debug', 'wb') as f: - f.write(content)''' + content = util.fetch_url(url, headers, report_text="Retrieved playlist", debug_name='playlist_videos') info = json.loads(util.uppercase_escape(content.decode('utf-8'))) return info -playlist_stat_template = string.Template(''' -<div>$stat</div>''') -def get_playlist_page(env, start_response): - start_response('200 OK', [('Content-type','text/html'),]) - parameters = env['parameters'] - playlist_id = parameters['list'][0] - page = parameters.get("page", "1")[0] - if page == "1": +@yt_app.route('/playlist') +def get_playlist_page(): + if 'list' not in request.args: + abort(400) + + playlist_id = request.args.get('list') + page = request.args.get('page', '1') + + if page == '1': first_page_json = playlist_first_page(playlist_id) this_page_json = first_page_json else: @@ -98,26 +94,20 @@ def get_playlist_page(env, start_response): video_list = this_page_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] except KeyError: # other pages video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents'] - videos_html = '' - for video_json in video_list: - info = yt_data_extract.renderer_info(video_json['playlistVideoRenderer']) - videos_html += html_common.video_item_html(info, html_common.small_video_item_template) + parsed_video_list = [yt_data_extract.parse_info_prepare_for_html(video_json) for video_json in video_list] + + + metadata = yt_data_extract.renderer_info(first_page_json['response']['header']) + yt_data_extract.prefix_urls(metadata) - metadata = yt_data_extract.renderer_info(first_page_json['response']['header']['playlistHeaderRenderer']) video_count = int(metadata['size'].replace(',', '')) - page_buttons = html_common.page_buttons_html(int(page), math.ceil(video_count/20), util.URL_ORIGIN + "/playlist", env['QUERY_STRING']) - - html_ready = html_common.get_html_ready(metadata) - html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page) - - stats = '' - stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos') - stats += playlist_stat_template.substitute(stat=html_ready['views']) - return yt_playlist_template.substitute( - header = html_common.get_header(), - videos = videos_html, - page_buttons = page_buttons, - stats = stats, - **html_ready + metadata['size'] += ' videos' + + return flask.render_template('playlist.html', + video_list = parsed_video_list, + num_pages = math.ceil(video_count/20), + parameters_dictionary = request.args, + + **metadata ).encode('utf-8') diff --git a/youtube/post_comment.py b/youtube/post_comment.py index 876a1c0..25d0e3a 100644 --- a/youtube/post_comment.py +++ b/youtube/post_comment.py @@ -1,5 +1,6 @@ # Contains functions having to do with posting/editing/deleting comments -from youtube import util, html_common, proto, comments, accounts +from youtube import util, proto, comments, accounts +from youtube import yt_app import settings import urllib @@ -8,6 +9,9 @@ import re import traceback import os +import flask +from flask import request + def _post_comment(text, video_id, session_token, cookiejar): headers = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', @@ -31,13 +35,11 @@ def _post_comment(text, video_id, session_token, cookiejar): data = urllib.parse.urlencode(data_dict).encode() - content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar) + content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar, debug_name='post_comment') code = json.loads(content)['code'] print("Comment posting code: " + code) return code - '''with open('debug/post_comment_response', 'wb') as f: - f.write(content)''' def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookiejar): @@ -62,13 +64,11 @@ def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookie } data = urllib.parse.urlencode(data_dict).encode() - content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar) + content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar, debug_name='post_reply') code = json.loads(content)['code'] print("Comment posting code: " + code) return code - '''with open('debug/post_comment_response', 'wb') as f: - f.write(content)''' def _delete_comment(video_id, comment_id, author_id, session_token, cookiejar): headers = { @@ -109,108 +109,73 @@ def get_session_token(video_id, cookiejar): else: raise Exception("Couldn't find xsrf_token") -def delete_comment(env, start_response): - parameters = env['parameters'] - video_id = parameters['video_id'][0] - cookiejar = accounts.account_cookiejar(parameters['channel_id'][0]) +@yt_app.route('/delete_comment', methods=['POST']) +def delete_comment(): + video_id = request.values['video_id'] + cookiejar = accounts.account_cookiejar(request.values['channel_id']) token = get_session_token(video_id, cookiejar) - code = _delete_comment(video_id, parameters['comment_id'][0], parameters['author_id'][0], token, cookiejar) + code = _delete_comment(video_id, request.values['comment_id'], request.values['author_id'], token, cookiejar) if code == "SUCCESS": - start_response('303 See Other', [('Location', util.URL_ORIGIN + '/comment_delete_success'),] ) + return flask.redirect(util.URL_ORIGIN + '/comment_delete_success', 303) else: - start_response('303 See Other', [('Location', util.URL_ORIGIN + '/comment_delete_fail'),] ) + return flask.redirect(util.URL_ORIGIN + '/comment_delete_fail', 303) + +@yt_app.route('/comment_delete_success') +def comment_delete_success(): + return flask.render_template('status.html', title='Success', message='Successfully deleted comment') -def post_comment(env, start_response): - parameters = env['parameters'] - video_id = parameters['video_id'][0] - channel_id = parameters['channel_id'][0] +@yt_app.route('/comment_delete_fail') +def comment_delete_fail(): + return flask.render_template('status.html', title='Error', message='Failed to delete comment') + +@yt_app.route('/post_comment', methods=['POST']) +@yt_app.route('/comments', methods=['POST']) +def post_comment(): + video_id = request.values['video_id'] + channel_id = request.values['channel_id'] cookiejar = accounts.account_cookiejar(channel_id) token = get_session_token(video_id, cookiejar) - if 'parent_id' in parameters: - code = _post_comment_reply(parameters['comment_text'][0], parameters['video_id'][0], parameters['parent_id'][0], token, cookiejar) - start_response('303 See Other', (('Location', util.URL_ORIGIN + '/comments?' + env['QUERY_STRING']),) ) - + if 'parent_id' in request.values: + code = _post_comment_reply(request.values['comment_text'], request.values['video_id'], request.values['parent_id'], token, cookiejar) + return flask.redirect(util.URL_ORIGIN + '/comments?' + request.query_string.decode('utf-8'), 303) else: - code = _post_comment(parameters['comment_text'][0], parameters['video_id'][0], token, cookiejar) - start_response('303 See Other', (('Location', util.URL_ORIGIN + '/comments?ctoken=' + comments.make_comment_ctoken(video_id, sort=1)),) ) + code = _post_comment(request.values['comment_text'], request.values['video_id'], token, cookiejar) + return flask.redirect(util.URL_ORIGIN + '/comments?ctoken=' + comments.make_comment_ctoken(video_id, sort=1), 303) - return b'' +@yt_app.route('/delete_comment', methods=['GET']) +def get_delete_comment_page(): + parameters = [(parameter_name, request.args[parameter_name]) for parameter_name in ('video_id', 'channel_id', 'author_id', 'comment_id')] + return flask.render_template('delete_comment.html', parameters = parameters) -def get_delete_comment_page(env, start_response): - start_response('200 OK', [('Content-type','text/html'),]) - parameters = env['parameters'] - style = ''' - main{ - display: grid; - grid-template-columns: minmax(0px, 3fr) 640px 40px 500px minmax(0px,2fr); - align-content: start; - } - main > div, main > form{ - margin-top:20px; - grid-column:2; - } - ''' - - page = ''' - <div>Are you sure you want to delete this comment?</div> - <form action="" method="POST">''' - for parameter in ('video_id', 'channel_id', 'author_id', 'comment_id'): - page += '''\n <input type="hidden" name="''' + parameter + '''" value="''' + parameters[parameter][0] + '''">''' - page += ''' - <input type="submit" value="Yes, delete it"> - </form>''' - return html_common.yt_basic_template.substitute( - page_title = "Delete comment?", - style = style, - header = html_common.get_header(), - page = page, - ).encode('utf-8') - -def get_post_comment_page(env, start_response): - start_response('200 OK', [('Content-type','text/html'),]) - parameters = env['parameters'] - video_id = parameters['video_id'][0] - parent_id = util.default_multi_get(parameters, 'parent_id', 0, default='') +@yt_app.route('/post_comment', methods=['GET']) +def get_post_comment_page(): + video_id = request.args['video_id'] + parent_id = request.args.get('parent_id', '') - style = ''' main{ - display: grid; - grid-template-columns: 3fr 2fr; -} -.left{ - display:grid; - grid-template-columns: 1fr 640px; -} -textarea{ - width: 460px; - height: 85px; -} -.comment-form{ - grid-column:2; - justify-content:start; -}''' if parent_id: # comment reply - comment_box = comments.comment_box_template.substitute( - form_action = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id, - video_id_input = '', - post_text = "Post reply", - options=comments.comment_box_account_options(), - ) + form_action = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id + replying = True else: - comment_box = comments.comment_box_template.substitute( - form_action = util.URL_ORIGIN + '/post_comment', - video_id_input = '''<input type="hidden" name="video_id" value="''' + video_id + '''">''', - post_text = "Post comment", - options=comments.comment_box_account_options(), - ) - - page = '''<div class="left">\n''' + comment_box + '''</div>\n''' - return html_common.yt_basic_template.substitute( - page_title = "Post comment reply" if parent_id else "Post a comment", - style = style, - header = html_common.get_header(), - page = page, - ).encode('utf-8') + form_action = '' + replying = False + + + comment_posting_box_info = { + 'form_action': form_action, + 'video_id': video_id, + 'accounts': accounts.account_list_data(), + 'include_video_id_input': not replying, + 'replying': replying, + } + return flask.render_template('post_comment.html', + comment_posting_box_info = comment_posting_box_info, + replying = replying, + ) + + + + diff --git a/youtube/search.py b/youtube/search.py index 0cef0f3..e35d0cb 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -1,16 +1,14 @@ -from youtube import util, html_common, yt_data_extract, proto +from youtube import util, yt_data_extract, proto, local_playlist +from youtube import yt_app +import settings import json import urllib -import html -from string import Template import base64 from math import ceil - - -with open("yt_search_results_template.html", "r") as file: - yt_search_results_template = file.read() - +import mimetypes +from flask import request +import flask # Sort: 1 # Upload date: 2 @@ -55,88 +53,81 @@ def get_search_json(query, page, autocorrect, sort, filters): 'X-YouTube-Client-Version': '2.20180418', } url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D") - content = util.fetch_url(url, headers=headers, report_text="Got search results") + content = util.fetch_url(url, headers=headers, report_text="Got search results", debug_name='search_results') info = json.loads(content) return info - -showing_results_for = Template(''' - <div>Showing results for <a>$corrected_query</a></div> - <div>Search instead for <a href="$original_query_url">$original_query</a></div> -''') -did_you_mean = Template(''' - <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div> -''') -def get_search_page(env, start_response): - start_response('200 OK', [('Content-type','text/html'),]) - parameters = env['parameters'] - if len(parameters) == 0: - return html_common.yt_basic_template.substitute( - page_title = "Search", - header = html_common.get_header(), - style = '', - page = '', - ).encode('utf-8') - query = parameters["query"][0] - page = parameters.get("page", "1")[0] - autocorrect = int(parameters.get("autocorrect", "1")[0]) - sort = int(parameters.get("sort", "0")[0]) + +@yt_app.route('/search') +def get_search_page(): + if len(request.args) == 0: + return flask.render_template('base.html', title="Search") + + if 'query' not in request.args: + abort(400) + + query = request.args.get("query") + page = request.args.get("page", "1") + autocorrect = int(request.args.get("autocorrect", "1")) + sort = int(request.args.get("sort", "0")) filters = {} - filters['time'] = int(parameters.get("time", "0")[0]) - filters['type'] = int(parameters.get("type", "0")[0]) - filters['duration'] = int(parameters.get("duration", "0")[0]) + filters['time'] = int(request.args.get("time", "0")) + filters['type'] = int(request.args.get("type", "0")) + filters['duration'] = int(request.args.get("duration", "0")) info = get_search_json(query, page, autocorrect, sort, filters) estimated_results = int(info[1]['response']['estimatedResults']) estimated_pages = ceil(estimated_results/20) results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'] - - corrections = '' - result_list_html = "" + + parsed_results = [] + corrections = {'type': None} for renderer in results: type = list(renderer.keys())[0] if type == 'shelfRenderer': continue if type == 'didYouMeanRenderer': renderer = renderer[type] - corrected_query_string = parameters.copy() + corrected_query_string = request.args.to_dict(flat=False) corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']] corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) - corrections = did_you_mean.substitute( - corrected_query_url = corrected_query_url, - corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - ) + + corrections = { + 'type': 'did_you_mean', + 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), + 'corrected_query_url': corrected_query_url, + } continue if type == 'showingResultsForRenderer': renderer = renderer[type] - no_autocorrect_query_string = parameters.copy() + no_autocorrect_query_string = request.args.to_dict(flat=False) no_autocorrect_query_string['autocorrect'] = ['0'] no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) - corrections = showing_results_for.substitute( - corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - original_query_url = no_autocorrect_query_url, - original_query = html.escape(renderer['originalQuery']['simpleText']), - ) + + corrections = { + 'type': 'showing_results_for', + 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), + 'original_query_url': no_autocorrect_query_url, + 'original_query': renderer['originalQuery']['simpleText'], + } continue - result_list_html += html_common.renderer_html(renderer, current_query_string=env['QUERY_STRING']) - - page = int(page) - if page <= 5: - page_start = 1 - page_end = min(9, estimated_pages) - else: - page_start = page - 4 - page_end = min(page + 4, estimated_pages) - - - result = Template(yt_search_results_template).substitute( - header = html_common.get_header(query), - results = result_list_html, - page_title = query + " - Search", - search_box_value = html.escape(query), - number_of_results = '{:,}'.format(estimated_results), - number_of_pages = '{:,}'.format(estimated_pages), - page_buttons = html_common.page_buttons_html(page, estimated_pages, util.URL_ORIGIN + "/search", env['QUERY_STRING']), - corrections = corrections - ) - return result.encode('utf-8') + + info = yt_data_extract.parse_info_prepare_for_html(renderer) + if info['type'] != 'unsupported': + parsed_results.append(info) + + return flask.render_template('search.html', + header_playlist_names = local_playlist.get_playlist_names(), + query = query, + estimated_results = estimated_results, + estimated_pages = estimated_pages, + corrections = corrections, + results = parsed_results, + parameters_dictionary = request.args, + ) + +@yt_app.route('/opensearch.xml') +def get_search_engine_xml(): + with open("youtube/opensearch.xml", 'rb') as f: + content = f.read().replace(b'$port_number', str(settings.port_number).encode()) + return flask.Response(content, mimetype='application/xml') diff --git a/youtube/comments.css b/youtube/static/comments.css index 4cec3e1..4cec3e1 100644 --- a/youtube/comments.css +++ b/youtube/static/comments.css diff --git a/youtube/favicon.ico b/youtube/static/favicon.ico Binary files differindex 9d6417c..9d6417c 100644 --- a/youtube/favicon.ico +++ b/youtube/static/favicon.ico diff --git a/youtube/shared.css b/youtube/static/shared.css index cd82164..848b8da 100644 --- a/youtube/shared.css +++ b/youtube/static/shared.css @@ -222,6 +222,12 @@ address{ max-height:2.4em; overflow:hidden; } + .medium-item .stats > *::after{ + content: " | "; + } + .medium-item .stats > *:last-child::after{ + content: ""; + } .medium-item .description{ grid-column: 2 / span 2; diff --git a/youtube/template.py b/youtube/template.py deleted file mode 100644 index d1a5e58..0000000 --- a/youtube/template.py +++ /dev/null @@ -1,132 +0,0 @@ - -import re as _re -from collections import ChainMap as _ChainMap - -class _TemplateMetaclass(type): - pattern = r""" - %(delim)s(?: - (?P<escaped>%(delim)s) | # Escape sequence of two delimiters - (?P<named>%(id)s) | # delimiter and a Python identifier - {(?P<braced>%(id)s)} | # delimiter and a braced identifier - (?P<invalid>) # Other ill-formed delimiter exprs - ) - """ - - def __init__(cls, name, bases, dct): - super(_TemplateMetaclass, cls).__init__(name, bases, dct) - if 'pattern' in dct: - pattern = cls.pattern - else: - pattern = _TemplateMetaclass.pattern % { - 'delim' : _re.escape(cls.delimiter), - 'id' : cls.idpattern, - } - cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE) - - -class Template(metaclass=_TemplateMetaclass): - """A string class for supporting $-substitutions.""" - - delimiter = '$' - idpattern = r'[_a-z][_a-z0-9]*' - flags = _re.IGNORECASE - - def __init__(self, template): - self.template = template - - # Search for $$, $identifier, ${identifier}, and any bare $'s - - def _invalid(self, mo): - i = mo.start('invalid') - lines = self.template[:i].splitlines(keepends=True) - if not lines: - colno = 1 - lineno = 1 - else: - colno = i - len(''.join(lines[:-1])) - lineno = len(lines) - raise ValueError('Invalid placeholder in string: line %d, col %d' % - (lineno, colno)) - - def substitute(*args, **kws): - if not args: - raise TypeError("descriptor 'substitute' of 'Template' object " - "needs an argument") - self, *args = args # allow the "self" keyword be passed - if len(args) > 1: - raise TypeError('Too many positional arguments') - if not args: - mapping = kws - elif kws: - mapping = _ChainMap(kws, args[0]) - else: - mapping = args[0] - # Helper function for .sub() - def convert(mo): - # Check the most common path first. - named = mo.group('named') or mo.group('braced') - if named is not None: - return str(mapping.get(named,'')) - if mo.group('escaped') is not None: - return self.delimiter - if mo.group('invalid') is not None: - self._invalid(mo) - raise ValueError('Unrecognized named group in pattern', - self.pattern) - return self.pattern.sub(convert, self.template) - - def strict_substitute(*args, **kws): - if not args: - raise TypeError("descriptor 'substitute' of 'Template' object " - "needs an argument") - self, *args = args # allow the "self" keyword be passed - if len(args) > 1: - raise TypeError('Too many positional arguments') - if not args: - mapping = kws - elif kws: - mapping = _ChainMap(kws, args[0]) - else: - mapping = args[0] - # Helper function for .sub() - def convert(mo): - # Check the most common path first. - named = mo.group('named') or mo.group('braced') - if named is not None: - return str(mapping[named]) - if mo.group('escaped') is not None: - return self.delimiter - if mo.group('invalid') is not None: - self._invalid(mo) - raise ValueError('Unrecognized named group in pattern', - self.pattern) - return self.pattern.sub(convert, self.template) - - def safe_substitute(*args, **kws): - if not args: - raise TypeError("descriptor 'safe_substitute' of 'Template' object " - "needs an argument") - self, *args = args # allow the "self" keyword be passed - if len(args) > 1: - raise TypeError('Too many positional arguments') - if not args: - mapping = kws - elif kws: - mapping = _ChainMap(kws, args[0]) - else: - mapping = args[0] - # Helper function for .sub() - def convert(mo): - named = mo.group('named') or mo.group('braced') - if named is not None: - try: - return str(mapping[named]) - except KeyError: - return mo.group() - if mo.group('escaped') is not None: - return self.delimiter - if mo.group('invalid') is not None: - return mo.group() - raise ValueError('Unrecognized named group in pattern', - self.pattern) - return self.pattern.sub(convert, self.template) diff --git a/youtube/templates/base.html b/youtube/templates/base.html new file mode 100644 index 0000000..72e3691 --- /dev/null +++ b/youtube/templates/base.html @@ -0,0 +1,114 @@ +<!DOCTYPE html> +<html> + <head> + <meta charset="utf-8"> + <title>{{ page_title }}</title> + <meta http-equiv="Content-Security-Policy" content="default-src 'self' 'unsafe-inline'; script-src 'none'; media-src 'self' https://*.googlevideo.com"> + <link href="/youtube.com/static/shared.css" type="text/css" rel="stylesheet"> + <link href="/youtube.com/static/comments.css" type="text/css" rel="stylesheet"> + <link href="/youtube.com/static/favicon.ico" type="image/x-icon" rel="icon"> + <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> + <style type="text/css"> +{% block style %} +{{ style }} +{% endblock %} + </style> + </head> + <body> + <header> + <form id="site-search" action="/youtube.com/search"> + <input type="search" name="query" class="search-box" value="{{ search_box_value }}"> + <button type="submit" value="Search" class="search-button">Search</button> + <div class="dropdown"> + <button class="dropdown-label">Options</button> + <div class="css-sucks"> + <div class="dropdown-content"> + <h3>Sort by</h3> + <input type="radio" id="sort_relevance" name="sort" value="0"> + <label for="sort_relevance">Relevance</label> + + <input type="radio" id="sort_upload_date" name="sort" value="2"> + <label for="sort_upload_date">Upload date</label> + + <input type="radio" id="sort_view_count" name="sort" value="3"> + <label for="sort_view_count">View count</label> + + <input type="radio" id="sort_rating" name="sort" value="1"> + <label for="sort_rating">Rating</label> + + + <h3>Upload date</h3> + <input type="radio" id="time_any" name="time" value="0"> + <label for="time_any">Any</label> + + <input type="radio" id="time_last_hour" name="time" value="1"> + <label for="time_last_hour">Last hour</label> + + <input type="radio" id="time_today" name="time" value="2"> + <label for="time_today">Today</label> + + <input type="radio" id="time_this_week" name="time" value="3"> + <label for="time_this_week">This week</label> + + <input type="radio" id="time_this_month" name="time" value="4"> + <label for="time_this_month">This month</label> + + <input type="radio" id="time_this_year" name="time" value="5"> + <label for="time_this_year">This year</label> + + <h3>Type</h3> + <input type="radio" id="type_any" name="type" value="0"> + <label for="type_any">Any</label> + + <input type="radio" id="type_video" name="type" value="1"> + <label for="type_video">Video</label> + + <input type="radio" id="type_channel" name="type" value="2"> + <label for="type_channel">Channel</label> + + <input type="radio" id="type_playlist" name="type" value="3"> + <label for="type_playlist">Playlist</label> + + <input type="radio" id="type_movie" name="type" value="4"> + <label for="type_movie">Movie</label> + + <input type="radio" id="type_show" name="type" value="5"> + <label for="type_show">Show</label> + + + <h3>Duration</h3> + <input type="radio" id="duration_any" name="duration" value="0"> + <label for="duration_any">Any</label> + + <input type="radio" id="duration_short" name="duration" value="1"> + <label for="duration_short">Short (< 4 minutes)</label> + + <input type="radio" id="duration_long" name="duration" value="2"> + <label for="duration_long">Long (> 20 minutes)</label> + + </div> + </div> + </div> + </form> + + <div id="header-right"> + <form id="playlist-edit" action="/youtube.com/edit_playlist" method="post" target="_self"> + <input name="playlist_name" id="playlist-name-selection" list="playlist-options" type="text"> + <datalist id="playlist-options"> + {% for playlist_name in header_playlist_names %} + <option value="{{ playlist_name }}">{{ playlist_name }}</option> + {% endfor %} + </datalist> + <button type="submit" id="playlist-add-button" name="action" value="add">Add to playlist</button> + <button type="reset" id="item-selection-reset">Clear selection</button> + </form> + <a href="/youtube.com/playlists" id="local-playlists">Local playlists</a> + </div> + </header> + <main> +{% block main %} +{{ main }} +{% endblock %} + </main> + </body> +</html> diff --git a/youtube/templates/channel.html b/youtube/templates/channel.html new file mode 100644 index 0000000..069e33b --- /dev/null +++ b/youtube/templates/channel.html @@ -0,0 +1,160 @@ +{% set page_title = channel_name + ' - Channel' %} +{% extends "base.html" %} +{% import "common_elements.html" as common_elements %} +{% block style %} + main{ + display:grid; +{% if current_tab == 'about' %} + grid-template-rows: 0fr 0fr 1fr; + grid-template-columns: 0fr 1fr; +{% else %} + grid-template-rows: repeat(5, 0fr); + grid-template-columns: auto 1fr; +{% endif %} + } + main .avatar{ + grid-row:1; + grid-column:1; + height:200px; + width:200px; + } + main .summary{ + grid-row:1; + grid-column:2; + margin-left: 5px; + } + main .channel-tabs{ + grid-row:2; + grid-column: 1 / span 2; + + display:grid; + grid-auto-flow: column; + justify-content:start; + + background-color: #aaaaaa; + padding: 3px; + padding-left: 6px; + } + #links-metadata{ + display: grid; + grid-auto-flow: column; + grid-column-gap: 10px; + grid-column: 1/span 2; + justify-content: start; + padding-top: 8px; + padding-bottom: 8px; + padding-left: 6px; + background-color: #bababa; + margin-bottom: 10px; + } + #number-of-results{ + font-weight:bold; + } + .item-grid{ + padding-left: 20px; + grid-row:4; + grid-column: 1 / span 2; + } + .item-list{ + width:1000px; + grid-column: 1 / span 2; + } + .page-button-row{ + grid-column: 1 / span 2; + } + .tab{ + padding: 5px 75px; + } + .channel-info{ + grid-row: 3; + grid-column: 1 / span 3; + } + .channel-info ul{ + padding-left: 40px; + } + .channel-info h3{ + margin-left: 40px; + } + .channel-info .description{ + white-space: pre-wrap; + min-width: 0; + margin-left: 40px; + } + .medium-item img{ + max-width: 168px; + } +{% endblock style %} + +{% block main %} + <img class="avatar" src="{{ avatar }}"> + <div class="summary"> + <h2 class="title">{{ channel_name }}</h2> + <p class="short-description">{{ short_description }}</p> + </div> + <nav class="channel-tabs"> + {% for tab_name in ('Videos', 'Playlists', 'About') %} + {% if tab_name.lower() == current_tab %} + <a class="tab page-button">{{ tab_name }}</a> + {% else %} + <a class="tab page-button" href="{{ channel_url + '/' + tab_name.lower() }}">{{ tab_name }}</a> + {% endif %} + {% endfor %} + + <form class="channel-search" action="{{ channel_url + '/search' }}"> + <input type="search" name="query" class="search-box" value="{{ search_box_value }}"> + <button type="submit" value="Search" class="search-button">Search</button> + </form> + </nav> + {% if current_tab == 'about' %} + <div class="channel-info"> + <ul> + {% for stat in stats %} + <li>{{ stat }}</li> + {% endfor %} + </ul> + <hr> + <h3>Description</h3> + <div class="description">{{ common_elements.text_runs(description) }}</div> + <hr> + <ul> + {% for text, url in links %} + <li><a href="{{ url }}">{{ text }}</a></li> + {% endfor %} + </ul> + </div> + {% else %} + <div id="links-metadata"> + {% if current_tab == 'videos' %} + {% set sorts = [('1', 'views'), ('2', 'oldest'), ('3', 'newest')] %} + <div id="number-of-results">{{ number_of_videos }} videos</div> + {% elif current_tab == 'playlists' %} + {% set sorts = [('2', 'oldest'), ('3', 'newest'), ('4', 'last video added')] %} + {% else %} + {% set sorts = [] %} + {% endif %} + + {% for sort_number, sort_name in sorts %} + {% if sort_number == current_sort.__str__() %} + <a class="sort-button">{{ 'Sorted by ' + sort_name }}</a> + {% else %} + <a class="sort-button" href="{{ channel_url + '/' + current_tab + '?sort=' + sort_number }}">{{ 'Sort by ' + sort_name }}</a> + {% endif %} + {% endfor %} + </div> + + {% if current_tab != 'about' %} + <nav class="{{ 'item-list' if current_tab == 'search' else 'item-grid' }}"> + {% for item_info in items %} + {{ common_elements.item(item_info, include_author=false) }} + {% endfor %} + </nav> + + {% if current_tab != 'playlists' %} + <nav class="page-button-row"> + {{ common_elements.page_buttons(number_of_pages, channel_url + '/' + current_tab, parameters_dictionary) }} + </nav> + {% endif %} + {% endif %} + + {% endif %} +{% endblock main %} diff --git a/youtube/templates/comments.html b/youtube/templates/comments.html new file mode 100644 index 0000000..82276b8 --- /dev/null +++ b/youtube/templates/comments.html @@ -0,0 +1,70 @@ +{% import "common_elements.html" as common_elements %} + +{% macro render_comment(comment, include_avatar) %} + <div class="comment-container"> + <div class="comment"> + <a class="author-avatar" href="{{ comment['author_url'] }}" title="{{ comment['author'] }}"> + {% if include_avatar %} + <img class="author-avatar-img" src="{{ comment['author_avatar'] }}"> + {% endif %} + </a> + <address> + <a class="author" href="{{ comment['author_url'] }}" title="{{ comment['author'] }}">{{ comment['author'] }}</a> + </address> + <a class="permalink" href="{{ comment['permalink'] }}" title="permalink"> + <time datetime="">{{ comment['published'] }}</time> + </a> + <span class="text">{{ common_elements.text_runs(comment['text']) }}</span> + + <span class="likes">{{ comment['likes_text'] if comment['likes'] else ''}}</span> + <div class="bottom-row"> + <a href="{{ comment['replies_url'] }}" class="replies">{{ comment['view_replies_text'] }}</a> + {% if 'delete_url' is in comment %} + <a href="{{ comment['delete_url'] }}" target="_blank">Delete</a> + {% endif %} + </div> + </div> + + </div> +{% endmacro %} + +{% macro video_comments(comments_info) %} + <section class="comments-area"> + <div class="comment-links"> + {% for link_text, link_url in comments_info['comment_links'] %} + <a class="sort-button" href="{{ link_url }}">{{ link_text }}</a> + {% endfor %} + </div> + <div class="comments"> + {% for comment in comments_info['comments'] %} + {{ render_comment(comment, comments_info['include_avatars']) }} + {% endfor %} + </div> + {% if 'more_comments_url' is in comments_info %} + <a class="page-button more-comments" href="{{ comments_info['more_comments_url'] }}">More comments</a> + {% endif %} + </section> +{% endmacro %} + +{% macro comment_posting_box(info) %} + <form action="{{ info['form_action'] }}" method="post" class="comment-form"> + <div id="comment-account-options"> + <label for="account-selection">Account:</label> + <select id="account-selection" name="channel_id"> + {% for account in info['accounts'] %} + <option value="{{ account[0] }}">{{ account[1] }}</option> + {% endfor %} + </select> + <a href="/https://youtube.com/login" target="_blank">Add account</a> + </div> + <textarea name="comment_text"></textarea> + {% if info['include_video_id_input'] %} + <input type="hidden" name="video_id" value="{{ info['video_id'] }}"> + {% endif %} + <button type="submit" class="post-comment-button">{{ 'Post reply' if info['replying'] else 'Post comment' }}</button> + </form> +{% endmacro %} + + + + diff --git a/youtube/templates/comments_page.html b/youtube/templates/comments_page.html new file mode 100644 index 0000000..68c8537 --- /dev/null +++ b/youtube/templates/comments_page.html @@ -0,0 +1,65 @@ +{% set page_title = ('Replies' if comments_info['is_replies'] else 'Comments page ' + comments_info['page_number']) %} +{% extends "base.html" %} +{% import "comments.html" as comments %} + +{% block style %} + main{ + display:grid; + grid-template-columns: 3fr 2fr; + } + #left{ + background-color:#bcbcbc; + + display: grid; + grid-column: 1; + grid-row: 1; + grid-template-columns: 1fr 640px; + grid-template-rows: 0fr 0fr 0fr; + } + .comments-area{ + grid-column:2; + } + .comment{ + width:640px; + } +{% endblock style %} + + +{% block main %} + <div id="left"> + <section class="comments-area"> + {% if not comments_info['is_replies'] %} + <section class="video-metadata"> + <a class="video-metadata-thumbnail-box" href="{{ comments_info['video_url'] }}" title="{{ comments_info['video_title'] }}"> + <img class="video-metadata-thumbnail-img" src="{{ comments_info['video_thumbnail'] }}" height="180px" width="320px"> + </a> + <a class="title" href="{{ comments_info['video_url'] }}" title="{{ comments_info['video_title'] }}">{{ comments_info['video_title'] }}</a> + + <h2>Comments page {{ comments_info['page_number'] }}</h2> + <span>Sorted by {{ comments_info['sort_text'] }}</span> + </section> + {% endif %} + + {{ comments.comment_posting_box(comment_posting_box_info) }} + + {% if not comments_info['is_replies'] %} + <div class="comment-links"> + {% for link_text, link_url in comments_info['comment_links'] %} + <a class="sort-button" href="{{ link_url }}">{{ link_text }}</a> + {% endfor %} + </div> + {% endif %} + + <div class="comments"> + {% for comment in comments_info['comments'] %} + {{ comments.render_comment(comment, comments_info['include_avatars']) }} + {% endfor %} + </div> + {% if 'more_comments_url' is in comments_info %} + <a class="page-button more-comments" href="{{ comments_info['more_comments_url'] }}">More comments</a> + {% endif %} + </section> + </div> +{% endblock main %} + + diff --git a/youtube/templates/common_elements.html b/youtube/templates/common_elements.html new file mode 100644 index 0000000..49e2fad --- /dev/null +++ b/youtube/templates/common_elements.html @@ -0,0 +1,154 @@ +{% macro text_runs(runs) %} + {%- if runs[0] is mapping -%} + {%- for text_run in runs -%} + {%- if text_run.get("bold", false) -%} + <b>{{ text_run["text"] }}</b> + {%- elif text_run.get('italics', false) -%} + <i>{{ text_run["text"] }}</i> + {%- else -%} + {{ text_run["text"] }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ runs }} + {%- endif -%} +{% endmacro %} + +{% macro small_item(info, include_author=true) %} + <div class="small-item-box"> + <div class="small-item"> + {% if info['type'] == 'video' %} + <a class="video-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="video-thumbnail-img" src="{{ info['thumbnail'] }}"> + <span class="video-duration">{{ info['duration'] }}</span> + </a> + <a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a> + + <address>{{ info['author'] }}</address> + <span class="views">{{ info['views'] }}</span> + + {% elif info['type'] == 'playlist' %} + <a class="playlist-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="playlist-thumbnail-img" src="{{ info['thumbnail'] }}"> + <div class="playlist-thumbnail-info"> + <span>{{ info['size'] }}</span> + </div> + </a> + <a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a> + + <address>{{ info['author'] }}</address> + {% else %} + Error: unsupported item type + {% endif %} + </div> + {% if info['type'] == 'video' %} + <input class="item-checkbox" type="checkbox" name="video_info_list" value="{{ info['video_info'] }}" form="playlist-edit"> + {% endif %} + </div> +{% endmacro %} + +{% macro get_stats(info, include_author=true) %} + {% if include_author %} + {% if 'author_url' is in(info) %} + <address>By <a href="{{ info['author_url'] }}">{{ info['author'] }}</a></address> + {% else %} + <address><b>{{ info['author'] }}</b></address> + {% endif %} + {% endif %} + {% if 'views' is in(info) %} + <span class="views">{{ info['views'] }}</span> + {% endif %} + {% if 'published' is in(info) %} + <time>{{ info['published'] }}</time> + {% endif %} +{% endmacro %} + + + +{% macro medium_item(info, include_author=true) %} + <div class="medium-item-box"> + <div class="medium-item"> + {% if info['type'] == 'video' %} + <a class="video-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="video-thumbnail-img" src="{{ info['thumbnail'] }}"> + <span class="video-duration">{{ info['duration'] }}</span> + </a> + + <a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a> + + <div class="stats"> + {{ get_stats(info, include_author) }} + </div> + + <span class="description">{{ text_runs(info.get('description', '')) }}</span> + <span class="badges">{{ info['badges']|join(' | ') }}</span> + {% elif info['type'] == 'playlist' %} + <a class="playlist-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="playlist-thumbnail-img" src="{{ info['thumbnail'] }}"> + <div class="playlist-thumbnail-info"> + <span>{{ info['size'] }}</span> + </div> + </a> + + <a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a> + + <div class="stats"> + {{ get_stats(info, include_author) }} + </div> + {% elif info['type'] == 'channel' %} + <a class="video-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="video-thumbnail-img" src="{{ info['thumbnail'] }}"> + </a> + + <a class="title" href="{{ info['url'] }}">{{ info['title'] }}</a> + + <span>{{ info['subscriber_count'] }} subscribers</span> + <span>{{ info['size'] }} videos</span> + + <span class="description">{{ text_runs(info.get('description', '')) }}</span> + {% else %} + Error: unsupported item type + {% endif %} + </div> + {% if info['type'] == 'video' %} + <input class="item-checkbox" type="checkbox" name="video_info_list" value="{{ info['video_info'] }}" form="playlist-edit"> + {% endif %} + </div> +{% endmacro %} + + +{% macro item(info, include_author=true) %} + {% if info['item_size'] == 'small' %} + {{ small_item(info, include_author) }} + {% elif info['item_size'] == 'medium' %} + {{ medium_item(info, include_author) }} + {% else %} + Error: Unknown item size + {% endif %} +{% endmacro %} + + + +{% macro page_buttons(estimated_pages, url, parameters_dictionary) %} + {% set current_page = parameters_dictionary.get('page', 1)|int %} + {% set parameters_dictionary = parameters_dictionary.to_dict() %} + {% if current_page is le(5) %} + {% set page_start = 1 %} + {% set page_end = [9, estimated_pages]|min %} + {% else %} + {% set page_start = current_page - 4 %} + {% set page_end = [current_page + 4, estimated_pages]|min %} + {% endif %} + + {% for page in range(page_start, page_end+1) %} + {% if page == current_page %} + <div class="page-button">{{ page }}</div> + {% else %} + {# IMPORTANT: Jinja SUCKS #} + {# https://stackoverflow.com/questions/36886650/how-to-add-a-new-entry-into-a-dictionary-object-while-using-jinja2 #} + {% set _ = parameters_dictionary.__setitem__('page', page) %} + <a class="page-button" href="{{ url + '?' + parameters_dictionary|urlencode }}">{{ page }}</a> + {% endif %} + {% endfor %} + +{% endmacro %} diff --git a/youtube/templates/delete_comment.html b/youtube/templates/delete_comment.html new file mode 100644 index 0000000..71555ee --- /dev/null +++ b/youtube/templates/delete_comment.html @@ -0,0 +1,25 @@ +{% set page_title = 'Delete comment?' %} +{% extends "base.html" %} + +{% block style %} + main{ + display: grid; + grid-template-columns: minmax(0px, 3fr) 640px 40px 500px minmax(0px,2fr); + align-content: start; + } + main > div, main > form{ + margin-top:20px; + grid-column:2; + } +{% endblock style %} + +{% block main %} + <div>Are you sure you want to delete this comment?</div> + <form action="" method="POST"> + {% for parameter_name, parameter_value in parameters %} + <input type="hidden" name="{{ parameter_name }}" value="{{ parameter_value }}"> + {% endfor %} + <input type="submit" value="Yes, delete it"> + </form> +{% endblock %} + diff --git a/youtube/templates/error.html b/youtube/templates/error.html new file mode 100644 index 0000000..e77c92c --- /dev/null +++ b/youtube/templates/error.html @@ -0,0 +1,7 @@ +{% set page_title = 'Error' %} +{% extends "base.html" %} + +{% block main %} + {{ error_message }} +{% endblock %} + diff --git a/youtube/templates/local_playlist.html b/youtube/templates/local_playlist.html new file mode 100644 index 0000000..f8e6f01 --- /dev/null +++ b/youtube/templates/local_playlist.html @@ -0,0 +1,60 @@ +{% set page_title = playlist_name + ' - Local playlist' %} +{% extends "base.html" %} +{% import "common_elements.html" as common_elements %} +{% block style %} + main{ + display:grid; + grid-template-columns: 3fr 1fr; + } + + + + #left{ + grid-column: 1; + grid-row: 1; + + display: grid; + grid-template-columns: 1fr 800px auto; + grid-template-rows: 0fr 1fr 0fr; + } + .playlist-title{ + grid-column:2; + } + #playlist-remove-button{ + grid-column:3; + align-self: center; + white-space: nowrap; + } + #results{ + + grid-row: 2; + grid-column: 2 / span 2; + + + display: grid; + grid-auto-rows: 0fr; + grid-row-gap: 10px; + + } + .page-button-row{ + grid-row: 3; + grid-column: 2; + justify-self: center; + } +{% endblock style %} + +{% block main %} + <div id="left"> + <h2 class="playlist-title">{{ playlist_name }}</h2> + <input type="hidden" name="playlist_page" value="{{ playlist_name }}" form="playlist-edit"> + <button type="submit" id="playlist-remove-button" name="action" value="remove" form="playlist-edit" formaction="">Remove from playlist</button> + <div id="results"> + {% for video_info in videos %} + {{ common_elements.item(video_info) }} + {% endfor %} + </div> + <nav class="page-button-row"> + {{ common_elements.page_buttons(num_pages, '/https://www.youtube.com/playlists/' + playlist_name, parameters_dictionary) }} + </nav> + </div> +{% endblock main %} diff --git a/youtube/templates/local_playlists_list.html b/youtube/templates/local_playlists_list.html new file mode 100644 index 0000000..9b5f510 --- /dev/null +++ b/youtube/templates/local_playlists_list.html @@ -0,0 +1,16 @@ +{% set page_title = 'Local playlists' %} +{% extends "base.html" %} + +{% block main %} + <ul> + {% for playlist_name, playlist_url in playlists %} + <li><a href="{{ playlist_url }}">{{ playlist_name }}</a></li> + {% endfor %} + </ul> +{% endblock main %} + + + + + + diff --git a/youtube/templates/login.html b/youtube/templates/login.html new file mode 100644 index 0000000..0f09a62 --- /dev/null +++ b/youtube/templates/login.html @@ -0,0 +1,60 @@ +{% set page_title = 'Login' %} +{% extends "base.html" %} + +{% block style %} + main{ + display: grid; + grid-template-columns: minmax(0px, 3fr) 640px 40px 500px minmax(0px,2fr); + align-content: start; + grid-row-gap: 40px; + } + + main form{ + margin-top:20px; + grid-column:2; + display:grid; + justify-items: start; + align-content: start; + grid-row-gap: 10px; + } + + #username, #password{ + grid-column:2; + width: 250px; + } + #add-account-button{ + margin-top:20px; + } + #tor-note{ + grid-row:2; + grid-column:2; + background-color: #dddddd; + padding: 10px; + } +{% endblock style %} + +{% block main %} + <form action="" method="POST"> + <div class="form-field"> + <label for="username">Username:</label> + <input type="text" id="username" name="username"> + </div> + <div class="form-field"> + <label for="password">Password:</label> + <input type="password" id="password" name="password"> + </div> + <div id="save-account-checkbox"> + <input type="checkbox" id="save-account" name="save" checked> + <label for="save-account">Save account info to disk (password will not be saved, only the login cookie)</label> + </div> + <div> + <input type="checkbox" id="use-tor" name="use_tor"> + <label for="use-tor">Use Tor when logging in (WARNING: This will lock your Google account under normal circumstances, see note below)</label> + </div> + <input type="submit" value="Add account" id="add-account-button"> + </form> + <div id="tor-note"><b>Note on using Tor to log in</b><br> +Using Tor to log in should only be done if the account was created using a proxy/VPN/Tor to begin with and hasn't been logged in using your IP. Otherwise, it's pointless since Google already knows who the account belongs to. When logging into a google account, it must be logged in using an IP address geographically close to the area where the account was created or where it is logged into regularly. If the account was created using an IP address in America and is logged into from an IP in Russia, Google will block the Russian IP from logging in, assume someone knows your password, lock the account, and make you change your password. If creating an account using Tor, you must remember the IP (or geographic region) it was created in, and only log in using that geographic region for the exit node. This can be accomplished by <a href="https://tor.stackexchange.com/questions/733/can-i-exit-from-a-specific-country-or-node">putting the desired IP in the torrc file</a> to force Tor to use that exit node. Using the login cookie to post comments through Tor is perfectly safe, however. + </div> +{% endblock main %} + diff --git a/youtube/templates/playlist.html b/youtube/templates/playlist.html new file mode 100644 index 0000000..371b51b --- /dev/null +++ b/youtube/templates/playlist.html @@ -0,0 +1,106 @@ +{% set page_title = title + ' - Page ' + parameters_dictionary.get('page', '1') %} +{% extends "base.html" %} +{% import "common_elements.html" as common_elements %} +{% block style %} + main{ + display:grid; + grid-template-columns: 3fr 1fr; + } + + + + #left{ + grid-column: 1; + grid-row: 1; + + display: grid; + grid-template-columns: 1fr 800px; + grid-template-rows: 0fr 1fr 0fr; + } + .playlist-metadata{ + grid-column:2; + grid-row:1; + + display:grid; + grid-template-columns: 0fr 1fr; + } + .playlist-thumbnail{ + grid-row: 1 / span 5; + grid-column:1; + justify-self:start; + width:250px; + margin-right: 10px; + } + .playlist-title{ + grid-row: 1; + grid-column:2; + } + .playlist-author{ + grid-row:2; + grid-column:2; + } + .playlist-stats{ + grid-row:3; + grid-column:2; + } + + .playlist-description{ + grid-row:4; + grid-column:2; + min-width:0px; + white-space: pre-line; + } + .page-button-row{ + grid-row: 3; + grid-column: 2; + justify-self: center; + } + + + #right{ + grid-column: 2; + grid-row: 1; + + } + #results{ + + grid-row: 2; + grid-column: 2; + margin-top:10px; + + display: grid; + grid-auto-rows: 0fr; + grid-row-gap: 10px; + + } +{% endblock style %} + +{% block main %} + <div id="left"> + <div class="playlist-metadata"> + <img class="playlist-thumbnail" src="{{ thumbnail }}"> + <h2 class="playlist-title">{{ title }}</h2> + <a class="playlist-author" href="{{ author_url }}">{{ author }}</a> + <div class="playlist-stats"> + <div>{{ views }}</div> + <div>{{ size }}</div> + </div> + <div class="playlist-description">{{ common_elements.text_runs(description) }}</div> + </div> + + <div id="results"> + {% for info in video_list %} + {{ common_elements.item(info) }} + {% endfor %} + </div> + <nav class="page-button-row"> + {{ common_elements.page_buttons(num_pages, '/https://www.youtube.com/playlist', parameters_dictionary) }} + </nav> + </div> +{% endblock main %} + + + + + + diff --git a/youtube/templates/post_comment.html b/youtube/templates/post_comment.html new file mode 100644 index 0000000..67c54f1 --- /dev/null +++ b/youtube/templates/post_comment.html @@ -0,0 +1,29 @@ +{% set page_title = 'Post reply' if replying else 'Post comment' %} +{% extends "base.html" %} +{% import "comments.html" as comments %} + +{% block style %} + main{ + display: grid; + grid-template-columns: 3fr 2fr; + } + .left{ + display:grid; + grid-template-columns: 1fr 640px; + } + textarea{ + width: 460px; + height: 85px; + } + .comment-form{ + grid-column:2; + justify-content:start; + } +{% endblock style %} + +{% block main %} + <div class="left"> + {{ comments.comment_posting_box(comment_posting_box_info) }} + </div> +{% endblock %} + diff --git a/youtube/templates/search.html b/youtube/templates/search.html new file mode 100644 index 0000000..782a85e --- /dev/null +++ b/youtube/templates/search.html @@ -0,0 +1,54 @@ +{% set search_box_value = query %} +{% set page_title = query + ' - Search' %} +{% extends "base.html" %} +{% import "common_elements.html" as common_elements %} +{% block style %} + main{ + display:grid; + grid-template-columns: minmax(0px, 1fr) 800px minmax(0px,2fr); + max-width:100vw; + } + + + #number-of-results{ + font-weight:bold; + } + #result-info{ + grid-row: 1; + grid-column:2; + align-self:center; + } + .page-button-row{ + grid-column: 2; + justify-self: center; + } + + + .item-list{ + grid-row: 2; + grid-column: 2; + } + .badge{ + background-color:#cccccc; + } +{% endblock style %} + +{% block main %} + <div id="result-info"> + <div id="number-of-results">Approximately {{ '{:,}'.format(estimated_results) }} results ({{ '{:,}'.format(estimated_pages) }} pages)</div> +{% if corrections['type'] == 'showing_results_for' %} + <div>Showing results for <a>{{ corrections['corrected_query']|safe }}</a></div> + <div>Search instead for <a href="{{ corrections['original_query_url'] }}">{{ corrections['original_query'] }}</a></div> +{% elif corrections['type'] == 'did_you_mean' %} + <div>Did you mean <a href="{{ corrections['corrected_query_url'] }}">{{ corrections['corrected_query']|safe }}</a></div> +{% endif %} + </div> + <div class="item-list"> + {% for info in results %} + {{ common_elements.item(info) }} + {% endfor %} + </div> + <nav class="page-button-row"> + {{ common_elements.page_buttons(estimated_pages, '/https://www.youtube.com/search', parameters_dictionary) }} + </nav> +{% endblock main %} diff --git a/youtube/templates/status.html b/youtube/templates/status.html new file mode 100644 index 0000000..901aa5b --- /dev/null +++ b/youtube/templates/status.html @@ -0,0 +1,7 @@ +{% set page_title = (title if (title is defined) else 'Status') %} +{% extends "base.html" %} + +{% block main %} + {{ message }} +{% endblock %} + diff --git a/yt_watch_template.html b/youtube/templates/watch.html index 59b588a..82c1a97 100644 --- a/yt_watch_template.html +++ b/youtube/templates/watch.html @@ -1,13 +1,8 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="utf-8"> - <title>$page_title</title> - <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/comments.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/favicon.ico" type="image/x-icon" rel="icon"> - <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> - <style type="text/css"> +{% set page_title = title %} +{% extends "base.html" %} +{% import "common_elements.html" as common_elements %} +{% import "comments.html" as comments %} +{% block style %} main{ display:grid; grid-template-columns: minmax(0px, 3fr) 640px 40px 500px minmax(0px,2fr); @@ -150,55 +145,86 @@ grid-template-columns: 60px 90px auto; max-height: 1.2em; } - </style> - </head> - <body> -$header - <main> +{% endblock style %} + +{% block main %} <div id="left"> </div> - <article class="full-item"> + <article class="full-item"> <video width="640" height="360" controls autofocus> -$video_sources - </video> +{% for video_source in video_sources %} + <source src="{{ video_source['src'] }}" type="{{ video_source['type'] }}"> +{% endfor %} + +{% for source in subtitle_sources %} + {% if source['on'] %} + <track label="{{ source['label'] }}" src="{{ source['url'] }}" kind="subtitles" srclang="{{ source['srclang'] }}" default> + {% else %} + <track label="{{ source['label'] }}" src="{{ source['url'] }}" kind="subtitles" srclang="{{ source['srclang'] }}"> + {% endif %} +{% endfor %} - <h2 class="title">$video_title</h2> -$is_unlisted + </video> - <address>Uploaded by <a href="$uploader_channel_url">$uploader</a></address> - <span class="views">$views views</span> + <h2 class="title">{{ title }}</h2> +{% if unlisted %} + <span class="is-unlisted">Unlisted</span> +{% endif %} + <address>Uploaded by <a href="{{ uploader_channel_url }}">{{ uploader }}</a></address> + <span class="views">{{ views }} views</span> - <time datetime="$upload_date">Published on $upload_date</time> - <span class="likes-dislikes">$likes likes $dislikes dislikes</span> + <time datetime="$upload_date">Published on {{ upload_date }}</time> + <span class="likes-dislikes">{{ likes }} likes {{ dislikes }} dislikes</span> <div class="download-dropdown"> <button class="download-dropdown-label">Download</button> <div class="download-dropdown-content"> -$download_options +{% for format in download_formats %} + <a href="{{ format['url'] }}"> + <span>{{ format['ext'] }}</span> + <span>{{ format['resolution'] }}</span> + <span>{{ format['note'] }}</span> + </a> +{% endfor %} </div> </div> - <input class="checkbox" name="video_info_list" value="$video_info" form="playlist-edit" type="checkbox"> + <input class="checkbox" name="video_info_list" value="{{ video_info }}" form="playlist-edit" type="checkbox"> - <span class="description">$description</span> + <span class="description">{{ description }}</span> <div class="music-list"> -$music_list + {% if music_list.__len__() != 0 %} + <hr> + <table> + <caption>Music</caption> + <tr> + {% for attribute in music_attributes %} + <th>{{ attribute }}</th> + {% endfor %} + </tr> + {% for track in music_list %} + <tr> + {% for attribute in music_attributes %} + <td>{{ track.get(attribute.lower(), '') }}</td> + {% endfor %} + </tr> + {% endfor %} + </table> + {% endif %} </div> -$comments + + {% if comments_info %} + {{ comments.video_comments(comments_info) }} + {% endif %} </article> - - + + <nav id="related"> -$related + {% for info in related %} + {{ common_elements.item(info) }} + {% endfor %} </nav> - </main> - - - - - - </body> -</html> +{% endblock main %} diff --git a/youtube/util.py b/youtube/util.py index c4e1aff..2205645 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -107,7 +107,7 @@ def decode_content(content, encoding_header): content = gzip.decompress(content) return content -def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False): +def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None): ''' When cookiejar_send is set to a CookieJar object, those cookies will be sent in the request (but cookies in response will not be merged into it) @@ -164,6 +164,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) content = decode_content(content, response.getheader('Content-Encoding', default='identity')) + if settings.debugging_save_responses and debug_name is not None: + save_dir = os.path.join(settings.data_dir, 'debug') + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + with open(os.path.join(save_dir, debug_name), 'wb') as f: + f.write(content) + if return_response: return content, response return content @@ -308,4 +316,4 @@ def update_query_string(query_string, items): def uppercase_escape(s): return re.sub( r'\\U([0-9a-fA-F]{8})', - lambda m: chr(int(m.group(1), base=16)), s)
\ No newline at end of file + lambda m: chr(int(m.group(1), base=16)), s) diff --git a/youtube/watch.py b/youtube/watch.py index 06b525a..5487dd4 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -1,138 +1,29 @@ -from youtube import util, html_common, comments +from youtube import yt_app +from youtube import util, comments, local_playlist, yt_data_extract +import settings + +from flask import request +import flask from youtube_dl.YoutubeDL import YoutubeDL from youtube_dl.extractor.youtube import YoutubeError import json -import urllib -from string import Template import html - import gevent -import settings import os -video_height_priority = (360, 480, 240, 720, 1080) - - -_formats = { - '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, - '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, - '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, - '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'}, - '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'}, - '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, - '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, - '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, - # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well - '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'}, - '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, - '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, - '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, - '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, - '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, - '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, - '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, - '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, - - - # 3D videos - '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, - '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, - '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, - '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, - '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20}, - '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, - '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, - - # Apple HTTP Live Streaming - '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, - '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, - '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, - '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, - '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, - '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, - '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, - '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10}, - - # DASH mp4 video - '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) - '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, - '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, - '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'}, - - # Dash mp4 audio - '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'}, - '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'}, - '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'}, - '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, - '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, - '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'}, - '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'}, - - # Dash webm - '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, - '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, - '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, - '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, - '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, - '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, - '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'}, - '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) - '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, - '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, - '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, - '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, - - # Dash webm audio - '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128}, - '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256}, - - # Dash webm audio with opus inside - '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50}, - '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70}, - '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160}, - - # RTMP (unnamed) - '_rtmp': {'protocol': 'rtmp'}, -} - - - - - - -with open("yt_watch_template.html", "r") as file: - yt_watch_template = Template(file.read()) - -def get_related_items_html(info): - result = "" +def get_related_items(info): + results = [] for item in info['related_vids']: if 'list' in item: # playlist: - item = watch_page_related_playlist_info(item) - result += html_common.playlist_item_html(item, html_common.small_playlist_item_template) + result = watch_page_related_playlist_info(item) else: - item = watch_page_related_video_info(item) - result += html_common.video_item_html(item, html_common.small_video_item_template) - return result + result = watch_page_related_video_info(item) + yt_data_extract.prefix_urls(result) + yt_data_extract.add_extra_html_info(result) + results.append(result) + return results # json of related items retrieved directly from the watch page has different names for everything @@ -145,6 +36,8 @@ def watch_page_related_video_info(item): except KeyError: result['views'] = '' result['thumbnail'] = util.get_thumbnail_url(item['id']) + result['item_size'] = 'small' + result['type'] = 'video' return result def watch_page_related_playlist_info(item): @@ -154,53 +47,49 @@ def watch_page_related_playlist_info(item): 'id': item['list'], 'first_video_id': item['video_id'], 'thumbnail': util.get_thumbnail_url(item['video_id']), + 'item_size': 'small', + 'type': 'playlist', } - -def sort_formats(info): - sorted_formats = info['formats'].copy() - sorted_formats.sort(key=lambda x: util.default_multi_get(_formats, x['format_id'], 'height', default=0)) - for index, format in enumerate(sorted_formats): - if util.default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360: - break - sorted_formats = sorted_formats[index:] + sorted_formats[0:index] - sorted_formats = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none'] - return sorted_formats - -source_tag_template = Template(''' -<source src="$src" type="$type">''') -def formats_html(formats): - result = '' - for format in formats: - result += source_tag_template.substitute( - src=format['url'], - type='audio/' + format['ext'] if format['vcodec'] == "none" else 'video/' + format['ext'], - ) - return result +def get_video_sources(info): + video_sources = [] + for format in info['formats']: + if format['acodec'] != 'none' and format['vcodec'] != 'none': + video_sources.append({ + 'src': format['url'], + 'type': 'video/' + format['ext'], + }) + return video_sources -subtitles_tag_template = Template(''' -<track label="$label" src="$src" kind="subtitles" srclang="$srclang" $default>''') -def subtitles_html(info): - result = '' +def get_subtitle_sources(info): + sources = [] default_found = False - default = '' + default = None for language, formats in info['subtitles'].items(): for format in formats: if format['ext'] == 'vtt': - append = subtitles_tag_template.substitute( - src = html.escape('/' + format['url']), - label = html.escape(language), - srclang = html.escape(language), - default = 'default' if language == settings.subtitles_language and settings.subtitles_mode > 0 else '', - ) + source = { + 'url': '/' + format['url'], + 'label': language, + 'srclang': language, + + # set as on by default if this is the preferred language and a default-on subtitles mode is in settings + 'on': language == settings.subtitles_language and settings.subtitles_mode > 0, + } + if language == settings.subtitles_language: default_found = True - default = append + default = source else: - result += append + sources.append(source) break - result += default + + # Put it at the end to avoid browser bug when there are too many languages + # (in firefox, it is impossible to select a language near the top of the list because it is cut off) + if default_found: + sources.append(default) + try: formats = info['automatic_captions'][settings.subtitles_language] except KeyError: @@ -208,19 +97,34 @@ def subtitles_html(info): else: for format in formats: if format['ext'] == 'vtt': - result += subtitles_tag_template.substitute( - src = html.escape('/' + format['url']), - label = settings.subtitles_language + ' - Automatic', - srclang = settings.subtitles_language, - default = 'default' if settings.subtitles_mode == 2 and not default_found else '', - ) - return result + sources.append({ + 'url': '/' + format['url'], + 'label': settings.subtitles_language + ' - Automatic', + 'srclang': settings.subtitles_language, + + # set as on by default if this is the preferred language and a default-on subtitles mode is in settings + 'on': settings.subtitles_mode == 2 and not default_found, + + }) + + return sources + +def get_ordered_music_list_attributes(music_list): + # get the set of attributes which are used by atleast 1 track + # so there isn't an empty, extraneous album column which no tracks use, for example + used_attributes = set() + for track in music_list: + used_attributes = used_attributes | track.keys() -more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''') + # now put them in the right order + ordered_attributes = [] + for attribute in ('Artist', 'Title', 'Album'): + if attribute.lower() in used_attributes: + ordered_attributes.append(attribute) + + return ordered_attributes -download_link_template = Template(''' -<a href="$url"> <span>$ext</span> <span>$resolution</span> <span>$note</span></a>''') def extract_info(downloader, *args, **kwargs): try: @@ -228,136 +132,96 @@ def extract_info(downloader, *args, **kwargs): except YoutubeError as e: return str(e) -music_list_table_row = Template('''<tr> - <td>$attribute</td> - <td>$value</td> -''') -def get_watch_page(env, start_response): - video_id = env['parameters']['v'][0] - if len(video_id) < 11: - start_response('404 Not Found', [('Content-type', 'text/plain'),]) - return b'Incomplete video id (too short): ' + video_id.encode('ascii') - - start_response('200 OK', [('Content-type','text/html'),]) - - lc = util.default_multi_get(env['parameters'], 'lc', 0, default='') - if settings.route_tor: - proxy = 'socks5://127.0.0.1:9150/' - else: - proxy = '' - downloader = YoutubeDL(params={'youtube_include_dash_manifest':False, 'proxy':proxy}) - tasks = ( - gevent.spawn(comments.video_comments, video_id, int(settings.default_comment_sorting), lc=lc ), - gevent.spawn(extract_info, downloader, "https://www.youtube.com/watch?v=" + video_id, download=False) - ) - gevent.joinall(tasks) - comments_html, info = tasks[0].value, tasks[1].value - - - #comments_html = comments.comments_html(video_id(url)) - #info = YoutubeDL().extract_info(url, download=False) - - #chosen_format = choose_format(info) - - if isinstance(info, str): # youtube error - return html_common.yt_basic_template.substitute( - page_title = "Error", - style = "", - header = html_common.get_header(), - page = html.escape(info), - ).encode('utf-8') - - sorted_formats = sort_formats(info) - - video_info = { - "duration": util.seconds_to_timestamp(info["duration"]), - "id": info['id'], - "title": info['title'], - "author": info['uploader'], - } - - upload_year = info["upload_date"][0:4] - upload_month = info["upload_date"][4:6] - upload_day = info["upload_date"][6:8] - upload_date = upload_month + "/" + upload_day + "/" + upload_year - - if settings.enable_related_videos: - related_videos_html = get_related_items_html(info) - else: - related_videos_html = '' - music_list = info['music_list'] - if len(music_list) == 0: - music_list_html = '' - else: - # get the set of attributes which are used by atleast 1 track - # so there isn't an empty, extraneous album column which no tracks use, for example - used_attributes = set() - for track in music_list: - used_attributes = used_attributes | track.keys() - - # now put them in the right order - ordered_attributes = [] - for attribute in ('Artist', 'Title', 'Album'): - if attribute.lower() in used_attributes: - ordered_attributes.append(attribute) - - music_list_html = '''<hr> -<table> - <caption>Music</caption> - <tr> -''' - # table headings - for attribute in ordered_attributes: - music_list_html += "<th>" + attribute + "</th>\n" - music_list_html += '''</tr>\n''' - - for track in music_list: - music_list_html += '''<tr>\n''' - for attribute in ordered_attributes: - try: - value = track[attribute.lower()] - except KeyError: - music_list_html += '''<td></td>''' - else: - music_list_html += '''<td>''' + html.escape(value) + '''</td>''' - music_list_html += '''</tr>\n''' - music_list_html += '''</table>\n''' - if settings.gather_googlevideo_domains: - with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: - url = info['formats'][0]['url'] - subdomain = url[0:url.find(".googlevideo.com")] - f.write(subdomain + "\n") - - download_options = '' - for format in info['formats']: - download_options += download_link_template.substitute( - url = html.escape(format['url']), - ext = html.escape(format['ext']), - resolution = html.escape(downloader.format_resolution(format)), - note = html.escape(downloader._format_note(format)), - ) - - - page = yt_watch_template.substitute( - video_title = html.escape(info["title"]), - page_title = html.escape(info["title"]), - header = html_common.get_header(), - uploader = html.escape(info["uploader"]), - uploader_channel_url = '/' + info["uploader_url"], - upload_date = upload_date, - views = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("view_count", None)), - likes = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("like_count", None)), - dislikes = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("dislike_count", None)), - download_options = download_options, - video_info = html.escape(json.dumps(video_info)), - description = html.escape(info["description"]), - video_sources = formats_html(sorted_formats) + subtitles_html(info), - related = related_videos_html, - - comments = comments_html, - - music_list = music_list_html, - is_unlisted = '<span class="is-unlisted">Unlisted</span>' if info['unlisted'] else '', - ) - return page.encode('utf-8') + + +@yt_app.route('/watch') +def get_watch_page(): + video_id = request.args['v'] + if len(video_id) < 11: + flask.abort(404) + flask.abort(flask.Response('Incomplete video id (too short): ' + video_id)) + + lc = request.args.get('lc', '') + if settings.route_tor: + proxy = 'socks5://127.0.0.1:9150/' + else: + proxy = '' + yt_dl_downloader = YoutubeDL(params={'youtube_include_dash_manifest':False, 'proxy':proxy}) + tasks = ( + gevent.spawn(comments.video_comments, video_id, int(settings.default_comment_sorting), lc=lc ), + gevent.spawn(extract_info, yt_dl_downloader, "https://www.youtube.com/watch?v=" + video_id, download=False) + ) + gevent.joinall(tasks) + comments_info, info = tasks[0].value, tasks[1].value + + if isinstance(info, str): # youtube error + return flask.render_template('error.html', error_message = info) + + video_info = { + "duration": util.seconds_to_timestamp(info["duration"]), + "id": info['id'], + "title": info['title'], + "author": info['uploader'], + } + + upload_year = info["upload_date"][0:4] + upload_month = info["upload_date"][4:6] + upload_day = info["upload_date"][6:8] + upload_date = upload_month + "/" + upload_day + "/" + upload_year + + if settings.related_videos_mode: + related_videos = get_related_items(info) + else: + related_videos = [] + + + if settings.gather_googlevideo_domains: + with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: + url = info['formats'][0]['url'] + subdomain = url[0:url.find(".googlevideo.com")] + f.write(subdomain + "\n") + + + download_formats = [] + + for format in info['formats']: + download_formats.append({ + 'url': format['url'], + 'ext': format['ext'], + 'resolution': yt_dl_downloader.format_resolution(format), + 'note': yt_dl_downloader._format_note(format), + }) + + return flask.render_template('watch.html', + header_playlist_names = local_playlist.get_playlist_names(), + uploader_channel_url = '/' + info['uploader_url'], + upload_date = upload_date, + views = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("view_count", None)), + likes = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("like_count", None)), + dislikes = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("dislike_count", None)), + download_formats = download_formats, + video_info = json.dumps(video_info), + video_sources = get_video_sources(info), + subtitle_sources = get_subtitle_sources(info), + related = related_videos, + music_list = info['music_list'], + music_attributes = get_ordered_music_list_attributes(info['music_list']), + comments_info = comments_info, + + title = info['title'], + uploader = info['uploader'], + description = info['description'], + unlisted = info['unlisted'], + ) + + +@yt_app.route('/api/<path:dummy>') +def get_captions(dummy): + result = util.fetch_url('https://www.youtube.com' + request.full_path) + result = result.replace(b"align:start position:0%", b"") + return result + + + + diff --git a/youtube/youtube.py b/youtube/youtube.py deleted file mode 100644 index c0be4fe..0000000 --- a/youtube/youtube.py +++ /dev/null @@ -1,160 +0,0 @@ -import mimetypes -import urllib.parse -import os -import re -from youtube import local_playlist, watch, search, playlist, channel, comments, post_comment, accounts, util, subscriptions -import settings -YOUTUBE_FILES = ( - "/shared.css", - '/comments.css', - '/favicon.ico', -) -get_handlers = { - 'search': search.get_search_page, - '': search.get_search_page, - 'watch': watch.get_watch_page, - 'playlist': playlist.get_playlist_page, - - 'channel': channel.get_channel_page, - 'user': channel.get_channel_page_general_url, - 'c': channel.get_channel_page_general_url, - - 'playlists': local_playlist.get_playlist_page, - - 'comments': comments.get_comments_page, - 'post_comment': post_comment.get_post_comment_page, - 'delete_comment': post_comment.get_delete_comment_page, - 'login': accounts.get_account_login_page, - - 'subscriptions': subscriptions.get_subscriptions_page, - 'subscription_manager': subscriptions.get_subscription_manager_page, -} -post_handlers = { - 'edit_playlist': local_playlist.edit_playlist, - 'playlists': local_playlist.path_edit_playlist, - - 'login': accounts.add_account, - 'comments': post_comment.post_comment, - 'post_comment': post_comment.post_comment, - 'delete_comment': post_comment.delete_comment, - - 'subscriptions': subscriptions.post_subscriptions_page, - 'subscription_manager': subscriptions.post_subscription_manager_page, - 'import_subscriptions': subscriptions.import_subscriptions, -} - -def youtube(env, start_response): - path, method, query_string = env['PATH_INFO'], env['REQUEST_METHOD'], env['QUERY_STRING'] - env['qs_parameters'] = urllib.parse.parse_qs(query_string) - env['parameters'] = dict(env['qs_parameters']) - - path_parts = path.rstrip('/').lstrip('/').split('/') - env['path_parts'] = path_parts - - if method == "GET": - try: - handler = get_handlers[path_parts[0]] - except KeyError: - pass - else: - return handler(env, start_response) - - if path in YOUTUBE_FILES: - with open("youtube" + path, 'rb') as f: - mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream' - start_response('200 OK', (('Content-type',mime_type),) ) - return f.read() - - elif path.startswith('/data/playlist_thumbnails/') or path.startswith('/data/subscription_thumbnails/'): - with open(os.path.join(settings.data_dir, os.path.normpath(path[6:])), 'rb') as f: - start_response('200 OK', (('Content-type', "image/jpeg"),) ) - return f.read() - - elif path.startswith("/api/"): - start_response('200 OK', [('Content-type', 'text/vtt'),] ) - result = util.fetch_url('https://www.youtube.com' + path + ('?' + query_string if query_string else '')) - result = result.replace(b"align:start position:0%", b"") - return result - - elif path == "/opensearch.xml": - with open("youtube" + path, 'rb') as f: - mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream' - start_response('200 OK', (('Content-type',mime_type),) ) - return f.read().replace(b'$port_number', str(settings.port_number).encode()) - - elif path == "/comment_delete_success": - start_response('200 OK', [('Content-type', 'text/plain'),] ) - return b'Successfully deleted comment' - - elif path == "/comment_delete_fail": - start_response('200 OK', [('Content-type', 'text/plain'),] ) - return b'Failed to deleted comment' - - else: - return channel.get_channel_page_general_url(env, start_response) - - elif method == "POST": - content_type = env['CONTENT_TYPE'] - if content_type == 'application/x-www-form-urlencoded': - post_parameters = urllib.parse.parse_qs(env['wsgi.input'].read().decode()) - env['post_parameters'] = post_parameters - env['parameters'].update(post_parameters) - - # Ugly hack that will be removed once I clean up this trainwreck and switch to a microframework - # Only supports a single file with no other fields - elif content_type.startswith('multipart/form-data'): - content = env['wsgi.input'].read() - - # find double line break - file_start = content.find(b'\r\n\r\n') - if file_start == -1: - start_response('400 Bad Request', ()) - return b'400 Bad Request' - - file_start += 4 - - lines = content[0:file_start].splitlines() - boundary = lines[0] - - file_end = content.find(boundary, file_start) - if file_end == -1: - start_response('400 Bad Request', ()) - return b'400 Bad Request' - file_end -= 2 # Subtract newlines - file = content[file_start:file_end] - - properties = dict() - for line in lines[1:]: - line = line.decode('utf-8') - colon = line.find(':') - if colon == -1: - continue - properties[line[0:colon]] = line[colon+2:] - - mime_type = properties['Content-Type'] - field_name = re.search(r'name="([^"]*)"' , properties['Content-Disposition']) - if field_name is None: - start_response('400 Bad Request', ()) - return b'400 Bad Request' - field_name = field_name.group(1) - - env['post_parameters'] = {field_name: (mime_type, file)} - env['parameters'][field_name] = (mime_type, file) - - else: - start_response('400 Bad Request', ()) - return b'400 Bad Request' - - try: - handler = post_handlers[path_parts[0]] - except KeyError: - pass - else: - return handler(env, start_response) - - start_response('404 Not Found', [('Content-type', 'text/plain'),]) - return b'404 Not Found' - - else: - start_response('501 Not Implemented', [('Content-type', 'text/plain'),]) - return b'501 Not Implemented' diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 5483911..c236c2f 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -1,4 +1,7 @@ +from youtube import util + import html +import json # videos (all of type str): @@ -33,19 +36,11 @@ import html - - def get_plain_text(node): try: - return html.escape(node['simpleText']) + return node['simpleText'] except KeyError: - return unformmated_text_runs(node['runs']) - -def unformmated_text_runs(runs): - result = '' - for text_run in runs: - result += html.escape(text_run["text"]) - return result + return ''.join(text_run['text'] for text_run in node['runs']) def format_text_runs(runs): if isinstance(runs, str): @@ -75,14 +70,19 @@ def get_url(node): def get_text(node): + if node == {}: + return '' try: return node['simpleText'] except KeyError: - pass + pass try: return node['runs'][0]['text'] except IndexError: # empty text runs return '' + except KeyError: + print(node) + raise def get_formatted_text(node): try: @@ -138,9 +138,85 @@ dispatch = { } -def renderer_info(renderer): +def ajax_info(item_json): + try: + info = {} + for key, node in item_json.items(): + try: + simple_key, function = dispatch[key] + except KeyError: + continue + info[simple_key] = function(node) + return info + except KeyError: + print(item_json) + raise + + + +def prefix_urls(item): + try: + item['thumbnail'] = '/' + item['thumbnail'].lstrip('/') + except KeyError: + pass + + try: + item['author_url'] = util.URL_ORIGIN + item['author_url'] + except KeyError: + pass + +def add_extra_html_info(item): + if item['type'] == 'video': + item['url'] = util.URL_ORIGIN + '/watch?v=' + item['id'] + + video_info = {} + for key in ('id', 'title', 'author', 'duration'): + try: + video_info[key] = item[key] + except KeyError: + video_info[key] = '' + + item['video_info'] = json.dumps(video_info) + + elif item['type'] == 'playlist': + item['url'] = util.URL_ORIGIN + '/playlist?list=' + item['id'] + elif item['type'] == 'channel': + item['url'] = util.URL_ORIGIN + "/channel/" + item['id'] + + +def renderer_info(renderer, additional_info={}): + type = list(renderer.keys())[0] + renderer = renderer[type] + info = {} + if type == 'itemSectionRenderer': + return renderer_info(renderer['contents'][0], additional_info) + + if type in ('movieRenderer', 'clarificationRenderer'): + info['type'] = 'unsupported' + return info + + info.update(additional_info) + + if type.startswith('compact') or (type.startswith('playlist') and type != 'playlistRenderer'): + info['item_size'] = 'small' + else: + info['item_size'] = 'medium' + + if type in ('compactVideoRenderer', 'videoRenderer', 'playlistVideoRenderer', 'gridVideoRenderer'): + info['type'] = 'video' + elif type in ('playlistRenderer', 'compactPlaylistRenderer', 'gridPlaylistRenderer', + 'radioRenderer', 'compactRadioRenderer', 'gridRadioRenderer', + 'showRenderer', 'compactShowRenderer', 'gridShowRenderer'): + info['type'] = 'playlist' + elif type == 'channelRenderer': + info['type'] = 'channel' + elif type == 'playlistHeaderRenderer': + info['type'] = 'playlist_metadata' + else: + info['type'] = 'unsupported' + return info + try: - info = {} if 'viewCountText' in renderer: # prefer this one as it contains all the digits info['views'] = get_text(renderer['viewCountText']) elif 'shortViewCountText' in renderer: @@ -183,23 +259,20 @@ def renderer_info(renderer): except KeyError: continue info[simple_key] = function(node) + if info['type'] == 'video' and 'duration' not in info: + info['duration'] = 'Live' + return info except KeyError: print(renderer) raise - -def ajax_info(item_json): - try: - info = {} - for key, node in item_json.items(): - try: - simple_key, function = dispatch[key] - except KeyError: - continue - info[simple_key] = function(node) - return info - except KeyError: - print(item_json) - raise - + + +def parse_info_prepare_for_html(renderer, additional_info={}): + item = renderer_info(renderer, additional_info) + prefix_urls(item) + add_extra_html_info(item) + + return item + diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 52c8731..18b240a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -17,6 +17,7 @@ from ..jsinterp import JSInterpreter from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, + compat_HTTPError, compat_kwargs, compat_parse_qs, compat_urllib_parse_unquote, @@ -28,6 +29,7 @@ from ..compat import ( ) from ..utils import ( clean_html, + dict_get, error_to_compat_str, ExtractorError, float_or_none, @@ -289,10 +291,25 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): if not mobj: break - more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), playlist_id, - 'Downloading page #%s' % page_num, - transform_source=uppercase_escape) + count = 0 + retries = 3 + while count <= retries: + try: + # Downloading page may result in intermittent 5xx HTTP error + # that is usually worked around with a retry + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), playlist_id, + 'Downloading page #%s%s' + % (page_num, ' (retry #%d)' % count if count else ''), + transform_source=uppercase_escape) + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): + count += 1 + if count <= retries: + continue + raise + content_html = more['content_html'] if not content_html.strip(): # Some webpages show a "Load more" button but they don't @@ -353,7 +370,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| - (?:www\.)?invidio\.us/| + (?:(?:www|dev)\.)?invidio\.us/| + (?:www\.)?invidiou\.sh/| + (?:www\.)?invidious\.snopyta\.org/| + (?:www\.)?invidious\.kabi\.tk/| + (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -429,7 +450,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559) '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, @@ -481,8 +502,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, + + # av01 video only formats sometimes served with "unknown" codecs + '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, } - _SUBTITLE_FORMATS = ('ttml', 'vtt') + _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') _GEO_BYPASS = False @@ -694,7 +721,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'age_limit': 18, }, }, - # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) + # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421) # YouTube Red ad is not captured for creator { 'url': '__2ABJjxzNo', @@ -715,7 +742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'DASH manifest missing', ] }, - # Olympics (https://github.com/rg3/youtube-dl/issues/4431) + # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) { 'url': 'lqQg6PlCWgI', 'info_dict': { @@ -766,7 +793,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'skip': 'This live event has ended.', }, - # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097) + # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097) { 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y', 'info_dict': { @@ -869,7 +896,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip': 'This video is not available.', }, { - # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536) + # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo', 'info_dict': { 'id': 'gVfLd0zydlo', @@ -887,10 +914,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'only_matching': True, }, { - # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468) + # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468) # Also tests cut-off URL expansion in video description (see - # https://github.com/rg3/youtube-dl/issues/1892, - # https://github.com/rg3/youtube-dl/issues/8164) + # https://github.com/ytdl-org/youtube-dl/issues/1892, + # https://github.com/ytdl-org/youtube-dl/issues/8164) 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg', 'info_dict': { 'id': 'lsguqyKfVQg', @@ -906,13 +933,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', 'track': 'Dark Walk - Position Music', 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', + 'album': 'Position Music - Production Music Vol. 143 - Dark Walk', }, 'params': { 'skip_download': True, }, }, { - # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468) + # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468) 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8', 'only_matching': True, }, @@ -976,7 +1004,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'only_matching': True, }, { - # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059) + # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059) 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', 'only_matching': True, }, @@ -1084,7 +1112,95 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, 'youtube_include_dash_manifest': False, }, - } + }, + { + # Youtube Music Auto-generated description + 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs', + 'info_dict': { + 'id': 'MgNrAu2pzNs', + 'ext': 'mp4', + 'title': 'Voyeur Girl', + 'description': 'md5:7ae382a65843d6df2685993e90a8628f', + 'upload_date': '20190312', + 'uploader': 'Various Artists - Topic', + 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw', + 'artist': 'Stephen', + 'track': 'Voyeur Girl', + 'album': 'it\'s too much love to know my dear', + 'release_date': '20190313', + 'release_year': 2019, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # Youtube Music Auto-generated description + # Retrieve 'artist' field from 'Artist:' in video description + # when it is present on youtube music video + 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY', + 'info_dict': { + 'id': 'k0jLE7tTwjY', + 'ext': 'mp4', + 'title': 'Latch Feat. Sam Smith', + 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335', + 'upload_date': '20150110', + 'uploader': 'Various Artists - Topic', + 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w', + 'artist': 'Disclosure', + 'track': 'Latch Feat. Sam Smith', + 'album': 'Latch Featuring Sam Smith', + 'release_date': '20121008', + 'release_year': 2012, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # Youtube Music Auto-generated description + # handle multiple artists on youtube music video + 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA', + 'info_dict': { + 'id': '74qn0eJSjpA', + 'ext': 'mp4', + 'title': 'Eastside', + 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2', + 'upload_date': '20180710', + 'uploader': 'Benny Blanco - Topic', + 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A', + 'artist': 'benny blanco, Halsey, Khalid', + 'track': 'Eastside', + 'album': 'Eastside', + 'release_date': '20180713', + 'release_year': 2018, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # Youtube Music Auto-generated description + # handle youtube music video with release_year and no release_date + 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M', + 'info_dict': { + 'id': '-hcAI0g-f5M', + 'ext': 'mp4', + 'title': 'Put It On Me', + 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e', + 'upload_date': '20180426', + 'uploader': 'Matt Maeson - Topic', + 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ', + 'artist': 'Matt Maeson', + 'track': 'Put It On Me', + 'album': 'The Hearse', + 'release_date': None, + 'release_year': 2018, + }, + 'params': { + 'skip_download': True, + }, + }, ] def __init__(self, *args, **kwargs): @@ -1198,11 +1314,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', + # Obsolete patterns + r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) @@ -1282,8 +1405,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # regex won't capture the whole JSON. Yet working around by trying more # concrete regex first keeping in mind proper quoted string handling # to be implemented in future that will replace this workaround (see - # https://github.com/rg3/youtube-dl/issues/7468, - # https://github.com/rg3/youtube-dl/pull/7599) + # https://github.com/ytdl-org/youtube-dl/issues/7468, + # https://github.com/ytdl-org/youtube-dl/pull/7599) r';ytplayer\.config\s*=\s*({.+?});ytplayer', r';ytplayer\.config\s*=\s*({.+?});', ) @@ -1467,8 +1590,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return video_id def _extract_annotations(self, video_id): - url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id - return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') + return self._download_webpage( + 'https://www.youtube.com/annotations_invideo', video_id, + note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + query={ + 'features': 1, + 'legacy': 1, + 'video_id': video_id, + }) @staticmethod def _extract_chapters(description, duration): @@ -1563,12 +1693,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) + def extract_token(v_info): + return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token')) + player_response = {} # Is it unlisted? - unlisted = (self._search_regex('''<meta itemprop="unlisted" content="(\w*)">''', video_webpage, 'is_unlisted', default='False') == "True") + unlisted = ('<span id="watch-privacy-icon"' in video_webpage) # Related videos related_vid_info = self._search_regex(r"""'RELATED_PLAYER_ARGS':\s*(\{.*?\})""", video_webpage, "related_player_args", default='') @@ -1623,8 +1756,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): - - # Get video info embed_webpage = None if re.search(r'player-age-gate-content">', video_webpage) is not None: @@ -1660,7 +1791,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): add_dash_mpd(video_info) # Rental video is not rented but preview is available (e.g. # https://www.youtube.com/watch?v=yYr8q0y5Jfg, - # https://github.com/rg3/youtube-dl/issues/10532) + # https://github.com/ytdl-org/youtube-dl/issues/10532) if not video_info and args.get('ypc_vid'): return self.url_result( args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) @@ -1680,9 +1811,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH # manifest pointed by get_video_info's dashmpd). # The general idea is to take a union of itags of both DASH manifests (for example - # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093) + # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093) self.report_video_info_webpage_download(video_id) - for el in ('info', 'embedded', 'detailpage', 'vevo', ''): + for el in ('embedded', 'detailpage', 'vevo', ''): query = { 'video_id': video_id, 'ps': 'default', @@ -1712,18 +1843,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): view_count = extract_view_count(get_video_info) if not video_info: video_info = get_video_info - get_token = get_video_info.get('token') or get_video_info.get('account_playback_token') + get_token = extract_token(get_video_info) if get_token: # Different get_video_info requests may report different results, e.g. # some may report video unavailability, but some may serve it without - # any complaint (see https://github.com/rg3/youtube-dl/issues/7362, + # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362, # the original webpage as well as el=info and el=embedded get_video_info # requests report video unavailability due to geo restriction while # el=detailpage succeeds and returns valid data). This is probably # due to YouTube measures against IP ranges of hosting providers. # Working around by preferring the first succeeded video_info containing # the token if no such video_info yet was found. - token = video_info.get('token') or video_info.get('account_playback_token') + token = extract_token(video_info) if not token: video_info = get_video_info break @@ -1733,40 +1864,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>', video_webpage, 'unavailable message', default=None) - token = video_info.get('token') or video_info.get('account_playback_token') - if not token: - if 'reason' in video_info: - if 'The uploader has not made this video available in your country.' in video_info['reason']: - regions_allowed = self._html_search_meta( - 'regionsAllowed', video_webpage, default=None) - countries = regions_allowed.split(',') if regions_allowed else None - self.raise_geo_restricted( - msg=video_info['reason'][0], countries=countries) - reason = video_info['reason'][0] - if 'Invalid parameters' in reason: - unavailable_message = extract_unavailable_message() - if unavailable_message: - reason = unavailable_message - raise YoutubeError( - 'YouTube said: %s' % reason, - expected=True, video_id=video_id) - else: - raise ExtractorError( - '"token" parameter not in video info for unknown reason', - video_id=video_id) - - if video_info.get('license_info'): - raise ExtractorError('This video is DRM protected.', expected=True) + if not video_info: + unavailable_message = extract_unavailable_message() + if not unavailable_message: + unavailable_message = 'Unable to extract video data' + raise ExtractorError( + 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id) video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} - # title - if 'title' in video_info: - video_title = video_info['title'][0] - elif 'title' in player_response: - video_title = video_details['title'] - else: + video_title = video_info.get('title', [None])[0] or video_details.get('title') + if not video_title: self._downloader.report_warning('Unable to extract video title') video_title = '_' @@ -1795,11 +1904,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ''', replace_url, video_description) video_description = clean_html(video_description) else: - fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage) - if fd_mobj: - video_description = unescapeHTML(fd_mobj.group(1)) - else: - video_description = '' + video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription') if not smuggled_data.get('force_singlefeed', False): if not self._downloader.params.get('noplaylist'): @@ -1814,7 +1919,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for feed in multifeed_metadata_list.split(','): # Unquote should take place before split on comma (,) since textual # fields may contain comma as well (see - # https://github.com/rg3/youtube-dl/issues/8536) + # https://github.com/ytdl-org/youtube-dl/issues/8536) feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed)) entries.append({ '_type': 'url_transparent', @@ -1839,7 +1944,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: - raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True) + raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True) def _extract_filesize(media_url): return int_or_none(self._search_regex( @@ -1856,7 +1961,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1): encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] if 'rtmpe%3Dyes' in encoded_url_map: - raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) + raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True) formats_spec = {} fmt_list = video_info.get('fmt_list', [''])[0] if fmt_list: @@ -1893,7 +1998,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): formats = [] for url_data_str in encoded_url_map.split(','): url_data = compat_parse_qs(url_data_str) - if 'itag' not in url_data or 'url' not in url_data: + if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'): continue stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0])) # Unsupported FORMAT_STREAM_TYPE_OTF @@ -1953,7 +2058,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): signature = self._decrypt_signature( encrypted_sig, video_id, player_url, age_gate) - url += '&signature=' + signature + sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature' + url += '&%s=%s' % (sp, signature) if 'ratebypass' not in url: url += '&ratebypass=yes' @@ -1968,7 +2074,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): dct.update(formats_spec[format_id]) # Some itags are not included in DASH manifest thus corresponding formats will - # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993). + # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993). # Trying to extract metadata from url_encoded_fmt_stream_map entry. mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0]) width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None) @@ -2017,8 +2123,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url_or_none(try_get( player_response, lambda x: x['streamingData']['hlsManifestUrl'], - compat_str)) or - url_or_none(try_get( + compat_str)) + or url_or_none(try_get( video_info, lambda x: x['hlsvp'][0], compat_str))) if manifest_url: formats = [] @@ -2038,7 +2144,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' formats.append(a_format) else: - error_message = extract_unavailable_message() + error_message = clean_html(video_info.get('reason', [None])[0]) alt_error_message = clean_html(video_info.get('reason', [None])[0]) print(alt_error_message) if not error_message: @@ -2068,8 +2174,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: self._downloader.report_warning('unable to extract uploader nickname') - channel_id = self._html_search_meta( - 'channelId', video_webpage, 'channel id') + channel_id = ( + str_or_none(video_details.get('channelId')) + or self._html_search_meta( + 'channelId', video_webpage, 'channel id', default=None) + or self._search_regex( + r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1', + video_webpage, 'channel id', default=None, group='id')) channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None # thumbnail image @@ -2128,6 +2239,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor): track = extract_meta('Song') artist = extract_meta('Artist') + album = extract_meta('Album') + + # Youtube Music Auto-generated description + release_date = release_year = None + if video_description: + mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description) + if mobj: + if not track: + track = mobj.group('track').strip() + if not artist: + artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')) + if not album: + album = mobj.group('album'.strip()) + release_year = mobj.group('release_year') + release_date = mobj.group('release_date') + if release_date: + release_date = release_date.replace('-', '') + if not release_year: + release_year = int(release_date[:4]) + if release_year: + release_year = int(release_year) m_episode = re.search( r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', @@ -2168,6 +2300,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage, 'view count', default=None)) + average_rating = ( + float_or_none(video_details.get('averageRating')) + or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0]))) + # subtitles video_subtitles = self._get_subtitles(video_id, video_webpage) automatic_captions = self._get_automatic_captions(video_id, video_webpage) @@ -2221,7 +2357,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Remove the formats we found through non-DASH, they # contain less info and it can be wrong, because we use # fixed values (for example the resolution). See - # https://github.com/rg3/youtube-dl/issues/5774 for an + # https://github.com/ytdl-org/youtube-dl/issues/5774 for an # example. formats = [f for f in formats if f['format_id'] not in dash_formats.keys()] formats.extend(dash_formats.values()) @@ -2241,6 +2377,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if f.get('vcodec') != 'none': f['stretched_ratio'] = ratio + if not formats: + token = extract_token(video_info) + if not token: + if 'reason' in video_info: + if 'The uploader has not made this video available in your country.' in video_info['reason']: + regions_allowed = self._html_search_meta( + 'regionsAllowed', video_webpage, default=None) + countries = regions_allowed.split(',') if regions_allowed else None + self.raise_geo_restricted( + msg=video_info['reason'][0], countries=countries) + reason = video_info['reason'][0] + if 'Invalid parameters' in reason: + unavailable_message = extract_unavailable_message() + if unavailable_message: + reason = unavailable_message + raise YoutubeError( + 'YouTube said: %s' % reason, + expected=True, video_id=video_id) + else: + raise ExtractorError( + '"token" parameter not in video info for unknown reason', + video_id=video_id) + + if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])): + raise ExtractorError('This video is DRM protected.', expected=True) + self._sort_formats(formats) self.mark_watched(video_id, video_info, player_response) @@ -2271,7 +2433,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, - 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]), + 'average_rating': average_rating, 'formats': formats, 'is_live': is_live, 'start_time': start_time, @@ -2281,6 +2443,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'episode_number': episode_number, 'track': track, 'artist': artist, + 'album': album, + 'release_date': release_date, + 'release_year': release_year, 'related_vids': related_vids, 'music_list': music_list, 'unlisted': unlisted, @@ -2482,9 +2647,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) title_span = ( - search_title('playlist-title') or - search_title('title long-title') or - search_title('title')) + search_title('playlist-title') + or search_title('title long-title') + or search_title('title')) title = clean_html(title_span) return self.playlist_result(url_results, playlist_id, title) @@ -2493,7 +2658,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) - # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604) + # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604) for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private @@ -2586,7 +2751,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): return playlist # Some playlist URLs don't actually serve a playlist (see - # https://github.com/rg3/youtube-dl/issues/10537). + # https://github.com/ytdl-org/youtube-dl/issues/10537). # Fallback to plain video extraction if there is a video id # along with playlist id. return self.url_result(video_id, 'Youtube', video_id=video_id) diff --git a/youtube_dl/extractor/youtube_unmodified_reference.py b/youtube_dl/extractor/youtube_unmodified_reference.py index c12c417..f76a6e7 100644 --- a/youtube_dl/extractor/youtube_unmodified_reference.py +++ b/youtube_dl/extractor/youtube_unmodified_reference.py @@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, + compat_HTTPError, compat_kwargs, compat_parse_qs, compat_urllib_parse_unquote, @@ -27,6 +28,7 @@ from ..compat import ( ) from ..utils import ( clean_html, + dict_get, error_to_compat_str, ExtractorError, float_or_none, @@ -287,10 +289,25 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): if not mobj: break - more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), playlist_id, - 'Downloading page #%s' % page_num, - transform_source=uppercase_escape) + count = 0 + retries = 3 + while count <= retries: + try: + # Downloading page may result in intermittent 5xx HTTP error + # that is usually worked around with a retry + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), playlist_id, + 'Downloading page #%s%s' + % (page_num, ' (retry #%d)' % count if count else ''), + transform_source=uppercase_escape) + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): + count += 1 + if count <= retries: + continue + raise + content_html = more['content_html'] if not content_html.strip(): # Some webpages show a "Load more" button but they don't @@ -351,7 +368,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| - (?:www\.)?invidio\.us/| + (?:(?:www|dev)\.)?invidio\.us/| + (?:www\.)?invidiou\.sh/| + (?:www\.)?invidious\.snopyta\.org/| + (?:www\.)?invidious\.kabi\.tk/| + (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -427,7 +448,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559) '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, @@ -479,8 +500,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, + + # av01 video only formats sometimes served with "unknown" codecs + '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, } - _SUBTITLE_FORMATS = ('ttml', 'vtt') + _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') _GEO_BYPASS = False @@ -692,7 +719,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'age_limit': 18, }, }, - # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) + # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421) # YouTube Red ad is not captured for creator { 'url': '__2ABJjxzNo', @@ -713,7 +740,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'DASH manifest missing', ] }, - # Olympics (https://github.com/rg3/youtube-dl/issues/4431) + # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) { 'url': 'lqQg6PlCWgI', 'info_dict': { @@ -764,7 +791,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'skip': 'This live event has ended.', }, - # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097) + # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097) { 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y', 'info_dict': { @@ -867,7 +894,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip': 'This video is not available.', }, { - # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536) + # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo', 'info_dict': { 'id': 'gVfLd0zydlo', @@ -885,10 +912,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'only_matching': True, }, { - # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468) + # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468) # Also tests cut-off URL expansion in video description (see - # https://github.com/rg3/youtube-dl/issues/1892, - # https://github.com/rg3/youtube-dl/issues/8164) + # https://github.com/ytdl-org/youtube-dl/issues/1892, + # https://github.com/ytdl-org/youtube-dl/issues/8164) 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg', 'info_dict': { 'id': 'lsguqyKfVQg', @@ -904,13 +931,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', 'track': 'Dark Walk - Position Music', 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', + 'album': 'Position Music - Production Music Vol. 143 - Dark Walk', }, 'params': { 'skip_download': True, }, }, { - # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468) + # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468) 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8', 'only_matching': True, }, @@ -974,7 +1002,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'only_matching': True, }, { - # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059) + # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059) 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', 'only_matching': True, }, @@ -1082,7 +1110,95 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, 'youtube_include_dash_manifest': False, }, - } + }, + { + # Youtube Music Auto-generated description + 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs', + 'info_dict': { + 'id': 'MgNrAu2pzNs', + 'ext': 'mp4', + 'title': 'Voyeur Girl', + 'description': 'md5:7ae382a65843d6df2685993e90a8628f', + 'upload_date': '20190312', + 'uploader': 'Various Artists - Topic', + 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw', + 'artist': 'Stephen', + 'track': 'Voyeur Girl', + 'album': 'it\'s too much love to know my dear', + 'release_date': '20190313', + 'release_year': 2019, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # Youtube Music Auto-generated description + # Retrieve 'artist' field from 'Artist:' in video description + # when it is present on youtube music video + 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY', + 'info_dict': { + 'id': 'k0jLE7tTwjY', + 'ext': 'mp4', + 'title': 'Latch Feat. Sam Smith', + 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335', + 'upload_date': '20150110', + 'uploader': 'Various Artists - Topic', + 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w', + 'artist': 'Disclosure', + 'track': 'Latch Feat. Sam Smith', + 'album': 'Latch Featuring Sam Smith', + 'release_date': '20121008', + 'release_year': 2012, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # Youtube Music Auto-generated description + # handle multiple artists on youtube music video + 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA', + 'info_dict': { + 'id': '74qn0eJSjpA', + 'ext': 'mp4', + 'title': 'Eastside', + 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2', + 'upload_date': '20180710', + 'uploader': 'Benny Blanco - Topic', + 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A', + 'artist': 'benny blanco, Halsey, Khalid', + 'track': 'Eastside', + 'album': 'Eastside', + 'release_date': '20180713', + 'release_year': 2018, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # Youtube Music Auto-generated description + # handle youtube music video with release_year and no release_date + 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M', + 'info_dict': { + 'id': '-hcAI0g-f5M', + 'ext': 'mp4', + 'title': 'Put It On Me', + 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e', + 'upload_date': '20180426', + 'uploader': 'Matt Maeson - Topic', + 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ', + 'artist': 'Matt Maeson', + 'track': 'Put It On Me', + 'album': 'The Hearse', + 'release_date': None, + 'release_year': 2018, + }, + 'params': { + 'skip_download': True, + }, + }, ] def __init__(self, *args, **kwargs): @@ -1196,11 +1312,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', + # Obsolete patterns + r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) @@ -1280,8 +1403,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # regex won't capture the whole JSON. Yet working around by trying more # concrete regex first keeping in mind proper quoted string handling # to be implemented in future that will replace this workaround (see - # https://github.com/rg3/youtube-dl/issues/7468, - # https://github.com/rg3/youtube-dl/pull/7599) + # https://github.com/ytdl-org/youtube-dl/issues/7468, + # https://github.com/ytdl-org/youtube-dl/pull/7599) r';ytplayer\.config\s*=\s*({.+?});ytplayer', r';ytplayer\.config\s*=\s*({.+?});', ) @@ -1465,8 +1588,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return video_id def _extract_annotations(self, video_id): - url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id - return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') + return self._download_webpage( + 'https://www.youtube.com/annotations_invideo', video_id, + note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + query={ + 'features': 1, + 'legacy': 1, + 'video_id': video_id, + }) @staticmethod def _extract_chapters(description, duration): @@ -1559,6 +1689,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) + def extract_token(v_info): + return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token')) + player_response = {} # Get video info @@ -1596,7 +1729,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): add_dash_mpd(video_info) # Rental video is not rented but preview is available (e.g. # https://www.youtube.com/watch?v=yYr8q0y5Jfg, - # https://github.com/rg3/youtube-dl/issues/10532) + # https://github.com/ytdl-org/youtube-dl/issues/10532) if not video_info and args.get('ypc_vid'): return self.url_result( args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) @@ -1616,9 +1749,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH # manifest pointed by get_video_info's dashmpd). # The general idea is to take a union of itags of both DASH manifests (for example - # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093) + # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093) self.report_video_info_webpage_download(video_id) - for el in ('info', 'embedded', 'detailpage', 'vevo', ''): + for el in ('embedded', 'detailpage', 'vevo', ''): query = { 'video_id': video_id, 'ps': 'default', @@ -1648,18 +1781,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): view_count = extract_view_count(get_video_info) if not video_info: video_info = get_video_info - get_token = get_video_info.get('token') or get_video_info.get('account_playback_token') + get_token = extract_token(get_video_info) if get_token: # Different get_video_info requests may report different results, e.g. # some may report video unavailability, but some may serve it without - # any complaint (see https://github.com/rg3/youtube-dl/issues/7362, + # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362, # the original webpage as well as el=info and el=embedded get_video_info # requests report video unavailability due to geo restriction while # el=detailpage succeeds and returns valid data). This is probably # due to YouTube measures against IP ranges of hosting providers. # Working around by preferring the first succeeded video_info containing # the token if no such video_info yet was found. - token = video_info.get('token') or video_info.get('account_playback_token') + token = extract_token(video_info) if not token: video_info = get_video_info break @@ -1669,44 +1802,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>', video_webpage, 'unavailable message', default=None) - token = video_info.get('token') or video_info.get('account_playback_token') - if not token: - if 'reason' in video_info: - if 'The uploader has not made this video available in your country.' in video_info['reason']: - regions_allowed = self._html_search_meta( - 'regionsAllowed', video_webpage, default=None) - countries = regions_allowed.split(',') if regions_allowed else None - self.raise_geo_restricted( - msg=video_info['reason'][0], countries=countries) - reason = video_info['reason'][0] - if 'Invalid parameters' in reason: - unavailable_message = extract_unavailable_message() - if unavailable_message: - reason = unavailable_message - raise ExtractorError( - 'YouTube said: %s' % reason, - expected=True, video_id=video_id) - else: - raise ExtractorError( - '"token" parameter not in video info for unknown reason', - video_id=video_id) - - if video_info.get('license_info'): - raise ExtractorError('This video is DRM protected.', expected=True) + if not video_info: + unavailable_message = extract_unavailable_message() + if not unavailable_message: + unavailable_message = 'Unable to extract video data' + raise ExtractorError( + 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id) video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} - # title - if 'title' in video_info: - video_title = video_info['title'][0] - elif 'title' in player_response: - video_title = video_details['title'] - else: + video_title = video_info.get('title', [None])[0] or video_details.get('title') + if not video_title: self._downloader.report_warning('Unable to extract video title') video_title = '_' - # description description_original = video_description = get_element_by_id("eow-description", video_webpage) if video_description: @@ -1731,11 +1841,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ''', replace_url, video_description) video_description = clean_html(video_description) else: - fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage) - if fd_mobj: - video_description = unescapeHTML(fd_mobj.group(1)) - else: - video_description = '' + video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription') if not smuggled_data.get('force_singlefeed', False): if not self._downloader.params.get('noplaylist'): @@ -1750,7 +1856,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for feed in multifeed_metadata_list.split(','): # Unquote should take place before split on comma (,) since textual # fields may contain comma as well (see - # https://github.com/rg3/youtube-dl/issues/8536) + # https://github.com/ytdl-org/youtube-dl/issues/8536) feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed)) entries.append({ '_type': 'url_transparent', @@ -1775,7 +1881,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: - raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True) + raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True) def _extract_filesize(media_url): return int_or_none(self._search_regex( @@ -1792,7 +1898,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1): encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] if 'rtmpe%3Dyes' in encoded_url_map: - raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) + raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True) formats_spec = {} fmt_list = video_info.get('fmt_list', [''])[0] if fmt_list: @@ -1829,7 +1935,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): formats = [] for url_data_str in encoded_url_map.split(','): url_data = compat_parse_qs(url_data_str) - if 'itag' not in url_data or 'url' not in url_data: + if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'): continue stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0])) # Unsupported FORMAT_STREAM_TYPE_OTF @@ -1889,7 +1995,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): signature = self._decrypt_signature( encrypted_sig, video_id, player_url, age_gate) - url += '&signature=' + signature + sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature' + url += '&%s=%s' % (sp, signature) if 'ratebypass' not in url: url += '&ratebypass=yes' @@ -1904,7 +2011,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): dct.update(formats_spec[format_id]) # Some itags are not included in DASH manifest thus corresponding formats will - # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993). + # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993). # Trying to extract metadata from url_encoded_fmt_stream_map entry. mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0]) width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None) @@ -1953,8 +2060,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url_or_none(try_get( player_response, lambda x: x['streamingData']['hlsManifestUrl'], - compat_str)) or - url_or_none(try_get( + compat_str)) + or url_or_none(try_get( video_info, lambda x: x['hlsvp'][0], compat_str))) if manifest_url: formats = [] @@ -2002,8 +2109,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: self._downloader.report_warning('unable to extract uploader nickname') - channel_id = self._html_search_meta( - 'channelId', video_webpage, 'channel id') + channel_id = ( + str_or_none(video_details.get('channelId')) + or self._html_search_meta( + 'channelId', video_webpage, 'channel id', default=None) + or self._search_regex( + r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1', + video_webpage, 'channel id', default=None, group='id')) channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None # thumbnail image @@ -2062,6 +2174,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor): track = extract_meta('Song') artist = extract_meta('Artist') + album = extract_meta('Album') + + # Youtube Music Auto-generated description + release_date = release_year = None + if video_description: + mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description) + if mobj: + if not track: + track = mobj.group('track').strip() + if not artist: + artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')) + if not album: + album = mobj.group('album'.strip()) + release_year = mobj.group('release_year') + release_date = mobj.group('release_date') + if release_date: + release_date = release_date.replace('-', '') + if not release_year: + release_year = int(release_date[:4]) + if release_year: + release_year = int(release_year) m_episode = re.search( r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', @@ -2102,6 +2235,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage, 'view count', default=None)) + average_rating = ( + float_or_none(video_details.get('averageRating')) + or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0]))) + # subtitles video_subtitles = self.extract_subtitles(video_id, video_webpage) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) @@ -2155,7 +2292,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Remove the formats we found through non-DASH, they # contain less info and it can be wrong, because we use # fixed values (for example the resolution). See - # https://github.com/rg3/youtube-dl/issues/5774 for an + # https://github.com/ytdl-org/youtube-dl/issues/5774 for an # example. formats = [f for f in formats if f['format_id'] not in dash_formats.keys()] formats.extend(dash_formats.values()) @@ -2175,6 +2312,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if f.get('vcodec') != 'none': f['stretched_ratio'] = ratio + if not formats: + token = extract_token(video_info) + if not token: + if 'reason' in video_info: + if 'The uploader has not made this video available in your country.' in video_info['reason']: + regions_allowed = self._html_search_meta( + 'regionsAllowed', video_webpage, default=None) + countries = regions_allowed.split(',') if regions_allowed else None + self.raise_geo_restricted( + msg=video_info['reason'][0], countries=countries) + reason = video_info['reason'][0] + if 'Invalid parameters' in reason: + unavailable_message = extract_unavailable_message() + if unavailable_message: + reason = unavailable_message + raise ExtractorError( + 'YouTube said: %s' % reason, + expected=True, video_id=video_id) + else: + raise ExtractorError( + '"token" parameter not in video info for unknown reason', + video_id=video_id) + + if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])): + raise ExtractorError('This video is DRM protected.', expected=True) + self._sort_formats(formats) self.mark_watched(video_id, video_info, player_response) @@ -2205,7 +2368,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, - 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]), + 'average_rating': average_rating, 'formats': formats, 'is_live': is_live, 'start_time': start_time, @@ -2215,6 +2378,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'episode_number': episode_number, 'track': track, 'artist': artist, + 'album': album, + 'release_date': release_date, + 'release_year': release_year, } @@ -2413,9 +2579,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) title_span = ( - search_title('playlist-title') or - search_title('title long-title') or - search_title('title')) + search_title('playlist-title') + or search_title('title long-title') + or search_title('title')) title = clean_html(title_span) return self.playlist_result(url_results, playlist_id, title) @@ -2424,7 +2590,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) - # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604) + # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604) for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private @@ -2517,7 +2683,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): return playlist # Some playlist URLs don't actually serve a playlist (see - # https://github.com/rg3/youtube-dl/issues/10537). + # https://github.com/ytdl-org/youtube-dl/issues/10537). # Fallback to plain video extraction if there is a video id # along with playlist id. return self.url_result(video_id, 'Youtube', video_id=video_id) diff --git a/yt_basic_template.html b/yt_basic_template.html deleted file mode 100644 index d2290d8..0000000 --- a/yt_basic_template.html +++ /dev/null @@ -1,20 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="utf-8"> - <title>$page_title</title> - <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/comments.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/favicon.ico" type="image/x-icon" rel="icon"> - <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> - <style type="text/css"> -$style - </style> - </head> - <body> -$header - <main> -$page - </main> - </body> -</html> diff --git a/yt_channel_about_template.html b/yt_channel_about_template.html deleted file mode 100644 index e8e721e..0000000 --- a/yt_channel_about_template.html +++ /dev/null @@ -1,89 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="utf-8"> - <title>$page_title</title> - <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/favicon.ico" type="image/x-icon" rel="icon"> - <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> - <style type="text/css"> - main{ - display:grid; - grid-template-rows: 0fr 0fr 1fr; - grid-template-columns: 0fr 1fr; - } - main .avatar{ - grid-row:1; - grid-column:1; - height:200px; - width:200px; - } - .metadata{ - grid-row:1; - grid-column:2; - margin-left: 10px; - display:grid; - align-content: start; - grid-row-gap:10px; - } - - main .channel-tabs{ - grid-column: 1 / span 2; - - display:grid; - grid-auto-flow: column; - justify-content:start; - - background-color: #aaaaaa; - padding: 3px; - } - main .channel-info{ - grid-column: 1 / span 3; - } - .tab{ - padding: 5px 75px; - } - .description{ - white-space: pre-wrap; - min-width: 0; - - } - </style> - </head> - <body> -$header - <main> - <img class="avatar" src="$avatar"> - <div class="metadata"> - <h2 class="title">$channel_title</h2> - <form method="POST" action="/youtube.com/subscriptions" class="subscribe-unsubscribe"> - <input type="submit" value="$action_name"> - <input type="hidden" name="channel_id" value="$channel_id"> - <input type="hidden" name="channel_name" value="$channel_title"> - <input type="hidden" name="action" value="$action"> - </form> - </div> - <nav class="channel-tabs"> -$channel_tabs - </nav> - <div class="channel-info"> - <ul> -$stats - - </ul> - <hr> - <h3>Description</h3> - <span class="description">$description</span> - <hr> - <ul> -$links - </ul> - </div> - </main> - - - - - - </body> -</html> diff --git a/yt_channel_items_template.html b/yt_channel_items_template.html deleted file mode 100644 index fc85dd5..0000000 --- a/yt_channel_items_template.html +++ /dev/null @@ -1,100 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="utf-8"> - <title>$page_title</title> - <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/favicon.ico" type="image/x-icon" rel="icon"> - <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> - <style type="text/css"> - main{ - display:grid; - grid-template-rows: repeat(5, 0fr); - grid-template-columns: auto 1fr; - } - main .avatar{ - grid-row:1; - grid-column:1; - height:200px; - width:200px; - } - .metadata{ - grid-row:1; - grid-column:2; - margin-left: 10px; - display:grid; - align-content: start; - grid-row-gap:10px; - } - main .channel-tabs{ - grid-column: 1 / span 2; - - display:grid; - grid-auto-flow: column; - justify-content:start; - - background-color: #aaaaaa; - padding: 3px; - } - #links-metadata{ - display: grid; - grid-auto-flow: column; - grid-column-gap: 10px; - grid-column: 1/span 2; - justify-content: start; - padding-top: 8px; - padding-bottom: 8px; - background-color: #bababa; - margin-bottom: 10px; - } - #number-of-results{ - font-weight:bold; - } - .item-grid{ - grid-column: 1 / span 2; - } - .item-list{ - width:1000px; - grid-column: 1 / span 2; - } - .page-button-row{ - grid-column: 1 / span 2; - } - .tab{ - padding: 5px 75px; - } - - </style> - </head> - <body> -$header - <main> - <img class="avatar" src="$avatar"> - <div class="metadata"> - <h2 class="title">$channel_title</h2> - <form method="POST" action="/youtube.com/subscriptions" class="subscribe-unsubscribe"> - <input type="submit" value="$action_name"> - <input type="hidden" name="channel_id" value="$channel_id"> - <input type="hidden" name="channel_name" value="$channel_title"> - <input type="hidden" name="action" value="$action"> - </form> - </div> - <nav class="channel-tabs"> -$channel_tabs - </nav> - <div id="links-metadata"> - <div id="number-of-results">$number_of_results</div> -$sort_buttons - </div> -$items - <nav class="page-button-row"> -$page_buttons - </nav> - </main> - - - - - - </body> -</html> diff --git a/yt_comments_template.html b/yt_comments_template.html deleted file mode 100644 index cdb11d0..0000000 --- a/yt_comments_template.html +++ /dev/null @@ -1,40 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="utf-8"> - <title>$page_title</title> - <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/comments.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/favicon.ico" type="image/x-icon" rel="icon"> - <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> - <style type="text/css"> - main{ - display:grid; - grid-template-columns: 3fr 2fr; - } - #left{ - background-color:#bcbcbc; - - display: grid; - grid-column: 1; - grid-row: 1; - grid-template-columns: 1fr 640px; - grid-template-rows: 0fr 0fr 0fr; - } - .comments-area{ - grid-column:2; - } - .comment{ - width:640px; - } - </style> - </head> - <body> -$header - <main> - <div id="left"> -$comments_area - </div> - </main> - </body> -</html> diff --git a/yt_local_playlist_template.html b/yt_local_playlist_template.html deleted file mode 100644 index 55287fc..0000000 --- a/yt_local_playlist_template.html +++ /dev/null @@ -1,84 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="utf-8"> - <title>$page_title</title> - <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/favicon.ico" type="image/x-icon" rel="icon"> - <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> - <style type="text/css"> - main{ - display:grid; - grid-template-columns: 3fr 1fr; - } - - - - #left{ - grid-column: 1; - grid-row: 1; - - display: grid; - grid-template-columns: 1fr 800px auto; - grid-template-rows: 0fr 1fr 0fr; - } - .playlist-title{ - grid-column:2; - } - #playlist-remove-button{ - grid-column:3; - align-self: center; - white-space: nowrap; - } - #results{ - - grid-row: 2; - grid-column: 2 / span 2; - - - display: grid; - grid-auto-rows: 0fr; - grid-row-gap: 10px; - - } - .page-button-row{ - grid-row: 3; - grid-column: 2; - justify-self: center; - } - - - #right{ - grid-column: 2; - grid-row: 1; - - } - - </style> - </head> - <body> -$header - <main> - <div id="left"> - <h2 class="playlist-title">$title</h2> - <input type="hidden" name="playlist_page" value="$title" form="playlist-edit"> - <button type="submit" id="playlist-remove-button" name="action" value="remove" form="playlist-edit" formaction="">Remove from playlist</button> - <div id="results"> -$videos - </div> - <nav class="page-button-row"> -$page_buttons - </nav> - </div> - - - <div id="right"> - - </div> - </main> - - - - - </body> -</html> diff --git a/yt_playlist_template.html b/yt_playlist_template.html deleted file mode 100644 index 0e33261..0000000 --- a/yt_playlist_template.html +++ /dev/null @@ -1,115 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="utf-8"> - <title>$page_title</title> - <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/favicon.ico" type="image/x-icon" rel="icon"> - <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> - <style type="text/css"> - main{ - display:grid; - grid-template-columns: 3fr 1fr; - } - - - - #left{ - grid-column: 1; - grid-row: 1; - - display: grid; - grid-template-columns: 1fr 800px; - grid-template-rows: 0fr 1fr 0fr; - } - .playlist-metadata{ - grid-column:2; - grid-row:1; - - display:grid; - grid-template-columns: 0fr 1fr; - } - .playlist-thumbnail{ - grid-row: 1 / span 5; - grid-column:1; - justify-self:start; - width:250px; - margin-right: 10px; - } - .playlist-title{ - grid-row: 1; - grid-column:2; - } - .playlist-author{ - grid-row:2; - grid-column:2; - } - .playlist-stats{ - grid-row:3; - grid-column:2; - } - - .playlist-description{ - grid-row:4; - grid-column:2; - min-width:0px; - white-space: pre-line; - } - .page-button-row{ - grid-row: 3; - grid-column: 2; - justify-self: center; - } - - - #right{ - grid-column: 2; - grid-row: 1; - - } - #results{ - - grid-row: 2; - grid-column: 2; - margin-top:10px; - - display: grid; - grid-auto-rows: 0fr; - grid-row-gap: 10px; - - } - </style> - </head> - <body> -$header - <main> - <div id="left"> - <div class="playlist-metadata"> - <img class="playlist-thumbnail" src="$thumbnail"> - <h2 class="playlist-title">$title</h2> - <a class="playlist-author" href="$author_url">$author</a> - <div class="playlist-stats"> -$stats - </div> - <div class="playlist-description">$description</div> - </div> - - <div id="results"> -$videos - </div> - <nav class="page-button-row"> -$page_buttons - </nav> - </div> - - - <div id="right"> - - </div> - </main> - - - - - </body> -</html> diff --git a/yt_search_results_template.html b/yt_search_results_template.html deleted file mode 100644 index d73fb9e..0000000 --- a/yt_search_results_template.html +++ /dev/null @@ -1,60 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="utf-8"> - <title>$page_title</title> - <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet"> - <link href="/youtube.com/favicon.ico" type="image/x-icon" rel="icon"> - <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> - <style type="text/css"> - main{ - display:grid; - grid-template-columns: minmax(0px, 1fr) 800px minmax(0px,2fr); - max-width:100vw; - } - - - #number-of-results{ - font-weight:bold; - } - #result-info{ - grid-row: 1; - grid-column:2; - align-self:center; - } - .page-button-row{ - grid-column: 2; - justify-self: center; - } - - - .item-list{ - grid-row: 2; - grid-column: 2; - } - .badge{ - background-color:#cccccc; - } - - </style> - </head> - <body> -$header - <main> - <div id="result-info"> - <div id="number-of-results">Approximately $number_of_results results ($number_of_pages pages)</div> -$corrections - </div> - <div class="item-list"> -$results - </div> - <nav class="page-button-row"> -$page_buttons - </nav> - </main> - - - - - </body> -</html> |