diff options
Diffstat (limited to 'python/defusedxml')
-rw-r--r-- | python/defusedxml/ElementTree.py | 112 | ||||
-rw-r--r-- | python/defusedxml/__init__.py | 45 | ||||
-rw-r--r-- | python/defusedxml/cElementTree.py | 30 | ||||
-rw-r--r-- | python/defusedxml/common.py | 120 | ||||
-rw-r--r-- | python/defusedxml/expatbuilder.py | 110 | ||||
-rw-r--r-- | python/defusedxml/expatreader.py | 59 | ||||
-rw-r--r-- | python/defusedxml/lxml.py | 153 | ||||
-rw-r--r-- | python/defusedxml/minidom.py | 42 | ||||
-rw-r--r-- | python/defusedxml/pulldom.py | 34 | ||||
-rw-r--r-- | python/defusedxml/sax.py | 49 | ||||
-rw-r--r-- | python/defusedxml/xmlrpc.py | 157 |
11 files changed, 911 insertions, 0 deletions
diff --git a/python/defusedxml/ElementTree.py b/python/defusedxml/ElementTree.py new file mode 100644 index 0000000..41b2ea8 --- /dev/null +++ b/python/defusedxml/ElementTree.py @@ -0,0 +1,112 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Defused xml.etree.ElementTree facade +""" +from __future__ import print_function, absolute_import + +import sys +from xml.etree.ElementTree import TreeBuilder as _TreeBuilder +from xml.etree.ElementTree import parse as _parse +from xml.etree.ElementTree import tostring + +from .common import PY3 + + +if PY3: + import importlib +else: + from xml.etree.ElementTree import XMLParser as _XMLParser + from xml.etree.ElementTree import iterparse as _iterparse + from xml.etree.ElementTree import ParseError + + +from .common import (DTDForbidden, EntitiesForbidden, + ExternalReferenceForbidden, _generate_etree_functions) + +__origin__ = "xml.etree.ElementTree" + + +def _get_py3_cls(): + """Python 3.3 hides the pure Python code but defusedxml requires it. + + The code is based on test.support.import_fresh_module(). + """ + pymodname = "xml.etree.ElementTree" + cmodname = "_elementtree" + + pymod = sys.modules.pop(pymodname, None) + cmod = sys.modules.pop(cmodname, None) + + sys.modules[cmodname] = None + pure_pymod = importlib.import_module(pymodname) + if cmod is not None: + sys.modules[cmodname] = cmod + else: + sys.modules.pop(cmodname) + sys.modules[pymodname] = pymod + + _XMLParser = pure_pymod.XMLParser + _iterparse = pure_pymod.iterparse + ParseError = pure_pymod.ParseError + + return _XMLParser, _iterparse, ParseError + + +if PY3: + _XMLParser, _iterparse, ParseError = _get_py3_cls() + + +class DefusedXMLParser(_XMLParser): + + def __init__(self, html=0, target=None, encoding=None, + forbid_dtd=False, forbid_entities=True, + forbid_external=True): + # Python 2.x old style class + _XMLParser.__init__(self, html, target, encoding) + self.forbid_dtd = forbid_dtd + self.forbid_entities = forbid_entities + self.forbid_external = forbid_external + if PY3: + parser = self.parser + else: + parser = self._parser + if self.forbid_dtd: + parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl + if self.forbid_entities: + parser.EntityDeclHandler = self.defused_entity_decl + parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl + if self.forbid_external: + parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler + + def defused_start_doctype_decl(self, name, sysid, pubid, + has_internal_subset): + raise DTDForbidden(name, sysid, pubid) + + def defused_entity_decl(self, name, is_parameter_entity, value, base, + sysid, pubid, notation_name): + raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name) + + def defused_unparsed_entity_decl(self, name, base, sysid, pubid, + notation_name): + # expat 1.2 + raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) + + def defused_external_entity_ref_handler(self, context, base, sysid, + pubid): + raise ExternalReferenceForbidden(context, base, sysid, pubid) + + +# aliases +XMLTreeBuilder = XMLParse = DefusedXMLParser + +parse, iterparse, fromstring = _generate_etree_functions(DefusedXMLParser, + _TreeBuilder, _parse, + _iterparse) +XML = fromstring + + +__all__ = ['XML', 'XMLParse', 'XMLTreeBuilder', 'fromstring', 'iterparse', + 'parse', 'tostring'] diff --git a/python/defusedxml/__init__.py b/python/defusedxml/__init__.py new file mode 100644 index 0000000..590a5a9 --- /dev/null +++ b/python/defusedxml/__init__.py @@ -0,0 +1,45 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Defuse XML bomb denial of service vulnerabilities +""" +from __future__ import print_function, absolute_import + +from .common import (DefusedXmlException, DTDForbidden, EntitiesForbidden, + ExternalReferenceForbidden, NotSupportedError, + _apply_defusing) + + +def defuse_stdlib(): + """Monkey patch and defuse all stdlib packages + + :warning: The monkey patch is an EXPERIMETNAL feature. + """ + defused = {} + + from . import cElementTree + from . import ElementTree + from . import minidom + from . import pulldom + from . import sax + from . import expatbuilder + from . import expatreader + from . import xmlrpc + + xmlrpc.monkey_patch() + defused[xmlrpc] = None + + for defused_mod in [cElementTree, ElementTree, minidom, pulldom, sax, + expatbuilder, expatreader]: + stdlib_mod = _apply_defusing(defused_mod) + defused[defused_mod] = stdlib_mod + + return defused + + +__version__ = "0.5.0" + +__all__ = ['DefusedXmlException', 'DTDForbidden', 'EntitiesForbidden', + 'ExternalReferenceForbidden', 'NotSupportedError'] diff --git a/python/defusedxml/cElementTree.py b/python/defusedxml/cElementTree.py new file mode 100644 index 0000000..cc13689 --- /dev/null +++ b/python/defusedxml/cElementTree.py @@ -0,0 +1,30 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Defused xml.etree.cElementTree +""" +from __future__ import absolute_import + +from xml.etree.cElementTree import TreeBuilder as _TreeBuilder +from xml.etree.cElementTree import parse as _parse +from xml.etree.cElementTree import tostring +# iterparse from ElementTree! +from xml.etree.ElementTree import iterparse as _iterparse + +from .ElementTree import DefusedXMLParser +from .common import _generate_etree_functions + +__origin__ = "xml.etree.cElementTree" + + +XMLTreeBuilder = XMLParse = DefusedXMLParser + +parse, iterparse, fromstring = _generate_etree_functions(DefusedXMLParser, + _TreeBuilder, _parse, + _iterparse) +XML = fromstring + +__all__ = ['XML', 'XMLParse', 'XMLTreeBuilder', 'fromstring', 'iterparse', + 'parse', 'tostring'] diff --git a/python/defusedxml/common.py b/python/defusedxml/common.py new file mode 100644 index 0000000..668b609 --- /dev/null +++ b/python/defusedxml/common.py @@ -0,0 +1,120 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Common constants, exceptions and helpe functions +""" +import sys + +PY3 = sys.version_info[0] == 3 + + +class DefusedXmlException(ValueError): + """Base exception + """ + + def __repr__(self): + return str(self) + + +class DTDForbidden(DefusedXmlException): + """Document type definition is forbidden + """ + + def __init__(self, name, sysid, pubid): + super(DTDForbidden, self).__init__() + self.name = name + self.sysid = sysid + self.pubid = pubid + + def __str__(self): + tpl = "DTDForbidden(name='{}', system_id={!r}, public_id={!r})" + return tpl.format(self.name, self.sysid, self.pubid) + + +class EntitiesForbidden(DefusedXmlException): + """Entity definition is forbidden + """ + + def __init__(self, name, value, base, sysid, pubid, notation_name): + super(EntitiesForbidden, self).__init__() + self.name = name + self.value = value + self.base = base + self.sysid = sysid + self.pubid = pubid + self.notation_name = notation_name + + def __str__(self): + tpl = "EntitiesForbidden(name='{}', system_id={!r}, public_id={!r})" + return tpl.format(self.name, self.sysid, self.pubid) + + +class ExternalReferenceForbidden(DefusedXmlException): + """Resolving an external reference is forbidden + """ + + def __init__(self, context, base, sysid, pubid): + super(ExternalReferenceForbidden, self).__init__() + self.context = context + self.base = base + self.sysid = sysid + self.pubid = pubid + + def __str__(self): + tpl = "ExternalReferenceForbidden(system_id='{}', public_id={})" + return tpl.format(self.sysid, self.pubid) + + +class NotSupportedError(DefusedXmlException): + """The operation is not supported + """ + + +def _apply_defusing(defused_mod): + assert defused_mod is sys.modules[defused_mod.__name__] + stdlib_name = defused_mod.__origin__ + __import__(stdlib_name, {}, {}, ["*"]) + stdlib_mod = sys.modules[stdlib_name] + stdlib_names = set(dir(stdlib_mod)) + for name, obj in vars(defused_mod).items(): + if name.startswith("_") or name not in stdlib_names: + continue + setattr(stdlib_mod, name, obj) + return stdlib_mod + + +def _generate_etree_functions(DefusedXMLParser, _TreeBuilder, + _parse, _iterparse): + """Factory for functions needed by etree, dependent on whether + cElementTree or ElementTree is used.""" + + def parse(source, parser=None, forbid_dtd=False, forbid_entities=True, + forbid_external=True): + if parser is None: + parser = DefusedXMLParser(target=_TreeBuilder(), + forbid_dtd=forbid_dtd, + forbid_entities=forbid_entities, + forbid_external=forbid_external) + return _parse(source, parser) + + def iterparse(source, events=None, parser=None, forbid_dtd=False, + forbid_entities=True, forbid_external=True): + if parser is None: + parser = DefusedXMLParser(target=_TreeBuilder(), + forbid_dtd=forbid_dtd, + forbid_entities=forbid_entities, + forbid_external=forbid_external) + return _iterparse(source, events, parser) + + def fromstring(text, forbid_dtd=False, forbid_entities=True, + forbid_external=True): + parser = DefusedXMLParser(target=_TreeBuilder(), + forbid_dtd=forbid_dtd, + forbid_entities=forbid_entities, + forbid_external=forbid_external) + parser.feed(text) + return parser.close() + + return parse, iterparse, fromstring diff --git a/python/defusedxml/expatbuilder.py b/python/defusedxml/expatbuilder.py new file mode 100644 index 0000000..0eb6b91 --- /dev/null +++ b/python/defusedxml/expatbuilder.py @@ -0,0 +1,110 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Defused xml.dom.expatbuilder +""" +from __future__ import print_function, absolute_import + +from xml.dom.expatbuilder import ExpatBuilder as _ExpatBuilder +from xml.dom.expatbuilder import Namespaces as _Namespaces + +from .common import (DTDForbidden, EntitiesForbidden, + ExternalReferenceForbidden) + +__origin__ = "xml.dom.expatbuilder" + + +class DefusedExpatBuilder(_ExpatBuilder): + """Defused document builder""" + + def __init__(self, options=None, forbid_dtd=False, forbid_entities=True, + forbid_external=True): + _ExpatBuilder.__init__(self, options) + self.forbid_dtd = forbid_dtd + self.forbid_entities = forbid_entities + self.forbid_external = forbid_external + + def defused_start_doctype_decl(self, name, sysid, pubid, + has_internal_subset): + raise DTDForbidden(name, sysid, pubid) + + def defused_entity_decl(self, name, is_parameter_entity, value, base, + sysid, pubid, notation_name): + raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name) + + def defused_unparsed_entity_decl(self, name, base, sysid, pubid, + notation_name): + # expat 1.2 + raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) + + def defused_external_entity_ref_handler(self, context, base, sysid, + pubid): + raise ExternalReferenceForbidden(context, base, sysid, pubid) + + def install(self, parser): + _ExpatBuilder.install(self, parser) + + if self.forbid_dtd: + parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl + if self.forbid_entities: + # if self._options.entities: + parser.EntityDeclHandler = self.defused_entity_decl + parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl + if self.forbid_external: + parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler + + +class DefusedExpatBuilderNS(_Namespaces, DefusedExpatBuilder): + """Defused document builder that supports namespaces.""" + + def install(self, parser): + DefusedExpatBuilder.install(self, parser) + if self._options.namespace_declarations: + parser.StartNamespaceDeclHandler = ( + self.start_namespace_decl_handler) + + def reset(self): + DefusedExpatBuilder.reset(self) + self._initNamespaces() + + +def parse(file, namespaces=True, forbid_dtd=False, forbid_entities=True, + forbid_external=True): + """Parse a document, returning the resulting Document node. + + 'file' may be either a file name or an open file object. + """ + if namespaces: + build_builder = DefusedExpatBuilderNS + else: + build_builder = DefusedExpatBuilder + builder = build_builder(forbid_dtd=forbid_dtd, + forbid_entities=forbid_entities, + forbid_external=forbid_external) + + if isinstance(file, str): + fp = open(file, 'rb') + try: + result = builder.parseFile(fp) + finally: + fp.close() + else: + result = builder.parseFile(file) + return result + + +def parseString(string, namespaces=True, forbid_dtd=False, + forbid_entities=True, forbid_external=True): + """Parse a document from a string, returning the resulting + Document node. + """ + if namespaces: + build_builder = DefusedExpatBuilderNS + else: + build_builder = DefusedExpatBuilder + builder = build_builder(forbid_dtd=forbid_dtd, + forbid_entities=forbid_entities, + forbid_external=forbid_external) + return builder.parseString(string) diff --git a/python/defusedxml/expatreader.py b/python/defusedxml/expatreader.py new file mode 100644 index 0000000..ef6bc39 --- /dev/null +++ b/python/defusedxml/expatreader.py @@ -0,0 +1,59 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Defused xml.sax.expatreader +""" +from __future__ import print_function, absolute_import + +from xml.sax.expatreader import ExpatParser as _ExpatParser + +from .common import (DTDForbidden, EntitiesForbidden, + ExternalReferenceForbidden) + +__origin__ = "xml.sax.expatreader" + + +class DefusedExpatParser(_ExpatParser): + """Defused SAX driver for the pyexpat C module.""" + + def __init__(self, namespaceHandling=0, bufsize=2 ** 16 - 20, + forbid_dtd=False, forbid_entities=True, + forbid_external=True): + _ExpatParser.__init__(self, namespaceHandling, bufsize) + self.forbid_dtd = forbid_dtd + self.forbid_entities = forbid_entities + self.forbid_external = forbid_external + + def defused_start_doctype_decl(self, name, sysid, pubid, + has_internal_subset): + raise DTDForbidden(name, sysid, pubid) + + def defused_entity_decl(self, name, is_parameter_entity, value, base, + sysid, pubid, notation_name): + raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name) + + def defused_unparsed_entity_decl(self, name, base, sysid, pubid, + notation_name): + # expat 1.2 + raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) + + def defused_external_entity_ref_handler(self, context, base, sysid, + pubid): + raise ExternalReferenceForbidden(context, base, sysid, pubid) + + def reset(self): + _ExpatParser.reset(self) + parser = self._parser + if self.forbid_dtd: + parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl + if self.forbid_entities: + parser.EntityDeclHandler = self.defused_entity_decl + parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl + if self.forbid_external: + parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler + + +def create_parser(*args, **kwargs): + return DefusedExpatParser(*args, **kwargs) diff --git a/python/defusedxml/lxml.py b/python/defusedxml/lxml.py new file mode 100644 index 0000000..7f3ee0b --- /dev/null +++ b/python/defusedxml/lxml.py @@ -0,0 +1,153 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Example code for lxml.etree protection + +The code has NO protection against decompression bombs. +""" +from __future__ import print_function, absolute_import + +import threading +from lxml import etree as _etree + +from .common import DTDForbidden, EntitiesForbidden, NotSupportedError + +LXML3 = _etree.LXML_VERSION[0] >= 3 + +__origin__ = "lxml.etree" + +tostring = _etree.tostring + + +class RestrictedElement(_etree.ElementBase): + """A restricted Element class that filters out instances of some classes + """ + __slots__ = () + # blacklist = (etree._Entity, etree._ProcessingInstruction, etree._Comment) + blacklist = _etree._Entity + + def _filter(self, iterator): + blacklist = self.blacklist + for child in iterator: + if isinstance(child, blacklist): + continue + yield child + + def __iter__(self): + iterator = super(RestrictedElement, self).__iter__() + return self._filter(iterator) + + def iterchildren(self, tag=None, reversed=False): + iterator = super(RestrictedElement, self).iterchildren( + tag=tag, reversed=reversed) + return self._filter(iterator) + + def iter(self, tag=None, *tags): + iterator = super(RestrictedElement, self).iter(tag=tag, *tags) + return self._filter(iterator) + + def iterdescendants(self, tag=None, *tags): + iterator = super(RestrictedElement, + self).iterdescendants(tag=tag, *tags) + return self._filter(iterator) + + def itersiblings(self, tag=None, preceding=False): + iterator = super(RestrictedElement, self).itersiblings( + tag=tag, preceding=preceding) + return self._filter(iterator) + + def getchildren(self): + iterator = super(RestrictedElement, self).__iter__() + return list(self._filter(iterator)) + + def getiterator(self, tag=None): + iterator = super(RestrictedElement, self).getiterator(tag) + return self._filter(iterator) + + +class GlobalParserTLS(threading.local): + """Thread local context for custom parser instances + """ + parser_config = { + 'resolve_entities': False, + # 'remove_comments': True, + # 'remove_pis': True, + } + + element_class = RestrictedElement + + def createDefaultParser(self): + parser = _etree.XMLParser(**self.parser_config) + element_class = self.element_class + if self.element_class is not None: + lookup = _etree.ElementDefaultClassLookup(element=element_class) + parser.set_element_class_lookup(lookup) + return parser + + def setDefaultParser(self, parser): + self._default_parser = parser + + def getDefaultParser(self): + parser = getattr(self, "_default_parser", None) + if parser is None: + parser = self.createDefaultParser() + self.setDefaultParser(parser) + return parser + + +_parser_tls = GlobalParserTLS() +getDefaultParser = _parser_tls.getDefaultParser + + +def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True): + """Check docinfo of an element tree for DTD and entity declarations + + The check for entity declarations needs lxml 3 or newer. lxml 2.x does + not support dtd.iterentities(). + """ + docinfo = elementtree.docinfo + if docinfo.doctype: + if forbid_dtd: + raise DTDForbidden(docinfo.doctype, + docinfo.system_url, + docinfo.public_id) + if forbid_entities and not LXML3: + # lxml < 3 has no iterentities() + raise NotSupportedError("Unable to check for entity declarations " + "in lxml 2.x") + + if forbid_entities: + for dtd in docinfo.internalDTD, docinfo.externalDTD: + if dtd is None: + continue + for entity in dtd.iterentities(): + raise EntitiesForbidden(entity.name, entity.content, None, + None, None, None) + + +def parse(source, parser=None, base_url=None, forbid_dtd=False, + forbid_entities=True): + if parser is None: + parser = getDefaultParser() + elementtree = _etree.parse(source, parser, base_url=base_url) + check_docinfo(elementtree, forbid_dtd, forbid_entities) + return elementtree + + +def fromstring(text, parser=None, base_url=None, forbid_dtd=False, + forbid_entities=True): + if parser is None: + parser = getDefaultParser() + rootelement = _etree.fromstring(text, parser, base_url=base_url) + elementtree = rootelement.getroottree() + check_docinfo(elementtree, forbid_dtd, forbid_entities) + return rootelement + + +XML = fromstring + + +def iterparse(*args, **kwargs): + raise NotSupportedError("defused lxml.etree.iterparse not available") diff --git a/python/defusedxml/minidom.py b/python/defusedxml/minidom.py new file mode 100644 index 0000000..0fd8684 --- /dev/null +++ b/python/defusedxml/minidom.py @@ -0,0 +1,42 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Defused xml.dom.minidom +""" +from __future__ import print_function, absolute_import + +from xml.dom.minidom import _do_pulldom_parse +from . import expatbuilder as _expatbuilder +from . import pulldom as _pulldom + +__origin__ = "xml.dom.minidom" + + +def parse(file, parser=None, bufsize=None, forbid_dtd=False, + forbid_entities=True, forbid_external=True): + """Parse a file into a DOM by filename or file object.""" + if parser is None and not bufsize: + return _expatbuilder.parse(file, forbid_dtd=forbid_dtd, + forbid_entities=forbid_entities, + forbid_external=forbid_external) + else: + return _do_pulldom_parse(_pulldom.parse, (file,), + {'parser': parser, 'bufsize': bufsize, + 'forbid_dtd': forbid_dtd, 'forbid_entities': forbid_entities, + 'forbid_external': forbid_external}) + + +def parseString(string, parser=None, forbid_dtd=False, + forbid_entities=True, forbid_external=True): + """Parse a file into a DOM from a string.""" + if parser is None: + return _expatbuilder.parseString(string, forbid_dtd=forbid_dtd, + forbid_entities=forbid_entities, + forbid_external=forbid_external) + else: + return _do_pulldom_parse(_pulldom.parseString, (string,), + {'parser': parser, 'forbid_dtd': forbid_dtd, + 'forbid_entities': forbid_entities, + 'forbid_external': forbid_external}) diff --git a/python/defusedxml/pulldom.py b/python/defusedxml/pulldom.py new file mode 100644 index 0000000..fc9e466 --- /dev/null +++ b/python/defusedxml/pulldom.py @@ -0,0 +1,34 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Defused xml.dom.pulldom +""" +from __future__ import print_function, absolute_import + +from xml.dom.pulldom import parse as _parse +from xml.dom.pulldom import parseString as _parseString +from .sax import make_parser + +__origin__ = "xml.dom.pulldom" + + +def parse(stream_or_string, parser=None, bufsize=None, forbid_dtd=False, + forbid_entities=True, forbid_external=True): + if parser is None: + parser = make_parser() + parser.forbid_dtd = forbid_dtd + parser.forbid_entities = forbid_entities + parser.forbid_external = forbid_external + return _parse(stream_or_string, parser, bufsize) + + +def parseString(string, parser=None, forbid_dtd=False, + forbid_entities=True, forbid_external=True): + if parser is None: + parser = make_parser() + parser.forbid_dtd = forbid_dtd + parser.forbid_entities = forbid_entities + parser.forbid_external = forbid_external + return _parseString(string, parser) diff --git a/python/defusedxml/sax.py b/python/defusedxml/sax.py new file mode 100644 index 0000000..534d0ca --- /dev/null +++ b/python/defusedxml/sax.py @@ -0,0 +1,49 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Defused xml.sax +""" +from __future__ import print_function, absolute_import + +from xml.sax import InputSource as _InputSource +from xml.sax import ErrorHandler as _ErrorHandler + +from . import expatreader + +__origin__ = "xml.sax" + + +def parse(source, handler, errorHandler=_ErrorHandler(), forbid_dtd=False, + forbid_entities=True, forbid_external=True): + parser = make_parser() + parser.setContentHandler(handler) + parser.setErrorHandler(errorHandler) + parser.forbid_dtd = forbid_dtd + parser.forbid_entities = forbid_entities + parser.forbid_external = forbid_external + parser.parse(source) + + +def parseString(string, handler, errorHandler=_ErrorHandler(), + forbid_dtd=False, forbid_entities=True, + forbid_external=True): + from io import BytesIO + + if errorHandler is None: + errorHandler = _ErrorHandler() + parser = make_parser() + parser.setContentHandler(handler) + parser.setErrorHandler(errorHandler) + parser.forbid_dtd = forbid_dtd + parser.forbid_entities = forbid_entities + parser.forbid_external = forbid_external + + inpsrc = _InputSource() + inpsrc.setByteStream(BytesIO(string)) + parser.parse(inpsrc) + + +def make_parser(parser_list=[]): + return expatreader.create_parser() diff --git a/python/defusedxml/xmlrpc.py b/python/defusedxml/xmlrpc.py new file mode 100644 index 0000000..2a456e6 --- /dev/null +++ b/python/defusedxml/xmlrpc.py @@ -0,0 +1,157 @@ +# defusedxml +# +# Copyright (c) 2013 by Christian Heimes <christian@python.org> +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +"""Defused xmlrpclib + +Also defuses gzip bomb +""" +from __future__ import print_function, absolute_import + +import io + +from .common import ( + DTDForbidden, EntitiesForbidden, ExternalReferenceForbidden, PY3) + +if PY3: + __origin__ = "xmlrpc.client" + from xmlrpc.client import ExpatParser + from xmlrpc import client as xmlrpc_client + from xmlrpc import server as xmlrpc_server + from xmlrpc.client import gzip_decode as _orig_gzip_decode + from xmlrpc.client import GzipDecodedResponse as _OrigGzipDecodedResponse +else: + __origin__ = "xmlrpclib" + from xmlrpclib import ExpatParser + import xmlrpclib as xmlrpc_client + xmlrpc_server = None + from xmlrpclib import gzip_decode as _orig_gzip_decode + from xmlrpclib import GzipDecodedResponse as _OrigGzipDecodedResponse + +try: + import gzip +except ImportError: + gzip = None + + +# Limit maximum request size to prevent resource exhaustion DoS +# Also used to limit maximum amount of gzip decoded data in order to prevent +# decompression bombs +# A value of -1 or smaller disables the limit +MAX_DATA = 30 * 1024 * 1024 # 30 MB + + +def defused_gzip_decode(data, limit=None): + """gzip encoded data -> unencoded data + + Decode data using the gzip content encoding as described in RFC 1952 + """ + if not gzip: + raise NotImplementedError + if limit is None: + limit = MAX_DATA + f = io.BytesIO(data) + gzf = gzip.GzipFile(mode="rb", fileobj=f) + try: + if limit < 0: # no limit + decoded = gzf.read() + else: + decoded = gzf.read(limit + 1) + except IOError: + raise ValueError("invalid data") + f.close() + gzf.close() + if limit >= 0 and len(decoded) > limit: + raise ValueError("max gzipped payload length exceeded") + return decoded + + +class DefusedGzipDecodedResponse(gzip.GzipFile if gzip else object): + """a file-like object to decode a response encoded with the gzip + method, as described in RFC 1952. + """ + + def __init__(self, response, limit=None): + # response doesn't support tell() and read(), required by + # GzipFile + if not gzip: + raise NotImplementedError + self.limit = limit = limit if limit is not None else MAX_DATA + if limit < 0: # no limit + data = response.read() + self.readlength = None + else: + data = response.read(limit + 1) + self.readlength = 0 + if limit >= 0 and len(data) > limit: + raise ValueError("max payload length exceeded") + self.stringio = io.BytesIO(data) + gzip.GzipFile.__init__(self, mode="rb", fileobj=self.stringio) + + def read(self, n): + if self.limit >= 0: + left = self.limit - self.readlength + n = min(n, left + 1) + data = gzip.GzipFile.read(self, n) + self.readlength += len(data) + if self.readlength > self.limit: + raise ValueError("max payload length exceeded") + return data + else: + return gzip.GzipFile.read(self, n) + + def close(self): + gzip.GzipFile.close(self) + self.stringio.close() + + +class DefusedExpatParser(ExpatParser): + + def __init__(self, target, forbid_dtd=False, forbid_entities=True, + forbid_external=True): + ExpatParser.__init__(self, target) + self.forbid_dtd = forbid_dtd + self.forbid_entities = forbid_entities + self.forbid_external = forbid_external + parser = self._parser + if self.forbid_dtd: + parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl + if self.forbid_entities: + parser.EntityDeclHandler = self.defused_entity_decl + parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl + if self.forbid_external: + parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler + + def defused_start_doctype_decl(self, name, sysid, pubid, + has_internal_subset): + raise DTDForbidden(name, sysid, pubid) + + def defused_entity_decl(self, name, is_parameter_entity, value, base, + sysid, pubid, notation_name): + raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name) + + def defused_unparsed_entity_decl(self, name, base, sysid, pubid, + notation_name): + # expat 1.2 + raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) + + def defused_external_entity_ref_handler(self, context, base, sysid, + pubid): + raise ExternalReferenceForbidden(context, base, sysid, pubid) + + +def monkey_patch(): + xmlrpc_client.FastParser = DefusedExpatParser + xmlrpc_client.GzipDecodedResponse = DefusedGzipDecodedResponse + xmlrpc_client.gzip_decode = defused_gzip_decode + if xmlrpc_server: + xmlrpc_server.gzip_decode = defused_gzip_decode + + +def unmonkey_patch(): + xmlrpc_client.FastParser = None + xmlrpc_client.GzipDecodedResponse = _OrigGzipDecodedResponse + xmlrpc_client.gzip_decode = _orig_gzip_decode + if xmlrpc_server: + xmlrpc_server.gzip_decode = _orig_gzip_decode |