aboutsummaryrefslogtreecommitdiffstats
path: root/python/defusedxml
diff options
context:
space:
mode:
Diffstat (limited to 'python/defusedxml')
-rw-r--r--python/defusedxml/ElementTree.py112
-rw-r--r--python/defusedxml/__init__.py45
-rw-r--r--python/defusedxml/cElementTree.py30
-rw-r--r--python/defusedxml/common.py120
-rw-r--r--python/defusedxml/expatbuilder.py110
-rw-r--r--python/defusedxml/expatreader.py59
-rw-r--r--python/defusedxml/lxml.py153
-rw-r--r--python/defusedxml/minidom.py42
-rw-r--r--python/defusedxml/pulldom.py34
-rw-r--r--python/defusedxml/sax.py49
-rw-r--r--python/defusedxml/xmlrpc.py157
11 files changed, 911 insertions, 0 deletions
diff --git a/python/defusedxml/ElementTree.py b/python/defusedxml/ElementTree.py
new file mode 100644
index 0000000..41b2ea8
--- /dev/null
+++ b/python/defusedxml/ElementTree.py
@@ -0,0 +1,112 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Defused xml.etree.ElementTree facade
+"""
+from __future__ import print_function, absolute_import
+
+import sys
+from xml.etree.ElementTree import TreeBuilder as _TreeBuilder
+from xml.etree.ElementTree import parse as _parse
+from xml.etree.ElementTree import tostring
+
+from .common import PY3
+
+
+if PY3:
+ import importlib
+else:
+ from xml.etree.ElementTree import XMLParser as _XMLParser
+ from xml.etree.ElementTree import iterparse as _iterparse
+ from xml.etree.ElementTree import ParseError
+
+
+from .common import (DTDForbidden, EntitiesForbidden,
+ ExternalReferenceForbidden, _generate_etree_functions)
+
+__origin__ = "xml.etree.ElementTree"
+
+
+def _get_py3_cls():
+ """Python 3.3 hides the pure Python code but defusedxml requires it.
+
+ The code is based on test.support.import_fresh_module().
+ """
+ pymodname = "xml.etree.ElementTree"
+ cmodname = "_elementtree"
+
+ pymod = sys.modules.pop(pymodname, None)
+ cmod = sys.modules.pop(cmodname, None)
+
+ sys.modules[cmodname] = None
+ pure_pymod = importlib.import_module(pymodname)
+ if cmod is not None:
+ sys.modules[cmodname] = cmod
+ else:
+ sys.modules.pop(cmodname)
+ sys.modules[pymodname] = pymod
+
+ _XMLParser = pure_pymod.XMLParser
+ _iterparse = pure_pymod.iterparse
+ ParseError = pure_pymod.ParseError
+
+ return _XMLParser, _iterparse, ParseError
+
+
+if PY3:
+ _XMLParser, _iterparse, ParseError = _get_py3_cls()
+
+
+class DefusedXMLParser(_XMLParser):
+
+ def __init__(self, html=0, target=None, encoding=None,
+ forbid_dtd=False, forbid_entities=True,
+ forbid_external=True):
+ # Python 2.x old style class
+ _XMLParser.__init__(self, html, target, encoding)
+ self.forbid_dtd = forbid_dtd
+ self.forbid_entities = forbid_entities
+ self.forbid_external = forbid_external
+ if PY3:
+ parser = self.parser
+ else:
+ parser = self._parser
+ if self.forbid_dtd:
+ parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl
+ if self.forbid_entities:
+ parser.EntityDeclHandler = self.defused_entity_decl
+ parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl
+ if self.forbid_external:
+ parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
+
+ def defused_start_doctype_decl(self, name, sysid, pubid,
+ has_internal_subset):
+ raise DTDForbidden(name, sysid, pubid)
+
+ def defused_entity_decl(self, name, is_parameter_entity, value, base,
+ sysid, pubid, notation_name):
+ raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
+
+ def defused_unparsed_entity_decl(self, name, base, sysid, pubid,
+ notation_name):
+ # expat 1.2
+ raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)
+
+ def defused_external_entity_ref_handler(self, context, base, sysid,
+ pubid):
+ raise ExternalReferenceForbidden(context, base, sysid, pubid)
+
+
+# aliases
+XMLTreeBuilder = XMLParse = DefusedXMLParser
+
+parse, iterparse, fromstring = _generate_etree_functions(DefusedXMLParser,
+ _TreeBuilder, _parse,
+ _iterparse)
+XML = fromstring
+
+
+__all__ = ['XML', 'XMLParse', 'XMLTreeBuilder', 'fromstring', 'iterparse',
+ 'parse', 'tostring']
diff --git a/python/defusedxml/__init__.py b/python/defusedxml/__init__.py
new file mode 100644
index 0000000..590a5a9
--- /dev/null
+++ b/python/defusedxml/__init__.py
@@ -0,0 +1,45 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Defuse XML bomb denial of service vulnerabilities
+"""
+from __future__ import print_function, absolute_import
+
+from .common import (DefusedXmlException, DTDForbidden, EntitiesForbidden,
+ ExternalReferenceForbidden, NotSupportedError,
+ _apply_defusing)
+
+
+def defuse_stdlib():
+ """Monkey patch and defuse all stdlib packages
+
+ :warning: The monkey patch is an EXPERIMETNAL feature.
+ """
+ defused = {}
+
+ from . import cElementTree
+ from . import ElementTree
+ from . import minidom
+ from . import pulldom
+ from . import sax
+ from . import expatbuilder
+ from . import expatreader
+ from . import xmlrpc
+
+ xmlrpc.monkey_patch()
+ defused[xmlrpc] = None
+
+ for defused_mod in [cElementTree, ElementTree, minidom, pulldom, sax,
+ expatbuilder, expatreader]:
+ stdlib_mod = _apply_defusing(defused_mod)
+ defused[defused_mod] = stdlib_mod
+
+ return defused
+
+
+__version__ = "0.5.0"
+
+__all__ = ['DefusedXmlException', 'DTDForbidden', 'EntitiesForbidden',
+ 'ExternalReferenceForbidden', 'NotSupportedError']
diff --git a/python/defusedxml/cElementTree.py b/python/defusedxml/cElementTree.py
new file mode 100644
index 0000000..cc13689
--- /dev/null
+++ b/python/defusedxml/cElementTree.py
@@ -0,0 +1,30 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Defused xml.etree.cElementTree
+"""
+from __future__ import absolute_import
+
+from xml.etree.cElementTree import TreeBuilder as _TreeBuilder
+from xml.etree.cElementTree import parse as _parse
+from xml.etree.cElementTree import tostring
+# iterparse from ElementTree!
+from xml.etree.ElementTree import iterparse as _iterparse
+
+from .ElementTree import DefusedXMLParser
+from .common import _generate_etree_functions
+
+__origin__ = "xml.etree.cElementTree"
+
+
+XMLTreeBuilder = XMLParse = DefusedXMLParser
+
+parse, iterparse, fromstring = _generate_etree_functions(DefusedXMLParser,
+ _TreeBuilder, _parse,
+ _iterparse)
+XML = fromstring
+
+__all__ = ['XML', 'XMLParse', 'XMLTreeBuilder', 'fromstring', 'iterparse',
+ 'parse', 'tostring']
diff --git a/python/defusedxml/common.py b/python/defusedxml/common.py
new file mode 100644
index 0000000..668b609
--- /dev/null
+++ b/python/defusedxml/common.py
@@ -0,0 +1,120 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Common constants, exceptions and helpe functions
+"""
+import sys
+
+PY3 = sys.version_info[0] == 3
+
+
+class DefusedXmlException(ValueError):
+ """Base exception
+ """
+
+ def __repr__(self):
+ return str(self)
+
+
+class DTDForbidden(DefusedXmlException):
+ """Document type definition is forbidden
+ """
+
+ def __init__(self, name, sysid, pubid):
+ super(DTDForbidden, self).__init__()
+ self.name = name
+ self.sysid = sysid
+ self.pubid = pubid
+
+ def __str__(self):
+ tpl = "DTDForbidden(name='{}', system_id={!r}, public_id={!r})"
+ return tpl.format(self.name, self.sysid, self.pubid)
+
+
+class EntitiesForbidden(DefusedXmlException):
+ """Entity definition is forbidden
+ """
+
+ def __init__(self, name, value, base, sysid, pubid, notation_name):
+ super(EntitiesForbidden, self).__init__()
+ self.name = name
+ self.value = value
+ self.base = base
+ self.sysid = sysid
+ self.pubid = pubid
+ self.notation_name = notation_name
+
+ def __str__(self):
+ tpl = "EntitiesForbidden(name='{}', system_id={!r}, public_id={!r})"
+ return tpl.format(self.name, self.sysid, self.pubid)
+
+
+class ExternalReferenceForbidden(DefusedXmlException):
+ """Resolving an external reference is forbidden
+ """
+
+ def __init__(self, context, base, sysid, pubid):
+ super(ExternalReferenceForbidden, self).__init__()
+ self.context = context
+ self.base = base
+ self.sysid = sysid
+ self.pubid = pubid
+
+ def __str__(self):
+ tpl = "ExternalReferenceForbidden(system_id='{}', public_id={})"
+ return tpl.format(self.sysid, self.pubid)
+
+
+class NotSupportedError(DefusedXmlException):
+ """The operation is not supported
+ """
+
+
+def _apply_defusing(defused_mod):
+ assert defused_mod is sys.modules[defused_mod.__name__]
+ stdlib_name = defused_mod.__origin__
+ __import__(stdlib_name, {}, {}, ["*"])
+ stdlib_mod = sys.modules[stdlib_name]
+ stdlib_names = set(dir(stdlib_mod))
+ for name, obj in vars(defused_mod).items():
+ if name.startswith("_") or name not in stdlib_names:
+ continue
+ setattr(stdlib_mod, name, obj)
+ return stdlib_mod
+
+
+def _generate_etree_functions(DefusedXMLParser, _TreeBuilder,
+ _parse, _iterparse):
+ """Factory for functions needed by etree, dependent on whether
+ cElementTree or ElementTree is used."""
+
+ def parse(source, parser=None, forbid_dtd=False, forbid_entities=True,
+ forbid_external=True):
+ if parser is None:
+ parser = DefusedXMLParser(target=_TreeBuilder(),
+ forbid_dtd=forbid_dtd,
+ forbid_entities=forbid_entities,
+ forbid_external=forbid_external)
+ return _parse(source, parser)
+
+ def iterparse(source, events=None, parser=None, forbid_dtd=False,
+ forbid_entities=True, forbid_external=True):
+ if parser is None:
+ parser = DefusedXMLParser(target=_TreeBuilder(),
+ forbid_dtd=forbid_dtd,
+ forbid_entities=forbid_entities,
+ forbid_external=forbid_external)
+ return _iterparse(source, events, parser)
+
+ def fromstring(text, forbid_dtd=False, forbid_entities=True,
+ forbid_external=True):
+ parser = DefusedXMLParser(target=_TreeBuilder(),
+ forbid_dtd=forbid_dtd,
+ forbid_entities=forbid_entities,
+ forbid_external=forbid_external)
+ parser.feed(text)
+ return parser.close()
+
+ return parse, iterparse, fromstring
diff --git a/python/defusedxml/expatbuilder.py b/python/defusedxml/expatbuilder.py
new file mode 100644
index 0000000..0eb6b91
--- /dev/null
+++ b/python/defusedxml/expatbuilder.py
@@ -0,0 +1,110 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Defused xml.dom.expatbuilder
+"""
+from __future__ import print_function, absolute_import
+
+from xml.dom.expatbuilder import ExpatBuilder as _ExpatBuilder
+from xml.dom.expatbuilder import Namespaces as _Namespaces
+
+from .common import (DTDForbidden, EntitiesForbidden,
+ ExternalReferenceForbidden)
+
+__origin__ = "xml.dom.expatbuilder"
+
+
+class DefusedExpatBuilder(_ExpatBuilder):
+ """Defused document builder"""
+
+ def __init__(self, options=None, forbid_dtd=False, forbid_entities=True,
+ forbid_external=True):
+ _ExpatBuilder.__init__(self, options)
+ self.forbid_dtd = forbid_dtd
+ self.forbid_entities = forbid_entities
+ self.forbid_external = forbid_external
+
+ def defused_start_doctype_decl(self, name, sysid, pubid,
+ has_internal_subset):
+ raise DTDForbidden(name, sysid, pubid)
+
+ def defused_entity_decl(self, name, is_parameter_entity, value, base,
+ sysid, pubid, notation_name):
+ raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
+
+ def defused_unparsed_entity_decl(self, name, base, sysid, pubid,
+ notation_name):
+ # expat 1.2
+ raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)
+
+ def defused_external_entity_ref_handler(self, context, base, sysid,
+ pubid):
+ raise ExternalReferenceForbidden(context, base, sysid, pubid)
+
+ def install(self, parser):
+ _ExpatBuilder.install(self, parser)
+
+ if self.forbid_dtd:
+ parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl
+ if self.forbid_entities:
+ # if self._options.entities:
+ parser.EntityDeclHandler = self.defused_entity_decl
+ parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl
+ if self.forbid_external:
+ parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
+
+
+class DefusedExpatBuilderNS(_Namespaces, DefusedExpatBuilder):
+ """Defused document builder that supports namespaces."""
+
+ def install(self, parser):
+ DefusedExpatBuilder.install(self, parser)
+ if self._options.namespace_declarations:
+ parser.StartNamespaceDeclHandler = (
+ self.start_namespace_decl_handler)
+
+ def reset(self):
+ DefusedExpatBuilder.reset(self)
+ self._initNamespaces()
+
+
+def parse(file, namespaces=True, forbid_dtd=False, forbid_entities=True,
+ forbid_external=True):
+ """Parse a document, returning the resulting Document node.
+
+ 'file' may be either a file name or an open file object.
+ """
+ if namespaces:
+ build_builder = DefusedExpatBuilderNS
+ else:
+ build_builder = DefusedExpatBuilder
+ builder = build_builder(forbid_dtd=forbid_dtd,
+ forbid_entities=forbid_entities,
+ forbid_external=forbid_external)
+
+ if isinstance(file, str):
+ fp = open(file, 'rb')
+ try:
+ result = builder.parseFile(fp)
+ finally:
+ fp.close()
+ else:
+ result = builder.parseFile(file)
+ return result
+
+
+def parseString(string, namespaces=True, forbid_dtd=False,
+ forbid_entities=True, forbid_external=True):
+ """Parse a document from a string, returning the resulting
+ Document node.
+ """
+ if namespaces:
+ build_builder = DefusedExpatBuilderNS
+ else:
+ build_builder = DefusedExpatBuilder
+ builder = build_builder(forbid_dtd=forbid_dtd,
+ forbid_entities=forbid_entities,
+ forbid_external=forbid_external)
+ return builder.parseString(string)
diff --git a/python/defusedxml/expatreader.py b/python/defusedxml/expatreader.py
new file mode 100644
index 0000000..ef6bc39
--- /dev/null
+++ b/python/defusedxml/expatreader.py
@@ -0,0 +1,59 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Defused xml.sax.expatreader
+"""
+from __future__ import print_function, absolute_import
+
+from xml.sax.expatreader import ExpatParser as _ExpatParser
+
+from .common import (DTDForbidden, EntitiesForbidden,
+ ExternalReferenceForbidden)
+
+__origin__ = "xml.sax.expatreader"
+
+
+class DefusedExpatParser(_ExpatParser):
+ """Defused SAX driver for the pyexpat C module."""
+
+ def __init__(self, namespaceHandling=0, bufsize=2 ** 16 - 20,
+ forbid_dtd=False, forbid_entities=True,
+ forbid_external=True):
+ _ExpatParser.__init__(self, namespaceHandling, bufsize)
+ self.forbid_dtd = forbid_dtd
+ self.forbid_entities = forbid_entities
+ self.forbid_external = forbid_external
+
+ def defused_start_doctype_decl(self, name, sysid, pubid,
+ has_internal_subset):
+ raise DTDForbidden(name, sysid, pubid)
+
+ def defused_entity_decl(self, name, is_parameter_entity, value, base,
+ sysid, pubid, notation_name):
+ raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
+
+ def defused_unparsed_entity_decl(self, name, base, sysid, pubid,
+ notation_name):
+ # expat 1.2
+ raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)
+
+ def defused_external_entity_ref_handler(self, context, base, sysid,
+ pubid):
+ raise ExternalReferenceForbidden(context, base, sysid, pubid)
+
+ def reset(self):
+ _ExpatParser.reset(self)
+ parser = self._parser
+ if self.forbid_dtd:
+ parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl
+ if self.forbid_entities:
+ parser.EntityDeclHandler = self.defused_entity_decl
+ parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl
+ if self.forbid_external:
+ parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
+
+
+def create_parser(*args, **kwargs):
+ return DefusedExpatParser(*args, **kwargs)
diff --git a/python/defusedxml/lxml.py b/python/defusedxml/lxml.py
new file mode 100644
index 0000000..7f3ee0b
--- /dev/null
+++ b/python/defusedxml/lxml.py
@@ -0,0 +1,153 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Example code for lxml.etree protection
+
+The code has NO protection against decompression bombs.
+"""
+from __future__ import print_function, absolute_import
+
+import threading
+from lxml import etree as _etree
+
+from .common import DTDForbidden, EntitiesForbidden, NotSupportedError
+
+LXML3 = _etree.LXML_VERSION[0] >= 3
+
+__origin__ = "lxml.etree"
+
+tostring = _etree.tostring
+
+
+class RestrictedElement(_etree.ElementBase):
+ """A restricted Element class that filters out instances of some classes
+ """
+ __slots__ = ()
+ # blacklist = (etree._Entity, etree._ProcessingInstruction, etree._Comment)
+ blacklist = _etree._Entity
+
+ def _filter(self, iterator):
+ blacklist = self.blacklist
+ for child in iterator:
+ if isinstance(child, blacklist):
+ continue
+ yield child
+
+ def __iter__(self):
+ iterator = super(RestrictedElement, self).__iter__()
+ return self._filter(iterator)
+
+ def iterchildren(self, tag=None, reversed=False):
+ iterator = super(RestrictedElement, self).iterchildren(
+ tag=tag, reversed=reversed)
+ return self._filter(iterator)
+
+ def iter(self, tag=None, *tags):
+ iterator = super(RestrictedElement, self).iter(tag=tag, *tags)
+ return self._filter(iterator)
+
+ def iterdescendants(self, tag=None, *tags):
+ iterator = super(RestrictedElement,
+ self).iterdescendants(tag=tag, *tags)
+ return self._filter(iterator)
+
+ def itersiblings(self, tag=None, preceding=False):
+ iterator = super(RestrictedElement, self).itersiblings(
+ tag=tag, preceding=preceding)
+ return self._filter(iterator)
+
+ def getchildren(self):
+ iterator = super(RestrictedElement, self).__iter__()
+ return list(self._filter(iterator))
+
+ def getiterator(self, tag=None):
+ iterator = super(RestrictedElement, self).getiterator(tag)
+ return self._filter(iterator)
+
+
+class GlobalParserTLS(threading.local):
+ """Thread local context for custom parser instances
+ """
+ parser_config = {
+ 'resolve_entities': False,
+ # 'remove_comments': True,
+ # 'remove_pis': True,
+ }
+
+ element_class = RestrictedElement
+
+ def createDefaultParser(self):
+ parser = _etree.XMLParser(**self.parser_config)
+ element_class = self.element_class
+ if self.element_class is not None:
+ lookup = _etree.ElementDefaultClassLookup(element=element_class)
+ parser.set_element_class_lookup(lookup)
+ return parser
+
+ def setDefaultParser(self, parser):
+ self._default_parser = parser
+
+ def getDefaultParser(self):
+ parser = getattr(self, "_default_parser", None)
+ if parser is None:
+ parser = self.createDefaultParser()
+ self.setDefaultParser(parser)
+ return parser
+
+
+_parser_tls = GlobalParserTLS()
+getDefaultParser = _parser_tls.getDefaultParser
+
+
+def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
+ """Check docinfo of an element tree for DTD and entity declarations
+
+ The check for entity declarations needs lxml 3 or newer. lxml 2.x does
+ not support dtd.iterentities().
+ """
+ docinfo = elementtree.docinfo
+ if docinfo.doctype:
+ if forbid_dtd:
+ raise DTDForbidden(docinfo.doctype,
+ docinfo.system_url,
+ docinfo.public_id)
+ if forbid_entities and not LXML3:
+ # lxml < 3 has no iterentities()
+ raise NotSupportedError("Unable to check for entity declarations "
+ "in lxml 2.x")
+
+ if forbid_entities:
+ for dtd in docinfo.internalDTD, docinfo.externalDTD:
+ if dtd is None:
+ continue
+ for entity in dtd.iterentities():
+ raise EntitiesForbidden(entity.name, entity.content, None,
+ None, None, None)
+
+
+def parse(source, parser=None, base_url=None, forbid_dtd=False,
+ forbid_entities=True):
+ if parser is None:
+ parser = getDefaultParser()
+ elementtree = _etree.parse(source, parser, base_url=base_url)
+ check_docinfo(elementtree, forbid_dtd, forbid_entities)
+ return elementtree
+
+
+def fromstring(text, parser=None, base_url=None, forbid_dtd=False,
+ forbid_entities=True):
+ if parser is None:
+ parser = getDefaultParser()
+ rootelement = _etree.fromstring(text, parser, base_url=base_url)
+ elementtree = rootelement.getroottree()
+ check_docinfo(elementtree, forbid_dtd, forbid_entities)
+ return rootelement
+
+
+XML = fromstring
+
+
+def iterparse(*args, **kwargs):
+ raise NotSupportedError("defused lxml.etree.iterparse not available")
diff --git a/python/defusedxml/minidom.py b/python/defusedxml/minidom.py
new file mode 100644
index 0000000..0fd8684
--- /dev/null
+++ b/python/defusedxml/minidom.py
@@ -0,0 +1,42 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Defused xml.dom.minidom
+"""
+from __future__ import print_function, absolute_import
+
+from xml.dom.minidom import _do_pulldom_parse
+from . import expatbuilder as _expatbuilder
+from . import pulldom as _pulldom
+
+__origin__ = "xml.dom.minidom"
+
+
+def parse(file, parser=None, bufsize=None, forbid_dtd=False,
+ forbid_entities=True, forbid_external=True):
+ """Parse a file into a DOM by filename or file object."""
+ if parser is None and not bufsize:
+ return _expatbuilder.parse(file, forbid_dtd=forbid_dtd,
+ forbid_entities=forbid_entities,
+ forbid_external=forbid_external)
+ else:
+ return _do_pulldom_parse(_pulldom.parse, (file,),
+ {'parser': parser, 'bufsize': bufsize,
+ 'forbid_dtd': forbid_dtd, 'forbid_entities': forbid_entities,
+ 'forbid_external': forbid_external})
+
+
+def parseString(string, parser=None, forbid_dtd=False,
+ forbid_entities=True, forbid_external=True):
+ """Parse a file into a DOM from a string."""
+ if parser is None:
+ return _expatbuilder.parseString(string, forbid_dtd=forbid_dtd,
+ forbid_entities=forbid_entities,
+ forbid_external=forbid_external)
+ else:
+ return _do_pulldom_parse(_pulldom.parseString, (string,),
+ {'parser': parser, 'forbid_dtd': forbid_dtd,
+ 'forbid_entities': forbid_entities,
+ 'forbid_external': forbid_external})
diff --git a/python/defusedxml/pulldom.py b/python/defusedxml/pulldom.py
new file mode 100644
index 0000000..fc9e466
--- /dev/null
+++ b/python/defusedxml/pulldom.py
@@ -0,0 +1,34 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Defused xml.dom.pulldom
+"""
+from __future__ import print_function, absolute_import
+
+from xml.dom.pulldom import parse as _parse
+from xml.dom.pulldom import parseString as _parseString
+from .sax import make_parser
+
+__origin__ = "xml.dom.pulldom"
+
+
+def parse(stream_or_string, parser=None, bufsize=None, forbid_dtd=False,
+ forbid_entities=True, forbid_external=True):
+ if parser is None:
+ parser = make_parser()
+ parser.forbid_dtd = forbid_dtd
+ parser.forbid_entities = forbid_entities
+ parser.forbid_external = forbid_external
+ return _parse(stream_or_string, parser, bufsize)
+
+
+def parseString(string, parser=None, forbid_dtd=False,
+ forbid_entities=True, forbid_external=True):
+ if parser is None:
+ parser = make_parser()
+ parser.forbid_dtd = forbid_dtd
+ parser.forbid_entities = forbid_entities
+ parser.forbid_external = forbid_external
+ return _parseString(string, parser)
diff --git a/python/defusedxml/sax.py b/python/defusedxml/sax.py
new file mode 100644
index 0000000..534d0ca
--- /dev/null
+++ b/python/defusedxml/sax.py
@@ -0,0 +1,49 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Defused xml.sax
+"""
+from __future__ import print_function, absolute_import
+
+from xml.sax import InputSource as _InputSource
+from xml.sax import ErrorHandler as _ErrorHandler
+
+from . import expatreader
+
+__origin__ = "xml.sax"
+
+
+def parse(source, handler, errorHandler=_ErrorHandler(), forbid_dtd=False,
+ forbid_entities=True, forbid_external=True):
+ parser = make_parser()
+ parser.setContentHandler(handler)
+ parser.setErrorHandler(errorHandler)
+ parser.forbid_dtd = forbid_dtd
+ parser.forbid_entities = forbid_entities
+ parser.forbid_external = forbid_external
+ parser.parse(source)
+
+
+def parseString(string, handler, errorHandler=_ErrorHandler(),
+ forbid_dtd=False, forbid_entities=True,
+ forbid_external=True):
+ from io import BytesIO
+
+ if errorHandler is None:
+ errorHandler = _ErrorHandler()
+ parser = make_parser()
+ parser.setContentHandler(handler)
+ parser.setErrorHandler(errorHandler)
+ parser.forbid_dtd = forbid_dtd
+ parser.forbid_entities = forbid_entities
+ parser.forbid_external = forbid_external
+
+ inpsrc = _InputSource()
+ inpsrc.setByteStream(BytesIO(string))
+ parser.parse(inpsrc)
+
+
+def make_parser(parser_list=[]):
+ return expatreader.create_parser()
diff --git a/python/defusedxml/xmlrpc.py b/python/defusedxml/xmlrpc.py
new file mode 100644
index 0000000..2a456e6
--- /dev/null
+++ b/python/defusedxml/xmlrpc.py
@@ -0,0 +1,157 @@
+# defusedxml
+#
+# Copyright (c) 2013 by Christian Heimes <christian@python.org>
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/psf/license for licensing details.
+"""Defused xmlrpclib
+
+Also defuses gzip bomb
+"""
+from __future__ import print_function, absolute_import
+
+import io
+
+from .common import (
+ DTDForbidden, EntitiesForbidden, ExternalReferenceForbidden, PY3)
+
+if PY3:
+ __origin__ = "xmlrpc.client"
+ from xmlrpc.client import ExpatParser
+ from xmlrpc import client as xmlrpc_client
+ from xmlrpc import server as xmlrpc_server
+ from xmlrpc.client import gzip_decode as _orig_gzip_decode
+ from xmlrpc.client import GzipDecodedResponse as _OrigGzipDecodedResponse
+else:
+ __origin__ = "xmlrpclib"
+ from xmlrpclib import ExpatParser
+ import xmlrpclib as xmlrpc_client
+ xmlrpc_server = None
+ from xmlrpclib import gzip_decode as _orig_gzip_decode
+ from xmlrpclib import GzipDecodedResponse as _OrigGzipDecodedResponse
+
+try:
+ import gzip
+except ImportError:
+ gzip = None
+
+
+# Limit maximum request size to prevent resource exhaustion DoS
+# Also used to limit maximum amount of gzip decoded data in order to prevent
+# decompression bombs
+# A value of -1 or smaller disables the limit
+MAX_DATA = 30 * 1024 * 1024 # 30 MB
+
+
+def defused_gzip_decode(data, limit=None):
+ """gzip encoded data -> unencoded data
+
+ Decode data using the gzip content encoding as described in RFC 1952
+ """
+ if not gzip:
+ raise NotImplementedError
+ if limit is None:
+ limit = MAX_DATA
+ f = io.BytesIO(data)
+ gzf = gzip.GzipFile(mode="rb", fileobj=f)
+ try:
+ if limit < 0: # no limit
+ decoded = gzf.read()
+ else:
+ decoded = gzf.read(limit + 1)
+ except IOError:
+ raise ValueError("invalid data")
+ f.close()
+ gzf.close()
+ if limit >= 0 and len(decoded) > limit:
+ raise ValueError("max gzipped payload length exceeded")
+ return decoded
+
+
+class DefusedGzipDecodedResponse(gzip.GzipFile if gzip else object):
+ """a file-like object to decode a response encoded with the gzip
+ method, as described in RFC 1952.
+ """
+
+ def __init__(self, response, limit=None):
+ # response doesn't support tell() and read(), required by
+ # GzipFile
+ if not gzip:
+ raise NotImplementedError
+ self.limit = limit = limit if limit is not None else MAX_DATA
+ if limit < 0: # no limit
+ data = response.read()
+ self.readlength = None
+ else:
+ data = response.read(limit + 1)
+ self.readlength = 0
+ if limit >= 0 and len(data) > limit:
+ raise ValueError("max payload length exceeded")
+ self.stringio = io.BytesIO(data)
+ gzip.GzipFile.__init__(self, mode="rb", fileobj=self.stringio)
+
+ def read(self, n):
+ if self.limit >= 0:
+ left = self.limit - self.readlength
+ n = min(n, left + 1)
+ data = gzip.GzipFile.read(self, n)
+ self.readlength += len(data)
+ if self.readlength > self.limit:
+ raise ValueError("max payload length exceeded")
+ return data
+ else:
+ return gzip.GzipFile.read(self, n)
+
+ def close(self):
+ gzip.GzipFile.close(self)
+ self.stringio.close()
+
+
+class DefusedExpatParser(ExpatParser):
+
+ def __init__(self, target, forbid_dtd=False, forbid_entities=True,
+ forbid_external=True):
+ ExpatParser.__init__(self, target)
+ self.forbid_dtd = forbid_dtd
+ self.forbid_entities = forbid_entities
+ self.forbid_external = forbid_external
+ parser = self._parser
+ if self.forbid_dtd:
+ parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl
+ if self.forbid_entities:
+ parser.EntityDeclHandler = self.defused_entity_decl
+ parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl
+ if self.forbid_external:
+ parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
+
+ def defused_start_doctype_decl(self, name, sysid, pubid,
+ has_internal_subset):
+ raise DTDForbidden(name, sysid, pubid)
+
+ def defused_entity_decl(self, name, is_parameter_entity, value, base,
+ sysid, pubid, notation_name):
+ raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
+
+ def defused_unparsed_entity_decl(self, name, base, sysid, pubid,
+ notation_name):
+ # expat 1.2
+ raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)
+
+ def defused_external_entity_ref_handler(self, context, base, sysid,
+ pubid):
+ raise ExternalReferenceForbidden(context, base, sysid, pubid)
+
+
+def monkey_patch():
+ xmlrpc_client.FastParser = DefusedExpatParser
+ xmlrpc_client.GzipDecodedResponse = DefusedGzipDecodedResponse
+ xmlrpc_client.gzip_decode = defused_gzip_decode
+ if xmlrpc_server:
+ xmlrpc_server.gzip_decode = defused_gzip_decode
+
+
+def unmonkey_patch():
+ xmlrpc_client.FastParser = None
+ xmlrpc_client.GzipDecodedResponse = _OrigGzipDecodedResponse
+ xmlrpc_client.gzip_decode = _orig_gzip_decode
+ if xmlrpc_server:
+ xmlrpc_server.gzip_decode = _orig_gzip_decode