import sys import xml.dom import xml.dom.minidom import re # Sanitize SVG by removing any script calls of any sort. # Returns a non-zero exit value if any changes were made. # # WARNING: # * Does not validate the SVG against a DTD (or schema or whatever) # * Pieces of non-SVG XML are mostly not sanitized, but must come from a short list of namespaces. # * Reformats even documents that need no changes (but leaves the XML semantically identical). # class Namespace: def __init__(self, name): self.name = name # SVG itself svg = Namespace("http://www.w3.org/2000/svg") # This is the complete list of event attributes from http://www.w3.org/TR/SVG/interact.html#SVGEvents svg.event_attributes = [ "onfocusin", "onfocusout", "onactivate", "onclick", "onmousedown", "onmouseup", "onmouseover", "onmousemove", "onmouseout", "onload", "onunload", "onabort", "onerror", "onresize", "onscroll", "onzoom", "onbegin", "onend", "onrepeat", ] # From http://www.w3.org/TR/SVG/script.html svg.script_attributes = [ "contentScriptType", ] svg.script_tags = [ "script", ] svg.evil_attributes = svg.script_attributes + svg.event_attributes svg.evil_tags = svg.script_tags svgns = [ "http://www.w3.org/2000/svg", ] adobens = [ "http://ns.adobe.com/Extensibility/1.0/", "http://ns.adobe.com/Flows/1.0/", "http://ns.adobe.com/AdobeIllustrator/10.0/", "http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", ] metans = [ "http://web.resource.org/cc/", "http://purl.org/dc/elements/1.1/", "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "http://www.w3.org/2000/xmlns/", "http://www.w3.org/XML/1998/namespace", "http://www.w3.org/1999/xlink", ] inkns = [ "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd", "http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd", "http://www.inkscape.org/namespaces/inkscape", ] msns = [ "http://schemas.microsoft.com/visio/2003/SVGExtensions/", ] acceptable_namespaces = [None] + svgns + adobens + metans + inkns + msns namespaces = {} for a in acceptable_namespaces: namespaces[a] = None # Some namespaces get sanitized as if they were SVG special_namespaces = { None:svg } for ns in svgns + adobens + inkns + msns: special_namespaces[ns] = svg def message(s): sys.stderr.write(s) sys.stderr.write("\n") sys.stderr.flush() def element_is_acceptable(node): global adobe_extensions global ink_extensions if node.namespaceURI in adobens: adobe_extensions = True if node.namespaceURI in inkns: ink_extensions = True if node.namespaceURI not in namespaces: message("Namespace '%s'not found; element '%s' unacceptable." % (node.namespaceURI,node)) return False if node.namespaceURI in special_namespaces: if node.localName in special_namespaces[node.namespaceURI].evil_tags: message("Element '%s' unacceptable." % node) return False return True def attribute_is_acceptable(node, attribute): nsURI = attribute.namespaceURI or node.namespaceURI if nsURI in adobens: adobe_extensions = True if nsURI in inkns: ink_extensions = True if not nsURI in namespaces: message("Namespace '%s'not found; attribute '%s' unacceptable." % (attribute.namespaceURI or node.namespaceURI,node)) return False if nsURI in special_namespaces and attribute.localName in special_namespaces[nsURI].evil_attributes: message("Attribute '%s' unacceptable." % attribute) return False return True # Begin cleansing changes = False doc = xml.dom.minidom.parse(sys.stdin) # Accept all versions of SVG if doc.doctype: if doc.doctype.name<>"svg" or not re.match(r"-//W3C//DTD SVG [0-9.]+//.*",doc.doctype.publicId): raise ValueError, 'Document does not appear to be SVG; doctype is "%s"' % doc.doctype.publicId else: # No doctype definition; accept as SVG anyway if not doc.documentElement.namespaceURI in [None,svg.name] or doc.documentElement.localName<>"svg": raise ValueError, 'Document does not appear to be SVG; no doctype and root tag is "%s" in namespace "%s".' % (doc.documentElement, doc.documentElement.namespaceURI) # Generic DOM function def walk_tree(node): yield node for n in node.childNodes: for t in walk_tree(n): yield t adobe_extensions = False ink_extensions = False for node in walk_tree(doc): # Eradicate anything from other namespaces if not element_is_acceptable(node): changes=True node.parentNode.removeChild(node) # Eradicate evil attributes if node.attributes is not None: for attr in map(lambda x: node.attributes.item(x), range(node.attributes.length)): if not attribute_is_acceptable(node,attr): node.removeAttributeNode(attr) changes = True #if adobe_extensions: message("File contains Adobe extensions to SVG.") #if ink_extensions: message("File contains Inkscape/Sodipodi extensions to SVG.") sys.stdout.write(doc.toxml("utf-8")) print #newline at end of file if changes: sys.exit(1) else: sys.exit(0)