From f18b39c79273cf19c4a2384617901900a110b0ce Mon Sep 17 00:00:00 2001 From: Jesper Reenberg Date: Tue, 2 Sep 2014 12:46:48 +0200 Subject: [PATCH 1/2] Removed mandatory=False from entities in MaltegoTransformRequestMessage When parsing the following valid XML from Maltego, the 'entities' field is actually not parsed but silently ignored, due to the fact that it was marked optional. Now the following code will throw an ParseError, which needs to be fixed. To aid this, I have added a debug version of 'safedexml.py' in the xmltools folder, which may be copied into your path, and thus output debug info when calling the 'parse' function. This debug output is also included at the end. --------------------------------------------------------------------------- In [1]: import canari.maltego.message as msg In [2]: mmsg = msg.MaltegoMessage.parse(''' paterva.com 0 paterva.com ''') --------------------------------------------------------------------------- ParseError Traceback (most recent call last) in () 14 15 ---> 16 ''') /usr/local/lib/python2.7/dist-packages/safedexml/__init__.py in parse(cls, xml) 343 print nest(), 'self._fields:', str(self._fields) 344 print nest(), 'fields found:', fields_found --> 345 self._parse_children_unordered(node, self._fields, fields_found) 346 # Check that all required fields have been found 347 /usr/local/lib/python2.7/dist-packages/safedexml/__init__.py in _parse_children_unordered(self, node, fields, fields_found) 402 continue 403 field = fields[idx] --> 404 res = field.parse_child_node(self, child) 405 if res is PARSE_DONE: 406 done_fields[idx] = True /usr/local/lib/python2.7/dist-packages/safedexml/fields.pyc in parse_child_node(self, obj, node) 809 field.field_name = self.field_name 810 field.model_class = self.model_class --> 811 res = field.parse_child_node(obj, node) 812 if res is dexml.PARSE_MORE: 813 raise ValueError("items in a Choice cannot return PARSE_MORE") /usr/local/lib/python2.7/dist-packages/safedexml/fields.pyc in parse_child_node(self, obj, node) 474 return dexml.PARSE_SKIP 475 else: --> 476 inst = typeclass.parse(node) 477 self.__set__(obj, inst) 478 return dexml.PARSE_DONE /usr/local/lib/python2.7/dist-packages/safedexml/__init__.py in parse(cls, xml) 351 if field.required and field not in fields_found: 352 err = "required field not found: '%s'" % (field.field_name,) --> 353 raise ParseError(err) 354 field.parse_done(self) 355 # All done, return the instance so created ParseError: required field not found: 'entities' --------------------------------------------------------------------------- For reference, I have added some debug output from safedexml, which may help to diagnose the issue. It is seen that when it constructs the class MaltegoTransformRequestMessage, then it goes straight to creating the Limits class. --------------------------------------------------------------------------- parse(....) ------------------------------------------------------------------------------------ Constructing class: MaltegoMessage Generating node from XML: '\n\n \n \n \n \n paterva.com\n \n 0\n paterva.com\n \n \n \n \n\n' node: attrs: [] Current node has the following defined fields: field: (tagname: None) (attrname: None) (field name: message) (model class: ) Has unused_attrs: [] parsing order_sensitive = True parse(....) ------------------------------------------------------------------------------------ Constructing class: MaltegoTransformRequestMessage Generating node from XML: node: attrs: [] Current node has the following defined fields: field: (tagname: Entities) (attrname: None) (field name: entities) (model class: ) Has unused_attrs: [] field: (tagname: TransformFields) (attrname: None) (field name: parameters) (model class: ) Has unused_attrs: [] field: (tagname: None) (attrname: None) (field name: limits) (model class: ) Has unused_attrs: [] parsing order_sensitive = True parse(....) ------------------------------------------------------------------------------------ Constructing class: Limits Generating node from XML: node: attrs: [, ] Current node has the following defined fields: field: (tagname: None) (attrname: SoftLimit) (field name: soft) (model class: ) Has unused_attrs: [] adding field to list of found field: (tagname: None) (attrname: HardLimit) (field name: hard) (model class: ) Has unused_attrs: [] adding field to list of found parsing order_sensitive = True parse() check that required fields have been found looping field: looping field: Created: Limits parse(....) ----------------------------------------- END -------------------------------------- parse() check that required fields have been found looping field: ERROR: An unexpected error occurred while tokenizing input The following traceback may be corrupted or invalid The error message is: ('EOF in multi-line string', (1, 0)) --------------------------------------------------------------------------- --- src/canari/maltego/message.py | 2 +- src/canari/xmltools/safedexml-debug/README | 14 + .../xmltools/safedexml-debug/__init__.py | 718 ++++++++++++++ src/canari/xmltools/safedexml-debug/fields.py | 901 ++++++++++++++++++ 4 files changed, 1634 insertions(+), 1 deletion(-) create mode 100644 src/canari/xmltools/safedexml-debug/README create mode 100644 src/canari/xmltools/safedexml-debug/__init__.py create mode 100644 src/canari/xmltools/safedexml-debug/fields.py diff --git a/src/canari/maltego/message.py b/src/canari/maltego/message.py index da40d39..6886367 100644 --- a/src/canari/maltego/message.py +++ b/src/canari/maltego/message.py @@ -543,7 +543,7 @@ def __setattr__(self, key, value): class MaltegoTransformRequestMessage(MaltegoElement): - entities = fields_.List(_Entity, tagname='Entities', required=False) + entities = fields_.List(_Entity, tagname='Entities') parameters = fields_.Dict(Field, tagname='TransformFields', key='name', required=False) limits = fields_.Model(Limits, required=False) diff --git a/src/canari/xmltools/safedexml-debug/README b/src/canari/xmltools/safedexml-debug/README new file mode 100644 index 0000000..71d083e --- /dev/null +++ b/src/canari/xmltools/safedexml-debug/README @@ -0,0 +1,14 @@ +Copy this folder into path and rename it to safedexml or if you have installed +safedexml from pip, overwrite the dist-packages path +/usr/local/lib/python2.7/dist-packages/safedexml/ with the content. + +This will generate some debug output. + +It is advised to only copy one of the files at a time (e.g., only __init__.py or +only fields.py) as their output will mingle and it will be near to impossible to +tell them apart. Yest the output is wery simple and could probably have been +made smarter... + +Currently the output of __init__.py is the 'best', in the sense that it tries to +nest the output and indent it so, it is easier to follow function calls, and the +'parsing logic' within the parse() function. diff --git a/src/canari/xmltools/safedexml-debug/__init__.py b/src/canari/xmltools/safedexml-debug/__init__.py new file mode 100644 index 0000000..a2d51cf --- /dev/null +++ b/src/canari/xmltools/safedexml-debug/__init__.py @@ -0,0 +1,718 @@ +""" + +dexml: a dead-simple Object-XML mapper for Python +================================================== + +Let's face it: xml is a fact of modern life. I'd even go so far as to say +that it's *good* at what is does. But that doesn't mean it's easy to work +with and it doesn't mean that we have to like it. Most of the time, XML +just needs to get out of the way and let you do some actual work instead +of writing code to traverse and manipulate yet another DOM. + +The dexml module takes the obvious mapping between XML tags and Python objects +and lets you capture that as cleanly as possible. Loosely inspired by Django's +ORM, you write simple class definitions to define the expected structure of +your XML document. Like so:: + + >>> import dexml + >>> from dexml import fields + >>> class Person(dexml.Model): + ... name = fields.String() + ... age = fields.Integer(tagname='age') + +Then you can parse an XML document into an object like this:: + + >>> p = Person.parse("42") + >>> p.name + u'Foo McBar' + >>> p.age + 42 + +And you can render an object into an XML document like this:: + + >>> p = Person(name="Handsome B. Wonderful",age=36) + >>> p.render() + '36' + +Malformed documents will raise a ParseError:: + + >>> p = Person.parse("92") + Traceback (most recent call last): + ... + ParseError: required field not found: 'name' + +Of course, it gets more interesting when you nest Model definitions, like this:: + + >>> class Group(dexml.Model): + ... name = fields.String(attrname="name") + ... members = fields.List(Person) + ... + >>> g = Group(name="Monty Python") + >>> g.members.append(Person(name="John Cleese",age=69)) + >>> g.members.append(Person(name="Terry Jones",age=67)) + >>> g.render(fragment=True) + '6967' + +There's support for XML namespaces, default field values, case-insensitive +parsing, and more fun stuff. Check out the documentation on the following +classes for more details: + + :Model: the base class for objects that map into XML + :Field: the base class for individual model fields + :Meta: meta-information about how to parse/render a model + +""" + +__ver_major__ = 0 +__ver_minor__ = 5 +__ver_patch__ = 1 +__ver_sub__ = "" +__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__, __ver_patch__, __ver_sub__) + +import sys +import re +import copy +from defusedxml import minidom +from safedexml import fields + + +if sys.version_info >= (3,): + str = str #pragma: no cover + unicode = str #pragma: no cover + bytes = bytes #pragma: no cover + basestring = (str, bytes) #pragma: no cover +else: + str = str #pragma: no cover + unicode = unicode #pragma: no cover + bytes = str #pragma: no cover + basestring = basestring #pragma: no cover + + +_nest = [] +def nest(): + return ''.join(_nest) +def nest_more(): + _nest.append(' ') +def nest_less(): + _nest.pop() + + +class Error(Exception): + """Base exception class for the dexml module.""" + pass + + +class ParseError(Error): + """Exception raised when XML could not be parsed into objects.""" + pass + + +class RenderError(Error): + """Exception raised when object could not be rendered into XML.""" + pass + + +class XmlError(Error): + """Exception raised to encapsulate errors from underlying XML parser.""" + pass + + +class PARSE_DONE(object): + """Constant returned by a Field when it has finished parsing.""" + pass + + +class PARSE_MORE(object): + """Constant returned by a Field when it wants additional nodes to parse.""" + pass + + +class PARSE_SKIP(object): + """Constant returned by a Field when it cannot parse the given node.""" + pass + + +class PARSE_CHILDREN(object): + """Constant returned by a Field to parse children from its container tag.""" + pass + + +class Meta(object): + """Class holding meta-information about a dexml.Model subclass. + + Each dexml.Model subclass has an attribute 'meta' which is an instance + of this class. That instance holds information about how the model + corresponds to XML, such as its tagname, namespace, and error handling + semantics. You would not ordinarily create an instance of this class; + instead let the ModelMetaclass create one automatically. + + These attributes control how the model corresponds to the XML: + + * tagname: the name of the tag representing this model + * namespace: the XML namespace in which this model lives + + These attributes control parsing/rendering behaviour: + + * namespace_prefix: the prefix to use for rendering namespaced tags + * ignore_unknown_elements: ignore unknown elements when parsing + * case_sensitive: match tag/attr names case-sensitively + * order_sensitive: match child tags in order of field definition + + """ + + _defaults = {"tagname": None, + "namespace": None, + "namespace_prefix": None, + "ignore_unknown_elements": True, + "case_sensitive": True, + "order_sensitive": True} + + def __init__(self, name, meta_attrs): + for (attr, default) in self._defaults.items(): + setattr(self, attr, meta_attrs.get(attr, default)) + if self.tagname is None: + self.tagname = name + + +def _meta_attributes(meta): + """Extract attributes from a "meta" object.""" + meta_attrs = {} + if meta: + for attr in dir(meta): + if not attr.startswith("_"): + meta_attrs[attr] = getattr(meta, attr) + return meta_attrs + + +class ModelMetaclass(type): + """Metaclass for dexml.Model and subclasses. + + This metaclass is responsible for introspecting Model class definitions + and setting up appropriate default behaviours. For example, this metaclass + sets a Model's default tagname to be equal to the declared class name. + """ + + instances_by_tagname = {} + instances_by_classname = {} + + def __new__(mcls, name, bases, attrs): + cls = super(ModelMetaclass, mcls).__new__(mcls, name, bases, attrs) + # Don't do anything if it's not a subclass of Model + parents = [b for b in bases if isinstance(b, ModelMetaclass)] + if not parents: + return cls + # Set up the cls.meta object, inheriting from base classes + meta_attrs = {} + for base in reversed(bases): + if isinstance(base, ModelMetaclass) and hasattr(base, "meta"): + meta_attrs.update(_meta_attributes(base.meta)) + meta_attrs.pop("tagname", None) + meta_attrs.update(_meta_attributes(attrs.get("meta", None))) + cls.meta = Meta(name, meta_attrs) + # Create ordered list of field objects, telling each about their + # name and containing class. Inherit fields from base classes + # only if not overridden on the class itself. + base_fields = {} + for base in bases: + if not isinstance(base, ModelMetaclass): + continue + for field in base._fields: + if field.field_name not in base_fields: + field = copy.copy(field) + field.model_class = cls + base_fields[field.field_name] = field + cls_fields = [] + for (name, value) in attrs.iteritems(): + if isinstance(value, fields.Field): + base_fields.pop(name, None) + value.field_name = name + value.model_class = cls + cls_fields.append(value) + cls._fields = base_fields.values() + cls_fields + cls._fields.sort(key=lambda f: f._order_counter) + # Register the new class so we can find it by name later on + tagname = (cls.meta.namespace, cls.meta.tagname) + mcls.instances_by_tagname[tagname] = cls + mcls.instances_by_classname[cls.__name__] = cls + return cls + + @classmethod + def find_class(mcls, tagname, namespace=None): + """Find dexml.Model subclass for the given tagname and namespace.""" + try: + return mcls.instances_by_tagname[(namespace, tagname)] + except KeyError: + if namespace is None: + try: + return mcls.instances_by_classname[tagname] + except KeyError: + pass + return None + + +# You can use this re to extract the encoding declaration from the XML +# document string. Hopefully you won't have to, but you might need to... +_XML_ENCODING_RE = re.compile("<\\?xml [^>]*encoding=[\"']([a-zA-Z0-9\\.\\-\\_]+)[\"'][^>]*?>") + + +class Model(object): + """Base class for dexml Model objects. + + Subclasses of Model represent a concrete type of object that can parsed + from or rendered to an XML document. The mapping to/from XML is controlled + by two things: + + * attributes declared on an inner class named 'meta' + * fields declared using instances of fields.Field + + Here's a quick example: + + class Person(dexml.Model): + # This overrides the default tagname of 'Person' + class meta + tagname = "person" + # This maps to a 'name' attributr on the tag + name = fields.String() + # This maps to an tag within the tag + age = fields.Integer(tagname='age') + + See the 'Meta' class in this module for available meta options, and the + 'fields' submodule for available field types. + """ + + __metaclass__ = ModelMetaclass + _fields = [] + + def __init__(self, **kwds): + """Default Model constructor. + + Keyword arguments that correspond to declared fields are processed + and assigned to that field. + """ + for f in self._fields: + try: + setattr(self, f.field_name, kwds[f.field_name]) + except KeyError: + pass + + + + @classmethod + def parse(cls, xml): + """Produce an instance of this model from some xml. + + The given xml can be a string, a readable file-like object, or + a DOM node; we might add support for more types in the future. + """ + + print nest(), 'parse(....) ------------------------------------------------------------------------------------' + nest_more() + + print nest(), 'Constructing class:', cls.__name__ + self = cls() + print nest(), 'Calling _make_xml_node with:', repr(xml) + node = self._make_xml_node(xml) + print nest(), 'Got node back:', str(node) + + self.validate_xml_node(node) + # Keep track of fields that have successfully parsed something + fields_found = [] + # Try to consume all the node's attributes + attrs = node.attributes.values() + print nest(), 'XML node has the following attrs:', str(attrs) + + print nest(), 'Current class instance has the following defined fields:' + for field in self._fields: + nest_more() + print nest(), 'field:', str(field), \ + '(tagname: %s)' % getattr(field, 'tagname', None), \ + '(attrname: %s)' % getattr(field, 'attrname', None), \ + '(field name: %s)' % getattr(field, 'field_name', None), \ + '(model class: %s)' % getattr(field, 'model_class', None) + + unused_attrs = field.parse_attributes(self, attrs) + print nest(), ' |- unused attrs, after consuming by (attribute) field:', str(unused_attrs) + + if len(unused_attrs) < len(attrs): + print nest(), ' |- An attribute was consumed, so the current field must be an' \ + 'attribute field. Store it in the list of found fields.' + fields_found.append(field) + else: + print nest(), ' |- No attributes was consumes, this field must be a tag field.' + + # Store resulting unused attributes, for next loop iteration. + attrs = unused_attrs + nest_less() + + for attr in attrs: + print nest(), 'handle unparsed attr:', str(attr) + self._handle_unparsed_node(attr) + + # Try to consume all child nodes + print nest(), 'Try to consume child nodes' + if self.meta.order_sensitive: + print nest(), 'parsing order_sensitive = True' + self._parse_children_ordered(node, self._fields, fields_found) + else: + print nest(), 'parsing order_sensitive = False' + print nest(), 'Current fields matched (attribute fields):', fields_found + self._parse_children_unordered(node, self._fields, fields_found) + # Check that all required fields have been found + + print nest(), 'parse() check that required fields have been found' + for field in self._fields: + nest_more() + print nest(), 'looping field:', str(field), \ + '(tagname: %s)' % getattr(field, 'tagname', None), \ + '(attrname: %s)' % getattr(field, 'attrname', None), \ + '(field name: %s)' % getattr(field, 'field_name', None), \ + '(model class: %s)' % getattr(field, 'model_class', None) + if field.required and field not in fields_found: + err = "required field not found: '%s'" % (field.field_name,) + raise ParseError(err) + field.parse_done(self) + nest_less() + # All done, return the instance so created + print nest(), 'Created:', type(self).__name__ + + nest_less() + print nest(), 'parse(....) ----------------------------------------- END --------------------------------------' + return self + + def _parse_children_ordered(self, node, fields, fields_found): + """Parse the children of the given node using strict field ordering.""" + cur_field_idx = 0 + for child in node.childNodes: + idx = cur_field_idx + # If we successfully break out of this loop, one of our + # fields has consumed the node. + while idx < len(fields): + field = fields[idx] + res = field.parse_child_node(self, child) + if res is PARSE_DONE: + if field not in fields_found: + fields_found.append(field) + cur_field_idx = idx + 1 + break + if res is PARSE_MORE: + if field not in fields_found: + fields_found.append(field) + cur_field_idx = idx + break + if res is PARSE_CHILDREN: + if field not in fields_found: + fields_found.append(field) + self._parse_children_ordered(child, [field], fields_found) + cur_field_idx = idx + break + idx += 1 + else: + self._handle_unparsed_node(child) + + def _parse_children_unordered(self, node, fields, fields_found): + """Parse the children of the given node using loose field ordering.""" + print nest(), '_parse_children_unordered(....) ----------------------------------------------------------------' + nest_more() + + done_fields = {} + print nest(), 'Looping through XML child nodes:', str(node.childNodes) + for child in node.childNodes: + nest_more() + print nest(), 'Current child:', child, 'Trying to match to one of the fields...' + + idx = 0 + # If we successfully break out of this loop, one of our + # fields has consumed the node. + + while idx < len(fields): + nest_more() + print nest(), 'Looping', idx+1, 'of', len(fields) + if idx in done_fields: + print nest(), 'Skipping index %s as it is in done_fields: ' % idx, done_fields + idx += 1 + nest_less() # remember to nest less :) + continue + field = fields[idx] + print nest(), 'Trying field:', str(field), \ + '(tagname: %s)' % getattr(field, 'tagname', None), \ + '(attrname: %s)' % getattr(field, 'attrname', None), \ + '(field name: %s)' % getattr(field, 'field_name', None), \ + '(model class: %s)' % getattr(field, 'model_class', None) + nest_more() + print nest(), 'Calling parse_child_node of the current field, with the current child' + res = field.parse_child_node(self, child) + + if res is PARSE_DONE: + print nest(), 'got PARSE_DONE' + done_fields[idx] = True + if field not in fields_found: + print nest(), '\t Adding field to fields_found' + fields_found.append(field) + break + if res is PARSE_MORE: + print nest(), 'got PARSE_MORE' + if field not in fields_found: + print nest(), '\t Adding field to fields_found' + fields_found.append(field) + break + if res is PARSE_CHILDREN: + print nest(), 'got PARSE_CHILDREN' + if field not in fields_found: + print nest(), '\t Adding field to fields_found' + fields_found.append(field) + + print nest(), '\t Adding field to fields_found' + self._parse_children_unordered(child, [field], fields_found) + break + else: + print nest(), '--------> Got unknown and unhandled parse result:', res, ' <--------------' + idx += 1 + nest_less() + nest_less() + else: + print nest(), 'Done looping. Hit else part of while loop. Calling _handle_unparsed_node()' + self._handle_unparsed_node(child) + nest_less() + + nest_less() + print nest(), '_parse_children_unordered(....) ------------------------------- END ----------------------------' + + def _handle_unparsed_node(self, node): + if not self.meta.ignore_unknown_elements: + if node.nodeType == node.ELEMENT_NODE: + err = "unknown element: %s" % (node.nodeName,) + raise ParseError(err) + elif node.nodeType in (node.TEXT_NODE, node.CDATA_SECTION_NODE): + if node.nodeValue.strip(): + err = "unparsed text node: %s" % (node.nodeValue,) + raise ParseError(err) + elif node.nodeType == node.ATTRIBUTE_NODE: + if not node.nodeName.startswith("xml"): + err = "unknown attribute: %s" % (node.name,) + raise ParseError(err) + + def render(self, encoding=None, fragment=False, pretty=False, nsmap=None): + """Produce XML from this model's instance data. + + A unicode string will be returned if any of the objects contain + unicode values; specifying the 'encoding' argument forces generation + of a bytestring. + + By default a complete XML document is produced, including the + leading "" declaration. To generate an XML fragment set + the 'fragment' argument to True. + """ + if nsmap is None: + nsmap = {} + data = [] + if not fragment: + if encoding: + s = '' % (encoding,) + data.append(s) + else: + data.append('') + data.extend(self._render(nsmap)) + xml = "".join(data) + if pretty: + xml = minidom.parseString(xml).toprettyxml() + if encoding: + xml = xml.encode(encoding) + return xml + + def irender(self, encoding=None, fragment=False, nsmap=None): + """Generator producing XML from this model's instance data. + + If any of the objects contain unicode values, the resulting output + stream will be a mix of bytestrings and unicode; specify the 'encoding' + arugment to force generation of bytestrings. + + By default a complete XML document is produced, including the + leading "" declaration. To generate an XML fragment set + the 'fragment' argument to True. + """ + if nsmap is None: + nsmap = {} + if not fragment: + if encoding: + decl = '' % (encoding,) + yield decl.encode(encoding) + else: + yield '' + if encoding: + for data in self._render(nsmap): + if isinstance(data, unicode): + data = data.encode(encoding) + yield data + else: + for data in self._render(nsmap): + yield data + + def _render(self, nsmap): + """Generator rendering this model as an XML fragment.""" + # Determine opening and closing tags + pushed_ns = False + if self.meta.namespace: + namespace = self.meta.namespace + prefix = self.meta.namespace_prefix + try: + cur_ns = nsmap[prefix] + except KeyError: + cur_ns = [] + nsmap[prefix] = cur_ns + if prefix: + tagname = "%s:%s" % (prefix, self.meta.tagname) + open_tag_contents = [tagname] + if not cur_ns or cur_ns[0] != namespace: + cur_ns.insert(0, namespace) + pushed_ns = True + open_tag_contents.append('xmlns:%s="%s"' % (prefix, namespace)) + close_tag_contents = tagname + else: + open_tag_contents = [self.meta.tagname] + if not cur_ns or cur_ns[0] != namespace: + cur_ns.insert(0, namespace) + pushed_ns = True + open_tag_contents.append('xmlns="%s"' % (namespace,)) + close_tag_contents = self.meta.tagname + else: + open_tag_contents = [self.meta.tagname] + close_tag_contents = self.meta.tagname + used_fields = set() + open_tag_contents.extend(self._render_attributes(used_fields, nsmap)) + # Render each child node + children = self._render_children(used_fields, nsmap) + try: + first_child = children.next() + except StopIteration: + yield "<%s />" % (" ".join(open_tag_contents),) + else: + yield "<%s>" % (" ".join(open_tag_contents),) + yield first_child + for child in children: + yield child + yield "" % (close_tag_contents,) + # Check that all required fields actually rendered something + for f in self._fields: + if f.required and f not in used_fields: + raise RenderError("Field '%s' is missing" % (f.field_name,)) + # Clean up + if pushed_ns: + nsmap[prefix].pop(0) + + def _render_attributes(self, used_fields, nsmap): + for f in self._fields: + val = getattr(self, f.field_name) + datas = iter(f.render_attributes(self, val, nsmap)) + try: + data = datas.next() + except StopIteration: + pass + else: + used_fields.add(f) + yield data + for data in datas: + yield data + + def _render_children(self, used_fields, nsmap): + for f in self._fields: + val = getattr(self, f.field_name) + datas = iter(f.render_children(self, val, nsmap)) + try: + data = datas.next() + except StopIteration: + pass + else: + used_fields.add(f) + yield data + for data in datas: + yield data + + @staticmethod + def _make_xml_node(xml): + """Transform a variety of input formats to an XML DOM node.""" + + print nest(), '_make_xml_node(....) -----------------------------------------------------------------------------' + nest_more() + + try: + print nest(), "Trying to see if the given 'xml' has a 'nodeType'" + ntype = xml.nodeType + except AttributeError: + print nest(), "Got 'AttributeError', so this must be a string with xml to be parsed" + if isinstance(xml, bytes): + print nest(), "The 'xml' is a 'bytes' instance" + try: + xml = minidom.parseString(xml) + except Exception, e: + raise XmlError(e) + elif isinstance(xml, unicode): + print nest(), "The 'xml' is a 'unicode' instance" + try: + # Try to grab the "encoding" attribute from the XML. + # It probably won't exist, so default to utf8. + encoding = _XML_ENCODING_RE.match(xml) + if encoding is None: + encoding = "utf8" + else: + encoding = encoding.group(1) + xml = minidom.parseString(xml.encode(encoding)) + except Exception, e: + raise XmlError(e) + elif hasattr(xml, "read"): + print nest(), "The 'xml' is a file like instance, with a 'read' attribute" + try: + xml = minidom.parse(xml) + except Exception, e: + raise XmlError(e) + else: + raise ValueError("Can't convert that to an XML DOM node") + print nest(), "Getting the 'documentElement' from the parsed 'xml'" + node = xml.documentElement + else: + if ntype == xml.DOCUMENT_NODE: + print nest(), "The given 'xml' is a document node, returning its 'documentElement'" + node = xml.documentElement + else: + print nest(), "The given 'xml' is a 'documentElement', returning it directly" + node = xml + + nest_less() + print nest(), '_make_xml_node(....) ---------------------------------- END --------------------------------------' + return node + + @classmethod + def validate_xml_node(cls, node): + """Check that the given xml node is valid for this object. + + Here 'valid' means that it is the right tag, in the right + namespace. We might add more eventually... + """ + if node.nodeType != node.ELEMENT_NODE: + err = "Class '%s' got a non-element node" + err = err % (cls.__name__,) + raise ParseError(err) + if cls.meta.case_sensitive: + if node.localName != cls.meta.tagname: + err = "Class '%s' got tag '%s' (expected '%s')" + err = err % (cls.__name__, node.localName, + cls.meta.tagname) + raise ParseError(err) + else: + if node.localName.lower() != cls.meta.tagname.lower(): + err = "Class '%s' got tag '%s' (expected '%s')" + err = err % (cls.__name__, node.localName, + cls.meta.tagname) + raise ParseError(err) + if cls.meta.namespace: + if node.namespaceURI != cls.meta.namespace: + err = "Class '%s' got namespace '%s' (expected '%s')" + err = err % (cls.__name__, node.namespaceURI, + cls.meta.namespace) + raise ParseError(err) + else: + if node.namespaceURI: + err = "Class '%s' got namespace '%s' (expected no namespace)" + err = err % (cls.__name__, node.namespaceURI,) + raise ParseError(err) + + diff --git a/src/canari/xmltools/safedexml-debug/fields.py b/src/canari/xmltools/safedexml-debug/fields.py new file mode 100644 index 0000000..4b571fb --- /dev/null +++ b/src/canari/xmltools/safedexml-debug/fields.py @@ -0,0 +1,901 @@ +""" + +dexml.fields: basic field type definitions for dexml +===================================================== + +""" + +import safedexml as dexml +import random +from xml.sax.saxutils import escape, quoteattr + + + +class _AttrBucket(object): + """A simple class used only to hold attributes.""" + pass + + +class Field(object): + """Base class for all dexml Field classes. + + Field classes are responsible for parsing and rendering individual + components to the XML. They also act as descriptors on dexml Model + instances, to get/set the corresponding properties. + + Each field instance will magically be given the following properties: + + * model_class: the Model subclass to which it is attached + * field_name: the name under which is appears on that class + + The following methods are required for interaction with the parsing + and rendering machinery: + + * parse_attributes: parse info out of XML node attributes + * parse_child_node: parse into out of an XML child node + * render_attributes: render XML for node attributes + * render_children: render XML for child nodes + + """ + + # Global counter tracking the order in which fields are declared. + _order_counters_ = 0 + + + class arguments: + required = True + + def __init__(self, **kwds): + """Default Field constructor. + + This constructor keeps track of the order in which Field instances + are created, since this information can have semantic meaning in + XML. It also merges any keyword arguments with the defaults + defined on the 'arguments' inner class, and assigned these attributes + to the Field instance. + """ + self._order_counter = Field._order_counters_ = Field._order_counters_ + 1 + args = self.__class__.arguments + for argnm in dir(args): + if not argnm.startswith("__"): + setattr(self, argnm, kwds.get(argnm, getattr(args, argnm))) + + def parse_attributes(self, obj, attrs): + """Parse any attributes for this field from the given list. + + This method will be called with the Model instance being parsed and + a list of attribute nodes from its XML tag. Any attributes of + interest to this field should be processed, and a list of the unused + attribute nodes returned. + """ + return attrs + + def parse_child_node(self, obj, node): + """Parse a child node for this field. + + This method will be called with the Model instance being parsed and + the current child node of that model's XML tag. There are three + options for processing this node: + + * return PARSE_DONE, indicating that it was consumed and this + field now has all the necessary data. + * return PARSE_MORE, indicating that it was consumed but this + field will accept more nodes. + * return PARSE_SKIP, indicating that it was not consumed by + this field. + + Any other return value will be taken as a parse error. + """ + print "parse_child_node called on the base 'Field' class" + return dexml.PARSE_SKIP + + def parse_done(self, obj): + """Finalize parsing for the given object. + + This method is called as a simple indicator that no more data will + be forthcoming. No return value is expected. + """ + pass + + def render_attributes(self, obj, val, nsmap): + """Render any attributes that this field manages.""" + return [] + + def render_children(self, obj, nsmap, val): + """Render any child nodes that this field manages.""" + return [] + + def __get__(self, instance, owner=None): + print 'Field.__get__:', 'self:', self, 'instance:', instance, 'owner:', owner + if instance is None: + print '\t instance was None, returning self' + return self + + print '\t getting return value' + res = instance.__dict__.get(self.field_name) + print '\t instance __dict__:', instance.__dict__ + print '\t returning:', res, 'from field name:', self.field_name + return res + + def __set__(self, instance, value): + print 'Field.__set__', self, instance, self.field_name, '<-', value + print '\t Instance dict (before):', instance.__dict__ + instance.__dict__[self.field_name] = value + print '\t Instance dict (after):', instance.__dict__ + + + def _check_tagname(self, node, tagname): + if node.nodeType != node.ELEMENT_NODE: + return False + if isinstance(tagname, basestring): + if node.localName != tagname: + return False + if node.namespaceURI: + if node.namespaceURI != self.model_class.meta.namespace: + return False + else: + (tagns, tagname) = tagname + if node.localName != tagname: + return False + if node.namespaceURI != tagns: + return False + return True + + +class Value(Field): + """Field subclass that holds a simple scalar value. + + This Field subclass contains the common logic to parse/render simple + scalar value fields - fields that don't required any recursive parsing. + Individual subclasses should provide the parse_value() and render_value() + methods to do type coercion of the value. + + Value fields can also have a default value, specified by the 'default' + keyword argument. + + By default, the field maps to an attribute of the model's XML node with + the same name as the field declaration. Consider: + + class MyModel(Model): + my_field = fields.Value(default="test") + + + This corresponds to the XML fragment "". + To use a different name specify the 'attrname' kwd argument. To use + a subtag instead of an attribute specify the 'tagname' kwd argument. + + Namespaced attributes or subtags are also supported, by specifying a + (namespace,tagname) pair for 'attrname' or 'tagname' respectively. + """ + + class arguments(Field.arguments): + tagname = None + attrname = None + default = None + + def __init__(self, **kwds): + super(Value, self).__init__(**kwds) + if self.default is not None: + self.required = False + + def _get_attrname(self): + if self.__dict__["tagname"]: + return None + attrname = self.__dict__['attrname'] + if not attrname: + attrname = self.field_name + return attrname + + def _set_attrname(self, attrname): + self.__dict__['attrname'] = attrname + + attrname = property(_get_attrname, _set_attrname) + + def _get_tagname(self): + if self.__dict__["attrname"]: + return None + tagname = self.__dict__['tagname'] + if tagname and not isinstance(tagname, (basestring, tuple)): + tagname = self.field_name + return tagname + + def _set_tagname(self, tagname): + self.__dict__['tagname'] = tagname + + tagname = property(_get_tagname, _set_tagname) + + def __get__(self, instance, owner=None): + val = super(Value, self).__get__(instance, owner) + if val is None: + return self.default + return val + + def parse_attributes(self, obj, attrs): + # Bail out if we're attached to a subtag rather than an attr. + if self.tagname: + return attrs + unused = [] + attrname = self.attrname + if isinstance(attrname, basestring): + ns = None + else: + (ns, attrname) = attrname + for attr in attrs: + if attr.localName == attrname: + if attr.namespaceURI == ns: + self.__set__(obj, self.parse_value(attr.nodeValue)) + else: + unused.append(attr) + else: + unused.append(attr) + return unused + + def parse_child_node(self, obj, node): + if not self.tagname: + return dexml.PARSE_SKIP + if self.tagname == ".": + node = node.parentNode + else: + if not self._check_tagname(node, self.tagname): + return dexml.PARSE_SKIP + vals = [] + # Merge all text nodes into a single value + for child in node.childNodes: + if child.nodeType not in (child.TEXT_NODE, child.CDATA_SECTION_NODE): + raise dexml.ParseError("non-text value node") + vals.append(child.nodeValue) + self.__set__(obj, self.parse_value("".join(vals))) + return dexml.PARSE_DONE + + def render_attributes(self, obj, val, nsmap): + if val is not None and self.attrname: + qaval = quoteattr(self.render_value(val)) + if isinstance(self.attrname, basestring): + yield '%s=%s' % (self.attrname, qaval,) + else: + m_meta = self.model_class.meta + (ns, nm) = self.attrname + if ns == m_meta.namespace and m_meta.namespace_prefix: + prefix = m_meta.namespace_prefix + yield '%s:%s=%s' % (prefix, nm, qaval,) + elif ns is None: + yield '%s=%s' % (nm, qaval,) + else: + for (p, n) in nsmap.iteritems(): + if ns == n[0]: + prefix = p + break + else: + prefix = "p" + str(random.randint(0, 10000)) + while prefix in nsmap: + prefix = "p" + str(random.randint(0, 10000)) + yield 'xmlns:%s="%s"' % (prefix, ns,) + yield '%s:%s=%s' % (prefix, nm, qaval,) + + def render_children(self, obj, val, nsmap): + if val is not None and self.tagname: + val = self._esc_render_value(val) + if self.tagname == ".": + yield val + else: + attrs = "" + # By default, tag values inherit the namespace of their + # containing model class. + if isinstance(self.tagname, basestring): + prefix = self.model_class.meta.namespace_prefix + localName = self.tagname + else: + m_meta = self.model_class.meta + (ns, localName) = self.tagname + if not ns: + # If we have an explicitly un-namespaced tag, + # we need to be careful. The model tag might have + # set the default namespace, which we need to undo. + prefix = None + if m_meta.namespace and not m_meta.namespace_prefix: + attrs = ' xmlns=""' + elif ns == m_meta.namespace: + prefix = m_meta.namespace_prefix + else: + for (p, n) in nsmap.iteritems(): + if ns == n[0]: + prefix = p + break + else: + prefix = "p" + str(random.randint(0, 10000)) + while prefix in nsmap: + prefix = "p" + str(random.randint(0, 10000)) + attrs = ' xmlns:%s="%s"' % (prefix, ns) + yield self._render_tag(val, prefix, localName, attrs) + + def _render_tag(self, val, prefix, localName, attrs): + if val: + if prefix: + args = (prefix, localName, attrs, val, prefix, localName) + return "<%s:%s%s>%s" % args + else: + return "<%s%s>%s" % (localName, attrs, val, localName) + else: + if prefix: + return "<%s:%s%s />" % (prefix, localName, attrs,) + else: + return "<%s%s />" % (localName, attrs) + + def parse_value(self, val): + return val + + def render_value(self, val): + if not isinstance(val, basestring): + val = str(val) + return val + + def _esc_render_value(self, val): + return escape(self.render_value(val)) + + +class String(Value): + """Field representing a simple string value.""" + # actually, the base Value() class will do this automatically. + pass + + +class CDATA(Value): + """String field rendered as CDATA.""" + + def __init__(self, **kwds): + super(CDATA, self).__init__(**kwds) + if self.__dict__.get("tagname", None) is None: + raise ValueError("CDATA fields must have a tagname") + + def _esc_render_value(self, val): + val = self.render_value(val) + val = val.replace("]]>", "]]]]>") + return "" + + +class Integer(Value): + """Field representing a simple integer value.""" + + def parse_value(self, val): + return int(val) + + +class Float(Value): + """Field representing a simple float value.""" + + def parse_value(self, val): + return float(val) + + +class Boolean(Value): + """Field representing a simple boolean value. + + The strings corresponding to false are 'no', 'off', 'false' and '0', + compared case-insensitively. Note that this means an empty tag or + attribute is considered True - this is usually what you want, since + a completely missing attribute or tag can be interpreted as False. + + To enforce that the presence of a tag indicates True and the absence of + a tag indicates False, pass the keyword argument "empty_only". + """ + + class arguments(Value.arguments): + empty_only = False + + def __init__(self, **kwds): + super(Boolean, self).__init__(**kwds) + if self.empty_only: + self.required = False + + def __set__(self, instance, value): + instance.__dict__[self.field_name] = bool(value) + + def parse_value(self, val): + if self.empty_only and val != "": + raise ValueError("non-empty value in empty_only Boolean") + if val.lower() in ("no", "off", "false", "0"): + return False + return True + + def render_children(self, obj, val, nsmap): + if not val and self.empty_only: + return [] + return super(Boolean, self).render_children(obj, val, nsmap) + + def render_attributes(self, obj, val, nsmap): + if not val and self.empty_only: + return [] + return super(Boolean, self).render_attributes(obj, val, nsmap) + + def render_value(self, val): + if not val: + return "false" + if self.empty_only: + return "" + return "true" + + +class Model(Field): + """Field subclass referencing another Model instance. + + This field sublcass allows Models to contain other Models recursively. + The first argument to the field constructor must be either a Model + class, or the name or tagname of a Model class. + """ + + class arguments(Field.arguments): + type = None + + def __init__(self, type=None, **kwds): + kwds["type"] = type + super(Model, self).__init__(**kwds) + + def _get_type(self): + return self.__dict__.get("type") + + def _set_type(self, value): + if value is not None: + self.__dict__["type"] = value + + type = property(_get_type, _set_type) + + def __set__(self, instance, value): + typeclass = self.typeclass + if value and not isinstance(value, typeclass): + raise ValueError("Invalid value type %s. Model field requires %s instance" % + (value.__class__.__name__, typeclass.__name__)) + super(Model, self).__set__(instance, value) + + @property + def typeclass(self): + try: + return self.__dict__['typeclass'] + except KeyError: + self.__dict__['typeclass'] = self._load_typeclass() + return self.__dict__['typeclass'] + + def _load_typeclass(self): + typ = self.type + if isinstance(typ, dexml.ModelMetaclass): + return typ + if typ is None: + typ = self.field_name + typeclass = None + if isinstance(typ, basestring): + if self.model_class.meta.namespace: + ns = self.model_class.meta.namespace + typeclass = dexml.ModelMetaclass.find_class(typ, ns) + if typeclass is None: + typeclass = dexml.ModelMetaclass.find_class(typ, None) + if typeclass is None: + raise ValueError("Unknown Model class: %s" % (typ,)) + else: + (ns, typ) = typ + if isinstance(typ, dexml.ModelMetaclass): + return typ + typeclass = dexml.ModelMetaclass.find_class(typ, ns) + if typeclass is None: + raise ValueError("Unknown Model class: (%s,%s)" % (ns, typ)) + return typeclass + + def parse_child_node(self, obj, node): + typeclass = self.typeclass + try: + typeclass.validate_xml_node(node) + except dexml.ParseError: + return dexml.PARSE_SKIP + else: + inst = typeclass.parse(node) + self.__set__(obj, inst) + return dexml.PARSE_DONE + + def render_attributes(self, obj, val, nsmap): + return [] + + def render_children(self, obj, val, nsmap): + if val is not None: + for data in val._render(nsmap): + yield data + + +class List(Field): + """Field subclass representing a list of fields. + + This field corresponds to a homogenous list of other fields. You would + declare it like so: + + class MyModel(Model): + items = fields.List(fields.String(tagname="item")) + + Corresponding to XML such as: + + onetwo + + + The properties 'minlength' and 'maxlength' control the allowable length + of the list. + + The 'tagname' property sets an optional wrapper tag which acts as container + for list items, for example: + + class MyModel(Model): + items = fields.List(fields.String(tagname="item"), + tagname='list') + + Corresponding to XML such as: + + onetwo + + This wrapper tag is always rendered, even if the list is empty. It is + transparently removed when parsing. + """ + + class arguments(Field.arguments): + field = None + minlength = None + maxlength = None + tagname = None + + def __init__(self, field, **kwds): + if isinstance(field, Field): + kwds["field"] = field + else: + kwds["field"] = Model(field, **kwds) + super(List, self).__init__(**kwds) + if not self.minlength and not self.tagname: + self.required = False + if self.minlength and not self.required: + raise ValueError("List must be required if it has minlength") + + def _get_field(self): + field = self.__dict__["field"] + if not hasattr(field, "field_name"): + field.field_name = self.field_name + if not hasattr(field, "model_class"): + field.model_class = self.model_class + return field + + def _set_field(self, field): + print 'List._set_field:', self, field + self.__dict__["field"] = field + + field = property(_get_field, _set_field) + + def __get__(self, instance, owner=None): + print 'List.__get__', 'self:', self, 'instance:', instance, 'owner:', owner + + print '\t calling super(List, self).__get__(instance, owner):', super(List, self).__get__ + val = super(List, self).__get__(instance, owner) + print 'Back in Field.__get__' + print '\t got return value:', val + + if val is not None: + print '\t value was not None, returning it' + return val + + print '\t Value was None, setting default value: []' + self.__set__(instance, []) # Set the default value + print 'Back in Field.__get__, from self.__set__' + print '\t calling self.__get__ to get the default value back' + res = self.__get__(instance, owner) # return the default value + print 'Back in Field.__get__', 'returning result:', res + return res + + def parse_child_node(self, obj, node): + print "parse_child_node called on the 'List' Class" + print 'self:', self + print 'obj:', obj + print 'node:', node + # If our children are inside a grouping tag, parse + # that first. The presence of this is indicated by + # setting the empty list on the target object. + if self.tagname: + print 'self has tagname:', self.tagname + print 'Calling super(List, self).__get__(obj):', super(List, self).__get__, obj + val = super(List, self).__get__(obj) + # ^^^ Why not call own __get__ ?? + # Anywhays requires this to return None before it tries to parse further on. + print 'Got back value:', val + if val is None: + print 'node.nodeType:', node.nodeType + if node.nodeType != node.ELEMENT_NODE: + print 'Was an ELEMENT_NODE, returning PARSE_SKIP' + return dexml.PARSE_SKIP + elif node.tagName == self.tagname: + print 'node.tagName:', node.tagName + print 'Setting default value on obj:', obj + self.__set__(obj, []) + print 'Returning dexml.PARSE_CHILDREN' + return dexml.PARSE_CHILDREN + else: + print 'returning parse_skip 1' + return dexml.PARSE_SKIP + # Now we just parse each child node. + tmpobj = _AttrBucket() + print 'Calling parse_child_node:', tmpobj, node + res = self.field.parse_child_node(tmpobj, node) + print 'Got result back:', res + + if res is dexml.PARSE_MORE: + raise ValueError("items in a list cannot return PARSE_MORE") + if res is dexml.PARSE_DONE: + items = self.__get__(obj) + val = getattr(tmpobj, self.field_name) + items.append(val) + return dexml.PARSE_MORE + else: + print 'returning parse_skip 2' + return dexml.PARSE_SKIP + + def parse_done(self, obj): + items = self.__get__(obj) + if self.minlength is not None and len(items) < self.minlength: + raise dexml.ParseError("Field '%s': not enough items" % (self.field_name,)) + if self.maxlength is not None and len(items) > self.maxlength: + raise dexml.ParseError("Field '%s': too many items" % (self.field_name,)) + + def render_children(self, obj, items, nsmap): + # Create a generator that yields child data chunks, and validates + # the number of items in the list as it goes. It allows any + # iterable to be passed in, not just a list. + def child_chunks(): + num_items = 0 + for item in items: + num_items += 1 + if self.maxlength is not None and num_items > self.maxlength: + msg = "Field '%s': too many items" % (self.field_name,) + raise dexml.RenderError(msg) + for data in self.field.render_children(obj, item, nsmap): + yield data + if self.minlength is not None and num_items < self.minlength: + msg = "Field '%s': not enough items" % (self.field_name,) + raise dexml.RenderError(msg) + + chunks = child_chunks() + # Render each chunk, but suppress the wrapper tag if there's no data. + try: + data = chunks.next() + except StopIteration: + if self.tagname and self.required: + yield "<%s />" % (self.tagname,) + else: + if self.tagname: + yield "<%s>" % (self.tagname,) + yield data + for data in chunks: + yield data + if self.tagname: + yield "" % (self.tagname,) + + +class Dict(Field): + """Field subclass representing a dict of fields keyed by unique attribute value. + + This field corresponds to an indexed dict of other fields. You would + declare it like so: + + class MyObject(Model): + name = fields.String(tagname = 'name') + attr = fields.String(tagname = 'attr') + + class MyModel(Model): + items = fields.Dict(fields.Model(MyObject), key = 'name') + + Corresponding to XML such as: + + obj1val1 + + + The properties 'minlength' and 'maxlength' control the allowable size + of the dict as in the List class. + + If 'unique' property is set to True, parsing will raise exception on + non-unique key values. + + The 'dictclass' property controls the internal dict-like class used by + the fielt. By default it is the standard dict class. + + The 'tagname' property sets the 'wrapper' tag which acts as container + for dict items, for example: + + from collections import defaultdict + class MyObject(Model): + name = fields.String() + attr = fields.String() + + class MyDict(defaultdict): + def __init__(self): + super(MyDict, self).__init__(MyObject) + + class MyModel(Model): + objects = fields.Dict('MyObject', key = 'name', + tagname = 'dict', dictclass = MyDict) + + xml = ''\ + val1' + mymodel = MyModel.parse(xml) + obj2 = mymodel['obj2'] + print(obj2.name) + print(mymodel.render(fragment = True)) + + This wrapper tag is always rendered, even if the dict is empty. It is + transparently removed when parsing. + """ + + class arguments(Field.arguments): + field = None + minlength = None + maxlength = None + unique = False + tagname = None + dictclass = dict + + def __init__(self, field, key, **kwds): + if isinstance(field, Field): + kwds["field"] = field + else: + kwds["field"] = Model(field, **kwds) + super(Dict, self).__init__(**kwds) + if not self.minlength and not self.tagname: + self.required = False + if self.minlength and not self.required: + raise ValueError("Dict must be required if it has minlength") + self.key = key + + def _get_field(self): + field = self.__dict__["field"] + if not hasattr(field, "field_name"): + field.field_name = self.field_name + if not hasattr(field, "model_class"): + field.model_class = self.model_class + return field + + def _set_field(self, field): + self.__dict__["field"] = field + + field = property(_get_field, _set_field) + + def __get__(self, instance, owner=None): + val = super(Dict, self).__get__(instance, owner) + if val is not None: + return val + + class dictclass(self.dictclass): + key = self.key + + def __setitem__(self, key, value): + keyval = getattr(value, self.key) + if keyval and keyval != key: + raise ValueError('Key field value does not match dict key') + setattr(value, self.key, key) + super(dictclass, self).__setitem__(key, value) + + self.__set__(instance, dictclass()) + return self.__get__(instance, owner) + + def parse_child_node(self, obj, node): + # If our children are inside a grouping tag, parse + # that first. The presence of this is indicated by + # setting an empty dict on the target object. + if self.tagname: + val = super(Dict, self).__get__(obj) + if val is None: + if node.nodeType != node.ELEMENT_NODE: + return dexml.PARSE_SKIP + elif node.tagName == self.tagname: + self.__get__(obj) + return dexml.PARSE_CHILDREN + else: + return dexml.PARSE_SKIP + # Now we just parse each child node. + tmpobj = _AttrBucket() + res = self.field.parse_child_node(tmpobj, node) + if res is dexml.PARSE_MORE: + raise ValueError("items in a dict cannot return PARSE_MORE") + if res is dexml.PARSE_DONE: + items = self.__get__(obj) + val = getattr(tmpobj, self.field_name) + try: + key = getattr(val, self.key) + except AttributeError: + raise dexml.ParseError("Key field '%s' required but not found in dict value" % (self.key, )) + if self.unique and key in items: + raise dexml.ParseError("Key '%s' already exists in dict" % (key,)) + items[key] = val + return dexml.PARSE_MORE + else: + return dexml.PARSE_SKIP + + def parse_done(self, obj): + items = self.__get__(obj) + if self.minlength is not None and len(items) < self.minlength: + raise dexml.ParseError("Field '%s': not enough items" % (self.field_name,)) + if self.maxlength is not None and len(items) > self.maxlength: + raise dexml.ParseError("Field '%s': too many items" % (self.field_name,)) + + def render_children(self, obj, items, nsmap): + if self.minlength is not None and len(items) < self.minlength: + raise dexml.RenderError("Field '%s': not enough items" % (self.field_name,)) + if self.maxlength is not None and len(items) > self.maxlength: + raise dexml.RenderError("too many items") + if self.tagname: + children = "".join(data for item in items.values() for data in self.field.render_children(obj, item, nsmap)) + if not children: + if self.required: + yield "<%s />" % (self.tagname,) + else: + yield children.join(('<%s>' % self.tagname, '' % self.tagname)) + else: + for item in items.values(): + for data in self.field.render_children(obj, item, nsmap): + yield data + + +class Choice(Field): + """Field subclass accepting any one of a given set of Model fields.""" + + class arguments(Field.arguments): + fields = [] + + def __init__(self, *fields, **kwds): + real_fields = [] + for field in fields: + if isinstance(field, Model): + real_fields.append(field) + elif isinstance(field, basestring): + real_fields.append(Model(field)) + else: + raise ValueError("only Model fields are allowed within a Choice field") + kwds["fields"] = real_fields + super(Choice, self).__init__(**kwds) + + def parse_child_node(self, obj, node): + for field in self.fields: + field.field_name = self.field_name + field.model_class = self.model_class + res = field.parse_child_node(obj, node) + if res is dexml.PARSE_MORE: + raise ValueError("items in a Choice cannot return PARSE_MORE") + if res is dexml.PARSE_DONE: + return dexml.PARSE_DONE + else: + return dexml.PARSE_SKIP + + def render_children(self, obj, item, nsmap): + if item is None: + if self.required: + raise dexml.RenderError("Field '%s': required field is missing" % (self.field_name,)) + else: + for data in item._render(nsmap=nsmap): + yield data + + +class XmlNode(Field): + class arguments(Field.arguments): + tagname = None + encoding = None + + def __set__(self, instance, value): + if isinstance(value, basestring): + if isinstance(value, unicode) and self.encoding: + value = value.encode(self.encoding) + doc = dexml.minidom.parseString(value) + value = doc.documentElement + if value is not None and value.namespaceURI is not None: + nsattr = "xmlns" + if value.prefix: + nsattr = ":".join((nsattr, value.prefix,)) + value.attributes[nsattr] = value.namespaceURI + return super(XmlNode, self).__set__(instance, value) + + def parse_child_node(self, obj, node): + if self.tagname is None or self._check_tagname(node, self.tagname): + self.__set__(obj, node) + return dexml.PARSE_DONE + return dexml.PARSE_SKIP + + @classmethod + def render_children(cls, obj, val, nsmap): + if val is not None: + yield val.toxml() + From 435cbbb61b571e1071faa673628eeaea606668b7 Mon Sep 17 00:00:00 2001 From: Jesper Reenberg Date: Wed, 3 Sep 2014 12:36:47 +0200 Subject: [PATCH 2/2] Removal of dead code and elimination of a bug. Dead code: self._canari_fields = dict([(f.name, f.value) for f in self.entity.fields.values()]) We can remove this line of code from the constructor of MaltegoTransformRequestMessage, as it is basically a no op: 1. Trying to read from self.entity in __init__ will ALWAYS end up returning a 'none-entity' (return Entity('')), as the read from self.entities will fail due to the fact that it has not been populated with anything yet. a. This read from self.entities has the side effect of reading from the safedexml List field type, which initialises it to the empty list. When we later on tries to actually parse anything into the field, it will fail as the call: val = super(List, self).__get__(obj) inside safedexml.fields.List.parse_child_node will return '[]' instead of None. Because of this, the if statement will skip the 'parsing logic' and eventually end up returning PARSE_SKIP, instead of PARSE_CHILDREN. b. This has the effect that the ... tag is not parsed, and thus the entities List field of the request message is newer populated. 2. '_canari_fields' is not references anywhere else in the canari code. So even if the call did return anything, it would newer be used. 3. To the best of my knowledge the code doesn't produce any side effects that are needed or even desired. With the dead code, the following valid XML from Maltego, will not populate the entities field. This is an issue, as the request message is sent to the transform, from where it is not possible to access any of the information about the entity (most importantly the value). Worse is, if the previous bugfix has been implemented, where the entities is no longer optional. Then it will fail with an ParseError due to the required field not being found. -------------------------------------------------------- In [1]: import canari.maltego.message as msg In [2]: mmsg = msg.MaltegoMessage.parse(''' paterva.com 0 paterva.com ''') -------------------------------------------------------- Now if trying to access the entity property of the request message, we will get a non-entity back as seen by the empty value and the empty list of entities. -------------------------------------------------------- In [3]: mmsg.message.entity.value Out[3]: '' In [4]: mmsg.message.entities Out[4]: [] -------------------------------------------------------- This can be furhter documented by adding debug statements inside the 'entity' property. However with the dead code eliminated, the correct information is returned as seen below. -------------------------------------------------------- In [3]: mmsg.message.entity.value Out[3]: u'paterva.com' In [4]: mmsg.message.entities Out[4]: [] -------------------------------------------------------- All in all, this seem to be a bug in safedexml. Currently I have only noticed it on the List field type, however it may or may not be present on other field types as well. This isue is solved by not reading from the entities field before the user has a chance to call the parse function or if explicitly adding instances into it (by appendelement) for use when rendering to xml. --- src/canari/maltego/message.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/canari/maltego/message.py b/src/canari/maltego/message.py index 6886367..a5359dd 100644 --- a/src/canari/maltego/message.py +++ b/src/canari/maltego/message.py @@ -549,7 +549,6 @@ class MaltegoTransformRequestMessage(MaltegoElement): def __init__(self, **kwargs): super(MaltegoTransformRequestMessage, self).__init__(**kwargs) - self._canari_fields = dict([(f.name, f.value) for f in self.entity.fields.values()]) @property def entity(self):