diff --git a/src/canari/maltego/message.py b/src/canari/maltego/message.py index da40d39..a5359dd 100644 --- a/src/canari/maltego/message.py +++ b/src/canari/maltego/message.py @@ -543,13 +543,12 @@ def __setattr__(self, key, value): class MaltegoTransformRequestMessage(MaltegoElement): - entities = fields_.List(_Entity, tagname='Entities', required=False) + entities = fields_.List(_Entity, tagname='Entities') parameters = fields_.Dict(Field, tagname='TransformFields', key='name', required=False) limits = fields_.Model(Limits, required=False) def __init__(self, **kwargs): super(MaltegoTransformRequestMessage, self).__init__(**kwargs) - self._canari_fields = dict([(f.name, f.value) for f in self.entity.fields.values()]) @property def entity(self): diff --git a/src/canari/xmltools/safedexml-debug/README b/src/canari/xmltools/safedexml-debug/README new file mode 100644 index 0000000..71d083e --- /dev/null +++ b/src/canari/xmltools/safedexml-debug/README @@ -0,0 +1,14 @@ +Copy this folder into path and rename it to safedexml or if you have installed +safedexml from pip, overwrite the dist-packages path +/usr/local/lib/python2.7/dist-packages/safedexml/ with the content. + +This will generate some debug output. + +It is advised to only copy one of the files at a time (e.g., only __init__.py or +only fields.py) as their output will mingle and it will be near to impossible to +tell them apart. Yest the output is wery simple and could probably have been +made smarter... + +Currently the output of __init__.py is the 'best', in the sense that it tries to +nest the output and indent it so, it is easier to follow function calls, and the +'parsing logic' within the parse() function. diff --git a/src/canari/xmltools/safedexml-debug/__init__.py b/src/canari/xmltools/safedexml-debug/__init__.py new file mode 100644 index 0000000..a2d51cf --- /dev/null +++ b/src/canari/xmltools/safedexml-debug/__init__.py @@ -0,0 +1,718 @@ +""" + +dexml: a dead-simple Object-XML mapper for Python +================================================== + +Let's face it: xml is a fact of modern life. I'd even go so far as to say +that it's *good* at what is does. But that doesn't mean it's easy to work +with and it doesn't mean that we have to like it. Most of the time, XML +just needs to get out of the way and let you do some actual work instead +of writing code to traverse and manipulate yet another DOM. + +The dexml module takes the obvious mapping between XML tags and Python objects +and lets you capture that as cleanly as possible. Loosely inspired by Django's +ORM, you write simple class definitions to define the expected structure of +your XML document. Like so:: + + >>> import dexml + >>> from dexml import fields + >>> class Person(dexml.Model): + ... name = fields.String() + ... age = fields.Integer(tagname='age') + +Then you can parse an XML document into an object like this:: + + >>> p = Person.parse("42") + >>> p.name + u'Foo McBar' + >>> p.age + 42 + +And you can render an object into an XML document like this:: + + >>> p = Person(name="Handsome B. Wonderful",age=36) + >>> p.render() + '36' + +Malformed documents will raise a ParseError:: + + >>> p = Person.parse("92") + Traceback (most recent call last): + ... + ParseError: required field not found: 'name' + +Of course, it gets more interesting when you nest Model definitions, like this:: + + >>> class Group(dexml.Model): + ... name = fields.String(attrname="name") + ... members = fields.List(Person) + ... + >>> g = Group(name="Monty Python") + >>> g.members.append(Person(name="John Cleese",age=69)) + >>> g.members.append(Person(name="Terry Jones",age=67)) + >>> g.render(fragment=True) + '6967' + +There's support for XML namespaces, default field values, case-insensitive +parsing, and more fun stuff. Check out the documentation on the following +classes for more details: + + :Model: the base class for objects that map into XML + :Field: the base class for individual model fields + :Meta: meta-information about how to parse/render a model + +""" + +__ver_major__ = 0 +__ver_minor__ = 5 +__ver_patch__ = 1 +__ver_sub__ = "" +__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__, __ver_patch__, __ver_sub__) + +import sys +import re +import copy +from defusedxml import minidom +from safedexml import fields + + +if sys.version_info >= (3,): + str = str #pragma: no cover + unicode = str #pragma: no cover + bytes = bytes #pragma: no cover + basestring = (str, bytes) #pragma: no cover +else: + str = str #pragma: no cover + unicode = unicode #pragma: no cover + bytes = str #pragma: no cover + basestring = basestring #pragma: no cover + + +_nest = [] +def nest(): + return ''.join(_nest) +def nest_more(): + _nest.append(' ') +def nest_less(): + _nest.pop() + + +class Error(Exception): + """Base exception class for the dexml module.""" + pass + + +class ParseError(Error): + """Exception raised when XML could not be parsed into objects.""" + pass + + +class RenderError(Error): + """Exception raised when object could not be rendered into XML.""" + pass + + +class XmlError(Error): + """Exception raised to encapsulate errors from underlying XML parser.""" + pass + + +class PARSE_DONE(object): + """Constant returned by a Field when it has finished parsing.""" + pass + + +class PARSE_MORE(object): + """Constant returned by a Field when it wants additional nodes to parse.""" + pass + + +class PARSE_SKIP(object): + """Constant returned by a Field when it cannot parse the given node.""" + pass + + +class PARSE_CHILDREN(object): + """Constant returned by a Field to parse children from its container tag.""" + pass + + +class Meta(object): + """Class holding meta-information about a dexml.Model subclass. + + Each dexml.Model subclass has an attribute 'meta' which is an instance + of this class. That instance holds information about how the model + corresponds to XML, such as its tagname, namespace, and error handling + semantics. You would not ordinarily create an instance of this class; + instead let the ModelMetaclass create one automatically. + + These attributes control how the model corresponds to the XML: + + * tagname: the name of the tag representing this model + * namespace: the XML namespace in which this model lives + + These attributes control parsing/rendering behaviour: + + * namespace_prefix: the prefix to use for rendering namespaced tags + * ignore_unknown_elements: ignore unknown elements when parsing + * case_sensitive: match tag/attr names case-sensitively + * order_sensitive: match child tags in order of field definition + + """ + + _defaults = {"tagname": None, + "namespace": None, + "namespace_prefix": None, + "ignore_unknown_elements": True, + "case_sensitive": True, + "order_sensitive": True} + + def __init__(self, name, meta_attrs): + for (attr, default) in self._defaults.items(): + setattr(self, attr, meta_attrs.get(attr, default)) + if self.tagname is None: + self.tagname = name + + +def _meta_attributes(meta): + """Extract attributes from a "meta" object.""" + meta_attrs = {} + if meta: + for attr in dir(meta): + if not attr.startswith("_"): + meta_attrs[attr] = getattr(meta, attr) + return meta_attrs + + +class ModelMetaclass(type): + """Metaclass for dexml.Model and subclasses. + + This metaclass is responsible for introspecting Model class definitions + and setting up appropriate default behaviours. For example, this metaclass + sets a Model's default tagname to be equal to the declared class name. + """ + + instances_by_tagname = {} + instances_by_classname = {} + + def __new__(mcls, name, bases, attrs): + cls = super(ModelMetaclass, mcls).__new__(mcls, name, bases, attrs) + # Don't do anything if it's not a subclass of Model + parents = [b for b in bases if isinstance(b, ModelMetaclass)] + if not parents: + return cls + # Set up the cls.meta object, inheriting from base classes + meta_attrs = {} + for base in reversed(bases): + if isinstance(base, ModelMetaclass) and hasattr(base, "meta"): + meta_attrs.update(_meta_attributes(base.meta)) + meta_attrs.pop("tagname", None) + meta_attrs.update(_meta_attributes(attrs.get("meta", None))) + cls.meta = Meta(name, meta_attrs) + # Create ordered list of field objects, telling each about their + # name and containing class. Inherit fields from base classes + # only if not overridden on the class itself. + base_fields = {} + for base in bases: + if not isinstance(base, ModelMetaclass): + continue + for field in base._fields: + if field.field_name not in base_fields: + field = copy.copy(field) + field.model_class = cls + base_fields[field.field_name] = field + cls_fields = [] + for (name, value) in attrs.iteritems(): + if isinstance(value, fields.Field): + base_fields.pop(name, None) + value.field_name = name + value.model_class = cls + cls_fields.append(value) + cls._fields = base_fields.values() + cls_fields + cls._fields.sort(key=lambda f: f._order_counter) + # Register the new class so we can find it by name later on + tagname = (cls.meta.namespace, cls.meta.tagname) + mcls.instances_by_tagname[tagname] = cls + mcls.instances_by_classname[cls.__name__] = cls + return cls + + @classmethod + def find_class(mcls, tagname, namespace=None): + """Find dexml.Model subclass for the given tagname and namespace.""" + try: + return mcls.instances_by_tagname[(namespace, tagname)] + except KeyError: + if namespace is None: + try: + return mcls.instances_by_classname[tagname] + except KeyError: + pass + return None + + +# You can use this re to extract the encoding declaration from the XML +# document string. Hopefully you won't have to, but you might need to... +_XML_ENCODING_RE = re.compile("<\\?xml [^>]*encoding=[\"']([a-zA-Z0-9\\.\\-\\_]+)[\"'][^>]*?>") + + +class Model(object): + """Base class for dexml Model objects. + + Subclasses of Model represent a concrete type of object that can parsed + from or rendered to an XML document. The mapping to/from XML is controlled + by two things: + + * attributes declared on an inner class named 'meta' + * fields declared using instances of fields.Field + + Here's a quick example: + + class Person(dexml.Model): + # This overrides the default tagname of 'Person' + class meta + tagname = "person" + # This maps to a 'name' attributr on the tag + name = fields.String() + # This maps to an tag within the tag + age = fields.Integer(tagname='age') + + See the 'Meta' class in this module for available meta options, and the + 'fields' submodule for available field types. + """ + + __metaclass__ = ModelMetaclass + _fields = [] + + def __init__(self, **kwds): + """Default Model constructor. + + Keyword arguments that correspond to declared fields are processed + and assigned to that field. + """ + for f in self._fields: + try: + setattr(self, f.field_name, kwds[f.field_name]) + except KeyError: + pass + + + + @classmethod + def parse(cls, xml): + """Produce an instance of this model from some xml. + + The given xml can be a string, a readable file-like object, or + a DOM node; we might add support for more types in the future. + """ + + print nest(), 'parse(....) ------------------------------------------------------------------------------------' + nest_more() + + print nest(), 'Constructing class:', cls.__name__ + self = cls() + print nest(), 'Calling _make_xml_node with:', repr(xml) + node = self._make_xml_node(xml) + print nest(), 'Got node back:', str(node) + + self.validate_xml_node(node) + # Keep track of fields that have successfully parsed something + fields_found = [] + # Try to consume all the node's attributes + attrs = node.attributes.values() + print nest(), 'XML node has the following attrs:', str(attrs) + + print nest(), 'Current class instance has the following defined fields:' + for field in self._fields: + nest_more() + print nest(), 'field:', str(field), \ + '(tagname: %s)' % getattr(field, 'tagname', None), \ + '(attrname: %s)' % getattr(field, 'attrname', None), \ + '(field name: %s)' % getattr(field, 'field_name', None), \ + '(model class: %s)' % getattr(field, 'model_class', None) + + unused_attrs = field.parse_attributes(self, attrs) + print nest(), ' |- unused attrs, after consuming by (attribute) field:', str(unused_attrs) + + if len(unused_attrs) < len(attrs): + print nest(), ' |- An attribute was consumed, so the current field must be an' \ + 'attribute field. Store it in the list of found fields.' + fields_found.append(field) + else: + print nest(), ' |- No attributes was consumes, this field must be a tag field.' + + # Store resulting unused attributes, for next loop iteration. + attrs = unused_attrs + nest_less() + + for attr in attrs: + print nest(), 'handle unparsed attr:', str(attr) + self._handle_unparsed_node(attr) + + # Try to consume all child nodes + print nest(), 'Try to consume child nodes' + if self.meta.order_sensitive: + print nest(), 'parsing order_sensitive = True' + self._parse_children_ordered(node, self._fields, fields_found) + else: + print nest(), 'parsing order_sensitive = False' + print nest(), 'Current fields matched (attribute fields):', fields_found + self._parse_children_unordered(node, self._fields, fields_found) + # Check that all required fields have been found + + print nest(), 'parse() check that required fields have been found' + for field in self._fields: + nest_more() + print nest(), 'looping field:', str(field), \ + '(tagname: %s)' % getattr(field, 'tagname', None), \ + '(attrname: %s)' % getattr(field, 'attrname', None), \ + '(field name: %s)' % getattr(field, 'field_name', None), \ + '(model class: %s)' % getattr(field, 'model_class', None) + if field.required and field not in fields_found: + err = "required field not found: '%s'" % (field.field_name,) + raise ParseError(err) + field.parse_done(self) + nest_less() + # All done, return the instance so created + print nest(), 'Created:', type(self).__name__ + + nest_less() + print nest(), 'parse(....) ----------------------------------------- END --------------------------------------' + return self + + def _parse_children_ordered(self, node, fields, fields_found): + """Parse the children of the given node using strict field ordering.""" + cur_field_idx = 0 + for child in node.childNodes: + idx = cur_field_idx + # If we successfully break out of this loop, one of our + # fields has consumed the node. + while idx < len(fields): + field = fields[idx] + res = field.parse_child_node(self, child) + if res is PARSE_DONE: + if field not in fields_found: + fields_found.append(field) + cur_field_idx = idx + 1 + break + if res is PARSE_MORE: + if field not in fields_found: + fields_found.append(field) + cur_field_idx = idx + break + if res is PARSE_CHILDREN: + if field not in fields_found: + fields_found.append(field) + self._parse_children_ordered(child, [field], fields_found) + cur_field_idx = idx + break + idx += 1 + else: + self._handle_unparsed_node(child) + + def _parse_children_unordered(self, node, fields, fields_found): + """Parse the children of the given node using loose field ordering.""" + print nest(), '_parse_children_unordered(....) ----------------------------------------------------------------' + nest_more() + + done_fields = {} + print nest(), 'Looping through XML child nodes:', str(node.childNodes) + for child in node.childNodes: + nest_more() + print nest(), 'Current child:', child, 'Trying to match to one of the fields...' + + idx = 0 + # If we successfully break out of this loop, one of our + # fields has consumed the node. + + while idx < len(fields): + nest_more() + print nest(), 'Looping', idx+1, 'of', len(fields) + if idx in done_fields: + print nest(), 'Skipping index %s as it is in done_fields: ' % idx, done_fields + idx += 1 + nest_less() # remember to nest less :) + continue + field = fields[idx] + print nest(), 'Trying field:', str(field), \ + '(tagname: %s)' % getattr(field, 'tagname', None), \ + '(attrname: %s)' % getattr(field, 'attrname', None), \ + '(field name: %s)' % getattr(field, 'field_name', None), \ + '(model class: %s)' % getattr(field, 'model_class', None) + nest_more() + print nest(), 'Calling parse_child_node of the current field, with the current child' + res = field.parse_child_node(self, child) + + if res is PARSE_DONE: + print nest(), 'got PARSE_DONE' + done_fields[idx] = True + if field not in fields_found: + print nest(), '\t Adding field to fields_found' + fields_found.append(field) + break + if res is PARSE_MORE: + print nest(), 'got PARSE_MORE' + if field not in fields_found: + print nest(), '\t Adding field to fields_found' + fields_found.append(field) + break + if res is PARSE_CHILDREN: + print nest(), 'got PARSE_CHILDREN' + if field not in fields_found: + print nest(), '\t Adding field to fields_found' + fields_found.append(field) + + print nest(), '\t Adding field to fields_found' + self._parse_children_unordered(child, [field], fields_found) + break + else: + print nest(), '--------> Got unknown and unhandled parse result:', res, ' <--------------' + idx += 1 + nest_less() + nest_less() + else: + print nest(), 'Done looping. Hit else part of while loop. Calling _handle_unparsed_node()' + self._handle_unparsed_node(child) + nest_less() + + nest_less() + print nest(), '_parse_children_unordered(....) ------------------------------- END ----------------------------' + + def _handle_unparsed_node(self, node): + if not self.meta.ignore_unknown_elements: + if node.nodeType == node.ELEMENT_NODE: + err = "unknown element: %s" % (node.nodeName,) + raise ParseError(err) + elif node.nodeType in (node.TEXT_NODE, node.CDATA_SECTION_NODE): + if node.nodeValue.strip(): + err = "unparsed text node: %s" % (node.nodeValue,) + raise ParseError(err) + elif node.nodeType == node.ATTRIBUTE_NODE: + if not node.nodeName.startswith("xml"): + err = "unknown attribute: %s" % (node.name,) + raise ParseError(err) + + def render(self, encoding=None, fragment=False, pretty=False, nsmap=None): + """Produce XML from this model's instance data. + + A unicode string will be returned if any of the objects contain + unicode values; specifying the 'encoding' argument forces generation + of a bytestring. + + By default a complete XML document is produced, including the + leading "" declaration. To generate an XML fragment set + the 'fragment' argument to True. + """ + if nsmap is None: + nsmap = {} + data = [] + if not fragment: + if encoding: + s = '' % (encoding,) + data.append(s) + else: + data.append('') + data.extend(self._render(nsmap)) + xml = "".join(data) + if pretty: + xml = minidom.parseString(xml).toprettyxml() + if encoding: + xml = xml.encode(encoding) + return xml + + def irender(self, encoding=None, fragment=False, nsmap=None): + """Generator producing XML from this model's instance data. + + If any of the objects contain unicode values, the resulting output + stream will be a mix of bytestrings and unicode; specify the 'encoding' + arugment to force generation of bytestrings. + + By default a complete XML document is produced, including the + leading "" declaration. To generate an XML fragment set + the 'fragment' argument to True. + """ + if nsmap is None: + nsmap = {} + if not fragment: + if encoding: + decl = '' % (encoding,) + yield decl.encode(encoding) + else: + yield '' + if encoding: + for data in self._render(nsmap): + if isinstance(data, unicode): + data = data.encode(encoding) + yield data + else: + for data in self._render(nsmap): + yield data + + def _render(self, nsmap): + """Generator rendering this model as an XML fragment.""" + # Determine opening and closing tags + pushed_ns = False + if self.meta.namespace: + namespace = self.meta.namespace + prefix = self.meta.namespace_prefix + try: + cur_ns = nsmap[prefix] + except KeyError: + cur_ns = [] + nsmap[prefix] = cur_ns + if prefix: + tagname = "%s:%s" % (prefix, self.meta.tagname) + open_tag_contents = [tagname] + if not cur_ns or cur_ns[0] != namespace: + cur_ns.insert(0, namespace) + pushed_ns = True + open_tag_contents.append('xmlns:%s="%s"' % (prefix, namespace)) + close_tag_contents = tagname + else: + open_tag_contents = [self.meta.tagname] + if not cur_ns or cur_ns[0] != namespace: + cur_ns.insert(0, namespace) + pushed_ns = True + open_tag_contents.append('xmlns="%s"' % (namespace,)) + close_tag_contents = self.meta.tagname + else: + open_tag_contents = [self.meta.tagname] + close_tag_contents = self.meta.tagname + used_fields = set() + open_tag_contents.extend(self._render_attributes(used_fields, nsmap)) + # Render each child node + children = self._render_children(used_fields, nsmap) + try: + first_child = children.next() + except StopIteration: + yield "<%s />" % (" ".join(open_tag_contents),) + else: + yield "<%s>" % (" ".join(open_tag_contents),) + yield first_child + for child in children: + yield child + yield "" % (close_tag_contents,) + # Check that all required fields actually rendered something + for f in self._fields: + if f.required and f not in used_fields: + raise RenderError("Field '%s' is missing" % (f.field_name,)) + # Clean up + if pushed_ns: + nsmap[prefix].pop(0) + + def _render_attributes(self, used_fields, nsmap): + for f in self._fields: + val = getattr(self, f.field_name) + datas = iter(f.render_attributes(self, val, nsmap)) + try: + data = datas.next() + except StopIteration: + pass + else: + used_fields.add(f) + yield data + for data in datas: + yield data + + def _render_children(self, used_fields, nsmap): + for f in self._fields: + val = getattr(self, f.field_name) + datas = iter(f.render_children(self, val, nsmap)) + try: + data = datas.next() + except StopIteration: + pass + else: + used_fields.add(f) + yield data + for data in datas: + yield data + + @staticmethod + def _make_xml_node(xml): + """Transform a variety of input formats to an XML DOM node.""" + + print nest(), '_make_xml_node(....) -----------------------------------------------------------------------------' + nest_more() + + try: + print nest(), "Trying to see if the given 'xml' has a 'nodeType'" + ntype = xml.nodeType + except AttributeError: + print nest(), "Got 'AttributeError', so this must be a string with xml to be parsed" + if isinstance(xml, bytes): + print nest(), "The 'xml' is a 'bytes' instance" + try: + xml = minidom.parseString(xml) + except Exception, e: + raise XmlError(e) + elif isinstance(xml, unicode): + print nest(), "The 'xml' is a 'unicode' instance" + try: + # Try to grab the "encoding" attribute from the XML. + # It probably won't exist, so default to utf8. + encoding = _XML_ENCODING_RE.match(xml) + if encoding is None: + encoding = "utf8" + else: + encoding = encoding.group(1) + xml = minidom.parseString(xml.encode(encoding)) + except Exception, e: + raise XmlError(e) + elif hasattr(xml, "read"): + print nest(), "The 'xml' is a file like instance, with a 'read' attribute" + try: + xml = minidom.parse(xml) + except Exception, e: + raise XmlError(e) + else: + raise ValueError("Can't convert that to an XML DOM node") + print nest(), "Getting the 'documentElement' from the parsed 'xml'" + node = xml.documentElement + else: + if ntype == xml.DOCUMENT_NODE: + print nest(), "The given 'xml' is a document node, returning its 'documentElement'" + node = xml.documentElement + else: + print nest(), "The given 'xml' is a 'documentElement', returning it directly" + node = xml + + nest_less() + print nest(), '_make_xml_node(....) ---------------------------------- END --------------------------------------' + return node + + @classmethod + def validate_xml_node(cls, node): + """Check that the given xml node is valid for this object. + + Here 'valid' means that it is the right tag, in the right + namespace. We might add more eventually... + """ + if node.nodeType != node.ELEMENT_NODE: + err = "Class '%s' got a non-element node" + err = err % (cls.__name__,) + raise ParseError(err) + if cls.meta.case_sensitive: + if node.localName != cls.meta.tagname: + err = "Class '%s' got tag '%s' (expected '%s')" + err = err % (cls.__name__, node.localName, + cls.meta.tagname) + raise ParseError(err) + else: + if node.localName.lower() != cls.meta.tagname.lower(): + err = "Class '%s' got tag '%s' (expected '%s')" + err = err % (cls.__name__, node.localName, + cls.meta.tagname) + raise ParseError(err) + if cls.meta.namespace: + if node.namespaceURI != cls.meta.namespace: + err = "Class '%s' got namespace '%s' (expected '%s')" + err = err % (cls.__name__, node.namespaceURI, + cls.meta.namespace) + raise ParseError(err) + else: + if node.namespaceURI: + err = "Class '%s' got namespace '%s' (expected no namespace)" + err = err % (cls.__name__, node.namespaceURI,) + raise ParseError(err) + + diff --git a/src/canari/xmltools/safedexml-debug/fields.py b/src/canari/xmltools/safedexml-debug/fields.py new file mode 100644 index 0000000..4b571fb --- /dev/null +++ b/src/canari/xmltools/safedexml-debug/fields.py @@ -0,0 +1,901 @@ +""" + +dexml.fields: basic field type definitions for dexml +===================================================== + +""" + +import safedexml as dexml +import random +from xml.sax.saxutils import escape, quoteattr + + + +class _AttrBucket(object): + """A simple class used only to hold attributes.""" + pass + + +class Field(object): + """Base class for all dexml Field classes. + + Field classes are responsible for parsing and rendering individual + components to the XML. They also act as descriptors on dexml Model + instances, to get/set the corresponding properties. + + Each field instance will magically be given the following properties: + + * model_class: the Model subclass to which it is attached + * field_name: the name under which is appears on that class + + The following methods are required for interaction with the parsing + and rendering machinery: + + * parse_attributes: parse info out of XML node attributes + * parse_child_node: parse into out of an XML child node + * render_attributes: render XML for node attributes + * render_children: render XML for child nodes + + """ + + # Global counter tracking the order in which fields are declared. + _order_counters_ = 0 + + + class arguments: + required = True + + def __init__(self, **kwds): + """Default Field constructor. + + This constructor keeps track of the order in which Field instances + are created, since this information can have semantic meaning in + XML. It also merges any keyword arguments with the defaults + defined on the 'arguments' inner class, and assigned these attributes + to the Field instance. + """ + self._order_counter = Field._order_counters_ = Field._order_counters_ + 1 + args = self.__class__.arguments + for argnm in dir(args): + if not argnm.startswith("__"): + setattr(self, argnm, kwds.get(argnm, getattr(args, argnm))) + + def parse_attributes(self, obj, attrs): + """Parse any attributes for this field from the given list. + + This method will be called with the Model instance being parsed and + a list of attribute nodes from its XML tag. Any attributes of + interest to this field should be processed, and a list of the unused + attribute nodes returned. + """ + return attrs + + def parse_child_node(self, obj, node): + """Parse a child node for this field. + + This method will be called with the Model instance being parsed and + the current child node of that model's XML tag. There are three + options for processing this node: + + * return PARSE_DONE, indicating that it was consumed and this + field now has all the necessary data. + * return PARSE_MORE, indicating that it was consumed but this + field will accept more nodes. + * return PARSE_SKIP, indicating that it was not consumed by + this field. + + Any other return value will be taken as a parse error. + """ + print "parse_child_node called on the base 'Field' class" + return dexml.PARSE_SKIP + + def parse_done(self, obj): + """Finalize parsing for the given object. + + This method is called as a simple indicator that no more data will + be forthcoming. No return value is expected. + """ + pass + + def render_attributes(self, obj, val, nsmap): + """Render any attributes that this field manages.""" + return [] + + def render_children(self, obj, nsmap, val): + """Render any child nodes that this field manages.""" + return [] + + def __get__(self, instance, owner=None): + print 'Field.__get__:', 'self:', self, 'instance:', instance, 'owner:', owner + if instance is None: + print '\t instance was None, returning self' + return self + + print '\t getting return value' + res = instance.__dict__.get(self.field_name) + print '\t instance __dict__:', instance.__dict__ + print '\t returning:', res, 'from field name:', self.field_name + return res + + def __set__(self, instance, value): + print 'Field.__set__', self, instance, self.field_name, '<-', value + print '\t Instance dict (before):', instance.__dict__ + instance.__dict__[self.field_name] = value + print '\t Instance dict (after):', instance.__dict__ + + + def _check_tagname(self, node, tagname): + if node.nodeType != node.ELEMENT_NODE: + return False + if isinstance(tagname, basestring): + if node.localName != tagname: + return False + if node.namespaceURI: + if node.namespaceURI != self.model_class.meta.namespace: + return False + else: + (tagns, tagname) = tagname + if node.localName != tagname: + return False + if node.namespaceURI != tagns: + return False + return True + + +class Value(Field): + """Field subclass that holds a simple scalar value. + + This Field subclass contains the common logic to parse/render simple + scalar value fields - fields that don't required any recursive parsing. + Individual subclasses should provide the parse_value() and render_value() + methods to do type coercion of the value. + + Value fields can also have a default value, specified by the 'default' + keyword argument. + + By default, the field maps to an attribute of the model's XML node with + the same name as the field declaration. Consider: + + class MyModel(Model): + my_field = fields.Value(default="test") + + + This corresponds to the XML fragment "". + To use a different name specify the 'attrname' kwd argument. To use + a subtag instead of an attribute specify the 'tagname' kwd argument. + + Namespaced attributes or subtags are also supported, by specifying a + (namespace,tagname) pair for 'attrname' or 'tagname' respectively. + """ + + class arguments(Field.arguments): + tagname = None + attrname = None + default = None + + def __init__(self, **kwds): + super(Value, self).__init__(**kwds) + if self.default is not None: + self.required = False + + def _get_attrname(self): + if self.__dict__["tagname"]: + return None + attrname = self.__dict__['attrname'] + if not attrname: + attrname = self.field_name + return attrname + + def _set_attrname(self, attrname): + self.__dict__['attrname'] = attrname + + attrname = property(_get_attrname, _set_attrname) + + def _get_tagname(self): + if self.__dict__["attrname"]: + return None + tagname = self.__dict__['tagname'] + if tagname and not isinstance(tagname, (basestring, tuple)): + tagname = self.field_name + return tagname + + def _set_tagname(self, tagname): + self.__dict__['tagname'] = tagname + + tagname = property(_get_tagname, _set_tagname) + + def __get__(self, instance, owner=None): + val = super(Value, self).__get__(instance, owner) + if val is None: + return self.default + return val + + def parse_attributes(self, obj, attrs): + # Bail out if we're attached to a subtag rather than an attr. + if self.tagname: + return attrs + unused = [] + attrname = self.attrname + if isinstance(attrname, basestring): + ns = None + else: + (ns, attrname) = attrname + for attr in attrs: + if attr.localName == attrname: + if attr.namespaceURI == ns: + self.__set__(obj, self.parse_value(attr.nodeValue)) + else: + unused.append(attr) + else: + unused.append(attr) + return unused + + def parse_child_node(self, obj, node): + if not self.tagname: + return dexml.PARSE_SKIP + if self.tagname == ".": + node = node.parentNode + else: + if not self._check_tagname(node, self.tagname): + return dexml.PARSE_SKIP + vals = [] + # Merge all text nodes into a single value + for child in node.childNodes: + if child.nodeType not in (child.TEXT_NODE, child.CDATA_SECTION_NODE): + raise dexml.ParseError("non-text value node") + vals.append(child.nodeValue) + self.__set__(obj, self.parse_value("".join(vals))) + return dexml.PARSE_DONE + + def render_attributes(self, obj, val, nsmap): + if val is not None and self.attrname: + qaval = quoteattr(self.render_value(val)) + if isinstance(self.attrname, basestring): + yield '%s=%s' % (self.attrname, qaval,) + else: + m_meta = self.model_class.meta + (ns, nm) = self.attrname + if ns == m_meta.namespace and m_meta.namespace_prefix: + prefix = m_meta.namespace_prefix + yield '%s:%s=%s' % (prefix, nm, qaval,) + elif ns is None: + yield '%s=%s' % (nm, qaval,) + else: + for (p, n) in nsmap.iteritems(): + if ns == n[0]: + prefix = p + break + else: + prefix = "p" + str(random.randint(0, 10000)) + while prefix in nsmap: + prefix = "p" + str(random.randint(0, 10000)) + yield 'xmlns:%s="%s"' % (prefix, ns,) + yield '%s:%s=%s' % (prefix, nm, qaval,) + + def render_children(self, obj, val, nsmap): + if val is not None and self.tagname: + val = self._esc_render_value(val) + if self.tagname == ".": + yield val + else: + attrs = "" + # By default, tag values inherit the namespace of their + # containing model class. + if isinstance(self.tagname, basestring): + prefix = self.model_class.meta.namespace_prefix + localName = self.tagname + else: + m_meta = self.model_class.meta + (ns, localName) = self.tagname + if not ns: + # If we have an explicitly un-namespaced tag, + # we need to be careful. The model tag might have + # set the default namespace, which we need to undo. + prefix = None + if m_meta.namespace and not m_meta.namespace_prefix: + attrs = ' xmlns=""' + elif ns == m_meta.namespace: + prefix = m_meta.namespace_prefix + else: + for (p, n) in nsmap.iteritems(): + if ns == n[0]: + prefix = p + break + else: + prefix = "p" + str(random.randint(0, 10000)) + while prefix in nsmap: + prefix = "p" + str(random.randint(0, 10000)) + attrs = ' xmlns:%s="%s"' % (prefix, ns) + yield self._render_tag(val, prefix, localName, attrs) + + def _render_tag(self, val, prefix, localName, attrs): + if val: + if prefix: + args = (prefix, localName, attrs, val, prefix, localName) + return "<%s:%s%s>%s" % args + else: + return "<%s%s>%s" % (localName, attrs, val, localName) + else: + if prefix: + return "<%s:%s%s />" % (prefix, localName, attrs,) + else: + return "<%s%s />" % (localName, attrs) + + def parse_value(self, val): + return val + + def render_value(self, val): + if not isinstance(val, basestring): + val = str(val) + return val + + def _esc_render_value(self, val): + return escape(self.render_value(val)) + + +class String(Value): + """Field representing a simple string value.""" + # actually, the base Value() class will do this automatically. + pass + + +class CDATA(Value): + """String field rendered as CDATA.""" + + def __init__(self, **kwds): + super(CDATA, self).__init__(**kwds) + if self.__dict__.get("tagname", None) is None: + raise ValueError("CDATA fields must have a tagname") + + def _esc_render_value(self, val): + val = self.render_value(val) + val = val.replace("]]>", "]]]]>") + return "" + + +class Integer(Value): + """Field representing a simple integer value.""" + + def parse_value(self, val): + return int(val) + + +class Float(Value): + """Field representing a simple float value.""" + + def parse_value(self, val): + return float(val) + + +class Boolean(Value): + """Field representing a simple boolean value. + + The strings corresponding to false are 'no', 'off', 'false' and '0', + compared case-insensitively. Note that this means an empty tag or + attribute is considered True - this is usually what you want, since + a completely missing attribute or tag can be interpreted as False. + + To enforce that the presence of a tag indicates True and the absence of + a tag indicates False, pass the keyword argument "empty_only". + """ + + class arguments(Value.arguments): + empty_only = False + + def __init__(self, **kwds): + super(Boolean, self).__init__(**kwds) + if self.empty_only: + self.required = False + + def __set__(self, instance, value): + instance.__dict__[self.field_name] = bool(value) + + def parse_value(self, val): + if self.empty_only and val != "": + raise ValueError("non-empty value in empty_only Boolean") + if val.lower() in ("no", "off", "false", "0"): + return False + return True + + def render_children(self, obj, val, nsmap): + if not val and self.empty_only: + return [] + return super(Boolean, self).render_children(obj, val, nsmap) + + def render_attributes(self, obj, val, nsmap): + if not val and self.empty_only: + return [] + return super(Boolean, self).render_attributes(obj, val, nsmap) + + def render_value(self, val): + if not val: + return "false" + if self.empty_only: + return "" + return "true" + + +class Model(Field): + """Field subclass referencing another Model instance. + + This field sublcass allows Models to contain other Models recursively. + The first argument to the field constructor must be either a Model + class, or the name or tagname of a Model class. + """ + + class arguments(Field.arguments): + type = None + + def __init__(self, type=None, **kwds): + kwds["type"] = type + super(Model, self).__init__(**kwds) + + def _get_type(self): + return self.__dict__.get("type") + + def _set_type(self, value): + if value is not None: + self.__dict__["type"] = value + + type = property(_get_type, _set_type) + + def __set__(self, instance, value): + typeclass = self.typeclass + if value and not isinstance(value, typeclass): + raise ValueError("Invalid value type %s. Model field requires %s instance" % + (value.__class__.__name__, typeclass.__name__)) + super(Model, self).__set__(instance, value) + + @property + def typeclass(self): + try: + return self.__dict__['typeclass'] + except KeyError: + self.__dict__['typeclass'] = self._load_typeclass() + return self.__dict__['typeclass'] + + def _load_typeclass(self): + typ = self.type + if isinstance(typ, dexml.ModelMetaclass): + return typ + if typ is None: + typ = self.field_name + typeclass = None + if isinstance(typ, basestring): + if self.model_class.meta.namespace: + ns = self.model_class.meta.namespace + typeclass = dexml.ModelMetaclass.find_class(typ, ns) + if typeclass is None: + typeclass = dexml.ModelMetaclass.find_class(typ, None) + if typeclass is None: + raise ValueError("Unknown Model class: %s" % (typ,)) + else: + (ns, typ) = typ + if isinstance(typ, dexml.ModelMetaclass): + return typ + typeclass = dexml.ModelMetaclass.find_class(typ, ns) + if typeclass is None: + raise ValueError("Unknown Model class: (%s,%s)" % (ns, typ)) + return typeclass + + def parse_child_node(self, obj, node): + typeclass = self.typeclass + try: + typeclass.validate_xml_node(node) + except dexml.ParseError: + return dexml.PARSE_SKIP + else: + inst = typeclass.parse(node) + self.__set__(obj, inst) + return dexml.PARSE_DONE + + def render_attributes(self, obj, val, nsmap): + return [] + + def render_children(self, obj, val, nsmap): + if val is not None: + for data in val._render(nsmap): + yield data + + +class List(Field): + """Field subclass representing a list of fields. + + This field corresponds to a homogenous list of other fields. You would + declare it like so: + + class MyModel(Model): + items = fields.List(fields.String(tagname="item")) + + Corresponding to XML such as: + + onetwo + + + The properties 'minlength' and 'maxlength' control the allowable length + of the list. + + The 'tagname' property sets an optional wrapper tag which acts as container + for list items, for example: + + class MyModel(Model): + items = fields.List(fields.String(tagname="item"), + tagname='list') + + Corresponding to XML such as: + + onetwo + + This wrapper tag is always rendered, even if the list is empty. It is + transparently removed when parsing. + """ + + class arguments(Field.arguments): + field = None + minlength = None + maxlength = None + tagname = None + + def __init__(self, field, **kwds): + if isinstance(field, Field): + kwds["field"] = field + else: + kwds["field"] = Model(field, **kwds) + super(List, self).__init__(**kwds) + if not self.minlength and not self.tagname: + self.required = False + if self.minlength and not self.required: + raise ValueError("List must be required if it has minlength") + + def _get_field(self): + field = self.__dict__["field"] + if not hasattr(field, "field_name"): + field.field_name = self.field_name + if not hasattr(field, "model_class"): + field.model_class = self.model_class + return field + + def _set_field(self, field): + print 'List._set_field:', self, field + self.__dict__["field"] = field + + field = property(_get_field, _set_field) + + def __get__(self, instance, owner=None): + print 'List.__get__', 'self:', self, 'instance:', instance, 'owner:', owner + + print '\t calling super(List, self).__get__(instance, owner):', super(List, self).__get__ + val = super(List, self).__get__(instance, owner) + print 'Back in Field.__get__' + print '\t got return value:', val + + if val is not None: + print '\t value was not None, returning it' + return val + + print '\t Value was None, setting default value: []' + self.__set__(instance, []) # Set the default value + print 'Back in Field.__get__, from self.__set__' + print '\t calling self.__get__ to get the default value back' + res = self.__get__(instance, owner) # return the default value + print 'Back in Field.__get__', 'returning result:', res + return res + + def parse_child_node(self, obj, node): + print "parse_child_node called on the 'List' Class" + print 'self:', self + print 'obj:', obj + print 'node:', node + # If our children are inside a grouping tag, parse + # that first. The presence of this is indicated by + # setting the empty list on the target object. + if self.tagname: + print 'self has tagname:', self.tagname + print 'Calling super(List, self).__get__(obj):', super(List, self).__get__, obj + val = super(List, self).__get__(obj) + # ^^^ Why not call own __get__ ?? + # Anywhays requires this to return None before it tries to parse further on. + print 'Got back value:', val + if val is None: + print 'node.nodeType:', node.nodeType + if node.nodeType != node.ELEMENT_NODE: + print 'Was an ELEMENT_NODE, returning PARSE_SKIP' + return dexml.PARSE_SKIP + elif node.tagName == self.tagname: + print 'node.tagName:', node.tagName + print 'Setting default value on obj:', obj + self.__set__(obj, []) + print 'Returning dexml.PARSE_CHILDREN' + return dexml.PARSE_CHILDREN + else: + print 'returning parse_skip 1' + return dexml.PARSE_SKIP + # Now we just parse each child node. + tmpobj = _AttrBucket() + print 'Calling parse_child_node:', tmpobj, node + res = self.field.parse_child_node(tmpobj, node) + print 'Got result back:', res + + if res is dexml.PARSE_MORE: + raise ValueError("items in a list cannot return PARSE_MORE") + if res is dexml.PARSE_DONE: + items = self.__get__(obj) + val = getattr(tmpobj, self.field_name) + items.append(val) + return dexml.PARSE_MORE + else: + print 'returning parse_skip 2' + return dexml.PARSE_SKIP + + def parse_done(self, obj): + items = self.__get__(obj) + if self.minlength is not None and len(items) < self.minlength: + raise dexml.ParseError("Field '%s': not enough items" % (self.field_name,)) + if self.maxlength is not None and len(items) > self.maxlength: + raise dexml.ParseError("Field '%s': too many items" % (self.field_name,)) + + def render_children(self, obj, items, nsmap): + # Create a generator that yields child data chunks, and validates + # the number of items in the list as it goes. It allows any + # iterable to be passed in, not just a list. + def child_chunks(): + num_items = 0 + for item in items: + num_items += 1 + if self.maxlength is not None and num_items > self.maxlength: + msg = "Field '%s': too many items" % (self.field_name,) + raise dexml.RenderError(msg) + for data in self.field.render_children(obj, item, nsmap): + yield data + if self.minlength is not None and num_items < self.minlength: + msg = "Field '%s': not enough items" % (self.field_name,) + raise dexml.RenderError(msg) + + chunks = child_chunks() + # Render each chunk, but suppress the wrapper tag if there's no data. + try: + data = chunks.next() + except StopIteration: + if self.tagname and self.required: + yield "<%s />" % (self.tagname,) + else: + if self.tagname: + yield "<%s>" % (self.tagname,) + yield data + for data in chunks: + yield data + if self.tagname: + yield "" % (self.tagname,) + + +class Dict(Field): + """Field subclass representing a dict of fields keyed by unique attribute value. + + This field corresponds to an indexed dict of other fields. You would + declare it like so: + + class MyObject(Model): + name = fields.String(tagname = 'name') + attr = fields.String(tagname = 'attr') + + class MyModel(Model): + items = fields.Dict(fields.Model(MyObject), key = 'name') + + Corresponding to XML such as: + + obj1val1 + + + The properties 'minlength' and 'maxlength' control the allowable size + of the dict as in the List class. + + If 'unique' property is set to True, parsing will raise exception on + non-unique key values. + + The 'dictclass' property controls the internal dict-like class used by + the fielt. By default it is the standard dict class. + + The 'tagname' property sets the 'wrapper' tag which acts as container + for dict items, for example: + + from collections import defaultdict + class MyObject(Model): + name = fields.String() + attr = fields.String() + + class MyDict(defaultdict): + def __init__(self): + super(MyDict, self).__init__(MyObject) + + class MyModel(Model): + objects = fields.Dict('MyObject', key = 'name', + tagname = 'dict', dictclass = MyDict) + + xml = ''\ + val1' + mymodel = MyModel.parse(xml) + obj2 = mymodel['obj2'] + print(obj2.name) + print(mymodel.render(fragment = True)) + + This wrapper tag is always rendered, even if the dict is empty. It is + transparently removed when parsing. + """ + + class arguments(Field.arguments): + field = None + minlength = None + maxlength = None + unique = False + tagname = None + dictclass = dict + + def __init__(self, field, key, **kwds): + if isinstance(field, Field): + kwds["field"] = field + else: + kwds["field"] = Model(field, **kwds) + super(Dict, self).__init__(**kwds) + if not self.minlength and not self.tagname: + self.required = False + if self.minlength and not self.required: + raise ValueError("Dict must be required if it has minlength") + self.key = key + + def _get_field(self): + field = self.__dict__["field"] + if not hasattr(field, "field_name"): + field.field_name = self.field_name + if not hasattr(field, "model_class"): + field.model_class = self.model_class + return field + + def _set_field(self, field): + self.__dict__["field"] = field + + field = property(_get_field, _set_field) + + def __get__(self, instance, owner=None): + val = super(Dict, self).__get__(instance, owner) + if val is not None: + return val + + class dictclass(self.dictclass): + key = self.key + + def __setitem__(self, key, value): + keyval = getattr(value, self.key) + if keyval and keyval != key: + raise ValueError('Key field value does not match dict key') + setattr(value, self.key, key) + super(dictclass, self).__setitem__(key, value) + + self.__set__(instance, dictclass()) + return self.__get__(instance, owner) + + def parse_child_node(self, obj, node): + # If our children are inside a grouping tag, parse + # that first. The presence of this is indicated by + # setting an empty dict on the target object. + if self.tagname: + val = super(Dict, self).__get__(obj) + if val is None: + if node.nodeType != node.ELEMENT_NODE: + return dexml.PARSE_SKIP + elif node.tagName == self.tagname: + self.__get__(obj) + return dexml.PARSE_CHILDREN + else: + return dexml.PARSE_SKIP + # Now we just parse each child node. + tmpobj = _AttrBucket() + res = self.field.parse_child_node(tmpobj, node) + if res is dexml.PARSE_MORE: + raise ValueError("items in a dict cannot return PARSE_MORE") + if res is dexml.PARSE_DONE: + items = self.__get__(obj) + val = getattr(tmpobj, self.field_name) + try: + key = getattr(val, self.key) + except AttributeError: + raise dexml.ParseError("Key field '%s' required but not found in dict value" % (self.key, )) + if self.unique and key in items: + raise dexml.ParseError("Key '%s' already exists in dict" % (key,)) + items[key] = val + return dexml.PARSE_MORE + else: + return dexml.PARSE_SKIP + + def parse_done(self, obj): + items = self.__get__(obj) + if self.minlength is not None and len(items) < self.minlength: + raise dexml.ParseError("Field '%s': not enough items" % (self.field_name,)) + if self.maxlength is not None and len(items) > self.maxlength: + raise dexml.ParseError("Field '%s': too many items" % (self.field_name,)) + + def render_children(self, obj, items, nsmap): + if self.minlength is not None and len(items) < self.minlength: + raise dexml.RenderError("Field '%s': not enough items" % (self.field_name,)) + if self.maxlength is not None and len(items) > self.maxlength: + raise dexml.RenderError("too many items") + if self.tagname: + children = "".join(data for item in items.values() for data in self.field.render_children(obj, item, nsmap)) + if not children: + if self.required: + yield "<%s />" % (self.tagname,) + else: + yield children.join(('<%s>' % self.tagname, '' % self.tagname)) + else: + for item in items.values(): + for data in self.field.render_children(obj, item, nsmap): + yield data + + +class Choice(Field): + """Field subclass accepting any one of a given set of Model fields.""" + + class arguments(Field.arguments): + fields = [] + + def __init__(self, *fields, **kwds): + real_fields = [] + for field in fields: + if isinstance(field, Model): + real_fields.append(field) + elif isinstance(field, basestring): + real_fields.append(Model(field)) + else: + raise ValueError("only Model fields are allowed within a Choice field") + kwds["fields"] = real_fields + super(Choice, self).__init__(**kwds) + + def parse_child_node(self, obj, node): + for field in self.fields: + field.field_name = self.field_name + field.model_class = self.model_class + res = field.parse_child_node(obj, node) + if res is dexml.PARSE_MORE: + raise ValueError("items in a Choice cannot return PARSE_MORE") + if res is dexml.PARSE_DONE: + return dexml.PARSE_DONE + else: + return dexml.PARSE_SKIP + + def render_children(self, obj, item, nsmap): + if item is None: + if self.required: + raise dexml.RenderError("Field '%s': required field is missing" % (self.field_name,)) + else: + for data in item._render(nsmap=nsmap): + yield data + + +class XmlNode(Field): + class arguments(Field.arguments): + tagname = None + encoding = None + + def __set__(self, instance, value): + if isinstance(value, basestring): + if isinstance(value, unicode) and self.encoding: + value = value.encode(self.encoding) + doc = dexml.minidom.parseString(value) + value = doc.documentElement + if value is not None and value.namespaceURI is not None: + nsattr = "xmlns" + if value.prefix: + nsattr = ":".join((nsattr, value.prefix,)) + value.attributes[nsattr] = value.namespaceURI + return super(XmlNode, self).__set__(instance, value) + + def parse_child_node(self, obj, node): + if self.tagname is None or self._check_tagname(node, self.tagname): + self.__set__(obj, node) + return dexml.PARSE_DONE + return dexml.PARSE_SKIP + + @classmethod + def render_children(cls, obj, val, nsmap): + if val is not None: + yield val.toxml() +