diff --git a/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Xml/XmlApplier.cs b/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Xml/XmlApplier.cs
index 12af7a8..c7111be 100644
--- a/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Xml/XmlApplier.cs
+++ b/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Xml/XmlApplier.cs
@@ -51,6 +51,12 @@ public class XmlApplier : IApplier
/// For each column, map from the XML path to the column name
private SqlMap columnPaths;
+ /// Map namespace prefixes to namespace URIs
+ /// If you have a default namespace (without prefix) in your XML document,
+ /// provide a prefix in the map for that namespace URI and use that prefix in the
+ /// XPath expression to select the nodes that are in the default namespace.
+ private SqlMap namespaceDecls;
+
/// New instances are constructed at least once per vertex
/// In the input row, the name of the column containing XML. The column must be a string.
/// Path of the XML element that contains rows.
@@ -58,11 +64,18 @@ public class XmlApplier : IApplier
/// It is specified relative to the row element.
/// Arguments to appliers must not be column references.
/// The arguments must be able to be calculated at compile time.
- public XmlApplier(string xmlColumnName, string rowPath, SqlMap columnPaths)
+ /// For each namespace URI in the document that you want to query, map the prefix to the namespace URI.
+ /// If you have a default namespace (without prefix) in your XML document,
+ /// provide a prefix in the map for that namespace URI and use that prefix in the
+ /// XPath expression to select the nodes that are in the default namespace.
+ /// If there is no namespace URI in the document, the map can be left null.
+ /// Do not rely on static fields because their values will not cross vertices.
+ public XmlApplier(string xmlColumnName, string rowPath, SqlMap columnPaths, SqlMap namespaceDecls = null)
{
this.xmlColumnName = xmlColumnName;
this.rowPath = rowPath;
this.columnPaths = columnPaths;
+ this.namespaceDecls = namespaceDecls;
}
/// Apply is called at least once per instance
@@ -81,17 +94,28 @@ public override IEnumerable Apply(IRow input, IUpdatableRow output)
{
throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
}
- // TODO: Add XML Namespace support and allow document fragments (should also be supported on XmlDomExtractor!).
+
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.LoadXml(input.Get(this.xmlColumnName));
- foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath))
+ XmlNamespaceManager nsmanager = new XmlNamespaceManager(xmlDocument.NameTable);
+
+ // If namespace declarations have been provided, add them to the namespace manager
+ if (this.namespaceDecls != null)
+ {
+ foreach (var namespaceDecl in this.namespaceDecls)
+ {
+ nsmanager.AddNamespace(namespaceDecl.Key, namespaceDecl.Value);
+ }
+ }
+
+ foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath, nsmanager))
{
// IUpdatableRow implements a builder pattern to save memory allocations,
// so call output.Set in a loop
foreach(IColumn col in output.Schema)
{
var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name);
- XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name);
+ XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name, nsmanager);
output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml);
}