001package org.hl7.fhir.r5.elementmodel;
002
003import java.io.ByteArrayInputStream;
004
005/*
006  Copyright (c) 2011+, HL7, Inc.
007  All rights reserved.
008
009  Redistribution and use in source and binary forms, with or without modification, 
010  are permitted provided that the following conditions are met:
011
012 * Redistributions of source code must retain the above copyright notice, this 
013     list of conditions and the following disclaimer.
014 * Redistributions in binary form must reproduce the above copyright notice, 
015     this list of conditions and the following disclaimer in the documentation 
016     and/or other materials provided with the distribution.
017 * Neither the name of HL7 nor the names of its contributors may be used to 
018     endorse or promote products derived from this software without specific 
019     prior written permission.
020
021  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
022  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
023  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
024  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
025  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
026  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
027  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
028  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
029  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
030  POSSIBILITY OF SUCH DAMAGE.
031
032 */
033
034
035import java.io.IOException;
036import java.io.InputStream;
037import java.io.OutputStream;
038import java.util.ArrayList;
039import java.util.Collections;
040import java.util.Comparator;
041import java.util.HashSet;
042import java.util.List;
043import java.util.Set;
044
045import javax.xml.parsers.DocumentBuilder;
046import javax.xml.parsers.DocumentBuilderFactory;
047import javax.xml.parsers.SAXParser;
048import javax.xml.parsers.SAXParserFactory;
049import javax.xml.transform.Transformer;
050import javax.xml.transform.TransformerFactory;
051import javax.xml.transform.dom.DOMResult;
052import javax.xml.transform.sax.SAXSource;
053
054import org.hl7.fhir.exceptions.DefinitionException;
055import org.hl7.fhir.exceptions.FHIRException;
056import org.hl7.fhir.exceptions.FHIRFormatError;
057import org.hl7.fhir.r5.conformance.profile.ProfileUtilities;
058import org.hl7.fhir.r5.context.IWorkerContext;
059import org.hl7.fhir.r5.elementmodel.Element.SpecialElement;
060import org.hl7.fhir.r5.elementmodel.Manager.FhirFormat;
061import org.hl7.fhir.r5.formats.FormatUtilities;
062import org.hl7.fhir.r5.formats.IParser.OutputStyle;
063import org.hl7.fhir.r5.model.Constants;
064import org.hl7.fhir.r5.model.DateTimeType;
065import org.hl7.fhir.r5.model.ElementDefinition;
066import org.hl7.fhir.r5.model.ElementDefinition.PropertyRepresentation;
067import org.hl7.fhir.r5.model.Enumeration;
068import org.hl7.fhir.r5.model.StructureDefinition;
069import org.hl7.fhir.r5.utils.ToolingExtensions;
070import org.hl7.fhir.r5.utils.formats.XmlLocationAnnotator;
071import org.hl7.fhir.r5.utils.formats.XmlLocationData;
072import org.hl7.fhir.utilities.ElementDecoration;
073import org.hl7.fhir.utilities.StringPair;
074import org.hl7.fhir.utilities.TextFile;
075import org.hl7.fhir.utilities.Utilities;
076import org.hl7.fhir.utilities.i18n.I18nConstants;
077import org.hl7.fhir.utilities.validation.ValidationMessage;
078import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity;
079import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
080import org.hl7.fhir.utilities.xhtml.CDANarrativeFormat;
081import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
082import org.hl7.fhir.utilities.xhtml.XhtmlNode;
083import org.hl7.fhir.utilities.xhtml.XhtmlParser;
084import org.hl7.fhir.utilities.xml.IXMLWriter;
085import org.hl7.fhir.utilities.xml.XMLUtil;
086import org.hl7.fhir.utilities.xml.XMLWriter;
087import org.w3c.dom.Document;
088import org.w3c.dom.Node;
089import org.xml.sax.ErrorHandler;
090import org.xml.sax.InputSource;
091import org.xml.sax.SAXParseException;
092import org.xml.sax.XMLReader;
093
094public class XmlParser extends ParserBase {
095  private boolean allowXsiLocation;
096  private String version;
097
098  public XmlParser(IWorkerContext context) {
099    super(context);
100  }
101
102  private String schemaPath;
103
104  public String getSchemaPath() {
105    return schemaPath;
106  }
107  public void setSchemaPath(String schemaPath) {
108    this.schemaPath = schemaPath;
109  }
110
111  public boolean isAllowXsiLocation() {
112    return allowXsiLocation;
113  }
114
115  public void setAllowXsiLocation(boolean allowXsiLocation) {
116    this.allowXsiLocation = allowXsiLocation;
117  }
118
119  public List<ValidatedFragment> parse(InputStream inStream) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
120    
121    byte[] content = TextFile.streamToBytes(inStream);
122    ValidatedFragment focusFragment = new ValidatedFragment(ValidatedFragment.FOCUS_NAME, "xml", content, false);
123    
124    ByteArrayInputStream stream = new ByteArrayInputStream(content);
125    Document doc = null;
126    try {
127      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
128      // xxe protection
129      factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
130      factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
131      factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
132      factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
133      factory.setXIncludeAware(false);
134      factory.setExpandEntityReferences(false);
135
136      factory.setNamespaceAware(true);
137      if (policy == ValidationPolicy.EVERYTHING) {
138        // The SAX interface appears to not work when reporting the correct version/encoding.
139        // if we can, we'll inspect the header/encoding ourselves 
140
141        stream.mark(1024);
142        version = checkHeader(focusFragment.getErrors(), stream);
143        stream.reset();
144
145        // use a slower parser that keeps location data
146        TransformerFactory transformerFactory = TransformerFactory.newInstance();
147        Transformer nullTransformer = transformerFactory.newTransformer();
148        DocumentBuilder docBuilder = factory.newDocumentBuilder();
149        doc = docBuilder.newDocument();
150        DOMResult domResult = new DOMResult(doc);
151        SAXParserFactory spf = SAXParserFactory.newInstance();
152        spf.setNamespaceAware(true);
153        spf.setValidating(false);
154        // xxe protection
155        spf.setFeature("http://xml.org/sax/features/external-general-entities", false);
156        spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
157        SAXParser saxParser = spf.newSAXParser();
158        XMLReader xmlReader = saxParser.getXMLReader();
159        // xxe protection
160        xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
161        xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
162
163        XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
164        InputSource inputSource = new InputSource(stream);
165        SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
166        nullTransformer.transform(saxSource, domResult);
167      } else {
168        DocumentBuilder builder = factory.newDocumentBuilder();
169        builder.setErrorHandler(new NullErrorHandler());
170        doc = builder.parse(stream);
171      }
172    } catch (Exception e) {
173      if (e.getMessage().contains("lineNumber:") && e.getMessage().contains("columnNumber:")) {
174        int line = Utilities.parseInt(extractVal(e.getMessage(), "lineNumber"), 0); 
175        int col = Utilities.parseInt(extractVal(e.getMessage(), "columnNumber"), 0); 
176        logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, line, col, "(xml)", IssueType.INVALID, e.getMessage().substring(e.getMessage().lastIndexOf(";")+1).trim(), IssueSeverity.FATAL);
177      } else {
178        logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, 0, 0, "(xml)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
179      }
180      doc = null;
181    }
182    if (doc != null) {
183      focusFragment.setElement(parse(focusFragment.getErrors(), doc));
184    }
185    List<ValidatedFragment> res = new ArrayList<>();
186    res.add(focusFragment);
187    return res;
188  }
189
190
191  private String extractVal(String src, String name) {
192    src = src.substring(src.indexOf(name)+name.length()+1);
193    src = src.substring(0, src.indexOf(";")).trim();
194    return src;
195  }
196  private void checkForProcessingInstruction(List<ValidationMessage> errors, Document document) throws FHIRFormatError {
197    if (policy == ValidationPolicy.EVERYTHING && FormatUtilities.FHIR_NS.equals(document.getDocumentElement().getNamespaceURI())) {
198      Node node = document.getFirstChild();
199      while (node != null) {
200        if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
201          logError(errors, ValidationMessage.NO_RULE_DATE, line(document, false), col(document, false), "(document)", IssueType.INVALID, context.formatMessage(
202              I18nConstants.NO_PROCESSING_INSTRUCTIONS_ALLOWED_IN_RESOURCES), IssueSeverity.ERROR);
203        node = node.getNextSibling();
204      }
205    }
206  }
207
208
209  private int line(Node node, boolean end) {
210    XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
211    return loc == null ? 0 : end ? loc.getEndLine() : loc.getStartLine();
212  }
213
214  private int col(Node node, boolean end) {
215    XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
216    return loc == null ? 0 : end ? loc.getEndColumn() : loc.getStartColumn();
217  }
218
219  public Element parse(List<ValidationMessage> errors, Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
220    checkForProcessingInstruction(errors, doc);
221    org.w3c.dom.Element element = doc.getDocumentElement();
222    return parse(errors, element);
223  }
224
225  public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
226    String ns = element.getNamespaceURI();
227    String name = element.getLocalName();
228    String path = "/"+pathPrefix(ns)+name;
229
230    StructureDefinition sd = getDefinition(errors, line(element, false), col(element, false), (ns == null ? "noNamespace" : ns), name);
231    if (sd == null)
232      return null;
233
234    Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML);
235    result.setPath(element.getLocalName());
236    checkElement(errors, element, result, path, result.getProperty(), false);
237    result.markLocation(line(element, false), col(element, false));
238    result.setType(element.getLocalName());
239    parseChildren(errors, path, element, result);
240    result.numberChildren();
241    return result;
242  }
243
244  private String pathPrefix(String ns) {
245    if (Utilities.noString(ns))
246      return "";
247    if (ns.equals(FormatUtilities.FHIR_NS))
248      return "f:";
249    if (ns.equals(FormatUtilities.XHTML_NS))
250      return "h:";
251    if (ns.equals("urn:hl7-org:v3"))
252      return "v3:";
253    if (ns.equals("urn:hl7-org:sdtc")) 
254      return "sdtc:";
255    if (ns.equals("urn:ihe:pharm"))
256      return "pharm:";
257    return "?:";
258  }
259
260  private boolean empty(org.w3c.dom.Element element) {
261    for (int i = 0; i < element.getAttributes().getLength(); i++) {
262      String n = element.getAttributes().item(i).getNodeName();
263      if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
264        return false;
265    }
266    if (!Utilities.noString(element.getTextContent().trim()))
267      return false;
268
269    Node n = element.getFirstChild();
270    while (n != null) {
271      if (n.getNodeType() == Node.ELEMENT_NODE)
272        return false;
273      n = n.getNextSibling();
274    }
275    return true;
276  }
277
278  private void checkElement(List<ValidationMessage> errors, org.w3c.dom.Element element, Element e, String path, Property prop, boolean xsiTypeChecked) throws FHIRFormatError {
279    if (policy == ValidationPolicy.EVERYTHING) {
280      if (empty(element) && FormatUtilities.FHIR_NS.equals(element.getNamespaceURI())) // this rule only applies to FHIR Content
281        logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.ELEMENT_MUST_HAVE_SOME_CONTENT), IssueSeverity.ERROR);
282      String ns = prop.getXmlNamespace();
283      String elementNs = element.getNamespaceURI();
284      if (elementNs == null) {
285        elementNs = "noNamespace";
286      }
287      if (!elementNs.equals(ns)) {
288        logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.WRONG_NAMESPACE__EXPECTED_, ns), IssueSeverity.ERROR);
289      }
290      if (!xsiTypeChecked) {
291        String xsiType = element.getAttributeNS(FormatUtilities.NS_XSI, "type");
292        if (!Utilities.noString(xsiType)) {
293          String actualType = prop.getXmlTypeName();
294          if (xsiType.equals(actualType)) {
295            logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_UNNECESSARY), IssueSeverity.INFORMATION);            
296          } else {
297            StructureDefinition sd = findLegalConstraint(xsiType, actualType);
298            if (sd != null) {
299              e.setType(sd.getType());
300              e.setExplicitType(xsiType);
301            } else {
302              logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_WRONG, xsiType, actualType), IssueSeverity.ERROR);           
303            }  
304          }
305        }
306      }
307    }
308  }
309
310  private StructureDefinition findLegalConstraint(String xsiType, String actualType) {
311    StructureDefinition sdA = context.fetchTypeDefinition(actualType);
312    StructureDefinition sd = context.fetchTypeDefinition(xsiType);
313    while (sd != null) {
314      if (sd == sdA) {
315        return sd;
316      }
317      sd = context.fetchResource(StructureDefinition.class, sd.getBaseDefinition());
318    }
319    return null;
320  }
321  
322  public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element base, String type) throws Exception {
323    StructureDefinition sd = getDefinition(errors, 0, 0, FormatUtilities.FHIR_NS, type);
324    Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML).setNativeObject(base);
325    result.setPath(base.getLocalName());
326    String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
327    checkElement(errors, base, result, path, result.getProperty(), false);
328    result.setType(base.getLocalName());
329    parseChildren(errors, path, base, result);
330    result.numberChildren();
331    return result;
332  }
333
334  private void parseChildren(List<ValidationMessage> errors, String path, org.w3c.dom.Element node, Element element) throws FHIRFormatError, FHIRException, IOException, DefinitionException {
335    // this parsing routine retains the original order in a the XML file, to support validation
336    reapComments(node, element);
337    List<Property> properties = element.getProperty().getChildProperties(element.getName(), XMLUtil.getXsiType(node));
338    Property cgProp = getChoiceGroupProp(properties);
339    Property mtProp = cgProp == null ? null : getTextProp(cgProp.getChildProperties(null, null));
340
341    String text = mtProp == null ? XMLUtil.getDirectText(node).trim() : null;
342    int line = line(node, false);
343    int col = col(node, false);
344    if (!Utilities.noString(text)) {
345      Property property = getTextProp(properties);
346      if (property != null) {
347        if ("ED.data[x]".equals(property.getDefinition().getId()) || (property.getDefinition()!=null && property.getDefinition().getBase()!=null && "ED.data[x]".equals(property.getDefinition().getBase().getPath()))) {
348          if ("B64".equals(node.getAttribute("representation"))) {
349            Element n = new Element("dataBase64Binary", property, "base64Binary", text).markLocation(line, col).setFormat(FhirFormat.XML);
350            n.setPath(element.getPath()+"."+property.getName());
351            element.getChildren().add(n);
352          } else {
353            Element n = new Element("dataString", property, "string", text).markLocation(line, col).setFormat(FhirFormat.XML);
354            n.setPath(element.getPath()+"."+property.getName());
355            element.getChildren().add(n);
356          }
357        } else {
358          Element n = new Element(property.getName(), property, property.getType(), text).markLocation(line, col).setFormat(FhirFormat.XML);
359          n.setPath(element.getPath()+"."+property.getName());
360          element.getChildren().add(n);
361        }
362      } else {
363        Node n = node.getFirstChild();
364        while (n != null) {
365          if (n.getNodeType() == Node.TEXT_NODE && !Utilities.noString(n.getTextContent().trim())) {
366            Node nt = n; // try to find the nearest element for a line/col location
367            boolean end = false;
368            while (nt.getPreviousSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) {
369              nt = nt.getPreviousSibling();
370              end = true;
371            }
372            while (nt.getNextSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) {
373              nt = nt.getNextSibling();
374              end = false;
375            }
376            line = line(nt, end);
377            col = col(nt, end);
378            logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.TEXT_SHOULD_NOT_BE_PRESENT, Utilities.makeSingleLine(n.getTextContent().trim())), IssueSeverity.ERROR);
379          }
380          n = n.getNextSibling();
381        }
382      }                 
383    }
384
385    for (int i = 0; i < node.getAttributes().getLength(); i++) {
386      Node attr = node.getAttributes().item(i);
387      String value = attr.getNodeValue();
388      if (!validAttrValue(value)) {
389        logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.XML_ATTR_VALUE_INVALID, attr.getNodeName()), IssueSeverity.ERROR);
390      }
391      if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
392        Property property = getAttrProp(properties, attr.getLocalName(), attr.getNamespaceURI());
393        if (property != null) {
394          String av = attr.getNodeValue();
395          if (ToolingExtensions.hasExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT))
396            av = convertForDateFormatFromExternal(ToolingExtensions.readStringExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av);          
397          if (property.getName().equals("value") && element.isPrimitive())
398            element.setValue(av);
399          else {
400            String[] vl = {av};
401            if (property.isList() && av.contains(" ")) {
402              vl = av.split(" ");
403            }
404            for (String v : vl) {
405              Element n = new Element(property.getName(), property, property.getType(), v).markLocation(line, col).setFormat(FhirFormat.XML);
406              n.setPath(element.getPath()+"."+property.getName());
407              element.getChildren().add(n);
408            }
409          }
410        } else {
411          boolean ok = false;
412          if (FormatUtilities.FHIR_NS.equals(node.getNamespaceURI())) {
413            if (attr.getLocalName().equals("schemaLocation") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())) {
414              ok = ok || allowXsiLocation; 
415            }
416          } else
417            ok = ok || (attr.getLocalName().equals("schemaLocation")); // xsi:schemalocation allowed for non FHIR content
418          ok = ok || (hasTypeAttr(element) && attr.getLocalName().equals("type") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())); // xsi:type allowed if element says so
419          if (!ok) { 
420            logError(errors, ValidationMessage.NO_RULE_DATE, line(node, false), col(node, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ATTRIBUTE__ON__FOR_TYPE__PROPERTIES__, attr.getNodeName(), node.getNodeName(), element.fhirType(), properties), IssueSeverity.ERROR);
421          }
422        }
423      }
424    }
425
426    String lastName = null;
427    int repeatCount = 0;
428    Node child = node.getFirstChild();
429    while (child != null) {
430      if (child.getNodeType() == Node.ELEMENT_NODE) {
431        Property property = getElementProp(properties, child.getLocalName(), child.getNamespaceURI());
432        
433        if (property != null) {
434          if (property.getName().equals(lastName)) {
435            repeatCount++;
436          } else {
437            lastName = property.getName();
438            repeatCount = 0;
439          }
440          if (!property.isChoice() && "xhtml".equals(property.getType())) {
441            XhtmlNode xhtml;
442            if (property.getDefinition().hasRepresentation(PropertyRepresentation.CDATEXT))
443              xhtml = new CDANarrativeFormat().convert((org.w3c.dom.Element) child);
444            else {
445              XhtmlParser xp = new XhtmlParser();
446              xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child);
447              if (policy == ValidationPolicy.EVERYTHING) {
448                for (StringPair s : xp.getValidationIssues()) {
449                  logError(errors, "2022-11-17", line(child, false), col(child, false), path, IssueType.INVALID, context.formatMessage(s.getName(), s.getValue()), IssueSeverity.ERROR);                
450                }
451              }
452            }
453            Element n = new Element(property.getName(), property, "xhtml", new XhtmlComposer(XhtmlComposer.XML, false).compose(xhtml)).setXhtml(xhtml).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
454            n.setPath(element.getPath()+"."+property.getName());
455            element.getChildren().add(n);
456          } else {
457            String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
458            String name = child.getLocalName();
459            if (!property.isChoice() && !name.equals(property.getName())) {
460              name = property.getName();
461            }
462            Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
463            if (property.isList()) {
464              n.setPath(element.getPath()+"."+property.getName()+"["+repeatCount+"]");                                    
465            } else {
466              n.setPath(element.getPath()+"."+property.getName());
467            }
468            boolean xsiTypeChecked = false;
469            boolean ok = true;
470            if (property.isChoice()) {
471              if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
472                String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
473                if (Utilities.noString(xsiType)) {
474                  if (ToolingExtensions.hasExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype")) {
475                    xsiType = ToolingExtensions.readStringExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype");
476                    n.setType(xsiType);
477                  } else {
478                    logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NO_TYPE_FOUND_ON_, child.getLocalName()), IssueSeverity.ERROR);
479                    ok = false;
480                  }
481                } else {
482                  if (xsiType.contains(":"))
483                    xsiType = xsiType.substring(xsiType.indexOf(":")+1);
484                  n.setType(xsiType);
485                  n.setExplicitType(xsiType);
486                }
487                xsiTypeChecked = true;
488              } else
489                n.setType(n.getType());
490            }
491            checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), xsiTypeChecked);
492            element.getChildren().add(n);
493            if (ok) {
494              if (property.isResource())
495                parseResource(errors, npath, (org.w3c.dom.Element) child, n, property);
496              else
497                parseChildren(errors, npath, (org.w3c.dom.Element) child, n);
498            }
499          }
500        } else {
501          if (cgProp != null) {
502            property = getElementProp(cgProp.getChildProperties(null, null), child.getLocalName(), child.getNamespaceURI());
503            if (property != null) {
504              if (cgProp.getName().equals(lastName)) {
505                repeatCount++;
506              } else {
507                lastName = cgProp.getName();
508                repeatCount = 0;
509              }
510              
511              String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName();
512              String name = cgProp.getName();
513              Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML);
514              cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 
515              element.getChildren().add(cgn);
516              
517              npath = npath+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
518              name = child.getLocalName();
519              Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
520              cgn.getChildren().add(n);
521              n.setPath(element.getPath()+"."+property.getName());
522              checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), false);
523              parseChildren(errors, npath, (org.w3c.dom.Element) child, n);
524            }
525          }
526          if (property == null) {
527            logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ELEMENT_, child.getLocalName(), path), IssueSeverity.ERROR);
528          }
529        }
530      } else if (child.getNodeType() == Node.TEXT_NODE && !Utilities.noString(child.getTextContent().trim()) && mtProp != null) {
531        if (cgProp.getName().equals(lastName)) {
532          repeatCount++;
533        } else {
534          lastName = cgProp.getName();
535          repeatCount = 0;
536        }
537        
538        String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName();
539        String name = cgProp.getName();
540        Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML);
541        cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 
542        element.getChildren().add(cgn);
543        
544        npath = npath+"/text()";
545        name = mtProp.getName();
546        Element n = new Element(name, mtProp, mtProp.getType(), child.getTextContent().trim()).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
547        cgn.getChildren().add(n);
548        n.setPath(element.getPath()+"."+mtProp.getName());
549
550        
551      } else if (child.getNodeType() == Node.CDATA_SECTION_NODE) {
552        logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.CDATA_IS_NOT_ALLOWED), IssueSeverity.ERROR);
553      } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
554        logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NODE_TYPE__IS_NOT_ALLOWED, Integer.toString(child.getNodeType())), IssueSeverity.ERROR);
555      }
556      child = child.getNextSibling();
557    }
558  }
559
560  private Property getChoiceGroupProp(List<Property> properties) {
561    for (Property p : properties) {
562      if (p.getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) {
563        return p;
564      }
565    }
566    return null;
567  }
568  
569  private boolean validAttrValue(String value) {
570    if (version == null) {
571      return true;
572    }
573    if (version.equals("1.0")) {
574      boolean ok = true;
575      for (char ch : value.toCharArray()) {
576        if (ch <= 0x1F && !Utilities.existsInList(ch, '\r', '\n', '\t')) {
577          ok = false;
578        }
579      }
580      return ok;
581    } else
582      return true;
583  }
584
585
586  private Property getElementProp(List<Property> properties, String nodeName, String namespace) {
587    List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties);
588    // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x]
589    // and therefore the longer property names get evaluated first
590    Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() {
591      @Override
592      public int compare(Property o1, Property o2) {
593        return o2.getName().length() - o1.getName().length();
594      }
595    });
596    // first scan, by namespace
597    for (Property p : propsSortedByLongestFirst) {
598      if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
599        if (p.getXmlName().equals(nodeName) && p.getXmlNamespace().equals(namespace)) 
600          return p;
601      }
602    }
603    for (Property p : propsSortedByLongestFirst) {
604      if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
605        if (p.getXmlName().equals(nodeName)) 
606          return p;
607        if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 
608          return p;
609      }
610    }
611    
612
613    return null;
614  }
615
616  private Property getAttrProp(List<Property> properties, String nodeName, String namespace) {
617    for (Property p : properties) {
618      if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && p.getXmlNamespace().equals(namespace)) {
619        return p;
620      }
621    }
622    if (namespace == null) {
623      for (Property p : properties) {
624        if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) {
625          return p;
626        }    
627      }
628    }
629    return null;
630  }
631
632  private Property getTextProp(List<Property> properties) {
633    for (Property p : properties)
634      if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 
635        return p;
636    return null;
637  }
638
639  private String convertForDateFormatFromExternal(String fmt, String av) throws FHIRException {
640    if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) {
641      try {
642        DateTimeType d = DateTimeType.parseV3(av);
643        return d.asStringValue();
644      } catch (Exception e) {
645        return av; // not at all clear what to do in this case.
646      }
647    }
648    throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATA_FORMAT_, fmt));
649  }
650
651  private String convertForDateFormatToExternal(String fmt, String av) throws FHIRException {
652    if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) {
653      DateTimeType d = new DateTimeType(av);
654      return d.getAsV3();
655    } else
656      throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATE_FORMAT_, fmt));
657  }
658
659  private void parseResource(List<ValidationMessage> errors, String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
660    org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
661    String name = res.getLocalName();
662    StructureDefinition sd = context.fetchResource(StructureDefinition.class, ProfileUtilities.sdNs(name, null));
663    if (sd == null)
664      throw new FHIRFormatError(context.formatMessage(I18nConstants.CONTAINED_RESOURCE_DOES_NOT_APPEAR_TO_BE_A_FHIR_RESOURCE_UNKNOWN_NAME_, res.getLocalName()));
665    parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities()), SpecialElement.fromProperty(parent.getProperty()), elementProperty);
666    parent.setType(name);
667    parseChildren(errors, res.getLocalName(), res, parent);
668  }
669
670  private void reapComments(org.w3c.dom.Element element, Element context) {
671    Node node = element.getPreviousSibling();
672    while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
673      if (node.getNodeType() == Node.COMMENT_NODE)
674        context.getComments().add(0, node.getTextContent());
675      node = node.getPreviousSibling();
676    }
677    node = element.getLastChild();
678    while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
679      node = node.getPreviousSibling();
680    }
681    while (node != null) {
682      if (node.getNodeType() == Node.COMMENT_NODE)
683        context.getComments().add(node.getTextContent());
684      node = node.getNextSibling();
685    }
686  }
687
688  private boolean isAttr(Property property) {
689    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
690      if (r.getValue() == PropertyRepresentation.XMLATTR) {
691        return true;
692      }
693    }
694    return false;
695  }
696
697  private boolean isCdaText(Property property) {
698    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
699      if (r.getValue() == PropertyRepresentation.CDATEXT) {
700        return true;
701      }
702    }
703    return false;
704  }
705
706  private boolean isTypeAttr(Property property) {
707    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
708      if (r.getValue() == PropertyRepresentation.TYPEATTR) {
709        return true;
710      }
711    }
712    return false;
713  }
714
715  private boolean isText(Property property) {
716    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
717      if (r.getValue() == PropertyRepresentation.XMLTEXT) {
718        return true;
719      }
720    }
721    return false;
722  }
723
724  @Override
725  public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException, FHIRException {
726    XMLWriter xml = new XMLWriter(stream, "UTF-8");
727    xml.setSortAttributes(false);
728    xml.setPretty(style == OutputStyle.PRETTY);
729    xml.start();
730    if (e.getPath() == null) {
731      e.populatePaths(null);
732    }
733    String ns = e.getProperty().getXmlNamespace();
734    if (ns!=null && !"noNamespace".equals(ns)) {
735      xml.setDefaultNamespace(ns);
736    }
737    if (hasTypeAttr(e))
738      xml.namespace("http://www.w3.org/2001/XMLSchema-instance", "xsi");
739    addNamespaces(xml, e);
740    composeElement(xml, e, e.getType(), true);
741    xml.end();
742  }
743
744  private void addNamespaces(IXMLWriter xml, Element e) throws IOException {
745    String ns = e.getProperty().getXmlNamespace();
746    if (ns!=null && xml.getDefaultNamespace()!=null && !xml.getDefaultNamespace().equals(ns)){
747      if (!xml.namespaceDefined(ns)) {
748        String prefix = pathPrefix(ns);
749        if (prefix.endsWith(":")) {
750          prefix = prefix.substring(0, prefix.length()-1);
751        }
752        if ("?".equals(prefix)) {
753          xml.namespace(ns);
754        } else {
755          xml.namespace(ns, prefix);
756        }
757      }
758    }
759    for (Element c : e.getChildren()) {
760      addNamespaces(xml, c);
761    }
762  }
763
764  private boolean hasTypeAttr(Element e) {
765    if (isTypeAttr(e.getProperty()))
766      return true;
767    for (Element c : e.getChildren()) {
768      if (hasTypeAttr(c))
769        return true;
770    }
771    // xsi_type is always allowed on CDA elements. right now, I'm not sure where to indicate this in the model, 
772    // so it's just hardcoded here 
773    if (e.getType() != null && e.getType().startsWith(Constants.NS_CDA_ROOT)) {
774      return true;
775    }
776    return false;
777  }
778
779  private void setXsiTypeIfIsTypeAttr(IXMLWriter xml, Element element) throws IOException, FHIRException {
780    if (isTypeAttr(element.getProperty()) && !Utilities.noString(element.getType())) {
781      String type = element.getType();
782      if (Utilities.isAbsoluteUrl(type)) {
783        type = type.substring(type.lastIndexOf("/")+1);
784      }
785      xml.attribute("xsi:type",type);    
786    }
787  }
788
789  public void compose(Element e, IXMLWriter xml) throws Exception {
790    if (e.getPath() == null) {
791      e.populatePaths(null);
792    }
793    xml.start();
794    xml.setDefaultNamespace(e.getProperty().getXmlNamespace());
795    if (schemaPath != null) {
796      xml.setSchemaLocation(FormatUtilities.FHIR_NS, Utilities.pathURL(schemaPath, e.fhirType()+".xsd"));
797    }
798    composeElement(xml, e, e.getType(), true);
799    xml.end();
800  }
801
802  private void composeElement(IXMLWriter xml, Element element, String elementName, boolean root) throws IOException, FHIRException {
803    if (showDecorations) {
804      @SuppressWarnings("unchecked")
805      List<ElementDecoration> decorations = (List<ElementDecoration>) element.getUserData("fhir.decorations");
806      if (decorations != null)
807        for (ElementDecoration d : decorations)
808          xml.decorate(d);
809    }
810    for (String s : element.getComments()) {
811      xml.comment(s, true);
812    }
813    if (isText(element.getProperty())) {
814      if (linkResolver != null)
815        xml.link(linkResolver.resolveProperty(element.getProperty()));
816      xml.enter(element.getProperty().getXmlNamespace(),elementName);
817      if (linkResolver != null && element.getProperty().isReference()) {
818        String ref = linkResolver.resolveReference(getReferenceForElement(element));
819        if (ref != null) {
820          xml.externalLink(ref);
821        }
822      }
823      xml.text(element.getValue());
824      xml.exit(element.getProperty().getXmlNamespace(),elementName);   
825    } else if (!element.hasChildren() && !element.hasValue()) {
826      if (element.getExplicitType() != null)
827        xml.attribute("xsi:type", element.getExplicitType());
828      xml.element(elementName);
829    } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) {
830      if (element.getType().equals("xhtml")) {
831        String rawXhtml = element.getValue();
832        if (isCdaText(element.getProperty())) {
833          new CDANarrativeFormat().convert(xml, new XhtmlParser().parseFragment(rawXhtml));
834        } else {
835          xml.escapedText(rawXhtml);
836          xml.anchor("end-xhtml");
837        }
838      } else if (isText(element.getProperty())) {
839        if (linkResolver != null)
840          xml.link(linkResolver.resolveProperty(element.getProperty()));
841        xml.text(element.getValue());
842      } else {
843        setXsiTypeIfIsTypeAttr(xml, element);
844        if (element.hasValue()) {
845          if (linkResolver != null)
846            xml.link(linkResolver.resolveType(element.getType()));
847          xml.attribute("value", element.getValue());
848        }
849        if (linkResolver != null)
850          xml.link(linkResolver.resolveProperty(element.getProperty()));
851        if (element.hasChildren()) {
852          xml.enter(element.getProperty().getXmlNamespace(), elementName);
853          if (linkResolver != null && element.getProperty().isReference()) {
854            String ref = linkResolver.resolveReference(getReferenceForElement(element));
855            if (ref != null) {
856              xml.externalLink(ref);
857            }
858          }
859          for (Element child : element.getChildren()) 
860            composeElement(xml, child, child.getName(), false);
861          xml.exit(element.getProperty().getXmlNamespace(),elementName);
862        } else
863          xml.element(elementName);
864      }
865    } else {
866      setXsiTypeIfIsTypeAttr(xml, element);
867      Set<String> handled = new HashSet<>();
868      for (Element child : element.getChildren()) {
869        if (!handled.contains(child.getName()) && isAttr(child.getProperty()) && wantCompose(element.getPath(), child)) {
870          handled.add(child.getName());
871          String av = child.getValue();
872          if (child.getProperty().isList()) {
873            for (Element c2 : element.getChildren()) {
874              if (c2 != child && c2.getName().equals(child.getName())) {
875                av = av + " "+c2.getValue();
876              }
877            }            
878          }
879          if (linkResolver != null)
880            xml.link(linkResolver.resolveType(child.getType()));
881          if (ToolingExtensions.hasExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT))
882            av = convertForDateFormatToExternal(ToolingExtensions.readStringExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av);
883          xml.attribute(child.getProperty().getXmlNamespace(),child.getProperty().getXmlName(), av);
884        }
885      }
886      if (linkResolver != null)
887        xml.link(linkResolver.resolveProperty(element.getProperty()));
888      if (!xml.namespaceDefined(element.getProperty().getXmlNamespace())) {
889        String abbrev = makeNamespaceAbbrev(element.getProperty(), xml);
890        xml.namespace(element.getProperty().getXmlNamespace(), abbrev);
891      }
892      xml.enter(element.getProperty().getXmlNamespace(), elementName);
893
894      if (!root && element.getSpecial() != null) {
895        if (linkResolver != null)
896          xml.link(linkResolver.resolveProperty(element.getProperty()));
897        xml.enter(element.getProperty().getXmlNamespace(),element.getType());
898      }
899      if (linkResolver != null && element.getProperty().isReference()) {
900        String ref = linkResolver.resolveReference(getReferenceForElement(element));
901        if (ref != null) {
902          xml.externalLink(ref);
903        }
904      }
905      for (Element child : element.getChildren()) {
906        if (wantCompose(element.getPath(), child)) {
907          if (isText(child.getProperty())) {
908            if (linkResolver != null)
909              xml.link(linkResolver.resolveProperty(element.getProperty()));
910            xml.text(child.getValue());
911          } else if (!isAttr(child.getProperty()))
912            composeElement(xml, child, child.getName(), false);
913        }
914      }
915      if (!root && element.getSpecial() != null)
916        xml.exit(element.getProperty().getXmlNamespace(),element.getType());
917      xml.exit(element.getProperty().getXmlNamespace(),elementName);
918    }
919  }
920
921  private String makeNamespaceAbbrev(Property property, IXMLWriter xml) {
922    // it's a cosmetic thing, but we're going to try to come up with a nice namespace
923
924    ElementDefinition ed = property.getDefinition();
925    String ns = property.getXmlNamespace();
926    String n = property.getXmlName();
927    
928    String diff = property.getName().toLowerCase().replace(n.toLowerCase(), "");
929    if (!Utilities.noString(diff) && diff.length() <= 5 && Utilities.isToken(diff) && !xml.abbreviationDefined(diff)) {
930      return diff;
931    }
932    
933    int i = ns.length()-1;
934    while (i > 0) {
935      if (Character.isAlphabetic(ns.charAt(i)) || Character.isDigit(ns.charAt(i))) {
936        i--;
937      } else {
938        break;
939      }
940    }
941    String tail = ns.substring(i+1);
942    if (!Utilities.noString(tail) && tail.length() <= 5 && Utilities.isToken(tail) && !xml.abbreviationDefined(tail)) {
943      return tail;
944    }
945    
946    i = 0;
947    while (xml.abbreviationDefined("ns"+i)) {
948      i++;
949    }
950    return "ns"+i;
951  }
952  private String checkHeader(List<ValidationMessage> errors, InputStream stream) throws IOException {
953    try {
954      // the stream will either start with the UTF-8 BOF or with <xml
955      int i0 = stream.read();
956      int i1 = stream.read();
957      int i2 = stream.read();
958
959      StringBuilder b = new StringBuilder();
960      if (i0 == 0xEF && i1 == 0xBB && i2 == 0xBF) {
961        // ok, it's UTF-8
962      } else if (i0 == 0x3C && i1 == 0x3F && i2 == 0x78) { // <xm
963        b.append((char) i0);
964        b.append((char) i1);
965        b.append((char) i2);
966      } else if (i0 == 60) { // just plain old XML with no header
967        return "1.0";        
968      } else {
969        throw new Exception(context.formatMessage(I18nConstants.XML_ENCODING_INVALID));
970      }
971      int i = stream.read();
972      do {
973        b.append((char) i);
974        i = stream.read();
975      } while (i != 0x3E);
976      String header = b.toString();
977      String e = null;
978      i = header.indexOf("encoding=\"");
979      if (i > -1) {
980        e = header.substring(i+10, i+15);
981      } else {
982        i = header.indexOf("encoding='");
983        if (i > -1) {
984          e = header.substring(i+10, i+15);
985        } 
986      }
987      if (e != null && !"UTF-8".equalsIgnoreCase(e)) {
988        logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, context.formatMessage(I18nConstants.XML_ENCODING_INVALID), IssueSeverity.ERROR);
989      }
990
991      i = header.indexOf("version=\"");
992      if (i > -1) {
993        return header.substring(i+9, i+12);
994      } else {
995        i = header.indexOf("version='");
996        if (i > -1) {
997          return header.substring(i+9, i+12);          
998        } 
999      }
1000      return "?xml-p1?";
1001    } catch (Exception e) {
1002      // suppress this error 
1003      logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, e.getMessage(), IssueSeverity.ERROR);
1004    }
1005    return "?xml-p2?";
1006  }
1007
1008  class NullErrorHandler implements ErrorHandler {
1009    @Override
1010    public void fatalError(SAXParseException e) {
1011        // do nothing
1012    }
1013
1014    @Override
1015    public void error(SAXParseException e) {
1016        // do nothing
1017    }
1018    
1019    @Override
1020    public void warning(SAXParseException e) {
1021        // do nothing
1022    }
1023}
1024}