001package org.hl7.fhir.r5.elementmodel; 002 003import java.io.ByteArrayInputStream; 004 005/* 006 Copyright (c) 2011+, HL7, Inc. 007 All rights reserved. 008 009 Redistribution and use in source and binary forms, with or without modification, 010 are permitted provided that the following conditions are met: 011 012 * Redistributions of source code must retain the above copyright notice, this 013 list of conditions and the following disclaimer. 014 * Redistributions in binary form must reproduce the above copyright notice, 015 this list of conditions and the following disclaimer in the documentation 016 and/or other materials provided with the distribution. 017 * Neither the name of HL7 nor the names of its contributors may be used to 018 endorse or promote products derived from this software without specific 019 prior written permission. 020 021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 022 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 023 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 024 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 025 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 026 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 027 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 028 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 029 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 030 POSSIBILITY OF SUCH DAMAGE. 031 032 */ 033 034 035import java.io.IOException; 036import java.io.InputStream; 037import java.io.OutputStream; 038import java.util.ArrayList; 039import java.util.Collections; 040import java.util.Comparator; 041import java.util.HashSet; 042import java.util.List; 043import java.util.Set; 044 045import javax.xml.parsers.DocumentBuilder; 046import javax.xml.parsers.DocumentBuilderFactory; 047import javax.xml.parsers.SAXParser; 048import javax.xml.parsers.SAXParserFactory; 049import javax.xml.transform.Transformer; 050import javax.xml.transform.TransformerFactory; 051import javax.xml.transform.dom.DOMResult; 052import javax.xml.transform.sax.SAXSource; 053 054import org.hl7.fhir.exceptions.DefinitionException; 055import org.hl7.fhir.exceptions.FHIRException; 056import org.hl7.fhir.exceptions.FHIRFormatError; 057import org.hl7.fhir.r5.conformance.profile.ProfileUtilities; 058import org.hl7.fhir.r5.context.IWorkerContext; 059import org.hl7.fhir.r5.elementmodel.Element.SpecialElement; 060import org.hl7.fhir.r5.elementmodel.Manager.FhirFormat; 061import org.hl7.fhir.r5.formats.FormatUtilities; 062import org.hl7.fhir.r5.formats.IParser.OutputStyle; 063import org.hl7.fhir.r5.model.Constants; 064import org.hl7.fhir.r5.model.DateTimeType; 065import org.hl7.fhir.r5.model.ElementDefinition; 066import org.hl7.fhir.r5.model.ElementDefinition.PropertyRepresentation; 067import org.hl7.fhir.r5.model.Enumeration; 068import org.hl7.fhir.r5.model.StructureDefinition; 069import org.hl7.fhir.r5.utils.ToolingExtensions; 070import org.hl7.fhir.r5.utils.formats.XmlLocationAnnotator; 071import org.hl7.fhir.r5.utils.formats.XmlLocationData; 072import org.hl7.fhir.utilities.ElementDecoration; 073import org.hl7.fhir.utilities.StringPair; 074import org.hl7.fhir.utilities.TextFile; 075import org.hl7.fhir.utilities.Utilities; 076import org.hl7.fhir.utilities.i18n.I18nConstants; 077import org.hl7.fhir.utilities.validation.ValidationMessage; 078import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity; 079import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType; 080import org.hl7.fhir.utilities.xhtml.CDANarrativeFormat; 081import org.hl7.fhir.utilities.xhtml.XhtmlComposer; 082import org.hl7.fhir.utilities.xhtml.XhtmlNode; 083import org.hl7.fhir.utilities.xhtml.XhtmlParser; 084import org.hl7.fhir.utilities.xml.IXMLWriter; 085import org.hl7.fhir.utilities.xml.XMLUtil; 086import org.hl7.fhir.utilities.xml.XMLWriter; 087import org.w3c.dom.Document; 088import org.w3c.dom.Node; 089import org.xml.sax.ErrorHandler; 090import org.xml.sax.InputSource; 091import org.xml.sax.SAXParseException; 092import org.xml.sax.XMLReader; 093 094public class XmlParser extends ParserBase { 095 private boolean allowXsiLocation; 096 private String version; 097 098 public XmlParser(IWorkerContext context) { 099 super(context); 100 } 101 102 private String schemaPath; 103 104 public String getSchemaPath() { 105 return schemaPath; 106 } 107 public void setSchemaPath(String schemaPath) { 108 this.schemaPath = schemaPath; 109 } 110 111 public boolean isAllowXsiLocation() { 112 return allowXsiLocation; 113 } 114 115 public void setAllowXsiLocation(boolean allowXsiLocation) { 116 this.allowXsiLocation = allowXsiLocation; 117 } 118 119 public List<ValidatedFragment> parse(InputStream inStream) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 120 121 byte[] content = TextFile.streamToBytes(inStream); 122 ValidatedFragment focusFragment = new ValidatedFragment(ValidatedFragment.FOCUS_NAME, "xml", content, false); 123 124 ByteArrayInputStream stream = new ByteArrayInputStream(content); 125 Document doc = null; 126 try { 127 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 128 // xxe protection 129 factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 130 factory.setFeature("http://xml.org/sax/features/external-general-entities", false); 131 factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 132 factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 133 factory.setXIncludeAware(false); 134 factory.setExpandEntityReferences(false); 135 136 factory.setNamespaceAware(true); 137 if (policy == ValidationPolicy.EVERYTHING) { 138 // The SAX interface appears to not work when reporting the correct version/encoding. 139 // if we can, we'll inspect the header/encoding ourselves 140 141 stream.mark(1024); 142 version = checkHeader(focusFragment.getErrors(), stream); 143 stream.reset(); 144 145 // use a slower parser that keeps location data 146 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 147 Transformer nullTransformer = transformerFactory.newTransformer(); 148 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 149 doc = docBuilder.newDocument(); 150 DOMResult domResult = new DOMResult(doc); 151 SAXParserFactory spf = SAXParserFactory.newInstance(); 152 spf.setNamespaceAware(true); 153 spf.setValidating(false); 154 // xxe protection 155 spf.setFeature("http://xml.org/sax/features/external-general-entities", false); 156 spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 157 SAXParser saxParser = spf.newSAXParser(); 158 XMLReader xmlReader = saxParser.getXMLReader(); 159 // xxe protection 160 xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false); 161 xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 162 163 XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc); 164 InputSource inputSource = new InputSource(stream); 165 SAXSource saxSource = new SAXSource(locationAnnotator, inputSource); 166 nullTransformer.transform(saxSource, domResult); 167 } else { 168 DocumentBuilder builder = factory.newDocumentBuilder(); 169 builder.setErrorHandler(new NullErrorHandler()); 170 doc = builder.parse(stream); 171 } 172 } catch (Exception e) { 173 if (e.getMessage().contains("lineNumber:") && e.getMessage().contains("columnNumber:")) { 174 int line = Utilities.parseInt(extractVal(e.getMessage(), "lineNumber"), 0); 175 int col = Utilities.parseInt(extractVal(e.getMessage(), "columnNumber"), 0); 176 logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, line, col, "(xml)", IssueType.INVALID, e.getMessage().substring(e.getMessage().lastIndexOf(";")+1).trim(), IssueSeverity.FATAL); 177 } else { 178 logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, 0, 0, "(xml)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL); 179 } 180 doc = null; 181 } 182 if (doc != null) { 183 focusFragment.setElement(parse(focusFragment.getErrors(), doc)); 184 } 185 List<ValidatedFragment> res = new ArrayList<>(); 186 res.add(focusFragment); 187 return res; 188 } 189 190 191 private String extractVal(String src, String name) { 192 src = src.substring(src.indexOf(name)+name.length()+1); 193 src = src.substring(0, src.indexOf(";")).trim(); 194 return src; 195 } 196 private void checkForProcessingInstruction(List<ValidationMessage> errors, Document document) throws FHIRFormatError { 197 if (policy == ValidationPolicy.EVERYTHING && FormatUtilities.FHIR_NS.equals(document.getDocumentElement().getNamespaceURI())) { 198 Node node = document.getFirstChild(); 199 while (node != null) { 200 if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) 201 logError(errors, ValidationMessage.NO_RULE_DATE, line(document, false), col(document, false), "(document)", IssueType.INVALID, context.formatMessage( 202 I18nConstants.NO_PROCESSING_INSTRUCTIONS_ALLOWED_IN_RESOURCES), IssueSeverity.ERROR); 203 node = node.getNextSibling(); 204 } 205 } 206 } 207 208 209 private int line(Node node, boolean end) { 210 XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 211 return loc == null ? 0 : end ? loc.getEndLine() : loc.getStartLine(); 212 } 213 214 private int col(Node node, boolean end) { 215 XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 216 return loc == null ? 0 : end ? loc.getEndColumn() : loc.getStartColumn(); 217 } 218 219 public Element parse(List<ValidationMessage> errors, Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 220 checkForProcessingInstruction(errors, doc); 221 org.w3c.dom.Element element = doc.getDocumentElement(); 222 return parse(errors, element); 223 } 224 225 public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 226 String ns = element.getNamespaceURI(); 227 String name = element.getLocalName(); 228 String path = "/"+pathPrefix(ns)+name; 229 230 StructureDefinition sd = getDefinition(errors, line(element, false), col(element, false), (ns == null ? "noNamespace" : ns), name); 231 if (sd == null) 232 return null; 233 234 Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML); 235 result.setPath(element.getLocalName()); 236 checkElement(errors, element, result, path, result.getProperty(), false); 237 result.markLocation(line(element, false), col(element, false)); 238 result.setType(element.getLocalName()); 239 parseChildren(errors, path, element, result); 240 result.numberChildren(); 241 return result; 242 } 243 244 private String pathPrefix(String ns) { 245 if (Utilities.noString(ns)) 246 return ""; 247 if (ns.equals(FormatUtilities.FHIR_NS)) 248 return "f:"; 249 if (ns.equals(FormatUtilities.XHTML_NS)) 250 return "h:"; 251 if (ns.equals("urn:hl7-org:v3")) 252 return "v3:"; 253 if (ns.equals("urn:hl7-org:sdtc")) 254 return "sdtc:"; 255 if (ns.equals("urn:ihe:pharm")) 256 return "pharm:"; 257 return "?:"; 258 } 259 260 private boolean empty(org.w3c.dom.Element element) { 261 for (int i = 0; i < element.getAttributes().getLength(); i++) { 262 String n = element.getAttributes().item(i).getNodeName(); 263 if (!n.equals("xmlns") && !n.startsWith("xmlns:")) 264 return false; 265 } 266 if (!Utilities.noString(element.getTextContent().trim())) 267 return false; 268 269 Node n = element.getFirstChild(); 270 while (n != null) { 271 if (n.getNodeType() == Node.ELEMENT_NODE) 272 return false; 273 n = n.getNextSibling(); 274 } 275 return true; 276 } 277 278 private void checkElement(List<ValidationMessage> errors, org.w3c.dom.Element element, Element e, String path, Property prop, boolean xsiTypeChecked) throws FHIRFormatError { 279 if (policy == ValidationPolicy.EVERYTHING) { 280 if (empty(element) && FormatUtilities.FHIR_NS.equals(element.getNamespaceURI())) // this rule only applies to FHIR Content 281 logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.ELEMENT_MUST_HAVE_SOME_CONTENT), IssueSeverity.ERROR); 282 String ns = prop.getXmlNamespace(); 283 String elementNs = element.getNamespaceURI(); 284 if (elementNs == null) { 285 elementNs = "noNamespace"; 286 } 287 if (!elementNs.equals(ns)) { 288 logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.WRONG_NAMESPACE__EXPECTED_, ns), IssueSeverity.ERROR); 289 } 290 if (!xsiTypeChecked) { 291 String xsiType = element.getAttributeNS(FormatUtilities.NS_XSI, "type"); 292 if (!Utilities.noString(xsiType)) { 293 String actualType = prop.getXmlTypeName(); 294 if (xsiType.equals(actualType)) { 295 logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_UNNECESSARY), IssueSeverity.INFORMATION); 296 } else { 297 StructureDefinition sd = findLegalConstraint(xsiType, actualType); 298 if (sd != null) { 299 e.setType(sd.getType()); 300 e.setExplicitType(xsiType); 301 } else { 302 logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_WRONG, xsiType, actualType), IssueSeverity.ERROR); 303 } 304 } 305 } 306 } 307 } 308 } 309 310 private StructureDefinition findLegalConstraint(String xsiType, String actualType) { 311 StructureDefinition sdA = context.fetchTypeDefinition(actualType); 312 StructureDefinition sd = context.fetchTypeDefinition(xsiType); 313 while (sd != null) { 314 if (sd == sdA) { 315 return sd; 316 } 317 sd = context.fetchResource(StructureDefinition.class, sd.getBaseDefinition()); 318 } 319 return null; 320 } 321 322 public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element base, String type) throws Exception { 323 StructureDefinition sd = getDefinition(errors, 0, 0, FormatUtilities.FHIR_NS, type); 324 Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML).setNativeObject(base); 325 result.setPath(base.getLocalName()); 326 String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName(); 327 checkElement(errors, base, result, path, result.getProperty(), false); 328 result.setType(base.getLocalName()); 329 parseChildren(errors, path, base, result); 330 result.numberChildren(); 331 return result; 332 } 333 334 private void parseChildren(List<ValidationMessage> errors, String path, org.w3c.dom.Element node, Element element) throws FHIRFormatError, FHIRException, IOException, DefinitionException { 335 // this parsing routine retains the original order in a the XML file, to support validation 336 reapComments(node, element); 337 List<Property> properties = element.getProperty().getChildProperties(element.getName(), XMLUtil.getXsiType(node)); 338 Property cgProp = getChoiceGroupProp(properties); 339 Property mtProp = cgProp == null ? null : getTextProp(cgProp.getChildProperties(null, null)); 340 341 String text = mtProp == null ? XMLUtil.getDirectText(node).trim() : null; 342 int line = line(node, false); 343 int col = col(node, false); 344 if (!Utilities.noString(text)) { 345 Property property = getTextProp(properties); 346 if (property != null) { 347 if ("ED.data[x]".equals(property.getDefinition().getId()) || (property.getDefinition()!=null && property.getDefinition().getBase()!=null && "ED.data[x]".equals(property.getDefinition().getBase().getPath()))) { 348 if ("B64".equals(node.getAttribute("representation"))) { 349 Element n = new Element("dataBase64Binary", property, "base64Binary", text).markLocation(line, col).setFormat(FhirFormat.XML); 350 n.setPath(element.getPath()+"."+property.getName()); 351 element.getChildren().add(n); 352 } else { 353 Element n = new Element("dataString", property, "string", text).markLocation(line, col).setFormat(FhirFormat.XML); 354 n.setPath(element.getPath()+"."+property.getName()); 355 element.getChildren().add(n); 356 } 357 } else { 358 Element n = new Element(property.getName(), property, property.getType(), text).markLocation(line, col).setFormat(FhirFormat.XML); 359 n.setPath(element.getPath()+"."+property.getName()); 360 element.getChildren().add(n); 361 } 362 } else { 363 Node n = node.getFirstChild(); 364 while (n != null) { 365 if (n.getNodeType() == Node.TEXT_NODE && !Utilities.noString(n.getTextContent().trim())) { 366 Node nt = n; // try to find the nearest element for a line/col location 367 boolean end = false; 368 while (nt.getPreviousSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) { 369 nt = nt.getPreviousSibling(); 370 end = true; 371 } 372 while (nt.getNextSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) { 373 nt = nt.getNextSibling(); 374 end = false; 375 } 376 line = line(nt, end); 377 col = col(nt, end); 378 logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.TEXT_SHOULD_NOT_BE_PRESENT, Utilities.makeSingleLine(n.getTextContent().trim())), IssueSeverity.ERROR); 379 } 380 n = n.getNextSibling(); 381 } 382 } 383 } 384 385 for (int i = 0; i < node.getAttributes().getLength(); i++) { 386 Node attr = node.getAttributes().item(i); 387 String value = attr.getNodeValue(); 388 if (!validAttrValue(value)) { 389 logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.XML_ATTR_VALUE_INVALID, attr.getNodeName()), IssueSeverity.ERROR); 390 } 391 if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) { 392 Property property = getAttrProp(properties, attr.getLocalName(), attr.getNamespaceURI()); 393 if (property != null) { 394 String av = attr.getNodeValue(); 395 if (ToolingExtensions.hasExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT)) 396 av = convertForDateFormatFromExternal(ToolingExtensions.readStringExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av); 397 if (property.getName().equals("value") && element.isPrimitive()) 398 element.setValue(av); 399 else { 400 String[] vl = {av}; 401 if (property.isList() && av.contains(" ")) { 402 vl = av.split(" "); 403 } 404 for (String v : vl) { 405 Element n = new Element(property.getName(), property, property.getType(), v).markLocation(line, col).setFormat(FhirFormat.XML); 406 n.setPath(element.getPath()+"."+property.getName()); 407 element.getChildren().add(n); 408 } 409 } 410 } else { 411 boolean ok = false; 412 if (FormatUtilities.FHIR_NS.equals(node.getNamespaceURI())) { 413 if (attr.getLocalName().equals("schemaLocation") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())) { 414 ok = ok || allowXsiLocation; 415 } 416 } else 417 ok = ok || (attr.getLocalName().equals("schemaLocation")); // xsi:schemalocation allowed for non FHIR content 418 ok = ok || (hasTypeAttr(element) && attr.getLocalName().equals("type") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())); // xsi:type allowed if element says so 419 if (!ok) { 420 logError(errors, ValidationMessage.NO_RULE_DATE, line(node, false), col(node, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ATTRIBUTE__ON__FOR_TYPE__PROPERTIES__, attr.getNodeName(), node.getNodeName(), element.fhirType(), properties), IssueSeverity.ERROR); 421 } 422 } 423 } 424 } 425 426 String lastName = null; 427 int repeatCount = 0; 428 Node child = node.getFirstChild(); 429 while (child != null) { 430 if (child.getNodeType() == Node.ELEMENT_NODE) { 431 Property property = getElementProp(properties, child.getLocalName(), child.getNamespaceURI()); 432 433 if (property != null) { 434 if (property.getName().equals(lastName)) { 435 repeatCount++; 436 } else { 437 lastName = property.getName(); 438 repeatCount = 0; 439 } 440 if (!property.isChoice() && "xhtml".equals(property.getType())) { 441 XhtmlNode xhtml; 442 if (property.getDefinition().hasRepresentation(PropertyRepresentation.CDATEXT)) 443 xhtml = new CDANarrativeFormat().convert((org.w3c.dom.Element) child); 444 else { 445 XhtmlParser xp = new XhtmlParser(); 446 xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child); 447 if (policy == ValidationPolicy.EVERYTHING) { 448 for (StringPair s : xp.getValidationIssues()) { 449 logError(errors, "2022-11-17", line(child, false), col(child, false), path, IssueType.INVALID, context.formatMessage(s.getName(), s.getValue()), IssueSeverity.ERROR); 450 } 451 } 452 } 453 Element n = new Element(property.getName(), property, "xhtml", new XhtmlComposer(XhtmlComposer.XML, false).compose(xhtml)).setXhtml(xhtml).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child); 454 n.setPath(element.getPath()+"."+property.getName()); 455 element.getChildren().add(n); 456 } else { 457 String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 458 String name = child.getLocalName(); 459 if (!property.isChoice() && !name.equals(property.getName())) { 460 name = property.getName(); 461 } 462 Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child); 463 if (property.isList()) { 464 n.setPath(element.getPath()+"."+property.getName()+"["+repeatCount+"]"); 465 } else { 466 n.setPath(element.getPath()+"."+property.getName()); 467 } 468 boolean xsiTypeChecked = false; 469 boolean ok = true; 470 if (property.isChoice()) { 471 if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) { 472 String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type"); 473 if (Utilities.noString(xsiType)) { 474 if (ToolingExtensions.hasExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype")) { 475 xsiType = ToolingExtensions.readStringExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype"); 476 n.setType(xsiType); 477 } else { 478 logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NO_TYPE_FOUND_ON_, child.getLocalName()), IssueSeverity.ERROR); 479 ok = false; 480 } 481 } else { 482 if (xsiType.contains(":")) 483 xsiType = xsiType.substring(xsiType.indexOf(":")+1); 484 n.setType(xsiType); 485 n.setExplicitType(xsiType); 486 } 487 xsiTypeChecked = true; 488 } else 489 n.setType(n.getType()); 490 } 491 checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), xsiTypeChecked); 492 element.getChildren().add(n); 493 if (ok) { 494 if (property.isResource()) 495 parseResource(errors, npath, (org.w3c.dom.Element) child, n, property); 496 else 497 parseChildren(errors, npath, (org.w3c.dom.Element) child, n); 498 } 499 } 500 } else { 501 if (cgProp != null) { 502 property = getElementProp(cgProp.getChildProperties(null, null), child.getLocalName(), child.getNamespaceURI()); 503 if (property != null) { 504 if (cgProp.getName().equals(lastName)) { 505 repeatCount++; 506 } else { 507 lastName = cgProp.getName(); 508 repeatCount = 0; 509 } 510 511 String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName(); 512 String name = cgProp.getName(); 513 Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML); 514 cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 515 element.getChildren().add(cgn); 516 517 npath = npath+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 518 name = child.getLocalName(); 519 Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child); 520 cgn.getChildren().add(n); 521 n.setPath(element.getPath()+"."+property.getName()); 522 checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), false); 523 parseChildren(errors, npath, (org.w3c.dom.Element) child, n); 524 } 525 } 526 if (property == null) { 527 logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ELEMENT_, child.getLocalName(), path), IssueSeverity.ERROR); 528 } 529 } 530 } else if (child.getNodeType() == Node.TEXT_NODE && !Utilities.noString(child.getTextContent().trim()) && mtProp != null) { 531 if (cgProp.getName().equals(lastName)) { 532 repeatCount++; 533 } else { 534 lastName = cgProp.getName(); 535 repeatCount = 0; 536 } 537 538 String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName(); 539 String name = cgProp.getName(); 540 Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML); 541 cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 542 element.getChildren().add(cgn); 543 544 npath = npath+"/text()"; 545 name = mtProp.getName(); 546 Element n = new Element(name, mtProp, mtProp.getType(), child.getTextContent().trim()).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child); 547 cgn.getChildren().add(n); 548 n.setPath(element.getPath()+"."+mtProp.getName()); 549 550 551 } else if (child.getNodeType() == Node.CDATA_SECTION_NODE) { 552 logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.CDATA_IS_NOT_ALLOWED), IssueSeverity.ERROR); 553 } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) { 554 logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NODE_TYPE__IS_NOT_ALLOWED, Integer.toString(child.getNodeType())), IssueSeverity.ERROR); 555 } 556 child = child.getNextSibling(); 557 } 558 } 559 560 private Property getChoiceGroupProp(List<Property> properties) { 561 for (Property p : properties) { 562 if (p.getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) { 563 return p; 564 } 565 } 566 return null; 567 } 568 569 private boolean validAttrValue(String value) { 570 if (version == null) { 571 return true; 572 } 573 if (version.equals("1.0")) { 574 boolean ok = true; 575 for (char ch : value.toCharArray()) { 576 if (ch <= 0x1F && !Utilities.existsInList(ch, '\r', '\n', '\t')) { 577 ok = false; 578 } 579 } 580 return ok; 581 } else 582 return true; 583 } 584 585 586 private Property getElementProp(List<Property> properties, String nodeName, String namespace) { 587 List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties); 588 // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x] 589 // and therefore the longer property names get evaluated first 590 Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() { 591 @Override 592 public int compare(Property o1, Property o2) { 593 return o2.getName().length() - o1.getName().length(); 594 } 595 }); 596 // first scan, by namespace 597 for (Property p : propsSortedByLongestFirst) { 598 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 599 if (p.getXmlName().equals(nodeName) && p.getXmlNamespace().equals(namespace)) 600 return p; 601 } 602 } 603 for (Property p : propsSortedByLongestFirst) { 604 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 605 if (p.getXmlName().equals(nodeName)) 606 return p; 607 if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 608 return p; 609 } 610 } 611 612 613 return null; 614 } 615 616 private Property getAttrProp(List<Property> properties, String nodeName, String namespace) { 617 for (Property p : properties) { 618 if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && p.getXmlNamespace().equals(namespace)) { 619 return p; 620 } 621 } 622 if (namespace == null) { 623 for (Property p : properties) { 624 if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) { 625 return p; 626 } 627 } 628 } 629 return null; 630 } 631 632 private Property getTextProp(List<Property> properties) { 633 for (Property p : properties) 634 if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 635 return p; 636 return null; 637 } 638 639 private String convertForDateFormatFromExternal(String fmt, String av) throws FHIRException { 640 if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) { 641 try { 642 DateTimeType d = DateTimeType.parseV3(av); 643 return d.asStringValue(); 644 } catch (Exception e) { 645 return av; // not at all clear what to do in this case. 646 } 647 } 648 throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATA_FORMAT_, fmt)); 649 } 650 651 private String convertForDateFormatToExternal(String fmt, String av) throws FHIRException { 652 if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) { 653 DateTimeType d = new DateTimeType(av); 654 return d.getAsV3(); 655 } else 656 throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATE_FORMAT_, fmt)); 657 } 658 659 private void parseResource(List<ValidationMessage> errors, String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 660 org.w3c.dom.Element res = XMLUtil.getFirstChild(container); 661 String name = res.getLocalName(); 662 StructureDefinition sd = context.fetchResource(StructureDefinition.class, ProfileUtilities.sdNs(name, null)); 663 if (sd == null) 664 throw new FHIRFormatError(context.formatMessage(I18nConstants.CONTAINED_RESOURCE_DOES_NOT_APPEAR_TO_BE_A_FHIR_RESOURCE_UNKNOWN_NAME_, res.getLocalName())); 665 parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities()), SpecialElement.fromProperty(parent.getProperty()), elementProperty); 666 parent.setType(name); 667 parseChildren(errors, res.getLocalName(), res, parent); 668 } 669 670 private void reapComments(org.w3c.dom.Element element, Element context) { 671 Node node = element.getPreviousSibling(); 672 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 673 if (node.getNodeType() == Node.COMMENT_NODE) 674 context.getComments().add(0, node.getTextContent()); 675 node = node.getPreviousSibling(); 676 } 677 node = element.getLastChild(); 678 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 679 node = node.getPreviousSibling(); 680 } 681 while (node != null) { 682 if (node.getNodeType() == Node.COMMENT_NODE) 683 context.getComments().add(node.getTextContent()); 684 node = node.getNextSibling(); 685 } 686 } 687 688 private boolean isAttr(Property property) { 689 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 690 if (r.getValue() == PropertyRepresentation.XMLATTR) { 691 return true; 692 } 693 } 694 return false; 695 } 696 697 private boolean isCdaText(Property property) { 698 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 699 if (r.getValue() == PropertyRepresentation.CDATEXT) { 700 return true; 701 } 702 } 703 return false; 704 } 705 706 private boolean isTypeAttr(Property property) { 707 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 708 if (r.getValue() == PropertyRepresentation.TYPEATTR) { 709 return true; 710 } 711 } 712 return false; 713 } 714 715 private boolean isText(Property property) { 716 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 717 if (r.getValue() == PropertyRepresentation.XMLTEXT) { 718 return true; 719 } 720 } 721 return false; 722 } 723 724 @Override 725 public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException, FHIRException { 726 XMLWriter xml = new XMLWriter(stream, "UTF-8"); 727 xml.setSortAttributes(false); 728 xml.setPretty(style == OutputStyle.PRETTY); 729 xml.start(); 730 if (e.getPath() == null) { 731 e.populatePaths(null); 732 } 733 String ns = e.getProperty().getXmlNamespace(); 734 if (ns!=null && !"noNamespace".equals(ns)) { 735 xml.setDefaultNamespace(ns); 736 } 737 if (hasTypeAttr(e)) 738 xml.namespace("http://www.w3.org/2001/XMLSchema-instance", "xsi"); 739 addNamespaces(xml, e); 740 composeElement(xml, e, e.getType(), true); 741 xml.end(); 742 } 743 744 private void addNamespaces(IXMLWriter xml, Element e) throws IOException { 745 String ns = e.getProperty().getXmlNamespace(); 746 if (ns!=null && xml.getDefaultNamespace()!=null && !xml.getDefaultNamespace().equals(ns)){ 747 if (!xml.namespaceDefined(ns)) { 748 String prefix = pathPrefix(ns); 749 if (prefix.endsWith(":")) { 750 prefix = prefix.substring(0, prefix.length()-1); 751 } 752 if ("?".equals(prefix)) { 753 xml.namespace(ns); 754 } else { 755 xml.namespace(ns, prefix); 756 } 757 } 758 } 759 for (Element c : e.getChildren()) { 760 addNamespaces(xml, c); 761 } 762 } 763 764 private boolean hasTypeAttr(Element e) { 765 if (isTypeAttr(e.getProperty())) 766 return true; 767 for (Element c : e.getChildren()) { 768 if (hasTypeAttr(c)) 769 return true; 770 } 771 // xsi_type is always allowed on CDA elements. right now, I'm not sure where to indicate this in the model, 772 // so it's just hardcoded here 773 if (e.getType() != null && e.getType().startsWith(Constants.NS_CDA_ROOT)) { 774 return true; 775 } 776 return false; 777 } 778 779 private void setXsiTypeIfIsTypeAttr(IXMLWriter xml, Element element) throws IOException, FHIRException { 780 if (isTypeAttr(element.getProperty()) && !Utilities.noString(element.getType())) { 781 String type = element.getType(); 782 if (Utilities.isAbsoluteUrl(type)) { 783 type = type.substring(type.lastIndexOf("/")+1); 784 } 785 xml.attribute("xsi:type",type); 786 } 787 } 788 789 public void compose(Element e, IXMLWriter xml) throws Exception { 790 if (e.getPath() == null) { 791 e.populatePaths(null); 792 } 793 xml.start(); 794 xml.setDefaultNamespace(e.getProperty().getXmlNamespace()); 795 if (schemaPath != null) { 796 xml.setSchemaLocation(FormatUtilities.FHIR_NS, Utilities.pathURL(schemaPath, e.fhirType()+".xsd")); 797 } 798 composeElement(xml, e, e.getType(), true); 799 xml.end(); 800 } 801 802 private void composeElement(IXMLWriter xml, Element element, String elementName, boolean root) throws IOException, FHIRException { 803 if (showDecorations) { 804 @SuppressWarnings("unchecked") 805 List<ElementDecoration> decorations = (List<ElementDecoration>) element.getUserData("fhir.decorations"); 806 if (decorations != null) 807 for (ElementDecoration d : decorations) 808 xml.decorate(d); 809 } 810 for (String s : element.getComments()) { 811 xml.comment(s, true); 812 } 813 if (isText(element.getProperty())) { 814 if (linkResolver != null) 815 xml.link(linkResolver.resolveProperty(element.getProperty())); 816 xml.enter(element.getProperty().getXmlNamespace(),elementName); 817 if (linkResolver != null && element.getProperty().isReference()) { 818 String ref = linkResolver.resolveReference(getReferenceForElement(element)); 819 if (ref != null) { 820 xml.externalLink(ref); 821 } 822 } 823 xml.text(element.getValue()); 824 xml.exit(element.getProperty().getXmlNamespace(),elementName); 825 } else if (!element.hasChildren() && !element.hasValue()) { 826 if (element.getExplicitType() != null) 827 xml.attribute("xsi:type", element.getExplicitType()); 828 xml.element(elementName); 829 } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) { 830 if (element.getType().equals("xhtml")) { 831 String rawXhtml = element.getValue(); 832 if (isCdaText(element.getProperty())) { 833 new CDANarrativeFormat().convert(xml, new XhtmlParser().parseFragment(rawXhtml)); 834 } else { 835 xml.escapedText(rawXhtml); 836 xml.anchor("end-xhtml"); 837 } 838 } else if (isText(element.getProperty())) { 839 if (linkResolver != null) 840 xml.link(linkResolver.resolveProperty(element.getProperty())); 841 xml.text(element.getValue()); 842 } else { 843 setXsiTypeIfIsTypeAttr(xml, element); 844 if (element.hasValue()) { 845 if (linkResolver != null) 846 xml.link(linkResolver.resolveType(element.getType())); 847 xml.attribute("value", element.getValue()); 848 } 849 if (linkResolver != null) 850 xml.link(linkResolver.resolveProperty(element.getProperty())); 851 if (element.hasChildren()) { 852 xml.enter(element.getProperty().getXmlNamespace(), elementName); 853 if (linkResolver != null && element.getProperty().isReference()) { 854 String ref = linkResolver.resolveReference(getReferenceForElement(element)); 855 if (ref != null) { 856 xml.externalLink(ref); 857 } 858 } 859 for (Element child : element.getChildren()) 860 composeElement(xml, child, child.getName(), false); 861 xml.exit(element.getProperty().getXmlNamespace(),elementName); 862 } else 863 xml.element(elementName); 864 } 865 } else { 866 setXsiTypeIfIsTypeAttr(xml, element); 867 Set<String> handled = new HashSet<>(); 868 for (Element child : element.getChildren()) { 869 if (!handled.contains(child.getName()) && isAttr(child.getProperty()) && wantCompose(element.getPath(), child)) { 870 handled.add(child.getName()); 871 String av = child.getValue(); 872 if (child.getProperty().isList()) { 873 for (Element c2 : element.getChildren()) { 874 if (c2 != child && c2.getName().equals(child.getName())) { 875 av = av + " "+c2.getValue(); 876 } 877 } 878 } 879 if (linkResolver != null) 880 xml.link(linkResolver.resolveType(child.getType())); 881 if (ToolingExtensions.hasExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT)) 882 av = convertForDateFormatToExternal(ToolingExtensions.readStringExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av); 883 xml.attribute(child.getProperty().getXmlNamespace(),child.getProperty().getXmlName(), av); 884 } 885 } 886 if (linkResolver != null) 887 xml.link(linkResolver.resolveProperty(element.getProperty())); 888 if (!xml.namespaceDefined(element.getProperty().getXmlNamespace())) { 889 String abbrev = makeNamespaceAbbrev(element.getProperty(), xml); 890 xml.namespace(element.getProperty().getXmlNamespace(), abbrev); 891 } 892 xml.enter(element.getProperty().getXmlNamespace(), elementName); 893 894 if (!root && element.getSpecial() != null) { 895 if (linkResolver != null) 896 xml.link(linkResolver.resolveProperty(element.getProperty())); 897 xml.enter(element.getProperty().getXmlNamespace(),element.getType()); 898 } 899 if (linkResolver != null && element.getProperty().isReference()) { 900 String ref = linkResolver.resolveReference(getReferenceForElement(element)); 901 if (ref != null) { 902 xml.externalLink(ref); 903 } 904 } 905 for (Element child : element.getChildren()) { 906 if (wantCompose(element.getPath(), child)) { 907 if (isText(child.getProperty())) { 908 if (linkResolver != null) 909 xml.link(linkResolver.resolveProperty(element.getProperty())); 910 xml.text(child.getValue()); 911 } else if (!isAttr(child.getProperty())) 912 composeElement(xml, child, child.getName(), false); 913 } 914 } 915 if (!root && element.getSpecial() != null) 916 xml.exit(element.getProperty().getXmlNamespace(),element.getType()); 917 xml.exit(element.getProperty().getXmlNamespace(),elementName); 918 } 919 } 920 921 private String makeNamespaceAbbrev(Property property, IXMLWriter xml) { 922 // it's a cosmetic thing, but we're going to try to come up with a nice namespace 923 924 ElementDefinition ed = property.getDefinition(); 925 String ns = property.getXmlNamespace(); 926 String n = property.getXmlName(); 927 928 String diff = property.getName().toLowerCase().replace(n.toLowerCase(), ""); 929 if (!Utilities.noString(diff) && diff.length() <= 5 && Utilities.isToken(diff) && !xml.abbreviationDefined(diff)) { 930 return diff; 931 } 932 933 int i = ns.length()-1; 934 while (i > 0) { 935 if (Character.isAlphabetic(ns.charAt(i)) || Character.isDigit(ns.charAt(i))) { 936 i--; 937 } else { 938 break; 939 } 940 } 941 String tail = ns.substring(i+1); 942 if (!Utilities.noString(tail) && tail.length() <= 5 && Utilities.isToken(tail) && !xml.abbreviationDefined(tail)) { 943 return tail; 944 } 945 946 i = 0; 947 while (xml.abbreviationDefined("ns"+i)) { 948 i++; 949 } 950 return "ns"+i; 951 } 952 private String checkHeader(List<ValidationMessage> errors, InputStream stream) throws IOException { 953 try { 954 // the stream will either start with the UTF-8 BOF or with <xml 955 int i0 = stream.read(); 956 int i1 = stream.read(); 957 int i2 = stream.read(); 958 959 StringBuilder b = new StringBuilder(); 960 if (i0 == 0xEF && i1 == 0xBB && i2 == 0xBF) { 961 // ok, it's UTF-8 962 } else if (i0 == 0x3C && i1 == 0x3F && i2 == 0x78) { // <xm 963 b.append((char) i0); 964 b.append((char) i1); 965 b.append((char) i2); 966 } else if (i0 == 60) { // just plain old XML with no header 967 return "1.0"; 968 } else { 969 throw new Exception(context.formatMessage(I18nConstants.XML_ENCODING_INVALID)); 970 } 971 int i = stream.read(); 972 do { 973 b.append((char) i); 974 i = stream.read(); 975 } while (i != 0x3E); 976 String header = b.toString(); 977 String e = null; 978 i = header.indexOf("encoding=\""); 979 if (i > -1) { 980 e = header.substring(i+10, i+15); 981 } else { 982 i = header.indexOf("encoding='"); 983 if (i > -1) { 984 e = header.substring(i+10, i+15); 985 } 986 } 987 if (e != null && !"UTF-8".equalsIgnoreCase(e)) { 988 logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, context.formatMessage(I18nConstants.XML_ENCODING_INVALID), IssueSeverity.ERROR); 989 } 990 991 i = header.indexOf("version=\""); 992 if (i > -1) { 993 return header.substring(i+9, i+12); 994 } else { 995 i = header.indexOf("version='"); 996 if (i > -1) { 997 return header.substring(i+9, i+12); 998 } 999 } 1000 return "?xml-p1?"; 1001 } catch (Exception e) { 1002 // suppress this error 1003 logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, e.getMessage(), IssueSeverity.ERROR); 1004 } 1005 return "?xml-p2?"; 1006 } 1007 1008 class NullErrorHandler implements ErrorHandler { 1009 @Override 1010 public void fatalError(SAXParseException e) { 1011 // do nothing 1012 } 1013 1014 @Override 1015 public void error(SAXParseException e) { 1016 // do nothing 1017 } 1018 1019 @Override 1020 public void warning(SAXParseException e) { 1021 // do nothing 1022 } 1023} 1024}