001package org.hl7.fhir.dstu2.utils; 002 003/*- 004 * #%L 005 * org.hl7.fhir.dstu2 006 * %% 007 * Copyright (C) 2014 - 2019 Health Level 7 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023 024import org.hl7.fhir.dstu2.model.ExpressionNode; 025import org.hl7.fhir.dstu2.model.ExpressionNode.SourceLocation; 026import org.hl7.fhir.exceptions.FHIRException; 027import org.hl7.fhir.utilities.Utilities; 028 029// shared lexer for concrete syntaxes 030// - FluentPath 031// - Mapping language 032 033public class FHIRLexer { 034 public class FHIRLexerException extends FHIRException { 035 036 public FHIRLexerException() { 037 super(); 038 } 039 040 public FHIRLexerException(String message, Throwable cause) { 041 super(message, cause); 042 } 043 044 public FHIRLexerException(String message) { 045 super(message); 046 } 047 048 public FHIRLexerException(Throwable cause) { 049 super(cause); 050 } 051 052 } 053 private String path; 054 private int cursor; 055 private int currentStart; 056 private String current; 057 private SourceLocation currentLocation; 058 private SourceLocation currentStartLocation; 059 private int id; 060 061 public FHIRLexer(String source) throws FHIRLexerException { 062 this.path = source; 063 currentLocation = new SourceLocation(1, 1); 064 next(); 065 } 066 public String getCurrent() { 067 return current; 068 } 069 public SourceLocation getCurrentLocation() { 070 return currentLocation; 071 } 072 073 public boolean isConstant(boolean incDoubleQuotes) { 074 return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 075 current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 076 current.equals("true") || current.equals("false") || current.equals("{}"); 077 } 078 079 public boolean isStringConstant() { 080 return current.charAt(0) == '\'' || current.charAt(0) == '"'; 081 } 082 083 public String take() throws FHIRLexerException { 084 String s = current; 085 next(); 086 return s; 087 } 088 089 public boolean isToken() { 090 if (Utilities.noString(current)) 091 return false; 092 093 if (current.startsWith("$")) 094 return true; 095 096 if (current.equals("*") || current.equals("**")) 097 return true; 098 099 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 100 for (int i = 1; i < current.length(); i++) 101 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 102 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 103 return false; 104 return true; 105 } 106 return false; 107 } 108 109 public FHIRLexerException error(String msg) { 110 return error(msg, currentLocation.toString()); 111 } 112 113 public FHIRLexerException error(String msg, String location) { 114 return new FHIRLexerException("Error in "+path+" at "+location+": "+msg); 115 } 116 117 public void next() throws FHIRLexerException { 118 current = null; 119 boolean last13 = false; 120 while (cursor < path.length() && Character.isWhitespace(path.charAt(cursor))) { 121 if (path.charAt(cursor) == '\r') { 122 currentLocation.setLine(currentLocation.getLine() + 1); 123 currentLocation.setColumn(1); 124 last13 = true; 125 } else if (!last13 && (path.charAt(cursor) == '\n')) { 126 currentLocation.setLine(currentLocation.getLine() + 1); 127 currentLocation.setColumn(1); 128 last13 = false; 129 } else { 130 last13 = false; 131 currentLocation.setColumn(currentLocation.getColumn() + 1); 132 } 133 cursor++; 134 } 135 currentStart = cursor; 136 currentStartLocation = currentLocation; 137 if (cursor < path.length()) { 138 char ch = path.charAt(cursor); 139 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 140 cursor++; 141 if (cursor < path.length() && (path.charAt(cursor) == '=' || path.charAt(cursor) == '~' || path.charAt(cursor) == '-')) 142 cursor++; 143 current = path.substring(currentStart, cursor); 144 } else if (ch == '.' ) { 145 cursor++; 146 if (cursor < path.length() && (path.charAt(cursor) == '.')) 147 cursor++; 148 current = path.substring(currentStart, cursor); 149 } else if (ch >= '0' && ch <= '9') { 150 cursor++; 151 boolean dotted = false; 152 while (cursor < path.length() && ((path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || (path.charAt(cursor) == '.') && !dotted)) { 153 if (path.charAt(cursor) == '.') 154 dotted = true; 155 cursor++; 156 } 157 if (path.charAt(cursor-1) == '.') 158 cursor--; 159 current = path.substring(currentStart, cursor); 160 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 161 while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 162 (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == '_')) 163 cursor++; 164 current = path.substring(currentStart, cursor); 165 } else if (ch == '%') { 166 cursor++; 167 if (cursor < path.length() && (path.charAt(cursor) == '"')) { 168 cursor++; 169 while (cursor < path.length() && (path.charAt(cursor) != '"')) 170 cursor++; 171 cursor++; 172 } else 173 while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 174 (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == ':' || path.charAt(cursor) == '-')) 175 cursor++; 176 current = path.substring(currentStart, cursor); 177 } else if (ch == '/') { 178 cursor++; 179 if (cursor < path.length() && (path.charAt(cursor) == '/')) { 180 cursor++; 181 while (cursor < path.length() && !((path.charAt(cursor) == '\r') || path.charAt(cursor) == '\n')) 182 cursor++; 183 } 184 current = path.substring(currentStart, cursor); 185 } else if (ch == '$') { 186 cursor++; 187 while (cursor < path.length() && (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z')) 188 cursor++; 189 current = path.substring(currentStart, cursor); 190 } else if (ch == '{') { 191 cursor++; 192 ch = path.charAt(cursor); 193 if (ch == '}') 194 cursor++; 195 current = path.substring(currentStart, cursor); 196 } else if (ch == '"'){ 197 cursor++; 198 boolean escape = false; 199 while (cursor < path.length() && (escape || path.charAt(cursor) != '"')) { 200 if (escape) 201 escape = false; 202 else 203 escape = (path.charAt(cursor) == '\\'); 204 cursor++; 205 } 206 if (cursor == path.length()) 207 throw error("Unterminated string"); 208 cursor++; 209 current = "\""+path.substring(currentStart+1, cursor-1)+"\""; 210 } else if (ch == '\''){ 211 cursor++; 212 char ech = ch; 213 boolean escape = false; 214 while (cursor < path.length() && (escape || path.charAt(cursor) != ech)) { 215 if (escape) 216 escape = false; 217 else 218 escape = (path.charAt(cursor) == '\\'); 219 cursor++; 220 } 221 if (cursor == path.length()) 222 throw error("Unterminated string"); 223 cursor++; 224 current = path.substring(currentStart, cursor); 225 if (ech == '\'') 226 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 227 } else if (ch == '@'){ 228 cursor++; 229 while (cursor < path.length() && isDateChar(path.charAt(cursor))) 230 cursor++; 231 current = path.substring(currentStart, cursor); 232 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 233 cursor++; 234 current = path.substring(currentStart, cursor); 235 } 236 } 237 } 238 239 240 private boolean isDateChar(char ch) { 241 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch); 242 } 243 public boolean isOp() { 244 return ExpressionNode.Operation.fromCode(current) != null; 245 } 246 public boolean done() { 247 return currentStart >= path.length(); 248 } 249 public int nextId() { 250 id++; 251 return id; 252 } 253 public SourceLocation getCurrentStartLocation() { 254 return currentStartLocation; 255 } 256 257 // special case use 258 public void setCurrent(String current) { 259 this.current = current; 260 } 261 262 public boolean hasComment() { 263 return !done() && current.startsWith("//"); 264 } 265 public boolean hasToken(String kw) { 266 return !done() && kw.equals(current); 267 } 268 public void token(String kw) throws FHIRLexerException { 269 if (!kw.equals(current)) 270 throw error("Found \""+current+"\" expecting \""+kw+"\""); 271 next(); 272 } 273 public String readConstant(String desc) throws FHIRLexerException { 274 if (!isStringConstant()) 275 throw error("Found "+current+" expecting \"["+desc+"]\""); 276 277 return processConstant(take()); 278 } 279 280 public String processConstant(String s) throws FHIRLexerException { 281 StringBuilder b = new StringBuilder(); 282 int i = 1; 283 while (i < s.length()-1) { 284 char ch = s.charAt(i); 285 if (ch == '\\') { 286 i++; 287 switch (s.charAt(i)) { 288 case 't': 289 b.append('\t'); 290 break; 291 case 'r': 292 b.append('\r'); 293 break; 294 case 'n': 295 b.append('\n'); 296 break; 297 case 'f': 298 b.append('\f'); 299 break; 300 case '\'': 301 b.append('\''); 302 break; 303 case '\\': 304 b.append('\\'); 305 break; 306 case '/': 307 b.append('\\'); 308 break; 309 case 'u': 310 i++; 311 int uc = Integer.parseInt(s.substring(i, i+4), 16); 312 b.append((char) uc); 313 i = i + 4; 314 break; 315 default: 316 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 317 } 318 } else { 319 b.append(ch); 320 i++; 321 } 322 } 323 return b.toString(); 324 325 } 326 public void skipToken(String token) throws FHIRLexerException { 327 if (getCurrent().equals(token)) 328 next(); 329 330 } 331 public String takeDottedToken() throws FHIRLexerException { 332 StringBuilder b = new StringBuilder(); 333 b.append(take()); 334 while (!done() && getCurrent().equals(".")) { 335 b.append(take()); 336 b.append(take()); 337 } 338 return b.toString(); 339 } 340 341 void skipComments() throws FHIRLexerException { 342 while (!done() && hasComment()) 343 next(); 344 } 345 346}