001package org.hl7.fhir.dstu2.utils;
002
003/*-
004 * #%L
005 * org.hl7.fhir.dstu2
006 * %%
007 * Copyright (C) 2014 - 2019 Health Level 7
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 * 
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 * 
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023
024import org.hl7.fhir.dstu2.model.ExpressionNode;
025import org.hl7.fhir.dstu2.model.ExpressionNode.SourceLocation;
026import org.hl7.fhir.exceptions.FHIRException;
027import org.hl7.fhir.utilities.Utilities;
028
029// shared lexer for concrete syntaxes 
030// - FluentPath
031// - Mapping language
032
033public class FHIRLexer {
034  public class FHIRLexerException extends FHIRException {
035
036    public FHIRLexerException() {
037      super();
038    }
039
040    public FHIRLexerException(String message, Throwable cause) {
041      super(message, cause);
042    }
043
044    public FHIRLexerException(String message) {
045      super(message);
046    }
047
048    public FHIRLexerException(Throwable cause) {
049      super(cause);
050    }
051
052  }
053  private String path;
054  private int cursor;
055  private int currentStart;
056  private String current;
057  private SourceLocation currentLocation;
058  private SourceLocation currentStartLocation;
059  private int id;
060
061  public FHIRLexer(String source) throws FHIRLexerException {
062    this.path = source;
063    currentLocation = new SourceLocation(1, 1);
064    next();
065  }
066  public String getCurrent() {
067    return current;
068  }
069  public SourceLocation getCurrentLocation() {
070    return currentLocation;
071  }
072
073  public boolean isConstant(boolean incDoubleQuotes) {
074    return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 
075        current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 
076        current.equals("true") || current.equals("false") || current.equals("{}");
077  }
078
079  public boolean isStringConstant() {
080    return current.charAt(0) == '\'' || current.charAt(0) == '"';
081  }
082
083  public String take() throws FHIRLexerException {
084    String s = current;
085    next();
086    return s;
087  }
088
089  public boolean isToken() {
090    if (Utilities.noString(current))
091      return false;
092
093    if (current.startsWith("$"))
094      return true;
095
096    if (current.equals("*") || current.equals("**"))
097      return true;
098
099    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
100      for (int i = 1; i < current.length(); i++) 
101        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
102            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
103          return false;
104      return true;
105    }
106    return false;
107  }
108
109  public FHIRLexerException error(String msg) {
110    return error(msg, currentLocation.toString());
111  }
112
113  public FHIRLexerException error(String msg, String location) {
114    return new FHIRLexerException("Error in "+path+" at "+location+": "+msg);
115  }
116
117  public void next() throws FHIRLexerException {
118    current = null;
119    boolean last13 = false;
120    while (cursor < path.length() && Character.isWhitespace(path.charAt(cursor))) {
121      if (path.charAt(cursor) == '\r') {
122        currentLocation.setLine(currentLocation.getLine() + 1);
123        currentLocation.setColumn(1);
124        last13 = true;
125      } else if (!last13 && (path.charAt(cursor) == '\n')) {
126        currentLocation.setLine(currentLocation.getLine() + 1);
127        currentLocation.setColumn(1);
128        last13 = false;
129      } else {
130        last13 = false;
131        currentLocation.setColumn(currentLocation.getColumn() + 1);
132      }
133      cursor++;
134    }
135    currentStart = cursor;
136    currentStartLocation = currentLocation;
137    if (cursor < path.length()) {
138      char ch = path.charAt(cursor);
139      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
140        cursor++;
141        if (cursor < path.length() && (path.charAt(cursor) == '=' || path.charAt(cursor) == '~' || path.charAt(cursor) == '-')) 
142          cursor++;
143        current = path.substring(currentStart, cursor);
144      } else if (ch == '.' ) {
145        cursor++;
146        if (cursor < path.length() && (path.charAt(cursor) == '.')) 
147          cursor++;
148        current = path.substring(currentStart, cursor);
149      } else if (ch >= '0' && ch <= '9') {
150          cursor++;
151        boolean dotted = false;
152        while (cursor < path.length() && ((path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || (path.charAt(cursor) == '.') && !dotted)) {
153          if (path.charAt(cursor) == '.')
154            dotted = true;
155          cursor++;
156        }
157        if (path.charAt(cursor-1) == '.')
158          cursor--;
159        current = path.substring(currentStart, cursor);
160      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
161        while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 
162            (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == '_')) 
163          cursor++;
164        current = path.substring(currentStart, cursor);
165      } else if (ch == '%') {
166        cursor++;
167        if (cursor < path.length() && (path.charAt(cursor) == '"')) {
168          cursor++;
169          while (cursor < path.length() && (path.charAt(cursor) != '"'))
170            cursor++;
171          cursor++;
172        } else
173        while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 
174            (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == ':' || path.charAt(cursor) == '-'))
175          cursor++;
176        current = path.substring(currentStart, cursor);
177      } else if (ch == '/') {
178        cursor++;
179        if (cursor < path.length() && (path.charAt(cursor) == '/')) {
180          cursor++;
181          while (cursor < path.length() && !((path.charAt(cursor) == '\r') || path.charAt(cursor) == '\n')) 
182            cursor++;
183        }
184        current = path.substring(currentStart, cursor);
185      } else if (ch == '$') {
186        cursor++;
187        while (cursor < path.length() && (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z'))
188          cursor++;
189        current = path.substring(currentStart, cursor);
190      } else if (ch == '{') {
191        cursor++;
192        ch = path.charAt(cursor);
193        if (ch == '}')
194          cursor++;
195        current = path.substring(currentStart, cursor);
196      } else if (ch == '"'){
197        cursor++;
198        boolean escape = false;
199        while (cursor < path.length() && (escape || path.charAt(cursor) != '"')) {
200          if (escape)
201            escape = false;
202          else 
203            escape = (path.charAt(cursor) == '\\');
204          cursor++;
205        }
206        if (cursor == path.length())
207          throw error("Unterminated string");
208        cursor++;
209        current = "\""+path.substring(currentStart+1, cursor-1)+"\"";
210      } else if (ch == '\''){
211        cursor++;
212        char ech = ch;
213        boolean escape = false;
214        while (cursor < path.length() && (escape || path.charAt(cursor) != ech)) {
215          if (escape)
216            escape = false;
217          else 
218            escape = (path.charAt(cursor) == '\\');
219          cursor++;
220        }
221        if (cursor == path.length())
222          throw error("Unterminated string");
223        cursor++;
224        current = path.substring(currentStart, cursor);
225        if (ech == '\'')
226          current = "\'"+current.substring(1, current.length() - 1)+"\'";
227      } else if (ch == '@'){
228        cursor++;
229        while (cursor < path.length() && isDateChar(path.charAt(cursor)))
230          cursor++;          
231        current = path.substring(currentStart, cursor);
232      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
233        cursor++;
234        current = path.substring(currentStart, cursor);
235      }
236    }
237  }
238
239
240  private boolean isDateChar(char ch) {
241    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch);
242  }
243  public boolean isOp() {
244    return ExpressionNode.Operation.fromCode(current) != null;
245  }
246  public boolean done() {
247    return currentStart >= path.length();
248  }
249  public int nextId() {
250    id++;
251    return id;
252  }
253  public SourceLocation getCurrentStartLocation() {
254    return currentStartLocation;
255  }
256  
257  // special case use
258  public void setCurrent(String current) {
259    this.current = current;
260  }
261
262  public boolean hasComment() {
263    return !done() && current.startsWith("//");
264  }
265  public boolean hasToken(String kw) {
266    return !done() && kw.equals(current);
267  }
268  public void token(String kw) throws FHIRLexerException {
269    if (!kw.equals(current)) 
270      throw error("Found \""+current+"\" expecting \""+kw+"\"");
271    next();
272  }
273  public String readConstant(String desc) throws FHIRLexerException {
274    if (!isStringConstant())
275      throw error("Found "+current+" expecting \"["+desc+"]\"");
276
277    return processConstant(take());
278  }
279
280  public String processConstant(String s) throws FHIRLexerException {
281    StringBuilder b = new StringBuilder();
282    int i = 1;
283    while (i < s.length()-1) {
284      char ch = s.charAt(i);
285      if (ch == '\\') {
286        i++;
287        switch (s.charAt(i)) {
288        case 't': 
289          b.append('\t');
290          break;
291        case 'r':
292          b.append('\r');
293          break;
294        case 'n': 
295          b.append('\n');
296          break;
297        case 'f': 
298          b.append('\f');
299          break;
300        case '\'':
301          b.append('\'');
302          break;
303        case '\\': 
304          b.append('\\');
305          break;
306        case '/': 
307          b.append('\\');
308          break;
309        case 'u':
310          i++;
311          int uc = Integer.parseInt(s.substring(i, i+4), 16);
312          b.append((char) uc);
313          i = i + 4;
314          break;
315        default:
316          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
317        }
318      } else {
319        b.append(ch);
320        i++;
321      }
322    }
323    return b.toString();
324
325  }
326  public void skipToken(String token) throws FHIRLexerException {
327    if (getCurrent().equals(token))
328      next();
329    
330  }
331  public String takeDottedToken() throws FHIRLexerException {
332    StringBuilder b = new StringBuilder();
333    b.append(take());
334    while (!done() && getCurrent().equals(".")) {
335      b.append(take());
336      b.append(take());
337    }
338    return b.toString();
339  }
340  
341  void skipComments() throws FHIRLexerException {
342    while (!done() && hasComment())
343      next();
344  }
345
346}