001 /*
002 * Java Genetic Algorithm Library (jenetics-7.1.1).
003 * Copyright (c) 2007-2022 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.ext.grammar;
021
022 import static java.lang.Character.isWhitespace;
023 import static java.lang.String.format;
024 import static io.jenetics.ext.grammar.Bnf.isIdChar;
025 import static io.jenetics.ext.grammar.Bnf.isStringChar;
026 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.ASSIGN;
027 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.BAR;
028 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.GT;
029 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.ID;
030 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.LT;
031 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.QUOTED_STRING;
032 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.STRING;
033
034 import io.jenetics.ext.internal.parser.CharSequenceTokenizer;
035 import io.jenetics.ext.internal.parser.ParsingException;
036 import io.jenetics.ext.internal.parser.Token;
037
038 /**
039 * Tokenizer for BNF grammars.
040 *
041 * <pre>{@code
042 * ASSIGN: '::=';
043 * BAR: '|';
044 * GT: '>';
045 * LT: '<';
046 * ID: ('a'..'z'|'A'..'Z') ('a'..'z'|'A'..'Z'|'0'..'9'|'-')+;
047 * STRING: ( '%s' | '%i' )? '"' ( ~ '"' )* '"';
048 * WS: [ \r\n\t] -> skip;
049 * }</pre>
050 *
051 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
052 * @since 7.1
053 * @version 7.1
054 */
055 final class BnfTokenizer extends CharSequenceTokenizer {
056
057 enum BnfTokenType implements Token.Type {
058 ASSIGN(1),
059 BAR(2),
060 GT(3),
061 LT(4),
062 ID(5),
063 STRING(6),
064 QUOTED_STRING(7);
065
066 private final int _code;
067
068 BnfTokenType(final int code) {
069 _code = code;
070 }
071
072 @Override
073 public int code() {
074 return _code;
075 }
076 }
077
078 BnfTokenizer(final CharSequence input) {
079 super(input);
080 }
081
082 @Override
083 public Token<String> next() {
084 while (isNonEof(c)) {
085 final char value = c;
086 switch (value) {
087 case ' ', '\r', '\n', '\t':
088 WS();
089 continue;
090 case ':':
091 return ASSIGN();
092 case '|':
093 consume();
094 return BAR.token(value);
095 case '>':
096 consume();
097 return GT.token(value);
098 case '<':
099 consume();
100 return LT.token(value);
101 case '\'':
102 return QUOTED_STRING();
103 default:
104 if (isAlphabetic(c)) {
105 return ID();
106 } else if (!isWhitespace(c)) {
107 return STRING();
108 } else {
109 throw new ParsingException(format(
110 "Got invalid character '%s' at position '%d'.",
111 c, pos
112 ));
113 }
114 }
115 }
116
117 return null;
118 }
119
120 private Token<String> ASSIGN() {
121 match(':');
122 match(':');
123 match('=');
124 return ASSIGN.token("::=");
125 }
126
127 private Token<String> QUOTED_STRING() {
128 final var value = new StringBuilder();
129
130 match('\'');
131 while (isNonEof(c) && c != '\'') {
132 if (c == '\\') {
133 consume();
134 }
135
136 value.append(c);
137 consume();
138 }
139 match('\'');
140
141 return QUOTED_STRING.token(value.toString());
142 }
143
144 private Token<String> ID() {
145 final var value = new StringBuilder();
146
147 while (isIdChar(c)) {
148 value.append(c);
149 consume();
150 }
151
152 return ID.token(value.toString());
153 }
154
155 private Token<String> STRING() {
156 final var value = new StringBuilder();
157
158 while (isNonEof(c) && isStringChar(c)) {
159 value.append(c);
160 consume();
161 }
162
163 return STRING.token(value.toString());
164 }
165
166 }
|