001 /*
002 * Java Genetic Algorithm Library (jenetics-7.1.0).
003 * Copyright (c) 2007-2022 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.ext.grammar;
021
022 import static java.lang.String.format;
023 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.ASSIGN;
024 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.BAR;
025 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.GT;
026 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.ID;
027 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.LT;
028 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.QUOTED_STRING;
029 import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.STRING;
030 import static io.jenetics.ext.internal.parser.Token.Type.EOF;
031
032 import java.util.ArrayList;
033 import java.util.List;
034
035 import io.jenetics.ext.grammar.Cfg.Expression;
036 import io.jenetics.ext.grammar.Cfg.NonTerminal;
037 import io.jenetics.ext.grammar.Cfg.Rule;
038 import io.jenetics.ext.grammar.Cfg.Symbol;
039 import io.jenetics.ext.grammar.Cfg.Terminal;
040 import io.jenetics.ext.internal.parser.ParsingException;
041 import io.jenetics.ext.internal.parser.TokenParser;
042
043 /**
044 * Parser for BNF grammars.
045 *
046 * <pre>{@code
047 * rulelist: rule_* EOF;
048 * rule: lhs ASSIGN rhs;
049 * lhs: id;
050 * rhs: alternatives;
051 * alternatives: alternative (BAR alternative)*;
052 * alternative: element*;
053 * element: text | id;
054 * text: STRING | QUOTED_STRING;
055 * id: LT ruleid GT;
056 * ruleid: ID;
057 * }</pre>
058 *
059 * The BNF object is build from the following classes.
060 * <ul>
061 * <li>{@link Symbol}: A symbol is either a {@link Terminal} or
062 * {@link NonTerminal} symbol.</li>
063 * <li>{@link NonTerminal}: Non-terminal symbols are parenthesised in angle
064 * brackets; {@code <expr>}, {@code num} or {@code var}. The name must start
065 * with a letter and contain only letters and digits:
066 * {@code ('a'..'z'|'A'..'Z') ('a'..'z'|'A'..'Z'|'0'..'9'|'-')+}</li>
067 * <li>{@link Terminal}: Terminal symbols are simple string values, which
068 * can also be quoted; {@code x}, {@code 1}, {@code terminal} or
069 * {@code 'some $special value'}</li>
070 * <li>{@link Expression}: Consists of a list of symbols; {@code [num]},
071 * {@code [var]} or {@code [(, expr, op, expr, )]}</li>
072 * <li>{@link Rule}: A rule has a name, a non-terminal start symbol, and a
073 * list of <em>alternative</em> expressions;
074 * {@code <expr> ::= [[num], [var], [(, expr, op, expr, )]]}</li>
075 * <li>{@link Cfg}: A whole BNF grammar consists of one or more {@link Rule}s.</li>
076 * </ul>
077 *
078 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
079 * @since 7.1
080 * @version 7.1
081 */
082 final class BnfParser extends TokenParser<String> {
083
084 NonTerminal<String> start = null;
085 final List<Rule<String>> rules = new ArrayList<>();
086 final List<Symbol<String>> symbols = new ArrayList<>();
087 final List<Expression<String>> alternatives = new ArrayList<>();
088
089 BnfParser(final BnfTokenizer tokenizer) {
090 super(tokenizer, 4);
091 }
092
093 public Cfg<String> parse() {
094 rulelist();
095
096 return Cfg.of(rules);
097 }
098
099 private void rulelist() {
100 do {
101 rule();
102 } while (LA(1) != EOF.code());
103 }
104
105 private void rule() {
106 start = lhs();
107 match(ASSIGN);
108 rhs();
109
110 rules.add(new Rule<>(start, alternatives));
111 start = null;
112 alternatives.clear();
113 }
114
115 private NonTerminal<String> lhs() {
116 return id();
117 }
118
119 private void rhs() {
120 alternatives();
121 }
122
123 private void alternatives() {
124 alternative();
125 if (!symbols.isEmpty()) {
126 alternatives.add(new Expression<>(symbols));
127 symbols.clear();
128 }
129
130 while (LA(1) == BAR.code()) {
131 match(BAR);
132 alternative();
133
134 if (!symbols.isEmpty()) {
135 alternatives.add(new Expression<>(symbols));
136 symbols.clear();
137 }
138 }
139 }
140
141 private void alternative() {
142 do {
143 element();
144 } while (
145 LA(4) != ASSIGN.code() &&
146 (
147 LA(1) == STRING.code() ||
148 LA(1) == QUOTED_STRING.code() ||
149 LA(1) == ID.code() ||
150 LA(1) == LT.code()
151 )
152 );
153 }
154
155 private void element() {
156 if (LA(1) == STRING.code()) {
157 symbols.add(text());
158 } else if (LA(1) == QUOTED_STRING.code()) {
159 symbols.add(text());
160 } else if (LA(1) == ID.code()) {
161 symbols.add(text());
162 } else if (LA(1) == LT.code()) {
163 symbols.add(id());
164 } else {
165 throw new ParsingException(format(
166 "Expecting %s but found %s.",
167 List.of(STRING, QUOTED_STRING, ID, LT), LT(1)
168 ));
169 }
170 }
171
172 private Terminal<String> text() {
173 if (LA(1) == STRING.code()) {
174 return terminal(match(STRING).value());
175 } else if (LA(1) == QUOTED_STRING.code()) {
176 return terminal(match(QUOTED_STRING).value());
177 } else if (LA(1) == ID.code()) {
178 return terminal(match(ID).value());
179 } else {
180 throw new ParsingException(format(
181 "Expecting %s but found %s.",
182 List.of(STRING, QUOTED_STRING, ID), LT(1)
183 ));
184 }
185 }
186
187 private static Terminal<String> terminal(final String value) {
188 if (value.isEmpty()) {
189 throw new ParsingException("Terminal value must not be empty.");
190 }
191 return new Terminal<>(value, value);
192 }
193
194 private NonTerminal<String> id() {
195 match(LT);
196 final var result = ruleid();
197 match(GT);
198 return result;
199 }
200
201 private NonTerminal<String> ruleid() {
202 final var name = match(ID).value();
203 if (name.isEmpty()) {
204 throw new ParsingException("Rule id must not be empty.");
205 }
206 return new NonTerminal<>(name);
207 }
208
209 }
|