001 /*
002 * Java Genetic Algorithm Library (jenetics-7.1.1).
003 * Copyright (c) 2007-2022 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.ext.grammar;
021
022 import static java.lang.Character.isDigit;
023 import static java.lang.Character.isWhitespace;
024 import static io.jenetics.ext.internal.parser.CharSequenceTokenizer.isAlphabetic;
025
026 import java.util.stream.Collectors;
027
028 import io.jenetics.ext.internal.parser.ParsingException;
029
030 /**
031 * This class contains methods for parsing and formatting <em>context-free</em>
032 * grammars in
033 * <a href="https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form">BNF</a>
034 * format.
035 * <pre>{@code
036 * final Cfg<String> grammar = Bnf.parse("""
037 * <expr> ::= <num> | <var> | '(' <expr> <op> <expr> ')'
038 * <op> ::= + | - | * | /
039 * <var> ::= x | y
040 * <num> ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
041 * """
042 * );
043 * }</pre>
044 *
045 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
046 * @since 7.1
047 * @version 7.1
048 */
049 public final class Bnf {
050 private Bnf() {}
051
052 static boolean isSymbolChar(final int ch) {
053 return switch (ch) {
054 case '<', '>', '|', ':', '=' -> true;
055 default -> false;
056 };
057 }
058
059 static boolean isStringChar(final char c) {
060 return !isWhitespace(c) && !isSymbolChar(c);
061 }
062
063 static boolean isIdChar(final char c) {
064 return isAlphabetic(c) || isDigit(c) || (c == '-');
065 }
066
067 /**
068 * Parses the given BNF {@code grammar} string to a {@link Cfg} object. The
069 * following example show the grammar of a simple arithmetic expression.
070 *
071 * <pre>{@code
072 * <expr> ::= <num> | <var> | '(' <expr> <op> <expr> ')'
073 * <op> ::= + | - | * | /
074 * <var> ::= x | y
075 * <num> ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
076 * }</pre>
077 *
078 * @param grammar the BNF {@code grammar} string
079 * @return the parsed {@code BNF} object
080 * @throws ParsingException if the given <em>grammar</em> is invalid
081 * @throws NullPointerException it the given {@code grammar} string is
082 * {@code null}
083 */
084 public static Cfg<String> parse(final String grammar) {
085 final var tokenizer = new BnfTokenizer(grammar);
086 final var parser = new BnfParser(tokenizer);
087
088 return parser.parse();
089 }
090
091 /**
092 * Formats the given <em>CFG</em> as BNF grammar string.
093 *
094 * @param grammar the CFG to format as BNF
095 * @return the BNF formatted grammar string
096 * @throws NullPointerException if the give {@code grammar} is {@code null}
097 */
098 public static String format(final Cfg<?> grammar) {
099 return grammar.rules().stream()
100 .map(Bnf::format)
101 .collect(Collectors.joining("\n"));
102 }
103
104 private static String format(final Cfg.Rule<?> rule) {
105 return String.format(
106 "%s ::= %s",
107 format(rule.start()),
108 rule.alternatives().stream()
109 .map(Bnf::format)
110 .collect(Collectors.joining("\n | "))
111 );
112 }
113
114 private static String format(final Cfg.Expression<?> expr) {
115 return expr.symbols().stream()
116 .map(Bnf::format)
117 .collect(Collectors.joining(" "));
118 }
119
120 private static String format(final Cfg.Symbol<?> symbol) {
121 if (symbol instanceof Cfg.NonTerminal<?> nt) {
122 return String.format("<%s>", nt.name());
123 } else if (symbol instanceof Cfg.Terminal<?> t) {
124 return "'" + t.name()
125 .replace("\\", "\\\\")
126 .replace("'", "\\'") + "'";
127 }
128 throw new AssertionError();
129 }
130
131 }
|