001 /*
002 * Java Genetic Algorithm Library (jenetics-7.1.2).
003 * Copyright (c) 2007-2023 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.ext.grammar;
021
022 import java.util.function.Function;
023
024 import io.jenetics.BitChromosome;
025 import io.jenetics.BitGene;
026 import io.jenetics.Genotype;
027 import io.jenetics.IntegerChromosome;
028 import io.jenetics.IntegerGene;
029 import io.jenetics.engine.Codec;
030 import io.jenetics.util.IntRange;
031
032 import io.jenetics.ext.grammar.Cfg.Rule;
033
034 /**
035 * This class defines factories for different CFG ↔ Chromosome mappings
036 * (encodings). The classical mapping codec, with a bit-chromosome can be created
037 * in the following way.
038 * <pre>{@code
039 * final Cfg<String> cfg = ...;
040 * final Codec<List<Terminal<String>>, BitGene> codec = singleBitChromosomeMapper(
041 * cfg,
042 * 1000,
043 * index -> new SentenceGenerator<>(index, 1000)
044 * );
045 * }</pre>
046 * This codec creates a mapping for the given grammar {@code cfg} and uses
047 * bit-chromosomes with length {@code 1000}. The result of the mapping will be a
048 * list of terminal symbols which has been created by the given
049 * {@link SentenceGenerator}. The sentence generator creates sentences with a
050 * maximal length of {@code 1000}. If no sentence could be created within this
051 * limit, an empty list of terminal symbols is returned.
052 *
053 * @see <a href="https://www.brinckerhoff.org/tmp/grammatica_evolution_ieee_tec_2001.pdf">
054 * Grammatical Evolution</a>
055 *
056 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
057 * @since 7.1
058 * @version 7.1
059 */
060 public final class Mappers {
061 private Mappers() {
062 }
063
064 /**
065 * Return a classic mapping codec. It uses a bit-chromosome for creating the
066 * grammar results. The codons are created by dividing the chromosome in
067 * 8-bit junks, as described in <a href="https://www.brinckerhoff.org/tmp/grammatica_evolution_ieee_tec_2001.pdf">
068 * Grammatical Evolution</a> by Michael O’Neill and Conor Ryan.
069 *
070 * <pre>{@code
071 * final Cfg<String> cfg = ...;
072 * final Codec<List<Terminal<String>>, BitGene> codec = singleBitChromosomeMapper(
073 * cfg,
074 * 1000,
075 * index -> new SentenceGenerator<>(index, 1000)
076 * );
077 * }</pre>
078 *
079 * @see #singleIntegerChromosomeMapper(Cfg, IntRange, IntRange, Function)
080 *
081 * @param cfg the encoding grammar
082 * @param length the length of the bit-chromosome
083 * @param generator sentence generator function from a given
084 * {@link SymbolIndex}
085 * @param <T> the terminal token type of the grammar
086 * @param <R> the result type of the mapper
087 * @return a new mapping codec for the given {@code cfg}
088 */
089 public static <T, R> Codec<R, BitGene>
090 singleBitChromosomeMapper(
091 final Cfg<? extends T> cfg,
092 final int length,
093 final Function<? super SymbolIndex, ? extends Generator<T, R>> generator
094 ) {
095 return Codec.of(
096 Genotype.of(BitChromosome.of(length)),
097 gt -> generator
098 .apply(Codons.ofBitGenes(gt.chromosome()))
099 .generate(cfg)
100 );
101 }
102
103 /**
104 * Create a mapping codec, similar as in {@link #singleBitChromosomeMapper(Cfg, int, Function)}.
105 * The only difference is that the codons are encoded directly, via an
106 * integer-chromosome, so that no gene split is necessary.
107 *
108 * <pre>{@code
109 * final Cfg<String> cfg = ...;
110 * final Codec<List<Terminal<String>>, IntegerGene> codec = singleIntegerChromosomeMapper(
111 * cfg,
112 * IntRange.of(0, 256), // Value range of chromosomes.
113 * IntRange.of(100), // Length (range) ot the chromosome.
114 * index -> new SentenceGenerator<>(index, 1000)
115 * );
116 * }</pre>
117 *
118 * @param cfg the encoding grammar
119 * @param range the value range of the integer genes
120 * @param length the length range of the integer-chromosome
121 * @param generator sentence generator function from a given
122 * {@link SymbolIndex}
123 * @param <T> the terminal token type of the grammar
124 * @param <R> the result type of the mapper
125 * @return a new mapping codec for the given {@code cfg}
126 */
127 public static <T, R> Codec<R, IntegerGene>
128 singleIntegerChromosomeMapper(
129 final Cfg<? extends T> cfg,
130 final IntRange range,
131 final IntRange length,
132 final Function<? super SymbolIndex, ? extends Generator<T, R>> generator
133 ) {
134 return Codec.of(
135 Genotype.of(IntegerChromosome.of(range, length)),
136 gt -> generator
137 .apply(Codons.ofIntegerGenes(gt.chromosome()))
138 .generate(cfg)
139 );
140 }
141
142 /**
143 * Create a mapping codec, similar as in {@link #singleBitChromosomeMapper(Cfg, int, Function)}.
144 * The only difference is that the codons are encoded directly, via an
145 * integer-chromosome, so that no gene split is necessary.
146 *
147 * <pre>{@code
148 * final Cfg<String> cfg = ...;
149 * final Codec<List<Terminal<String>>, IntegerGene> codec = singleIntegerChromosomeMapper(
150 * cfg,
151 * IntRange.of(0, 256), // Value range of chromosomes.
152 * 100, // Length (range) ot the chromosome.
153 * index -> new SentenceGenerator<>(index, 1000)
154 * );
155 * }</pre>
156 *
157 * @param cfg the encoding grammar
158 * @param range the value range of the integer genes
159 * @param length the length range of the integer-chromosome
160 * @param generator sentence generator function from a given
161 * {@link SymbolIndex}
162 * @param <T> the terminal token type of the grammar
163 * @param <R> the result type of the mapper
164 * @return a new mapping codec for the given {@code cfg}
165 */
166 public static <T, R> Codec<R, IntegerGene>
167 singleIntegerChromosomeMapper(
168 final Cfg<? extends T> cfg,
169 final IntRange range,
170 final int length,
171 final Function<? super SymbolIndex, ? extends Generator<T, R>> generator
172 ) {
173 return singleIntegerChromosomeMapper(
174 cfg, range, IntRange.of(length), generator
175 );
176 }
177
178 /**
179 * Codec for creating <em>results</em> from a given grammar. The creation of
180 * the grammar result is controlled by a given genotype. This encoding uses
181 * separate <em>codons</em>, backed up by a {@link IntegerChromosome}, for
182 * every rule. The length of the chromosome is defined as a function of the
183 * encoded rules. This means that the following CFG,
184 *
185 * <pre>{@code
186 * (0) (1)
187 * (0) <expr> ::= (<expr><op><expr>) | <var>
188 * (0) (1) (2) (3)
189 * (1) <op> ::= + | - | * | /
190 * (0) (1) (2) (3) (4)
191 * (2) <var> ::= x | 1 | 2 | 3 | 4
192 * }</pre>
193 *
194 * will be represented by the following {@link Genotype}
195 * <pre>{@code
196 * Genotype.of(
197 * IntegerChromosome.of(IntRange.of(0, 2), length.apply(cfg.rules().get(0))),
198 * IntegerChromosome.of(IntRange.of(0, 4), length.apply(cfg.rules().get(1))),
199 * IntegerChromosome.of(IntRange.of(0, 5), length.apply(cfg.rules().get(2)))
200 * )
201 * }</pre>
202 *
203 * The {@code length} function lets you defining the number of codons as
204 * function of the rule the chromosome is encoding.
205 *
206 * <pre>{@code
207 * final Cfg<String> cfg = Bnf.parse(...);
208 * final Codec<List<Terminal<String>>, IntegerGene> codec = multiIntegerChromosomeMapper(
209 * cfg,
210 * // The chromosome length is 25 times the
211 * // number of rule alternatives.
212 * rule -> IntRange.of(rule.alternatives().size()*25),
213 * // Using the standard sentence generator
214 * // with a maximal sentence length of 500.
215 * index -> new SentenceGenerator<>(index, 500)
216 * );
217 * }</pre>
218 *
219 * @param cfg the encoding grammar
220 * @param length the length of the chromosome which is used for selecting
221 * rules and symbols. The input parameter for this function is the
222 * actual rule. This way it is possible to define the chromosome
223 * length dependent on the selectable alternatives.
224 * @param generator sentence generator function from a given
225 * {@link SymbolIndex}
226 * @param <T> the terminal token type of the grammar
227 * @param <R> the result type of the mapper
228 * @return a new mapping codec for the given {@code cfg}
229 */
230 public static <T, R> Codec<R, IntegerGene>
231 multiIntegerChromosomeMapper(
232 final Cfg<? extends T> cfg,
233 final Function<? super Rule<?>, IntRange> length,
234 final Function<? super SymbolIndex, ? extends Generator<T, R>> generator
235 ) {
236 return new MultiIntegerChromosomeMapper<>(cfg, length, generator);
237 }
238
239
240 }
|