001 /*
002 * Java Genetic Algorithm Library (jenetics-7.1.0).
003 * Copyright (c) 2007-2022 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.ext.grammar;
021
022 import static java.util.Objects.requireNonNull;
023
024 import java.util.Map;
025 import java.util.function.Function;
026 import java.util.stream.Collectors;
027 import java.util.stream.IntStream;
028
029 import io.jenetics.Genotype;
030 import io.jenetics.IntegerChromosome;
031 import io.jenetics.IntegerGene;
032 import io.jenetics.engine.Codec;
033 import io.jenetics.util.Factory;
034 import io.jenetics.util.ISeq;
035 import io.jenetics.util.IntRange;
036
037 import io.jenetics.ext.grammar.Cfg.Rule;
038
039 /**
040 * Codec for creating <em>results</em> from a given grammar. The creation of
041 * the grammar result is controlled by a given genotype. This encoding uses
042 * separate <em>codons</em>, backed up by a {@link IntegerChromosome}, for
043 * every rule. The length of the chromosome is defined as a function of the
044 * encoded rules. This means that the following CFG,
045 *
046 * <pre>{@code
047 * (0) (1)
048 * (0) <expr> ::= (<expr><op><expr>) | <var>
049 * (0) (1) (2) (3)
050 * (1) <op> ::= + | - | * | /
051 * (0) (1) (2) (3) (4)
052 * (2) <var> ::= x | 1 | 2 | 3 | 4
053 * }</pre>
054 *
055 * will be represented by the following {@link Genotype}
056 * <pre>{@code
057 * Genotype.of(
058 * IntegerChromosome.of(IntRange.of(0, 2), length.apply(cfg.rules().get(0))),
059 * IntegerChromosome.of(IntRange.of(0, 4), length.apply(cfg.rules().get(1))),
060 * IntegerChromosome.of(IntRange.of(0, 5), length.apply(cfg.rules().get(2)))
061 * )
062 * }</pre>
063 *
064 * The {@code length} function lets you defining the number of codons as
065 * function of the rule the chromosome is encoding.
066 *
067 * <pre>{@code
068 * final Cfg<String> cfg = Bnf.parse(...);
069 * final Codec<List<Terminal<String>>, IntegerGene> codec = new Mapper<>(
070 * cfg,
071 * // The chromosome length is 10 times the
072 * // number of rule alternatives.
073 * rule -> IntRange.of(rule.alternatives().size()*10),
074 * // Using the standard sentence generator
075 * // with a maximal sentence length of 5,000.
076 * index -> new SentenceGenerator<>(index, 5_000)
077 * );
078 * }</pre>
079 *
080 * @param <T> the terminal token type of the grammar
081 * @param <R> the result type of the mapper
082 *
083 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
084 * @since 7.1
085 * @version 7.1
086 */
087 final class MultiIntegerChromosomeMapper<T, R> implements Codec<R, IntegerGene> {
088
089 private final Factory<Genotype<IntegerGene>> _encoding;
090 private final Function<Genotype<IntegerGene>, R> _decoder;
091
092 /**
093 * Create a new sentence (list of terminal symbols) codec.
094 *
095 * @param cfg the encoding grammar
096 * @param length the length of the chromosome which is used for selecting
097 * rules and symbols. The input parameter for this function is the
098 * actual rule. This way it is possible to define the chromosome
099 * length dependent on the selectable alternatives.
100 * @param generator sentence generator function from a given
101 * {@link SymbolIndex}
102 */
103 public MultiIntegerChromosomeMapper(
104 final Cfg<? extends T> cfg,
105 final Function<? super Rule<?>, IntRange> length,
106 final Function<? super SymbolIndex, ? extends Generator<T, R>> generator
107 ) {
108 // Every rule gets its own codons. The ranges of the chromosomes
109 // will fit exactly the number of rule alternatives.
110 _encoding = Genotype.of(
111 cfg.rules().stream()
112 .map(rule ->
113 IntegerChromosome.of(
114 IntRange.of(0, rule.alternatives().size()),
115 length.apply(rule)
116 ))
117 .collect(ISeq.toISeq())
118 );
119
120 final var codons = new CodonsFactory(cfg);
121 _decoder = gt -> generator.apply(codons.get(gt)).generate(cfg);
122 }
123
124 @Override
125 public Factory<Genotype<IntegerGene>> encoding() {
126 return _encoding;
127 }
128
129 @Override
130 public Function<Genotype<IntegerGene>, R> decoder() {
131 return _decoder;
132 }
133
134 /**
135 * Helper class for generating codons for a given genotype.
136 */
137 private static final class CodonsFactory {
138 private final Map<String, Integer> _rulesIndex;
139
140 CodonsFactory(final Cfg<?> cfg) {
141 _rulesIndex = IntStream
142 .range(0, cfg.rules().size())
143 .mapToObj(i -> Map.entry(cfg.rules().get(i).start().name(), i))
144 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
145 }
146
147 SymbolIndex get(final Genotype<IntegerGene> gt) {
148 final var codons = new CodonsCache(gt);
149
150 return (rule, bound) -> codons
151 .get(_rulesIndex.get(rule.start().name()))
152 .next(rule, bound);
153 }
154
155 /**
156 * Caching and lazy creation of codons.
157 */
158 private static final class CodonsCache {
159 private final Genotype<IntegerGene> _genotype;
160 private final Codons[] _codons;
161
162 CodonsCache(final Genotype<IntegerGene> genotype) {
163 _genotype = requireNonNull(genotype);
164 _codons = new Codons[genotype.length()];
165 }
166
167 Codons get(final int index) {
168 Codons result = _codons[index];
169 if (result == null) {
170 result = Codons.ofIntegerGenes(_genotype.get(index));
171 _codons[index] = result;
172 }
173
174 return result;
175 }
176 }
177
178 }
179
180 }
|