001/*
002 * Units of Measurement Reference Implementation
003 * Copyright (c) 2005-2024, Jean-Marie Dautelle, Werner Keil, Otavio Santana.
004 *
005 * All rights reserved.
006 *
007 * Redistribution and use in source and binary forms, with or without modification,
008 * are permitted provided that the following conditions are met:
009 *
010 * 1. Redistributions of source code must retain the above copyright notice,
011 *    this list of conditions and the following disclaimer.
012 *
013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
014 *    and the following disclaimer in the documentation and/or other materials provided with the distribution.
015 *
016 * 3. Neither the name of JSR-385, Indriya nor the names of their contributors may be used to endorse or promote products
017 *    derived from this software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package tech.units.indriya.format;
031
032import javax.measure.Quantity;
033import javax.measure.Unit;
034import javax.measure.format.MeasurementParseException;
035
036import tech.units.indriya.AbstractUnit;
037import tech.units.indriya.internal.format.UnitFormatParser;
038import tech.units.indriya.unit.AnnotatedUnit;
039
040import java.io.IOException;
041import java.io.StringReader;
042import java.text.ParsePosition;
043import java.util.Locale;
044import java.util.ResourceBundle;
045
046/**
047 * <p>
048 * This class represents the local neutral format.
049 * </p>
050 * 
051 * <h3>Here is the grammar for Units in Extended Backus-Naur Form (EBNF)</h3>
052 * <p>
053 * Note that the grammar has been left-factored to be suitable for use by a top-down parser generator such as <a
054 * href="https://javacc.dev.java.net/">JavaCC</a>
055 * </p>
056 * <table>
057 * <tr>
058 * <th colspan="3">Lexical Entities:</th>
059 * </tr>
060 * <tr valign="top">
061 * <td>&lt;sign&gt;</td>
062 * <td>:=</td>
063 * <td>"+" | "-"</td>
064 * </tr>
065 * <tr valign="top">
066 * <td>&lt;digit&gt;</td>
067 * <td>:=</td>
068 * <td>"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"</td>
069 * </tr>
070 * <tr valign="top">
071 * <td>&lt;superscript_digit&gt;</td>
072 * <td>:=</td>
073 * <td>"⁰" | "¹" | "²" | "³" | "⁴" | "⁵" | "⁶" | "⁷" | "⁸" | "⁹"</td>
074 * </tr>
075 * <tr valign="top">
076 * <td>&lt;integer&gt;</td>
077 * <td>:=</td>
078 * <td>(&lt;digit&gt;)+</td>
079 * </tr>
080 * <tr valign="top">
081 * <td>&lt;number&gt;</td>
082 * <td>:=</td>
083 * <td>(&lt;sign&gt;)? (&lt;digit&gt;)* (".")? (&lt;digit&gt;)+ (("e" | "E") (&lt;sign&gt;)? (&lt;digit&gt;)+)?</td>
084 * </tr>
085 * <tr valign="top">
086 * <td>&lt;exponent&gt;</td>
087 * <td>:=</td>
088 * <td>( "^" ( &lt;sign&gt; )? &lt;integer&gt; ) <br>
089 * | ( "^(" (&lt;sign&gt;)? &lt;integer&gt; ( "/" (&lt;sign&gt;)? &lt;integer&gt; )? ")" ) <br>
090 * | ( &lt;superscript_digit&gt; )+</td>
091 * </tr>
092 * <tr valign="top">
093 * <td>&lt;initial_char&gt;</td>
094 * <td>:=</td>
095 * <td>? Any Unicode character excluding the following: ASCII control & whitespace (&#92;u0000 - &#92;u0020), decimal digits '0'-'9', '('
096 * (&#92;u0028), ')' (&#92;u0029), '*' (&#92;u002A), '+' (&#92;u002B), '-' (&#92;u002D), '.' (&#92;u002E), '/' (&#92;u005C), ':' (&#92;u003A), '^'
097 * (&#92;u005E), '²' (&#92;u00B2), '³' (&#92;u00B3), '·' (&#92;u00B7), '¹' (&#92;u00B9), '⁰' (&#92;u2070), '⁴' (&#92;u2074), '⁵' (&#92;u2075), '⁶'
098 * (&#92;u2076), '⁷' (&#92;u2077), '⁸' (&#92;u2078), '⁹' (&#92;u2079) ?</td>
099 * </tr>
100 * <tr valign="top">
101 * <td>&lt;unit_identifier&gt;</td>
102 * <td>:=</td>
103 * <td>&lt;initial_char&gt; ( &lt;initial_char&gt; | &lt;digit&gt; )*</td>
104 * </tr>
105 * <tr>
106 * <th colspan="3" align="left">Non-Terminals:</th>
107 * </tr>
108 * <tr valign="top">
109 * <td>&lt;unit_expr&gt;</td>
110 * <td>:=</td>
111 * <td>&lt;mix_expr&gt;</td>
112 * </tr>
113 * <tr valign="top">
114 * <td>&lt;mix_expr&gt;</td>
115 * <td>:=</td>
116 * <td>&lt;add_expr&gt; ( ":" &lt;add_expr&gt; )*</td>
117 * </tr>
118 * <tr valign="top">
119 * <td>&lt;add_expr&gt;</td>
120 * <td>:=</td>
121 * <td>( &lt;number&gt; &lt;sign&gt; )? &lt;mul_expr&gt; ( &lt;sign&gt; &lt;number&gt; )?</td>
122 * </tr>
123 * <tr valign="top">
124 * <td>&lt;mul_expr&gt;</td>
125 * <td>:=</td>
126 * <td>&lt;exponent_expr&gt; ( ( ( "*" | "·" ) &lt;exponent_expr&gt; ) | ( "/" &lt;exponent_expr&gt; ) )*</td>
127 * </tr>
128 * <tr valign="top">
129 * <td>&lt;exponent_expr&gt;</td>
130 * <td>:=</td>
131 * <td>( &lt;atomic_expr&gt; ( &lt;exponent&gt; )? ) <br>
132 * | (&lt;integer&gt; "^" &lt;atomic_expr&gt;) <br>
133 * | ( ( "log" ( &lt;integer&gt; )? ) | "ln" ) "(" &lt;add_expr&gt; ")" )</td>
134 * </tr>
135 * <tr valign="top">
136 * <td>&lt;atomic_expr&gt;</td>
137 * <td>:=</td>
138 * <td>&lt;number&gt; <br>
139 * | &lt;unit_identifier&gt; <br>
140 * | ( "(" &lt;add_expr&gt; ")" )</td>
141 * </tr>
142 * </table>
143 * 
144 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a>
145 * @author <a href="mailto:werner@uom.tech">Werner Keil</a>
146 * @version 2.2, October 12, 2024
147 * @since 1.0
148 */
149public class EBNFUnitFormat extends AbstractUnitFormat {
150
151  //////////////////////////////////////////////////////
152  // Class variables //
153  //////////////////////////////////////////////////////
154
155  /**
156    * 
157    */
158  // private static final long serialVersionUID = 8968559300292910840L;
159
160  /**
161   * Name of the resource bundle
162   */
163  private static final String BUNDLE_NAME = "tech.units.indriya.format.messages"; //$NON-NLS-1$
164
165  /**
166   * Default locale instance. If the default locale is changed after the class is initialized, this instance will no longer be used.
167   */
168  private static final EBNFUnitFormat DEFAULT_INSTANCE = new EBNFUnitFormat();
169
170  /**
171   * Returns the instance for the current default locale (non-ascii characters are allowed)
172   */
173  public static EBNFUnitFormat getInstance() {
174    return DEFAULT_INSTANCE;
175  }
176
177  /** Returns a new instance for the given symbol map. */
178  public static EBNFUnitFormat getInstance(SymbolMap symbols) {
179    return new EBNFUnitFormat(symbols);
180  }
181  
182  /**
183   * Similar to {@link #getInstance()}, but returns a new, non-shared unit format instance,
184   * instead of a shared singleton instance.
185   *
186   * @return a new instance of the default unit format.
187   * @see #getInstance()
188   * @since 2.0
189   */
190  public static EBNFUnitFormat getNewInstance() {
191      return new EBNFUnitFormat();
192  }
193
194  // //////////////////////
195  // Instance variables //
196  // //////////////////////
197  /**
198   * The symbol map used by this instance to map between {@link org.unitsofmeasure.Unit Unit}s and <code>String</code>s, etc...
199   */
200  private final transient SymbolMap symbolMap;
201
202  // ////////////////
203  // Constructors //
204  // ////////////////
205  /**
206   * Base constructor.
207   * 
208   */
209  EBNFUnitFormat() {
210    // TODO try to use MultiPropertyResourceBundle with at least one extension point in a package other than tech.units.indriya.format for additional resources.
211    this(SymbolMap.of(ResourceBundle.getBundle(BUNDLE_NAME, Locale.ROOT)));
212  }
213
214  /**
215   * Private constructor.
216   * 
217   * @param symbols
218   *          the symbol mapping.
219   */
220  private EBNFUnitFormat(SymbolMap symbols) {
221    symbolMap = symbols;
222  }
223
224  // //////////////////////
225  // Instance methods //
226  // //////////////////////
227  /**
228   * Get the symbol map used by this instance to map between {@link javax.measure.Unit Unit}s and <code>String</code>s, etc...
229   * 
230   * @return SymbolMap the current symbol map
231   */
232  protected SymbolMap getSymbols() {
233    return symbolMap;
234  }
235  
236  @Override
237  public String toString() {
238    return getClass().getSimpleName();
239  }
240  
241  /**
242   * Attaches a system-wide alias to this unit. Multiple aliases may be attached to the same unit. Aliases are used during parsing to recognize
243   * different variants of the same unit. For example: <code>EBNFUnitFormat.getInstance().alias(METRE.multiply(0.3048), "foot");
244   * EBNFUnitFormat.getInstance().alias(METRE.multiply(0.3048), "feet");
245   * EBNFUnitFormat.getInstance().alias(METRE, "meter"); </code> If the specified alias is already associated to a unit or applied as a label, the association is
246   * replaced by the new one.
247   *
248   * @param unit
249   *          the unit being aliased.
250   * @param alias
251   *          the alias attached to this unit.
252   */
253  public void alias(Unit<?> unit, String alias) {       
254        symbolMap.alias(unit, alias);
255  }
256  
257  @Override
258  public void label(Unit<?> unit, String label) {       
259        symbolMap.label(unit, label);
260  }
261
262  ////////////////
263  // Formatting //
264  ////////////////
265  public Appendable format(Unit<?> unit, Appendable appendable) throws IOException {
266
267    EBNFHelper.formatInternal(unit, appendable, symbolMap);
268    if (unit instanceof AnnotatedUnit<?>) {
269      AnnotatedUnit<?> annotatedUnit = (AnnotatedUnit<?>) unit;
270      if (annotatedUnit.getAnnotation() != null) {
271        appendable.append('{');
272        appendable.append(annotatedUnit.getAnnotation());
273        appendable.append('}');
274      }
275    }
276    // TODO add support for MixedUnit similar to AnnotatedUnit
277    return appendable;
278  }
279
280  @Override
281  public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws MeasurementParseException {
282    // Parsing reads the whole character sequence from the parse position.
283    int start = cursor != null ? cursor.getIndex() : 0;
284    int end = csq.length();
285    if (end <= start) {
286      return AbstractUnit.ONE;
287    }
288    String source = csq.subSequence(start, end).toString().trim();
289    if (source.length() == 0) {
290      return AbstractUnit.ONE;
291    }
292    try {
293      UnitFormatParser parser = new UnitFormatParser(symbolMap, new StringReader(source));
294      Unit<?> result = parser.parseUnit();
295      if (cursor != null)
296        cursor.setIndex(end);
297      return result;
298    } catch (TokenException e) {
299      if (e.currentToken != null) {
300        cursor.setErrorIndex(start + e.currentToken.endColumn);
301      } else {
302        cursor.setErrorIndex(start);
303      }
304      throw new MeasurementParseException(e);
305    } catch (TokenMgrError e) {
306      cursor.setErrorIndex(start);
307      throw new IllegalArgumentException(e.getMessage());
308    }
309  }
310
311  @Override
312  protected Unit<?> parse(CharSequence csq, int index) throws IllegalArgumentException {
313    return parse(csq, new ParsePosition(index));
314  }
315
316  @Override
317  public Unit<?> parse(CharSequence csq) throws MeasurementParseException {
318    return parse(csq, 0);
319  }
320}