/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tokenizers.es;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.tagging.es.SpanishTagger;
import org.languagetool.tokenizers.WordTokenizer;

public class SpanishWordTokenizer
extends WordTokenizer {
    private static final String wordCharacters = "\u00a7\u00a9@\u20ac\u00a3\\$_\\p{L}\\d\u00b7\\-\u0300-\u036f\u00a8\u2070-\u209f\u00b0%\u2030\u2031&\ufffd\u00ad\u00ac";
    private static final Pattern tokenizerPattern = Pattern.compile("[\u00a7\u00a9@\u20ac\u00a3\\$_\\p{L}\\d\u00b7\\-\u0300-\u036f\u00a8\u2070-\u209f\u00b0%\u2030\u2031&\ufffd\u00ad\u00ac]+|[^\u00a7\u00a9@\u20ac\u00a3\\$_\\p{L}\\d\u00b7\\-\u0300-\u036f\u00a8\u2070-\u209f\u00b0%\u2030\u2031&\ufffd\u00ad\u00ac]");
    private static final Pattern DECIMAL_POINT = Pattern.compile("([\\d])\\.([\\d])", 66);
    private static final Pattern DECIMAL_COMMA = Pattern.compile("([\\d]),([\\d])", 66);
    private static final Pattern ORDINAL_POINT = Pattern.compile("\\b([\\d]+)\\.(\u00ba|\u00aa|o|a|er|os|as)\\b", 66);
    private static final Pattern PATTERN_1 = Pattern.compile("xxES_DECIMAL_POINTxx", 16);
    private static final Pattern PATTERN_2 = Pattern.compile("xxES_DECIMAL_COMMAxx", 16);
    private static final Pattern PATTERN_3 = Pattern.compile("xxES_ORDINAL_POINTxx", 16);
    private static final Pattern SOFT_HYPHEN = Pattern.compile("\u00ad", 16);

    public List<String> tokenize(String text) {
        ArrayList<String> l = new ArrayList<String>();
        String auxText = text.replace('\u2010', '-');
        auxText = auxText.replace('\u2011', '-');
        Matcher matcher = DECIMAL_POINT.matcher(auxText);
        auxText = matcher.replaceAll("$1xxES_DECIMAL_POINTxx$2");
        matcher = DECIMAL_COMMA.matcher(auxText);
        auxText = matcher.replaceAll("$1xxES_DECIMAL_COMMAxx$2");
        matcher = ORDINAL_POINT.matcher(auxText);
        auxText = matcher.replaceAll("$1xxES_ORDINAL_POINTxx$2");
        Matcher tokenizerMatcher = tokenizerPattern.matcher(auxText);
        while (tokenizerMatcher.find()) {
            String s = tokenizerMatcher.group();
            if (l.size() > 0 && s.length() == 1 && s.codePointAt(0) >= 65024 && s.codePointAt(0) <= 65039) {
                l.set(l.size() - 1, (String)l.get(l.size() - 1) + s);
                continue;
            }
            s = PATTERN_1.matcher(s).replaceAll(".");
            s = PATTERN_2.matcher(s).replaceAll(",");
            s = PATTERN_3.matcher(s).replaceAll(".");
            l.addAll(this.wordsToAdd(s));
        }
        return this.joinEMailsAndUrls(l);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private List<String> wordsToAdd(String s) {
        ArrayList<String> l = new ArrayList<String>();
        SpanishWordTokenizer spanishWordTokenizer = this;
        synchronized (spanishWordTokenizer) {
            if (!s.isEmpty()) {
                if (!s.contains("-")) {
                    l.add(s);
                } else if (SpanishTagger.INSTANCE.tag(Arrays.asList(SOFT_HYPHEN.matcher(s).replaceAll("").replace('\u2019', '\''))).get(0).isTagged()) {
                    l.add(s);
                } else if (s.equalsIgnoreCase("mers-cov") || s.equalsIgnoreCase("mcgraw-hill") || s.equalsIgnoreCase("sars-cov-2") || s.equalsIgnoreCase("sars-cov") || s.equalsIgnoreCase("ph-metre") || s.equalsIgnoreCase("ph-metres")) {
                    l.add(s);
                } else {
                    StringTokenizer st2 = new StringTokenizer(s, "-", true);
                    while (st2.hasMoreElements()) {
                        l.add(st2.nextToken());
                    }
                }
            }
            return l;
        }
    }
}

