/*
 * Decompiled with CFR 0.152.
 */
package datafu.opennlp.tools.tokenize;

import datafu.opennlp.tools.tokenize.TokenContextGenerator;
import datafu.opennlp.tools.util.StringUtil;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class DefaultTokenContextGenerator
implements TokenContextGenerator {
    protected final Set<String> inducedAbbreviations;

    public DefaultTokenContextGenerator() {
        this(Collections.emptySet());
    }

    public DefaultTokenContextGenerator(Set<String> inducedAbbreviations) {
        this.inducedAbbreviations = inducedAbbreviations;
    }

    @Override
    public String[] getContext(String sentence, int index) {
        List<String> preds = this.createContext(sentence, index);
        String[] context = new String[preds.size()];
        preds.toArray(context);
        return context;
    }

    protected List<String> createContext(String sentence, int index) {
        ArrayList<String> preds = new ArrayList<String>();
        String prefix = sentence.substring(0, index);
        String suffix = sentence.substring(index);
        preds.add("p=" + prefix);
        preds.add("s=" + suffix);
        if (index > 0) {
            this.addCharPreds("p1", sentence.charAt(index - 1), preds);
            if (index > 1) {
                this.addCharPreds("p2", sentence.charAt(index - 2), preds);
                preds.add("p21=" + sentence.charAt(index - 2) + sentence.charAt(index - 1));
            } else {
                preds.add("p2=bok");
            }
            preds.add("p1f1=" + sentence.charAt(index - 1) + sentence.charAt(index));
        } else {
            preds.add("p1=bok");
        }
        this.addCharPreds("f1", sentence.charAt(index), preds);
        if (index + 1 < sentence.length()) {
            this.addCharPreds("f2", sentence.charAt(index + 1), preds);
            preds.add("f12=" + sentence.charAt(index) + sentence.charAt(index + 1));
        } else {
            preds.add("f2=bok");
        }
        if (sentence.charAt(0) == '&' && sentence.charAt(sentence.length() - 1) == ';') {
            preds.add("cc");
        }
        if (index == sentence.length() - 1 && this.inducedAbbreviations.contains(sentence)) {
            preds.add("pabb");
        }
        return preds;
    }

    protected void addCharPreds(String key, char c, List<String> preds) {
        preds.add(key + "=" + c);
        if (Character.isLetter(c)) {
            preds.add(key + "_alpha");
            if (Character.isUpperCase(c)) {
                preds.add(key + "_caps");
            }
        } else if (Character.isDigit(c)) {
            preds.add(key + "_num");
        } else if (StringUtil.isWhitespace(c)) {
            preds.add(key + "_ws");
        } else if (c == '.' || c == '?' || c == '!') {
            preds.add(key + "_eos");
        } else if (c == '`' || c == '\"' || c == '\'') {
            preds.add(key + "_quote");
        } else if (c == '[' || c == '{' || c == '(') {
            preds.add(key + "_lp");
        } else if (c == ']' || c == '}' || c == ')') {
            preds.add(key + "_rp");
        }
    }
}

