/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats.muc;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import opennlp.tools.formats.muc.MucElementNames;
import opennlp.tools.formats.muc.SgmlParser;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.Span;

public class MucNameContentHandler
extends SgmlParser.ContentHandler {
    private static final String ENTITY_ELEMENT_NAME = "ENAMEX";
    private static final String TIME_ELEMENT_NAME = "TIMEX";
    private static final String NUM_ELEMENT_NAME = "NUMEX";
    private static final Set<String> NAME_ELEMENT_NAMES;
    private static final Set<String> EXPECTED_TYPES;
    private final Tokenizer tokenizer;
    private final List<NameSample> storedSamples;
    private boolean isInsideContentElement = false;
    private final List<String> text = new ArrayList<String>();
    private boolean isClearAdaptiveData = false;
    private final Stack<Span> incompleteNames = new Stack();
    private final List<Span> names = new ArrayList<Span>();

    public MucNameContentHandler(Tokenizer tokenizer, List<NameSample> storedSamples) {
        this.tokenizer = tokenizer;
        this.storedSamples = storedSamples;
    }

    @Override
    public void startElement(String name, Map<String, String> attributes) throws InvalidFormatException {
        if ("DOC".equals(name)) {
            this.isClearAdaptiveData = true;
        }
        if (MucElementNames.CONTENT_ELEMENTS.contains(name)) {
            this.isInsideContentElement = true;
        }
        if (NAME_ELEMENT_NAMES.contains(name)) {
            String nameType = attributes.get("TYPE");
            if (!EXPECTED_TYPES.contains(nameType)) {
                throw new InvalidFormatException("Unknown timex, numex or namex type: " + nameType + ", expected one of " + EXPECTED_TYPES);
            }
            this.incompleteNames.add(new Span(this.text.size(), this.text.size(), nameType.toLowerCase(Locale.ENGLISH)));
        }
    }

    @Override
    public void characters(CharSequence chars) {
        if (this.isInsideContentElement) {
            String[] tokens = this.tokenizer.tokenize(chars.toString());
            this.text.addAll(Arrays.asList(tokens));
        }
    }

    @Override
    public void endElement(String name) {
        if (NAME_ELEMENT_NAMES.contains(name)) {
            Span nameSpan = this.incompleteNames.pop();
            nameSpan = new Span(nameSpan.getStart(), this.text.size(), nameSpan.getType());
            this.names.add(nameSpan);
        }
        if (MucElementNames.CONTENT_ELEMENTS.contains(name)) {
            this.storedSamples.add(new NameSample(this.text.toArray(new String[this.text.size()]), this.names.toArray(new Span[this.names.size()]), this.isClearAdaptiveData));
            if (this.isClearAdaptiveData) {
                this.isClearAdaptiveData = false;
            }
            this.text.clear();
            this.names.clear();
            this.isInsideContentElement = false;
        }
    }

    static {
        HashSet<String> types = new HashSet<String>();
        types.add("PERSON");
        types.add("ORGANIZATION");
        types.add("LOCATION");
        types.add("DATE");
        types.add("TIME");
        types.add("MONEY");
        types.add("PERCENT");
        EXPECTED_TYPES = Collections.unmodifiableSet(types);
        HashSet<String> nameElements = new HashSet<String>();
        nameElements.add(ENTITY_ELEMENT_NAME);
        nameElements.add(TIME_ELEMENT_NAME);
        nameElements.add(NUM_ELEMENT_NAME);
        NAME_ELEMENT_NAMES = Collections.unmodifiableSet(nameElements);
    }
}

