/*
 * Decompiled with CFR 0.152.
 */
package de.tblsoft.solr.pipeline.filter;

import de.tblsoft.solr.pipeline.AbstractFilter;
import de.tblsoft.solr.pipeline.bean.Document;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.lang3.StringUtils;

public class DictionaryNormalizationFilter
extends AbstractFilter {
    private Set<String> tokens = new HashSet<String>();

    @Override
    public void init() {
        super.init();
    }

    @Override
    public void document(Document document) {
        String phrase = document.getFieldValue("token");
        phrase = this.normalizePhrase(phrase);
        StringTokenizer tokenizer = new StringTokenizer(phrase);
        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();
            if (token.contains(".") || StringUtils.isEmpty((CharSequence)token) || token.length() < 2 || token.matches(".*\\d+.*") || token.startsWith("-") || token.endsWith("-")) continue;
            this.tokens.add(token);
        }
    }

    String normalizePhrase(String token) {
        token = token.replaceAll("[()\",'*+%\u00b0/\\[\\]?!:]", "");
        token = token.toLowerCase().trim();
        return token;
    }

    @Override
    public void end() {
        ArrayList<String> sortedTokens = new ArrayList<String>(this.tokens);
        Collections.sort(sortedTokens);
        for (String token : sortedTokens) {
            Document document = new Document();
            document.setField("token", token);
            super.document(document);
        }
        super.end();
    }
}

