/*
 * Decompiled with CFR 0.152.
 */
package de.tblsoft.solr.pipeline.filter;

import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import de.tblsoft.solr.logic.LinguisticHelper;
import de.tblsoft.solr.pipeline.AbstractFilter;
import de.tblsoft.solr.pipeline.bean.Document;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class CompoundWordFilter
extends AbstractFilter {
    private String nounnFieldName;
    private Set<String> nounList = new HashSet<String>();
    private static Set<String> blacklist = new HashSet<String>();
    private static Set<String> whitelist;

    @Override
    public void init() {
        this.nounnFieldName = this.getProperty("nounnFieldName", "noun");
        super.init();
    }

    @Override
    public void document(Document document) {
        String noun = document.getFieldValue(this.nounnFieldName);
        if (Strings.isNullOrEmpty((String)noun)) {
            return;
        }
        if (blacklist.contains(noun)) {
            return;
        }
        if (LinguisticHelper.containsOnlyGermanCharacters(noun) && noun.length() > 3) {
            this.nounList.add(noun.toLowerCase());
        }
    }

    @Override
    public void end() {
        this.nounList.addAll(whitelist);
        for (String noun : this.nounList) {
            ArrayList<String> compoundList = new ArrayList<String>();
            for (String string : this.nounList) {
                int diff;
                if (!noun.contains(string) || noun.equals(string) || (diff = Math.abs(string.length() - noun.length())) <= 3) continue;
                compoundList.add(string);
            }
            if (compoundList.isEmpty()) continue;
            Document document = new Document();
            document.addField("noun", noun);
            for (String compound3 : compoundList) {
                document.addField("compound", compound3);
            }
            List<String> list = this.tokenize(noun, compoundList);
            ArrayList additionalTokens = new ArrayList();
            list.addAll(additionalTokens);
            String joinedTokens = Joiner.on((String)" ").join(list);
            document.addField("tokenized", joinedTokens);
            super.document(document);
        }
        super.end();
    }

    List<String> tokenize(String noun, List<String> compoundList) {
        ArrayList<String> ret = new ArrayList<String>();
        StringBuilder b = new StringBuilder();
        for (int i = 0; i < noun.length(); ++i) {
            b.append(noun.charAt(i));
            String part = b.toString();
            String candidate = null;
            boolean startsWith = false;
            for (String compound : compoundList) {
                if (compound.equals(part)) {
                    candidate = compound;
                    continue;
                }
                if (!compound.startsWith(part)) continue;
                startsWith = true;
            }
            if (candidate != null && !startsWith) {
                ret.add(part);
                candidate = null;
                b = new StringBuilder();
                continue;
            }
            if (startsWith) continue;
            if (part.length() > 1) {
                i = i - part.length() + 1;
            }
            b = new StringBuilder();
        }
        return ret;
    }

    boolean isOverlap(String compoundToCompare, List<String> compoundList) {
        for (String compound : compoundList) {
            if (compoundToCompare.equals(compound) || !compound.contains(compoundToCompare)) continue;
            return true;
        }
        return false;
    }

    static {
        blacklist.add("euch");
        blacklist.add("herb");
        whitelist = new HashSet<String>();
        whitelist.add("leucht");
        whitelist.add("bohrung");
        whitelist.add("alu");
        whitelist.add("dampf");
        whitelist.add("kasten");
        whitelist.add("natrium");
        whitelist.add("bau");
        whitelist.add("leiste");
        whitelist.add("schutz");
        whitelist.add("tor");
    }
}

