/*
 * Decompiled with CFR 0.152.
 */
package de.tblsoft.solr.pipeline.filter;

import com.google.gson.Gson;
import de.tblsoft.solr.pipeline.AbstractFilter;
import de.tblsoft.solr.pipeline.bean.Document;
import de.tblsoft.solr.util.IOUtils;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.Charsets;

public class EntityExtractionFilter
extends AbstractFilter {
    private List<String> fieldList;
    private String entityFieldName;
    List<Map<String, Entity>> dictionaryMap = new ArrayList<Map<String, Entity>>();

    @Override
    public void init() {
        this.fieldList = this.getPropertyAsList("fieldList", new ArrayList<String>());
        this.entityFieldName = this.getProperty("entityFieldName", "entities");
        this.readEntityDictionary("teams.csv", Charsets.UTF_8.name(), "team");
        this.readEntityDictionary("persons.csv", Charsets.UTF_8.name(), "person");
        super.init();
    }

    void readEntityDictionary(String filename, String charset, String type) {
        String absoluteFilename = IOUtils.getAbsoluteFile(this.getBaseDir(), filename);
        CSVFormat format = CSVFormat.RFC4180;
        format = format.withHeader(new String[0]);
        try {
            InputStream in = IOUtils.getInputStream(absoluteFilename);
            InputStreamReader reader = new InputStreamReader(in, charset);
            CSVParser parser = format.parse((Reader)reader);
            for (CSVRecord record : parser.getRecords()) {
                String entityName = record.get("name");
                String entityUrl = record.get("url");
                StringTokenizer tokenizer = new StringTokenizer(entityName);
                int tokenCount = 0;
                StringBuilder tokenPhrase = new StringBuilder();
                while (tokenizer.hasMoreTokens()) {
                    String token = tokenizer.nextToken();
                    if (this.dictionaryMap.size() < tokenCount + 1) {
                        this.dictionaryMap.add(new HashMap());
                    }
                    if (tokenCount > 0) {
                        tokenPhrase.append(" ");
                    }
                    tokenPhrase.append(token);
                    Map<String, Entity> entityMap = this.dictionaryMap.get(tokenCount);
                    Entity newEntity = new Entity(entityName, !tokenizer.hasMoreTokens());
                    newEntity.setUrl(entityUrl);
                    newEntity.setType(type);
                    String key = this.normalize(tokenPhrase.toString());
                    Entity entity = entityMap.get(key);
                    if (entity == null) {
                        entityMap.put(key, newEntity);
                    } else if (newEntity.getName().length() < entity.getName().length()) {
                        entityMap.put(key, newEntity);
                    }
                    ++tokenCount;
                }
            }
            in.close();
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    String normalize(String token) {
        token = token.toLowerCase();
        token = token.replaceAll("[^a-zA-Z0-9\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df]+", "");
        return token.trim();
    }

    @Override
    public void document(Document document) {
        ArrayList<Entity> entities = new ArrayList<Entity>();
        HashSet restWords = new HashSet();
        for (String fieldName : this.fieldList) {
            List<String> fieldValues = document.getFieldValues(fieldName);
            if (fieldValues == null) continue;
            for (String text : fieldValues) {
                entities.addAll(this.extractEntites(text));
            }
        }
        Set<Entity> fullMatchEntites = this.removeDuplicatesAndNotFullMatchEntities(entities);
        if (!fullMatchEntites.isEmpty()) {
            document.setField(this.entityFieldName + "Object", this.toJson(fullMatchEntites));
        }
        for (Entity entity : fullMatchEntites) {
            document.addField(this.entityFieldName, entity.getName());
        }
        super.document(document);
    }

    Set<Entity> removeDuplicatesAndNotFullMatchEntities(List<Entity> entities) {
        HashSet<Entity> fullMatchEntites = new HashSet<Entity>();
        for (Entity entity : entities) {
            if (!entity.fullMatch) continue;
            fullMatchEntites.add(entity);
        }
        return fullMatchEntites;
    }

    private String toJson(Set<Entity> entites) {
        Gson gson = new Gson();
        return gson.toJson(entites);
    }

    List<Entity> extractEntites(String text) {
        ArrayList<Entity> entities = new ArrayList<Entity>();
        StringTokenizer tokenizer = new StringTokenizer(text);
        int tokenCount = 0;
        StringBuilder tokenPhrase = new StringBuilder();
        Entity candidate = null;
        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();
            if (this.dictionaryMap.size() < tokenCount + 1) {
                if (candidate != null) {
                    entities.add(candidate);
                }
                tokenPhrase = new StringBuilder();
                candidate = null;
                tokenCount = 0;
                continue;
            }
            if (tokenCount > 0) {
                tokenPhrase.append(" ");
            }
            tokenPhrase.append(token);
            Map<String, Entity> entityMap = this.dictionaryMap.get(tokenCount);
            Entity newCandidate = entityMap.get(this.normalize(tokenPhrase.toString()));
            if (newCandidate == null) {
                if (candidate != null) {
                    entities.add(candidate);
                }
                tokenPhrase = new StringBuilder();
                candidate = null;
                tokenCount = 0;
                continue;
            }
            candidate = newCandidate;
            ++tokenCount;
        }
        return entities;
    }

    class Entity {
        private String name;
        private boolean fullMatch;
        private String url;
        private String type;

        Entity(String name, boolean fullMatch) {
            this.name = name;
            this.fullMatch = fullMatch;
        }

        public String getName() {
            return this.name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public String getUrl() {
            return this.url;
        }

        public void setUrl(String url) {
            this.url = url;
        }

        public String getType() {
            return this.type;
        }

        public void setType(String type) {
            this.type = type;
        }

        public int hashCode() {
            return this.url.hashCode();
        }
    }
}

