/*
 * Decompiled with CFR 0.152.
 */
package com.kennycason.kumo.nlp;

import ch.lambdaj.Lambda;
import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.nlp.filter.CompositeFilter;
import com.kennycason.kumo.nlp.filter.Filter;
import com.kennycason.kumo.nlp.filter.StopWordFilter;
import com.kennycason.kumo.nlp.filter.WordSizeFilter;
import com.kennycason.kumo.nlp.normalize.CharacterStrippingNormalizer;
import com.kennycason.kumo.nlp.normalize.LowerCaseNormalizer;
import com.kennycason.kumo.nlp.normalize.Normalizer;
import com.kennycason.kumo.nlp.normalize.TrimToEmptyNormalizer;
import com.kennycason.kumo.nlp.tokenizer.WhiteSpaceWordTokenizer;
import com.kennycason.kumo.nlp.tokenizer.WordTokenizer;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.hamcrest.Matcher;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class FrequencyAnalyzer {
    public static final String DEFAULT_ENCODING = "UTF-8";
    public static final int DEFAULT_WORD_MAX_LENGTH = 32;
    public static final int DEFAULT_WORD_MIN_LENGTH = 3;
    public static final int DEFAULT_WORD_FREQUENCIES_TO_RETURN = 50;
    public static final long DEFAULT_URL_LOAD_TIMEOUT = 3000L;
    private final Set<String> stopWords = new HashSet<String>();
    private WordTokenizer wordTokenizer = new WhiteSpaceWordTokenizer();
    private final List<Filter> filters = new ArrayList<Filter>();
    private final List<Normalizer> normalizers = new ArrayList<Normalizer>();
    private int wordFrequenciesToReturn = 50;
    private int maxWordLength = 32;
    private int minWordLength = 3;
    private String characterEncoding = "UTF-8";
    private long urlLoadTimeout = 3000L;

    public FrequencyAnalyzer() {
        this.normalizers.add(new TrimToEmptyNormalizer());
        this.normalizers.add(new CharacterStrippingNormalizer());
        this.normalizers.add(new LowerCaseNormalizer());
    }

    public List<WordFrequency> load(InputStream fileInputStream) throws IOException {
        return this.load(IOUtils.readLines((InputStream)fileInputStream, (String)this.characterEncoding));
    }

    public List<WordFrequency> load(File file) throws IOException {
        return this.load(new FileInputStream(file));
    }

    public List<WordFrequency> load(String filePath) throws IOException {
        return this.load(new File(filePath));
    }

    public List<WordFrequency> load(URL url) throws IOException {
        Document doc = Jsoup.parse((URL)url, (int)((int)this.urlLoadTimeout));
        return this.load(Collections.singletonList(doc.body().text()));
    }

    public List<WordFrequency> load(List<String> texts) {
        ArrayList<WordFrequency> wordFrequencies = new ArrayList<WordFrequency>();
        Map<String, Integer> cloud = this.buildWordFrequencies(texts, this.wordTokenizer);
        for (Map.Entry<String, Integer> wordCount : cloud.entrySet()) {
            wordFrequencies.add(new WordFrequency(wordCount.getKey(), wordCount.getValue()));
        }
        return this.takeTopFrequencies(wordFrequencies);
    }

    public List<WordFrequency> loadWordFrequencies(List<WordFrequency> wflist) {
        return this.takeTopFrequencies(wflist);
    }

    private Map<String, Integer> buildWordFrequencies(List<String> texts, WordTokenizer tokenizer) {
        HashMap<String, Integer> wordFrequencies = new HashMap<String, Integer>();
        for (String text : texts) {
            List<String> words = this.filter(tokenizer.tokenize(text));
            for (String word : words) {
                String normalized = this.normalize(word);
                if (!wordFrequencies.containsKey(normalized)) {
                    wordFrequencies.put(normalized, 1);
                }
                wordFrequencies.put(normalized, (Integer)wordFrequencies.get(normalized) + 1);
            }
        }
        return wordFrequencies;
    }

    private List<String> filter(List<String> words) {
        ArrayList<Filter> allFilters = new ArrayList<Filter>();
        allFilters.add(new StopWordFilter(this.stopWords));
        allFilters.add(new WordSizeFilter(this.minWordLength, this.maxWordLength));
        allFilters.addAll(this.filters);
        CompositeFilter compositeFilter = new CompositeFilter(allFilters);
        return Lambda.filter((Matcher)compositeFilter, words);
    }

    private String normalize(String word) {
        String normalized = word;
        for (Normalizer normalizer : this.normalizers) {
            normalized = normalizer.normalize(normalized);
        }
        return normalized;
    }

    private List<WordFrequency> takeTopFrequencies(Collection<WordFrequency> wordCloudEntities) {
        if (wordCloudEntities.isEmpty()) {
            return Collections.emptyList();
        }
        List sorted = Lambda.sort(wordCloudEntities, (Object)((WordFrequency)Lambda.on(WordFrequency.class)).getFrequency());
        Collections.reverse(sorted);
        return sorted.subList(0, Math.min(sorted.size(), this.wordFrequenciesToReturn));
    }

    public void setStopWords(Collection<String> stopWords) {
        this.stopWords.clear();
        this.stopWords.addAll(stopWords);
    }

    public void setWordFrequenciesToReturn(int wordFrequenciesToReturn) {
        this.wordFrequenciesToReturn = wordFrequenciesToReturn;
    }

    public void setMinWordLength(int minWordLength) {
        this.minWordLength = minWordLength;
    }

    public void setMaxWordLength(int maxWordLength) {
        this.maxWordLength = maxWordLength;
    }

    public void setWordTokenizer(WordTokenizer wordTokenizer) {
        this.wordTokenizer = wordTokenizer;
    }

    public void clearFilters() {
        this.filters.clear();
    }

    public void addFilter(Filter filter) {
        this.filters.add(filter);
    }

    public void setFilter(Filter filter) {
        this.filters.clear();
        this.filters.add(filter);
    }

    public void clearNormalizers() {
        this.normalizers.clear();
    }

    public void addNormalizer(Normalizer normalizer) {
        this.normalizers.add(normalizer);
    }

    public void setNormalizer(Normalizer normalizer) {
        this.normalizers.clear();
        this.normalizers.add(normalizer);
    }

    public void setCharacterEncoding(String characterEncoding) {
        this.characterEncoding = characterEncoding;
    }

    public void setUrlLoadTimeout(long urlLoadTimeout) {
        this.urlLoadTimeout = urlLoadTimeout;
    }
}

