/*
 * Decompiled with CFR 0.152.
 */
package com.kennycason.kumo.nlp;

import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.nlp.filter.CompositeFilter;
import com.kennycason.kumo.nlp.filter.Filter;
import com.kennycason.kumo.nlp.filter.StopWordFilter;
import com.kennycason.kumo.nlp.filter.WordSizeFilter;
import com.kennycason.kumo.nlp.normalize.CharacterStrippingNormalizer;
import com.kennycason.kumo.nlp.normalize.LowerCaseNormalizer;
import com.kennycason.kumo.nlp.normalize.Normalizer;
import com.kennycason.kumo.nlp.normalize.TrimToEmptyNormalizer;
import com.kennycason.kumo.nlp.tokenizer.api.WordTokenizer;
import com.kennycason.kumo.nlp.tokenizer.core.WhiteSpaceWordTokenizer;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class FrequencyAnalyzer {
    public static final String DEFAULT_ENCODING = "UTF-8";
    public static final int DEFAULT_WORD_MAX_LENGTH = 32;
    public static final int DEFAULT_WORD_MIN_LENGTH = 3;
    public static final int DEFAULT_WORD_FREQUENCIES_TO_RETURN = 50;
    public static final long DEFAULT_URL_LOAD_TIMEOUT = 3000L;
    private final Set<String> stopWords = new HashSet<String>();
    private WordTokenizer wordTokenizer = new WhiteSpaceWordTokenizer();
    private final List<Filter> filters = new ArrayList<Filter>();
    private final List<Normalizer> normalizers = new ArrayList<Normalizer>();
    private int wordFrequenciesToReturn = 50;
    private int maxWordLength = 32;
    private int minWordLength = 3;
    private String characterEncoding = "UTF-8";
    private long urlLoadTimeout = 3000L;

    public FrequencyAnalyzer() {
        this.normalizers.add(new TrimToEmptyNormalizer());
        this.normalizers.add(new CharacterStrippingNormalizer());
        this.normalizers.add(new LowerCaseNormalizer());
    }

    public List<WordFrequency> load(InputStream inputStream) throws IOException {
        return this.load(IOUtils.readLines((InputStream)inputStream, (String)this.characterEncoding));
    }

    public List<WordFrequency> load(File file) throws IOException {
        return this.load(new FileInputStream(file));
    }

    public List<WordFrequency> load(String filePath) throws IOException {
        return this.load(new File(filePath));
    }

    public List<WordFrequency> load(URL url) throws IOException {
        Document doc = Jsoup.parse((URL)url, (int)((int)this.urlLoadTimeout));
        return this.load(Collections.singletonList(doc.body().text()));
    }

    public List<WordFrequency> load(List<String> texts) {
        ArrayList<WordFrequency> wordFrequencies = new ArrayList<WordFrequency>();
        Map<String, Integer> cloud = this.buildWordFrequencies(texts, this.wordTokenizer);
        cloud.forEach((key, value) -> wordFrequencies.add(new WordFrequency((String)key, (int)value)));
        return this.takeTopFrequencies(wordFrequencies);
    }

    public List<WordFrequency> load(InputStream inputStream, boolean autoFill, String autoFillWord) throws IOException {
        return this.load(IOUtils.readLines((InputStream)inputStream, (String)this.characterEncoding), autoFill, autoFillWord);
    }

    public List<WordFrequency> load(InputStream inputStream, boolean autoFill) throws IOException {
        return this.load(IOUtils.readLines((InputStream)inputStream, (String)this.characterEncoding), autoFill, "nothing");
    }

    public List<WordFrequency> load(List<String> texts, boolean autoFill) {
        return this.load(texts, autoFill, "nothing");
    }

    public List<WordFrequency> load(List<String> texts, boolean autoFill, String autoFillWord) {
        if (!autoFill) {
            return this.load(texts);
        }
        ArrayList<WordFrequency> wordFrequencies = new ArrayList<WordFrequency>();
        Map<String, Integer> cloud = this.buildWordFrequencies(texts, this.wordTokenizer);
        int totalLength = 0;
        for (Map.Entry<String, Integer> entry : cloud.entrySet()) {
            totalLength += entry.getKey().length();
        }
        if (totalLength == 0) {
            cloud.put(autoFillWord, 1);
            totalLength = autoFillWord.length();
            cloud.forEach((key, value) -> wordFrequencies.add(new WordFrequency((String)key, (int)value)));
        }
        int timesToAdd = Math.max(this.wordFrequenciesToReturn / totalLength, 1);
        for (int i = 0; i < timesToAdd; ++i) {
            cloud.forEach((key, value) -> wordFrequencies.add(new WordFrequency((String)key, (int)value)));
        }
        return this.takeTopFrequencies(wordFrequencies);
    }

    public List<WordFrequency> loadWordFrequencies(List<WordFrequency> wordFrequencies) {
        return this.takeTopFrequencies(wordFrequencies);
    }

    private Map<String, Integer> buildWordFrequencies(List<String> texts, WordTokenizer tokenizer) {
        return texts.stream().map(arg_0 -> ((WordTokenizer)tokenizer).tokenize(arg_0)).flatMap(Collection::stream).map(this::normalize).filter(this.buildFilter()).collect(Collectors.groupingBy(e -> e, Collectors.reducing(0, e -> 1, Integer::sum)));
    }

    private Filter buildFilter() {
        ArrayList<Filter> allFilters = new ArrayList<Filter>();
        allFilters.add(new StopWordFilter(this.stopWords));
        allFilters.add(new WordSizeFilter(this.minWordLength, this.maxWordLength));
        allFilters.addAll(this.filters);
        return new CompositeFilter(allFilters);
    }

    private String normalize(String word) {
        String normalized = word;
        for (Normalizer normalizer : this.normalizers) {
            normalized = (String)normalizer.apply(normalized);
        }
        return normalized;
    }

    private List<WordFrequency> takeTopFrequencies(Collection<WordFrequency> wordCloudEntities) {
        return wordCloudEntities.stream().sorted(WordFrequency::compareTo).limit(this.wordFrequenciesToReturn).collect(Collectors.toList());
    }

    public void setStopWords(Collection<String> stopWords) {
        this.stopWords.clear();
        this.stopWords.addAll(stopWords);
    }

    public void setWordFrequenciesToReturn(int wordFrequenciesToReturn) {
        this.wordFrequenciesToReturn = wordFrequenciesToReturn;
    }

    public void setMinWordLength(int minWordLength) {
        this.minWordLength = minWordLength;
    }

    public void setMaxWordLength(int maxWordLength) {
        this.maxWordLength = maxWordLength;
    }

    public void setWordTokenizer(WordTokenizer wordTokenizer) {
        this.wordTokenizer = wordTokenizer;
    }

    public void clearFilters() {
        this.filters.clear();
    }

    public void addFilter(Filter filter) {
        this.filters.add(filter);
    }

    public void setFilter(Filter filter) {
        this.filters.clear();
        this.filters.add(filter);
    }

    public void clearNormalizers() {
        this.normalizers.clear();
    }

    public void addNormalizer(Normalizer normalizer) {
        this.normalizers.add(normalizer);
    }

    public void setNormalizer(Normalizer normalizer) {
        this.normalizers.clear();
        this.normalizers.add(normalizer);
    }

    public void setCharacterEncoding(String characterEncoding) {
        this.characterEncoding = characterEncoding;
    }

    public void setUrlLoadTimeout(long urlLoadTimeout) {
        this.urlLoadTimeout = urlLoadTimeout;
    }
}

