/*
 * Decompiled with CFR 0.152.
 */
package smile.nlp;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import smile.nlp.Bigram;
import smile.nlp.Corpus;
import smile.nlp.SimpleText;
import smile.nlp.Text;
import smile.nlp.TextTerms;
import smile.nlp.dictionary.EnglishPunctuations;
import smile.nlp.dictionary.EnglishStopWords;
import smile.nlp.dictionary.Punctuations;
import smile.nlp.dictionary.StopWords;
import smile.nlp.relevance.Relevance;
import smile.nlp.relevance.RelevanceRanker;
import smile.nlp.tokenizer.SentenceSplitter;
import smile.nlp.tokenizer.SimpleSentenceSplitter;
import smile.nlp.tokenizer.SimpleTokenizer;
import smile.nlp.tokenizer.Tokenizer;
import smile.util.MutableInt;

public class SimpleCorpus
implements Corpus {
    private long size;
    private final List<SimpleText> docs = new ArrayList<SimpleText>();
    private final HashMap<String, MutableInt> freq = new HashMap();
    private final HashMap<Bigram, MutableInt> freq2 = new HashMap();
    private final HashMap<String, List<SimpleText>> invertedFile = new HashMap();
    private final SentenceSplitter splitter;
    private final Tokenizer tokenizer;
    private final StopWords stopWords;
    private final Punctuations punctuations;

    public SimpleCorpus() {
        this(SimpleSentenceSplitter.getInstance(), new SimpleTokenizer(), EnglishStopWords.DEFAULT, EnglishPunctuations.getInstance());
    }

    public SimpleCorpus(SentenceSplitter splitter, Tokenizer tokenizer, StopWords stopWords, Punctuations punctuations) {
        this.splitter = splitter;
        this.tokenizer = tokenizer;
        this.stopWords = stopWords;
        this.punctuations = punctuations;
    }

    public Text add(Text text) {
        ArrayList<String> bag = new ArrayList<String>();
        for (String sentence : this.splitter.split(text.body)) {
            int i;
            String[] tokens = this.tokenizer.split(sentence);
            for (i = 0; i < tokens.length; ++i) {
                tokens[i] = tokens[i].toLowerCase();
            }
            for (String w : tokens) {
                boolean keep = true;
                if (this.punctuations != null && this.punctuations.contains(w)) {
                    keep = false;
                } else if (this.stopWords != null && this.stopWords.contains(w)) {
                    keep = false;
                }
                if (!keep) continue;
                ++this.size;
                bag.add(w);
                MutableInt count = this.freq.get(w);
                if (count == null) {
                    this.freq.put(w, new MutableInt(1));
                    continue;
                }
                count.increment();
            }
            for (i = 0; i < tokens.length - 1; ++i) {
                String w1 = tokens[i];
                String w2 = tokens[i + 1];
                if (!this.freq.containsKey(w1) || !this.freq.containsKey(w2)) continue;
                Bigram bigram = new Bigram(w1, w2);
                MutableInt count = this.freq2.get(bigram);
                if (count == null) {
                    this.freq2.put(bigram, new MutableInt(1));
                    continue;
                }
                count.increment();
            }
        }
        String[] words = new String[bag.size()];
        for (int i = 0; i < words.length; ++i) {
            words[i] = (String)bag.get(i);
        }
        SimpleText doc = new SimpleText(text.id, text.title, text.body, words);
        this.docs.add(doc);
        for (String term : doc.unique()) {
            List hit = this.invertedFile.computeIfAbsent(term, k -> new ArrayList());
            hit.add(doc);
        }
        return doc;
    }

    @Override
    public long size() {
        return this.size;
    }

    @Override
    public int ndoc() {
        return this.docs.size();
    }

    @Override
    public int nterm() {
        return this.freq.size();
    }

    @Override
    public long nbigram() {
        return this.freq2.size();
    }

    @Override
    public int avgDocSize() {
        return (int)(this.size / (long)this.docs.size());
    }

    @Override
    public int count(String term) {
        MutableInt count = this.freq.get(term);
        return count == null ? 0 : count.value;
    }

    @Override
    public int count(Bigram bigram) {
        MutableInt count = this.freq2.get(bigram);
        return count == null ? 0 : count.value;
    }

    @Override
    public Iterator<String> terms() {
        return this.freq.keySet().iterator();
    }

    @Override
    public Iterator<Bigram> bigrams() {
        return this.freq2.keySet().iterator();
    }

    @Override
    public Iterator<Text> search(String term) {
        if (this.invertedFile.containsKey(term)) {
            ArrayList hits = new ArrayList(this.invertedFile.get(term));
            return hits.iterator();
        }
        return Collections.emptyIterator();
    }

    @Override
    public Iterator<Relevance> search(RelevanceRanker ranker, String term) {
        if (this.invertedFile.containsKey(term)) {
            List<SimpleText> hits = this.invertedFile.get(term);
            int n = hits.size();
            ArrayList<Relevance> rank = new ArrayList<Relevance>(n);
            for (SimpleText doc : hits) {
                int tf = doc.tf(term);
                rank.add(new Relevance(doc, ranker.rank((Corpus)this, (TextTerms)doc, term, tf, n)));
            }
            rank.sort(Collections.reverseOrder());
            return rank.iterator();
        }
        return Collections.emptyIterator();
    }

    @Override
    public Iterator<Relevance> search(RelevanceRanker ranker, String[] terms) {
        HashSet hits = new HashSet();
        for (String term : terms) {
            if (!this.invertedFile.containsKey(term)) continue;
            hits.addAll(this.invertedFile.get(term));
        }
        int n = hits.size();
        if (n == 0) {
            return Collections.emptyIterator();
        }
        ArrayList<Relevance> rank = new ArrayList<Relevance>(n);
        for (SimpleText doc : hits) {
            double r = 0.0;
            for (String term : terms) {
                int tf = doc.tf(term);
                r += ranker.rank((Corpus)this, (TextTerms)doc, term, tf, n);
            }
            rank.add(new Relevance(doc, r));
        }
        rank.sort(Collections.reverseOrder());
        return rank.iterator();
    }
}

