/*
 * Decompiled with CFR 0.152.
 */
package smile.nlp.keyword;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import smile.nlp.Trie;
import smile.nlp.collocation.NGram;
import smile.nlp.stemmer.PorterStemmer;
import smile.nlp.tokenizer.SimpleParagraphSplitter;
import smile.nlp.tokenizer.SimpleSentenceSplitter;
import smile.nlp.tokenizer.SimpleTokenizer;
import smile.sort.QuickSort;

public interface CooccurrenceKeywords {
    public static NGram[] of(String text) {
        return CooccurrenceKeywords.of(text, 10);
    }

    public static NGram[] of(String text, int maxNumKeywords) {
        int i;
        int i2;
        ArrayList<String[]> sentences = new ArrayList<String[]>();
        SimpleTokenizer tokenizer = new SimpleTokenizer();
        PorterStemmer stemmer = new PorterStemmer();
        int ntotal = 0;
        for (String paragraph : SimpleParagraphSplitter.getInstance().split(text)) {
            for (String s : SimpleSentenceSplitter.getInstance().split(paragraph)) {
                String[] sentence = tokenizer.split(s);
                for (int i3 = 0; i3 < sentence.length; ++i3) {
                    sentence[i3] = stemmer.stripPluralParticiple(sentence[i3]).toLowerCase();
                }
                sentences.add(sentence);
                ntotal += sentence.length;
            }
        }
        int maxNGramSize = 4;
        ArrayList terms = new ArrayList();
        for (NGram[] ngrams : NGram.of(sentences, maxNGramSize, 4)) {
            Collections.addAll(terms, ngrams);
        }
        Collections.sort(terms);
        int n = 3 * terms.size() / 10;
        NGram[] freqTerms = new NGram[n];
        int start = terms.size() - n;
        for (int i4 = 0; i4 < n; ++i4) {
            freqTerms[i4] = (NGram)terms.get(start + i4);
        }
        Trie<String, Integer> trie = new Trie<String, Integer>();
        for (int i5 = 0; i5 < n; ++i5) {
            trie.put(freqTerms[i5].words, i5);
        }
        int[] nw = new int[n];
        int[][] table = new int[n][n];
        for (String[] sentence : sentences) {
            Object phrase;
            int i6;
            HashSet<Integer> phrases = new HashSet<Integer>();
            for (int j = 1; j <= maxNGramSize; ++j) {
                for (i6 = 0; i6 <= sentence.length - j; ++i6) {
                    phrase = Arrays.copyOfRange(sentence, i6, i6 + j);
                    Integer index = (Integer)trie.get((K[])phrase);
                    if (index == null) continue;
                    phrases.add(index);
                }
            }
            Iterator j = phrases.iterator();
            while (j.hasNext()) {
                int n2 = i6 = ((Integer)j.next()).intValue();
                nw[n2] = nw[n2] + phrases.size();
                phrase = phrases.iterator();
                while (phrase.hasNext()) {
                    int j2 = (Integer)phrase.next();
                    if (i6 == j2) continue;
                    int[] nArray = table[i6];
                    int n3 = j2;
                    nArray[n3] = nArray[n3] + 1;
                }
            }
        }
        int[] cluster = new int[n];
        for (i2 = 0; i2 < cluster.length; ++i2) {
            cluster[i2] = i2;
        }
        for (i2 = 0; i2 < n; ++i2) {
            for (int j = i2 + 1; j < n; ++j) {
                double mutual;
                if (table[i2][j] <= 0 || !((mutual = (double)table[i2][j] * (double)table[i2][j] / (double)(freqTerms[i2].count * freqTerms[j].count)) >= 0.25)) continue;
                cluster[j] = cluster[i2];
            }
        }
        double[] pc = new double[n];
        for (i = 0; i < n; ++i) {
            for (int j = 0; j < n; ++j) {
                int n4 = cluster[j];
                pc[n4] = pc[n4] + (double)table[i][j];
            }
        }
        i = 0;
        while (i < n) {
            int n5 = i++;
            pc[n5] = pc[n5] / (double)ntotal;
        }
        double[] score = new double[n];
        for (int i7 = 0; i7 < n; ++i7) {
            double max = Double.NEGATIVE_INFINITY;
            for (int j = 0; j < n; ++j) {
                if (cluster[j] != j) continue;
                double fwc = 0.0;
                for (int k = 0; k < n; ++k) {
                    if (cluster[k] != j) continue;
                    fwc += (double)table[i7][k];
                }
                double expected = (double)nw[i7] * pc[j];
                double d = fwc - expected;
                double chisq = d * d / expected;
                int n6 = i7;
                score[n6] = score[n6] + chisq;
                if (!(chisq > max)) continue;
                max = chisq;
            }
        }
        int[] index = QuickSort.sort((double[])score);
        ArrayList<NGram> keywords = new ArrayList<NGram>();
        int i8 = n;
        while (i8-- > 0) {
            boolean add = true;
            for (int j = i8 + 1; j < n; ++j) {
                if (cluster[index[j]] != cluster[index[i8]]) continue;
                if (freqTerms[index[j]].words.length >= freqTerms[index[i8]].words.length) {
                    add = false;
                    break;
                }
                keywords.remove(freqTerms[index[j]]);
                add = true;
            }
            if (!add) continue;
            keywords.add(freqTerms[index[i8]]);
            if (keywords.size() < maxNumKeywords) continue;
            break;
        }
        return keywords.toArray(new NGram[0]);
    }
}

