/*
 * Decompiled with CFR 0.152.
 */
package smile.nlp.collocation;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import smile.nlp.Corpus;
import smile.sort.HeapSelect;
import smile.stat.distribution.ChiSquareDistribution;

public class Bigram
extends smile.nlp.Bigram
implements Comparable<Bigram> {
    public final int count;
    public final double score;
    private static final ChiSquareDistribution chisq = new ChiSquareDistribution(1);

    public Bigram(String w1, String w2, int count, double score) {
        super(w1, w2);
        this.count = count;
        this.score = score;
    }

    @Override
    public String toString() {
        return String.format("(%s %s, %d, %.2f)", this.w1, this.w2, this.count, this.score);
    }

    @Override
    public int compareTo(Bigram o) {
        return Double.compare(this.score, o.score);
    }

    public static Bigram[] of(Corpus corpus, int k, int minFrequency) {
        HeapSelect heap = new HeapSelect(Bigram.class, k);
        Iterator<smile.nlp.Bigram> iterator = corpus.bigrams();
        while (iterator.hasNext()) {
            smile.nlp.Bigram bigram = iterator.next();
            int c12 = corpus.count(bigram);
            if (c12 <= minFrequency) continue;
            int c1 = corpus.count(bigram.w1);
            int c2 = corpus.count(bigram.w2);
            double score = Bigram.likelihoodRatio(c1, c2, c12, corpus.size());
            heap.add((Comparable)new Bigram(bigram.w1, bigram.w2, c12, -score));
        }
        heap.sort();
        Bigram[] bigrams = (Bigram[])heap.toArray();
        int n = bigrams.length;
        Bigram[] collocations = new Bigram[n];
        for (int i = 0; i < n; ++i) {
            Bigram bigram = bigrams[n - i - 1];
            collocations[i] = new Bigram(bigram.w1, bigram.w2, bigram.count, -bigram.score);
        }
        return collocations;
    }

    public static Bigram[] of(Corpus corpus, double p, int minFrequency) {
        if (p <= 0.0 || p >= 1.0) {
            throw new IllegalArgumentException("Invalid p = " + p);
        }
        double cutoff = chisq.quantile(p);
        ArrayList<Bigram> bigrams = new ArrayList<Bigram>();
        Iterator<smile.nlp.Bigram> iterator = corpus.bigrams();
        while (iterator.hasNext()) {
            int c2;
            int c1;
            double score;
            smile.nlp.Bigram bigram = iterator.next();
            int c12 = corpus.count(bigram);
            if (c12 <= minFrequency || !((score = Bigram.likelihoodRatio(c1 = corpus.count(bigram.w1), c2 = corpus.count(bigram.w2), c12, corpus.size())) > cutoff)) continue;
            bigrams.add(new Bigram(bigram.w1, bigram.w2, c12, score));
        }
        Bigram[] collocations = bigrams.toArray(new Bigram[0]);
        Arrays.sort(collocations, Collections.reverseOrder());
        return collocations;
    }

    private static double likelihoodRatio(int c1, int c2, int c12, long N) {
        double p = (double)c2 / (double)N;
        double p1 = (double)c12 / (double)c1;
        double p2 = (double)(c2 - c12) / (double)(N - (long)c1);
        double logLambda = Bigram.logL(c12, c1, p) + Bigram.logL(c2 - c12, N - (long)c1, p) - Bigram.logL(c12, c1, p1) - Bigram.logL(c2 - c12, N - (long)c1, p2);
        return -2.0 * logLambda;
    }

    private static double logL(int k, long n, double x) {
        if (x == 0.0) {
            x = 0.01;
        }
        if (x == 1.0) {
            x = 0.99;
        }
        return (double)k * Math.log(x) + (double)(n - (long)k) * Math.log(1.0 - x);
    }
}

