/*
 * Decompiled with CFR 0.152.
 */
package smile.nlp;

import java.io.Serializable;
import java.util.Collection;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.ArrayOps$;
import scala.collection.Seq;
import scala.collection.immutable.Map;
import scala.collection.immutable.Set;
import scala.jdk.CollectionConverters$;
import scala.math.Ordering;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ScalaRunTime$;
import scala.runtime.java8.JFunction0;
import scala.runtime.java8.JFunction1;
import smile.math.MathEx;
import smile.nlp.Corpus;
import smile.nlp.PimpedString;
import smile.nlp.SimpleCorpus;
import smile.nlp.Text;
import smile.nlp.collocation.Bigram;
import smile.nlp.collocation.NGram;
import smile.nlp.pos.HMMPOSTagger;
import smile.nlp.pos.PennTreebankPOS;
import smile.nlp.stemmer.LancasterStemmer;
import smile.nlp.stemmer.PorterStemmer;
import smile.util.package$time$;

public final class package$ {
    public static final package$ MODULE$ = new package$();
    private static final PorterStemmer porter = new PorterStemmer(){

        private String apply(String word) {
            return this.stem(word);
        }
    };
    private static final LancasterStemmer lancaster = new LancasterStemmer(){

        private String apply(String word) {
            return this.stem(word);
        }
    };

    public PimpedString pimpString(String string) {
        return new PimpedString(string);
    }

    public PorterStemmer porter() {
        return porter;
    }

    public LancasterStemmer lancaster() {
        return lancaster;
    }

    public SimpleCorpus corpus(Seq<String> text2) {
        SimpleCorpus corpus = new SimpleCorpus();
        text2.foreach((Function1 & Serializable)text -> corpus.add(new Text(text)));
        return corpus;
    }

    public Bigram[] bigram(int k, int minFreq, scala.collection.immutable.Seq<String> text) {
        return (Bigram[])package$time$.MODULE$.apply("Bi-gram collocation", (Function0 & Serializable)() -> Bigram.of((Corpus)MODULE$.corpus((Seq<String>)text), (int)k, (int)minFreq));
    }

    public Bigram[] bigram(double p, int minFreq, scala.collection.immutable.Seq<String> text) {
        return (Bigram[])package$time$.MODULE$.apply("Bi-gram collocation", (Function0 & Serializable)() -> Bigram.of((Corpus)MODULE$.corpus((Seq<String>)text), (double)p, (int)minFreq));
    }

    public NGram[][] ngram(int maxNGramSize, int minFreq, scala.collection.immutable.Seq<String> text) {
        return (NGram[][])package$time$.MODULE$.apply("N-gram collocation", (Function0 & Serializable)() -> {
            scala.collection.immutable.Seq sentences = (scala.collection.immutable.Seq)text.flatMap((Function1 & Serializable)text -> Predef$.MODULE$.wrapRefArray((Object[])ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[])MODULE$.pimpString((String)text).sentences()), (Function1 & Serializable)sentence -> (String[])ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[])MODULE$.pimpString((String)sentence).words("none")), (Function1 & Serializable)word -> MODULE$.porter().stripPluralParticiple(word).toLowerCase(), ClassTag$.MODULE$.apply(String.class)), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class)))));
            return NGram.of((Collection)CollectionConverters$.MODULE$.SeqHasAsJava((Seq)sentences).asJava(), (int)maxNGramSize, (int)minFreq);
        });
    }

    public PennTreebankPOS[] postag(String[] sentence) {
        return (PennTreebankPOS[])package$time$.MODULE$.apply("PoS tagging with Hidden Markov Model", (Function0 & Serializable)() -> HMMPOSTagger.getDefault().tag(sentence));
    }

    public double[] vectorize(String[] terms, Map<String, Object> bag) {
        return (double[])ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[])terms), (Function1 & Serializable)x$1 -> BoxesRunTime.boxToDouble((double)package$.$anonfun$vectorize$1(bag, x$1)), (ClassTag)ClassTag$.MODULE$.Double());
    }

    public int[] vectorize(String[] terms, Set<String> bag) {
        return (int[])ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[])ArrayOps$.MODULE$.filter$extension(Predef$.MODULE$.refArrayOps((Object[])ArrayOps$.MODULE$.zipWithIndex$extension(Predef$.MODULE$.refArrayOps((Object[])terms))), (Function1 & Serializable)x0$1 -> BoxesRunTime.boxToBoolean((boolean)package$.$anonfun$vectorize$3(bag, x0$1)))), (Function1 & Serializable)x$2 -> BoxesRunTime.boxToInteger((int)x$2._2$mcI$sp()), (ClassTag)ClassTag$.MODULE$.Int());
    }

    public int[] df(String[] terms, Map<String, Object>[] corpus) {
        return (int[])ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[])terms), (Function1 & Serializable)term -> BoxesRunTime.boxToInteger((int)ArrayOps$.MODULE$.count$extension(Predef$.MODULE$.refArrayOps((Object[])corpus), (Function1 & Serializable)x$3 -> BoxesRunTime.boxToBoolean((boolean)x$3.contains((Object)term)))), (ClassTag)ClassTag$.MODULE$.Int());
    }

    private double tfidf(double tf, double maxtf, int n, int df) {
        return tf / maxtf * Math.log((1.0 + (double)n) / (1.0 + (double)df));
    }

    public double[][] tfidf(double[][] corpus) {
        int n = corpus.length;
        int[] df = new int[corpus[0].length];
        ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps((Object[])corpus), (Function1 & Serializable)bag -> {
            ArrayOps$.MODULE$.indices$extension(Predef$.MODULE$.intArrayOps(df)).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable)i -> {
                if (bag[i] > 0.0) {
                    df$1[i] = df[i] + 1;
                    return;
                }
            });
            return df;
        });
        return (double[][])ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[])corpus), (Function1 & Serializable)bag -> MODULE$.tfidf((double[])bag, n, df), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Double.TYPE)));
    }

    public double[] tfidf(double[] bag, int n, int[] df) {
        double maxtf = BoxesRunTime.unboxToDouble((Object)Predef$.MODULE$.wrapDoubleArray(bag).max((Ordering)Ordering.Double$.TotalOrdering$.MODULE$));
        double[] features = new double[bag.length];
        ArrayOps$.MODULE$.indices$extension(Predef$.MODULE$.doubleArrayOps(features)).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable)i -> {
            features$1[i] = MODULE$.tfidf(bag[i], maxtf, n, df[i]);
        });
        MathEx.unitize((double[])features);
        return features;
    }

    public static final /* synthetic */ double $anonfun$vectorize$1(Map bag$1, String x$1) {
        return BoxesRunTime.unboxToInt((Object)bag$1.getOrElse((Object)x$1, (Function0)(JFunction0.mcI.sp & Serializable)() -> 0));
    }

    public static final /* synthetic */ boolean $anonfun$vectorize$3(Set bag$2, Tuple2 x0$1) {
        Tuple2 tuple2 = x0$1;
        if (tuple2 != null) {
            String w = (String)tuple2._1();
            return bag$2.contains((Object)w);
        }
        throw new MatchError((Object)tuple2);
    }

    private package$() {
    }
}

