/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.io.IOException;
import java.io.Serializable;
import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.feature.CountVectorizer$;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.feature.CountVectorizerParams;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.DoubleParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.shared.HasInputCol;
import org.apache.spark.ml.param.shared.HasOutputCol;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.util.collection.OpenHashMap;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.None$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Iterable$;
import scala.collection.Seq;
import scala.collection.mutable.ArrayOps;
import scala.math.Ordering;
import scala.package$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.java8.JFunction0;
import scala.runtime.java8.JFunction1;

@ScalaSignature(bytes="\u0006\u0001\u0005ue\u0001B\u0001\u0003\u00015\u0011qbQ8v]R4Vm\u0019;pe&TXM\u001d\u0006\u0003\u0007\u0011\tqAZ3biV\u0014XM\u0003\u0002\u0006\r\u0005\u0011Q\u000e\u001c\u0006\u0003\u000f!\tQa\u001d9be.T!!\u0003\u0006\u0002\r\u0005\u0004\u0018m\u00195f\u0015\u0005Y\u0011aA8sO\u000e\u00011\u0003\u0002\u0001\u000f-e\u00012a\u0004\t\u0013\u001b\u0005!\u0011BA\t\u0005\u0005%)5\u000f^5nCR|'\u000f\u0005\u0002\u0014)5\t!!\u0003\u0002\u0016\u0005\t!2i\\;oiZ+7\r^8sSj,'/T8eK2\u0004\"aE\f\n\u0005a\u0011!!F\"pk:$h+Z2u_JL'0\u001a:QCJ\fWn\u001d\t\u00035ui\u0011a\u0007\u0006\u00039\u0011\tA!\u001e;jY&\u0011ad\u0007\u0002\u0016\t\u00164\u0017-\u001e7u!\u0006\u0014\u0018-\\:Xe&$\u0018M\u00197f\u0011!\u0001\u0003A!b\u0001\n\u0003\n\u0013aA;jIV\t!\u0005\u0005\u0002$Y9\u0011AE\u000b\t\u0003K!j\u0011A\n\u0006\u0003O1\ta\u0001\u0010:p_Rt$\"A\u0015\u0002\u000bM\u001c\u0017\r\\1\n\u0005-B\u0013A\u0002)sK\u0012,g-\u0003\u0002.]\t11\u000b\u001e:j]\u001eT!a\u000b\u0015)\u0007}\u0001d\u0007\u0005\u00022i5\t!G\u0003\u00024\r\u0005Q\u0011M\u001c8pi\u0006$\u0018n\u001c8\n\u0005U\u0012$!B*j]\u000e,\u0017%A\u001c\u0002\u000bErSG\f\u0019\t\u0011e\u0002!\u0011!Q\u0001\n\t\nA!^5eA!\u001a\u0001\b\r\u001c\t\u000bq\u0002A\u0011A\u001f\u0002\rqJg.\u001b;?)\tqt\b\u0005\u0002\u0014\u0001!)\u0001e\u000fa\u0001E!\u001aq\b\r\u001c)\u0007m\u0002d\u0007C\u0003=\u0001\u0011\u00051\tF\u0001?Q\r\u0011\u0005G\u000e\u0005\u0006\r\u0002!\taR\u0001\fg\u0016$\u0018J\u001c9vi\u000e{G\u000e\u0006\u0002I\u00136\t\u0001\u0001C\u0003K\u000b\u0002\u0007!%A\u0003wC2,X\rK\u0002FaYBQ!\u0014\u0001\u0005\u00029\u000bAb]3u\u001fV$\b/\u001e;D_2$\"\u0001S(\t\u000b)c\u0005\u0019\u0001\u0012)\u00071\u0003d\u0007C\u0003S\u0001\u0011\u00051+\u0001\u0007tKR4vnY1c'&TX\r\u0006\u0002I)\")!*\u0015a\u0001+B\u0011akV\u0007\u0002Q%\u0011\u0001\f\u000b\u0002\u0004\u0013:$\bfA)1m!)1\f\u0001C\u00019\u0006A1/\u001a;NS:$e\t\u0006\u0002I;\")!J\u0017a\u0001=B\u0011akX\u0005\u0003A\"\u0012a\u0001R8vE2,\u0007f\u0001.1m!)1\r\u0001C\u0001I\u0006A1/\u001a;NCb$e\t\u0006\u0002IK\")!J\u0019a\u0001=\"\u001a!\rM4\"\u0003!\fQA\r\u00185]ABQA\u001b\u0001\u0005\u0002-\f\u0001b]3u\u001b&tGK\u0012\u000b\u0003\u00112DQAS5A\u0002yC3!\u001b\u00197\u0011\u0015y\u0007\u0001\"\u0001q\u0003%\u0019X\r\u001e\"j]\u0006\u0014\u0018\u0010\u0006\u0002Ic\")!J\u001ca\u0001eB\u0011ak]\u0005\u0003i\"\u0012qAQ8pY\u0016\fg\u000eK\u0002oaY\f\u0013a^\u0001\u0006e9\u0002d\u0006\r\u0005\u0006s\u0002!\tE_\u0001\u0004M&$HC\u0001\n|\u0011\u0015a\b\u00101\u0001~\u0003\u001d!\u0017\r^1tKR\u00044A`A\u0007!\u0015y\u0018QAA\u0005\u001b\t\t\tAC\u0002\u0002\u0004\u0019\t1a]9m\u0013\u0011\t9!!\u0001\u0003\u000f\u0011\u000bG/Y:fiB!\u00111BA\u0007\u0019\u0001!1\"a\u0004|\u0003\u0003\u0005\tQ!\u0001\u0002\u0012\t\u0019q\fJ\u0019\u0012\t\u0005M\u0011\u0011\u0004\t\u0004-\u0006U\u0011bAA\fQ\t9aj\u001c;iS:<\u0007c\u0001,\u0002\u001c%\u0019\u0011Q\u0004\u0015\u0003\u0007\u0005s\u0017\u0010K\u0002yaYDq!a\t\u0001\t\u0003\n)#A\bue\u0006t7OZ8s[N\u001b\u0007.Z7b)\u0011\t9#a\r\u0011\t\u0005%\u0012qF\u0007\u0003\u0003WQA!!\f\u0002\u0002\u0005)A/\u001f9fg&!\u0011\u0011GA\u0016\u0005)\u0019FO];diRK\b/\u001a\u0005\t\u0003k\t\t\u00031\u0001\u0002(\u000511o\u00195f[\u0006DC!!\t1m!9\u00111\b\u0001\u0005B\u0005u\u0012\u0001B2paf$2APA \u0011!\t\t%!\u000fA\u0002\u0005\r\u0013!B3yiJ\f\u0007\u0003BA#\u0003\u0017j!!a\u0012\u000b\u0007\u0005%C!A\u0003qCJ\fW.\u0003\u0003\u0002N\u0005\u001d#\u0001\u0003)be\u0006lW*\u00199)\t\u0005e\u0002G\u000e\u0015\u0004\u0001A2taBA+\u0005!\u0005\u0011qK\u0001\u0010\u0007>,h\u000e\u001e,fGR|'/\u001b>feB\u00191#!\u0017\u0007\r\u0005\u0011\u0001\u0012AA.'!\tI&!\u0018\u0002d\u0005%\u0004c\u0001,\u0002`%\u0019\u0011\u0011\r\u0015\u0003\r\u0005s\u0017PU3g!\u0011Q\u0012Q\r \n\u0007\u0005\u001d4DA\u000bEK\u001a\fW\u000f\u001c;QCJ\fWn\u001d*fC\u0012\f'\r\\3\u0011\u0007Y\u000bY'C\u0002\u0002n!\u0012AbU3sS\u0006d\u0017N_1cY\u0016Dq\u0001PA-\t\u0003\t\t\b\u0006\u0002\u0002X!A\u0011QOA-\t\u0003\n9(\u0001\u0003m_\u0006$Gc\u0001 \u0002z!9\u00111PA:\u0001\u0004\u0011\u0013\u0001\u00029bi\"DS!a\u001d1\u0003\u007f\n#!!!\u0002\u000bErcG\f\u0019\t\u0015\u0005\u0015\u0015\u0011LA\u0001\n\u0013\t9)A\u0006sK\u0006$'+Z:pYZ,GCAAE!\u0011\tY)!&\u000e\u0005\u00055%\u0002BAH\u0003#\u000bA\u0001\\1oO*\u0011\u00111S\u0001\u0005U\u00064\u0018-\u0003\u0003\u0002\u0018\u00065%AB(cU\u0016\u001cG\u000fK\u0003\u0002ZA\ny\bK\u0003\u0002TA\ny\b")
public class CountVectorizer
extends Estimator<CountVectorizerModel>
implements CountVectorizerParams,
DefaultParamsWritable {
    private final String uid;
    private final IntParam vocabSize;
    private final DoubleParam minDF;
    private final DoubleParam maxDF;
    private final DoubleParam minTF;
    private final BooleanParam binary;
    private final Param<String> outputCol;
    private final Param<String> inputCol;

    public static MLReader<CountVectorizer> read() {
        return CountVectorizer$.MODULE$.read();
    }

    public static /* bridge */ Object load(String string) {
        return CountVectorizer$.MODULE$.load(string);
    }

    public static CountVectorizer load(String string) {
        return CountVectorizer$.MODULE$.load(string);
    }

    @Override
    public MLWriter write() {
        return DefaultParamsWritable.write$(this);
    }

    @Override
    public void save(String path) throws IOException {
        MLWritable.save$(this, path);
    }

    @Override
    public int getVocabSize() {
        return CountVectorizerParams.getVocabSize$(this);
    }

    @Override
    public double getMinDF() {
        return CountVectorizerParams.getMinDF$(this);
    }

    @Override
    public double getMaxDF() {
        return CountVectorizerParams.getMaxDF$(this);
    }

    @Override
    public StructType validateAndTransformSchema(StructType schema) {
        return CountVectorizerParams.validateAndTransformSchema$(this, schema);
    }

    @Override
    public double getMinTF() {
        return CountVectorizerParams.getMinTF$(this);
    }

    @Override
    public boolean getBinary() {
        return CountVectorizerParams.getBinary$(this);
    }

    @Override
    public final String getOutputCol() {
        return HasOutputCol.getOutputCol$(this);
    }

    @Override
    public final String getInputCol() {
        return HasInputCol.getInputCol$(this);
    }

    @Override
    public IntParam vocabSize() {
        return this.vocabSize;
    }

    @Override
    public DoubleParam minDF() {
        return this.minDF;
    }

    @Override
    public DoubleParam maxDF() {
        return this.maxDF;
    }

    @Override
    public DoubleParam minTF() {
        return this.minTF;
    }

    @Override
    public BooleanParam binary() {
        return this.binary;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$vocabSize_$eq(IntParam x$1) {
        this.vocabSize = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minDF_$eq(DoubleParam x$1) {
        this.minDF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$maxDF_$eq(DoubleParam x$1) {
        this.maxDF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minTF_$eq(DoubleParam x$1) {
        this.minTF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$binary_$eq(BooleanParam x$1) {
        this.binary = x$1;
    }

    @Override
    public final Param<String> outputCol() {
        return this.outputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasOutputCol$_setter_$outputCol_$eq(Param<String> x$1) {
        this.outputCol = x$1;
    }

    @Override
    public final Param<String> inputCol() {
        return this.inputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasInputCol$_setter_$inputCol_$eq(Param<String> x$1) {
        this.inputCol = x$1;
    }

    @Override
    public String uid() {
        return this.uid;
    }

    public CountVectorizer setInputCol(String value) {
        return (CountVectorizer)this.set(this.inputCol(), value);
    }

    public CountVectorizer setOutputCol(String value) {
        return (CountVectorizer)this.set(this.outputCol(), value);
    }

    public CountVectorizer setVocabSize(int value) {
        return (CountVectorizer)this.set(this.vocabSize(), BoxesRunTime.boxToInteger((int)value));
    }

    public CountVectorizer setMinDF(double value) {
        return (CountVectorizer)this.set(this.minDF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setMaxDF(double value) {
        return (CountVectorizer)this.set(this.maxDF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setMinTF(double value) {
        return (CountVectorizer)this.set(this.minTF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setBinary(boolean value) {
        return (CountVectorizer)this.set(this.binary(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    @Override
    public CountVectorizerModel fit(Dataset<?> dataset) {
        this.transformSchema(dataset.schema(), true);
        int vocSize = BoxesRunTime.unboxToInt((Object)this.$(this.vocabSize()));
        RDD input = dataset.select(this.$(this.inputCol()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).rdd().map((Function1 & Serializable & scala.Serializable)x$1 -> (Seq)x$1.getAs(0), ClassTag$.MODULE$.apply(Seq.class));
        boolean countingRequired = BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) < 1.0 || BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) < 1.0;
        None$ maybeInputSize = countingRequired ? new Some((Object)BoxesRunTime.boxToLong((long)input.cache().count())) : None$.MODULE$;
        double minDf = BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) >= 1.0 ? BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) : BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) * (double)BoxesRunTime.unboxToLong((Object)maybeInputSize.get());
        double maxDf = BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) >= 1.0 ? BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) : BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) * (double)BoxesRunTime.unboxToLong((Object)maybeInputSize.get());
        Predef$.MODULE$.require(maxDf >= minDf, (Function0 & Serializable & scala.Serializable)() -> "maxDF must be >= minDF.");
        RDD allWordCounts = RDD$.MODULE$.rddToPairRDDFunctions(input.flatMap((Function1 & Serializable & scala.Serializable)x0$1 -> {
            Seq seq = x0$1;
            OpenHashMap.mcJ.sp wc = new OpenHashMap.mcJ.sp(ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Long());
            seq.foreach(arg_0 -> CountVectorizer.$anonfun$fit$4$adapted((OpenHashMap)wc, arg_0));
            Iterable iterable = (Iterable)wc.map((Function1 & Serializable & scala.Serializable)x0$2 -> {
                Tuple2 tuple2 = x0$2;
                if (tuple2 == null) {
                    throw new MatchError((Object)tuple2);
                }
                String word = (String)tuple2._1();
                long count = tuple2._2$mcJ$sp();
                Tuple2 tuple22 = new Tuple2((Object)word, (Object)new Tuple2.mcJI.sp(count, 1));
                return tuple22;
            }, Iterable$.MODULE$.canBuildFrom());
            return iterable;
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple2.class), (Ordering)Ordering.String$.MODULE$).reduceByKey((Function2 & Serializable & scala.Serializable)(x0$3, x1$1) -> {
            int df1;
            long wc1;
            Tuple2 tuple2;
            block3: {
                Tuple2 tuple22;
                block2: {
                    tuple22 = new Tuple2(x0$3, x1$1);
                    if (tuple22 == null) break block2;
                    Tuple2 tuple23 = (Tuple2)tuple22._1();
                    tuple2 = (Tuple2)tuple22._2();
                    if (tuple23 == null) break block2;
                    wc1 = tuple23._1$mcJ$sp();
                    df1 = tuple23._2$mcI$sp();
                    if (tuple2 != null) break block3;
                }
                throw new MatchError((Object)tuple22);
            }
            long wc2 = tuple2._1$mcJ$sp();
            int df2 = tuple2._2$mcI$sp();
            Tuple2.mcJI.sp sp2 = new Tuple2.mcJI.sp(wc1 + wc2, df1 + df2);
            return sp2;
        });
        boolean filteringRequired = this.isSet(this.minDF()) || this.isSet(this.maxDF());
        RDD maybeFilteredWordCounts = filteringRequired ? allWordCounts.filter((Function1 & Serializable & scala.Serializable)x0$4 -> BoxesRunTime.boxToBoolean((boolean)CountVectorizer.$anonfun$fit$9(minDf, maxDf, x0$4))) : allWordCounts;
        RDD wordCounts = maybeFilteredWordCounts.map((Function1 & Serializable & scala.Serializable)x0$5 -> {
            Tuple2 tuple2;
            String word;
            block3: {
                Tuple2 tuple22;
                block2: {
                    tuple22 = x0$5;
                    if (tuple22 == null) break block2;
                    word = (String)tuple22._1();
                    tuple2 = (Tuple2)tuple22._2();
                    if (tuple2 != null) break block3;
                }
                throw new MatchError((Object)tuple22);
            }
            long count = tuple2._1$mcJ$sp();
            Tuple2 tuple23 = new Tuple2((Object)word, (Object)BoxesRunTime.boxToLong((long)count));
            return tuple23;
        }, ClassTag$.MODULE$.apply(Tuple2.class)).cache();
        Object object = countingRequired ? input.unpersist(input.unpersist$default$1()) : BoxedUnit.UNIT;
        long fullVocabSize = wordCounts.count();
        String[] vocab = (String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])wordCounts.top((int)scala.math.package$.MODULE$.min(fullVocabSize, (long)vocSize), package$.MODULE$.Ordering().by((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToLong((long)x$3._2$mcJ$sp()), (Ordering)Ordering.Long$.MODULE$)))).map((Function1 & Serializable & scala.Serializable)x$4 -> (String)x$4._1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        Predef$.MODULE$.require(vocab.length > 0, (Function0 & Serializable & scala.Serializable)() -> "The vocabulary size should be > 0. Lower minDF as necessary.");
        return this.copyValues(new CountVectorizerModel(this.uid(), vocab).setParent(this), this.copyValues$default$2());
    }

    @Override
    public StructType transformSchema(StructType schema) {
        return this.validateAndTransformSchema(schema);
    }

    @Override
    public CountVectorizer copy(ParamMap extra) {
        return (CountVectorizer)this.defaultCopy(extra);
    }

    public static final /* synthetic */ boolean $anonfun$fit$9(double minDf$1, double maxDf$1, Tuple2 x0$4) {
        Tuple2 tuple2;
        Tuple2 tuple22 = x0$4;
        if (tuple22 == null || (tuple2 = (Tuple2)tuple22._2()) == null) {
            throw new MatchError((Object)tuple22);
        }
        int df = tuple2._2$mcI$sp();
        boolean bl = (double)df >= minDf$1 && (double)df <= maxDf$1;
        return bl;
    }

    public CountVectorizer(String uid) {
        this.uid = uid;
        HasInputCol.$init$(this);
        HasOutputCol.$init$(this);
        CountVectorizerParams.$init$(this);
        MLWritable.$init$(this);
        DefaultParamsWritable.$init$(this);
    }

    public CountVectorizer() {
        this(Identifiable$.MODULE$.randomUID("cntVec"));
    }

    public static final /* synthetic */ Object $anonfun$fit$4$adapted(OpenHashMap wc$1, String w) {
        return BoxesRunTime.boxToLong((long)wc$1.changeValue$mcJ$sp((Object)w, (Function0)(JFunction0.mcJ.sp & Serializable & scala.Serializable)() -> 1L, (Function1)(JFunction1.mcJJ.sp & Serializable & scala.Serializable)x$2 -> x$2 + 1L));
    }
}

