/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.io.IOException;
import java.io.Serializable;
import org.apache.spark.ml.UnaryTransformer;
import org.apache.spark.ml.feature.RegexTokenizer$;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.ParamValidators$;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import scala.Function0;
import scala.Function1;
import scala.Predef$;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.util.matching.Regex;

@ScalaSignature(bytes="\u0006\u0001\u0005%g\u0001B\u0001\u0003\u00015\u0011aBU3hKb$vn[3oSj,'O\u0003\u0002\u0004\t\u00059a-Z1ukJ,'BA\u0003\u0007\u0003\tiGN\u0003\u0002\b\u0011\u0005)1\u000f]1sW*\u0011\u0011BC\u0001\u0007CB\f7\r[3\u000b\u0003-\t1a\u001c:h\u0007\u0001\u00192\u0001\u0001\b+!\u0015y\u0001CE\u0010)\u001b\u0005!\u0011BA\t\u0005\u0005A)f.\u0019:z)J\fgn\u001d4pe6,'\u000f\u0005\u0002\u001499\u0011AC\u0007\t\u0003+ai\u0011A\u0006\u0006\u0003/1\ta\u0001\u0010:p_Rt$\"A\r\u0002\u000bM\u001c\u0017\r\\1\n\u0005mA\u0012A\u0002)sK\u0012,g-\u0003\u0002\u001e=\t11\u000b\u001e:j]\u001eT!a\u0007\r\u0011\u0007\u0001*#C\u0004\u0002\"G9\u0011QCI\u0005\u00023%\u0011A\u0005G\u0001\ba\u0006\u001c7.Y4f\u0013\t1sEA\u0002TKFT!\u0001\n\r\u0011\u0005%\u0002Q\"\u0001\u0002\u0011\u0005-rS\"\u0001\u0017\u000b\u00055\"\u0011\u0001B;uS2L!a\f\u0017\u0003+\u0011+g-Y;miB\u000b'/Y7t/JLG/\u00192mK\"A\u0011\u0007\u0001BC\u0002\u0013\u0005#'A\u0002vS\u0012,\u0012A\u0005\u0015\u0004aQR\u0004CA\u001b9\u001b\u00051$BA\u001c\u0007\u0003)\tgN\\8uCRLwN\\\u0005\u0003sY\u0012QaU5oG\u0016\f\u0013aO\u0001\u0006c9\"d\u0006\r\u0005\t{\u0001\u0011\t\u0011)A\u0005%\u0005!Q/\u001b3!Q\raDG\u000f\u0005\u0006\u0001\u0002!\t!Q\u0001\u0007y%t\u0017\u000e\u001e \u0015\u0005!\u0012\u0005\"B\u0019@\u0001\u0004\u0011\u0002f\u0001\"5u!\u001aq\b\u000e\u001e\t\u000b\u0001\u0003A\u0011\u0001$\u0015\u0003!B3!\u0012\u001b;\u0011\u001dI\u0005A1A\u0005\u0002)\u000ba\"\\5o)>\\WM\u001c'f]\u001e$\b.F\u0001L!\tau*D\u0001N\u0015\tqE!A\u0003qCJ\fW.\u0003\u0002Q\u001b\nA\u0011J\u001c;QCJ\fW\u000eK\u0002IiiBaa\u0015\u0001!\u0002\u0013Y\u0015aD7j]R{7.\u001a8MK:<G\u000f\u001b\u0011)\u0007I#$\bC\u0003W\u0001\u0011\u0005q+A\ttKRl\u0015N\u001c+pW\u0016tG*\u001a8hi\"$\"\u0001W-\u000e\u0003\u0001AQAW+A\u0002m\u000bQA^1mk\u0016\u0004\"\u0001X/\u000e\u0003aI!A\u0018\r\u0003\u0007%sG\u000fK\u0002ViiBQ!\u0019\u0001\u0005\u0002\t\f\u0011cZ3u\u001b&tGk\\6f]2+gn\u001a;i+\u0005Y\u0006f\u000115u!9Q\r\u0001b\u0001\n\u00031\u0017\u0001B4baN,\u0012a\u001a\t\u0003\u0019\"L!!['\u0003\u0019\t{w\u000e\\3b]B\u000b'/Y7)\u0007\u0011$$\b\u0003\u0004m\u0001\u0001\u0006IaZ\u0001\u0006O\u0006\u00048\u000f\t\u0015\u0004WRR\u0004\"B8\u0001\t\u0003\u0001\u0018aB:fi\u001e\u000b\u0007o\u001d\u000b\u00031FDQA\u00178A\u0002I\u0004\"\u0001X:\n\u0005QD\"a\u0002\"p_2,\u0017M\u001c\u0015\u0004]RR\u0004\"B<\u0001\t\u0003A\u0018aB4fi\u001e\u000b\u0007o]\u000b\u0002e\"\u001aa\u000f\u000e\u001e\t\u000fm\u0004!\u0019!C\u0001y\u00069\u0001/\u0019;uKJtW#A?\u0011\u00071s(#\u0003\u0002\u0000\u001b\n)\u0001+\u0019:b[\"\u001a!\u0010\u000e\u001e\t\u000f\u0005\u0015\u0001\u0001)A\u0005{\u0006A\u0001/\u0019;uKJt\u0007\u0005\u000b\u0003\u0002\u0004QR\u0004bBA\u0006\u0001\u0011\u0005\u0011QB\u0001\u000bg\u0016$\b+\u0019;uKJtGc\u0001-\u0002\u0010!1!,!\u0003A\u0002IAC!!\u00035u!1\u0011Q\u0003\u0001\u0005\u0002I\n!bZ3u!\u0006$H/\u001a:oQ\u0011\t\u0019\u0002\u000e\u001e\t\u0011\u0005m\u0001A1A\u0005\u0006\u0019\f1\u0002^8M_^,'oY1tK\"*\u0011\u0011\u0004\u001b\u0002 \u0005\u0012\u0011\u0011E\u0001\u0006c92d\u0006\r\u0005\b\u0003K\u0001\u0001\u0015!\u0004h\u00031!x\u000eT8xKJ\u001c\u0017m]3!Q\u0015\t\u0019\u0003NA\u0010\u0011\u001d\tY\u0003\u0001C\u0001\u0003[\tab]3u)>dun^3sG\u0006\u001cX\rF\u0002Y\u0003_AaAWA\u0015\u0001\u0004\u0011\b&BA\u0015i\u0005}\u0001BBA\u001b\u0001\u0011\u0005\u00010\u0001\bhKR$v\u000eT8xKJ\u001c\u0017m]3)\u000b\u0005MB'a\b\t\u000f\u0005m\u0002\u0001\"\u0015\u0002>\u0005\u00192M]3bi\u0016$&/\u00198tM>\u0014XNR;oGV\u0011\u0011q\b\t\u00069\u0006\u0005#cH\u0005\u0004\u0003\u0007B\"!\u0003$v]\u000e$\u0018n\u001c82\u0011\u001d\t9\u0005\u0001C)\u0003\u0013\n\u0011C^1mS\u0012\fG/Z%oaV$H+\u001f9f)\u0011\tY%!\u0015\u0011\u0007q\u000bi%C\u0002\u0002Pa\u0011A!\u00168ji\"A\u00111KA#\u0001\u0004\t)&A\u0005j]B,H\u000fV=qKB!\u0011qKA1\u001b\t\tIF\u0003\u0003\u0002\\\u0005u\u0013!\u0002;za\u0016\u001c(bAA0\r\u0005\u00191/\u001d7\n\t\u0005\r\u0014\u0011\f\u0002\t\t\u0006$\u0018\rV=qK\"9\u0011q\r\u0001\u0005R\u0005%\u0014AD8viB,H\u000fR1uCRK\b/Z\u000b\u0003\u0003+Bq!!\u001c\u0001\t\u0003\ny'\u0001\u0003d_BLHc\u0001\u0015\u0002r!A\u00111OA6\u0001\u0004\t)(A\u0003fqR\u0014\u0018\rE\u0002M\u0003oJ1!!\u001fN\u0005!\u0001\u0016M]1n\u001b\u0006\u0004\b&BA6i\u0005u\u0014EAA@\u0003\u0015\td\u0006\u000e\u00182Q\r\u0001AGO\u0004\b\u0003\u000b\u0013\u0001\u0012AAD\u00039\u0011VmZ3y)>\\WM\\5{KJ\u00042!KAE\r\u0019\t!\u0001#\u0001\u0002\fNA\u0011\u0011RAG\u0003'\u000bI\nE\u0002]\u0003\u001fK1!!%\u0019\u0005\u0019\te.\u001f*fMB!1&!&)\u0013\r\t9\n\f\u0002\u0016\t\u00164\u0017-\u001e7u!\u0006\u0014\u0018-\\:SK\u0006$\u0017M\u00197f!\ra\u00161T\u0005\u0004\u0003;C\"\u0001D*fe&\fG.\u001b>bE2,\u0007b\u0002!\u0002\n\u0012\u0005\u0011\u0011\u0015\u000b\u0003\u0003\u000fC\u0001\"!*\u0002\n\u0012\u0005\u0013qU\u0001\u0005Y>\fG\rF\u0002)\u0003SCq!a+\u0002$\u0002\u0007!#\u0001\u0003qCRD\u0007&BARi\u0005}\u0001BCAY\u0003\u0013\u000b\t\u0011\"\u0003\u00024\u0006Y!/Z1e%\u0016\u001cx\u000e\u001c<f)\t\t)\f\u0005\u0003\u00028\u0006\u0005WBAA]\u0015\u0011\tY,!0\u0002\t1\fgn\u001a\u0006\u0003\u0003\u007f\u000bAA[1wC&!\u00111YA]\u0005\u0019y%M[3di\"*\u0011\u0011\u0012\u001b\u0002 !*\u00111\u0011\u001b\u0002 \u0001")
public class RegexTokenizer
extends UnaryTransformer<String, Seq<String>, RegexTokenizer>
implements DefaultParamsWritable {
    private final String uid;
    private final IntParam minTokenLength;
    private final BooleanParam gaps;
    private final Param<String> pattern;
    private final BooleanParam toLowercase;

    public static MLReader<RegexTokenizer> read() {
        return RegexTokenizer$.MODULE$.read();
    }

    public static /* bridge */ Object load(String string) {
        return RegexTokenizer$.MODULE$.load(string);
    }

    public static RegexTokenizer load(String string) {
        return RegexTokenizer$.MODULE$.load(string);
    }

    @Override
    public MLWriter write() {
        return DefaultParamsWritable.write$(this);
    }

    @Override
    public void save(String path) throws IOException {
        MLWritable.save$(this, path);
    }

    @Override
    public String uid() {
        return this.uid;
    }

    public IntParam minTokenLength() {
        return this.minTokenLength;
    }

    public RegexTokenizer setMinTokenLength(int value) {
        return (RegexTokenizer)this.set(this.minTokenLength(), BoxesRunTime.boxToInteger((int)value));
    }

    public int getMinTokenLength() {
        return BoxesRunTime.unboxToInt((Object)this.$(this.minTokenLength()));
    }

    public BooleanParam gaps() {
        return this.gaps;
    }

    public RegexTokenizer setGaps(boolean value) {
        return (RegexTokenizer)this.set(this.gaps(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public boolean getGaps() {
        return BoxesRunTime.unboxToBoolean((Object)this.$(this.gaps()));
    }

    public Param<String> pattern() {
        return this.pattern;
    }

    public RegexTokenizer setPattern(String value) {
        return (RegexTokenizer)this.set(this.pattern(), value);
    }

    public String getPattern() {
        return this.$(this.pattern());
    }

    public final BooleanParam toLowercase() {
        return this.toLowercase;
    }

    public RegexTokenizer setToLowercase(boolean value) {
        return (RegexTokenizer)this.set(this.toLowercase(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public boolean getToLowercase() {
        return BoxesRunTime.unboxToBoolean((Object)this.$(this.toLowercase()));
    }

    @Override
    public Function1<String, Seq<String>> createTransformFunc() {
        return (Function1 & Serializable & scala.Serializable)originStr -> {
            Regex re = new StringOps(Predef$.MODULE$.augmentString(this.$(this.pattern()))).r();
            String str = BoxesRunTime.unboxToBoolean((Object)this.$(this.toLowercase())) ? originStr.toLowerCase() : originStr;
            Seq tokens = BoxesRunTime.unboxToBoolean((Object)this.$(this.gaps())) ? new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])re.split((CharSequence)str))).toSeq() : re.findAllIn((CharSequence)str).toSeq();
            int minLength = BoxesRunTime.unboxToInt((Object)this.$(this.minTokenLength()));
            return (Seq)tokens.filter((Function1 & Serializable & scala.Serializable)x$2 -> BoxesRunTime.boxToBoolean((boolean)RegexTokenizer.$anonfun$createTransformFunc$3(minLength, x$2)));
        };
    }

    @Override
    public void validateInputType(DataType inputType) {
        DataType dataType = inputType;
        StringType$ stringType$ = StringType$.MODULE$;
        Predef$.MODULE$.require(!(dataType != null ? !dataType.equals(stringType$) : stringType$ != null), (Function0 & Serializable & scala.Serializable)() -> new StringBuilder(40).append("Input type must be string type but got ").append(inputType).append(".").toString());
    }

    @Override
    public DataType outputDataType() {
        return new ArrayType((DataType)StringType$.MODULE$, true);
    }

    @Override
    public RegexTokenizer copy(ParamMap extra) {
        return (RegexTokenizer)this.defaultCopy(extra);
    }

    public static final /* synthetic */ boolean $anonfun$createTransformFunc$3(int minLength$1, String x$2) {
        return x$2.length() >= minLength$1;
    }

    public RegexTokenizer(String uid) {
        this.uid = uid;
        MLWritable.$init$(this);
        DefaultParamsWritable.$init$(this);
        this.minTokenLength = new IntParam((Identifiable)this, "minTokenLength", "minimum token length (>= 0)", (Function1<Object, Object>)ParamValidators$.MODULE$.gtEq(0.0));
        this.gaps = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens");
        this.pattern = new Param(this, "pattern", "regex pattern used for tokenizing");
        this.toLowercase = new BooleanParam(this, "toLowercase", "whether to convert all characters to lowercase before tokenizing.");
        this.setDefault((Seq<ParamPair<?>>)Predef$.MODULE$.wrapRefArray((Object[])new ParamPair[]{this.minTokenLength().$minus$greater(BoxesRunTime.boxToInteger((int)1)), this.gaps().$minus$greater(BoxesRunTime.boxToBoolean((boolean)true)), this.pattern().$minus$greater("\\s+"), this.toLowercase().$minus$greater(BoxesRunTime.boxToBoolean((boolean)true))}));
    }

    public RegexTokenizer() {
        this(Identifiable$.MODULE$.randomUID("regexTok"));
    }
}

