/*
 * Decompiled with CFR 0.152.
 */
package com.johnsnowlabs.ml.crf;

import com.johnsnowlabs.ml.crf.CrfDataset;
import com.johnsnowlabs.ml.crf.DatasetEncoder;
import com.johnsnowlabs.ml.crf.DatasetEncoder$;
import com.johnsnowlabs.ml.crf.DatasetMetadata;
import com.johnsnowlabs.ml.crf.Instance;
import com.johnsnowlabs.ml.crf.InstanceLabels;
import com.johnsnowlabs.ml.crf.SparseArray;
import com.johnsnowlabs.ml.crf.TextSentenceAttrs;
import com.johnsnowlabs.ml.crf.TextSentenceLabels;
import com.johnsnowlabs.ml.crf.WordAttrs;
import com.johnsnowlabs.ml.crf.WordAttrs$;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.Serializable;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.GenTraversableOnce;
import scala.collection.Iterable;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.TraversableOnce$;
import scala.collection.generic.GenericTraversableTemplate;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayOps;
import scala.io.BufferedSource;
import scala.io.Codec$;
import scala.io.Source;
import scala.io.Source$;
import scala.math.Ordering;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.java8.JFunction0;

public final class DatasetReader$ {
    public static DatasetReader$ MODULE$;

    static {
        new DatasetReader$();
    }

    private Source getSource(String file) {
        BufferedSource bufferedSource;
        if (file.endsWith(".gz")) {
            FileInputStream fis = new FileInputStream(file);
            GzipCompressorInputStream zis = new GzipCompressorInputStream((InputStream)fis);
            bufferedSource = Source$.MODULE$.fromInputStream((InputStream)zis, Codec$.MODULE$.fallbackSystemCodec());
        } else {
            bufferedSource = Source$.MODULE$.fromFile(file, Codec$.MODULE$.fallbackSystemCodec());
        }
        return bufferedSource;
    }

    private TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> readWithLabels(String file, int skipLines) {
        Iterator lines = this.getSource(file).getLines().drop(skipLines);
        ObjectRef labels = ObjectRef.create((Object)new ArrayBuffer());
        ObjectRef tokens = ObjectRef.create((Object)new ArrayBuffer());
        return lines.flatMap((Function1 & Serializable & scala.Serializable)line -> {
            Iterable iterable;
            String[] words = line.split("\t");
            if (words.length <= 1) {
                iterable = Option$.MODULE$.option2Iterable(DatasetReader$.addToResultIfExists$1(tokens, labels));
            } else {
                Tuple2[] attrValues = (Tuple2[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])words)).drop(1))).map((Function1 & Serializable & scala.Serializable)feature -> {
                    String[] attrValue = feature.split("=");
                    String attr = attrValue[0];
                    String value = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])attrValue)).size() == 1 ? "" : attrValue[1];
                    return new Tuple2((Object)attr, (Object)value);
                }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)));
                ((ArrayBuffer)tokens$1.elem).append((Seq)Predef$.MODULE$.wrapRefArray((Object[])new WordAttrs[]{new WordAttrs((Seq<Tuple2<String, String>>)Predef$.MODULE$.wrapRefArray((Object[])attrValues), WordAttrs$.MODULE$.apply$default$2())}));
                ((ArrayBuffer)labels$1.elem).append((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{(String)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])words)).head()}));
                iterable = Option$.MODULE$.option2Iterable((Option)None$.MODULE$);
            }
            return iterable;
        });
    }

    private int readWithLabels$default$2() {
        return 0;
    }

    public CrfDataset encodeDataset(TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> source) {
        DatasetEncoder metadata = new DatasetEncoder(DatasetEncoder$.MODULE$.$lessinit$greater$default$1());
        Tuple2[] instances = (Tuple2[])TraversableOnce$.MODULE$.MonadOps(source).map((Function1 & Serializable & scala.Serializable)x0$1 -> {
            Tuple2 tuple2;
            Tuple2 tuple22 = x0$1;
            if (tuple22 != null) {
                TextSentenceLabels textLabels = (TextSentenceLabels)tuple22._1();
                TextSentenceAttrs textSentence = (TextSentenceAttrs)tuple22._2();
                ObjectRef prevLabel = ObjectRef.create((Object)metadata.startLabel());
                Tuple2 tuple23 = ((GenericTraversableTemplate)((TraversableLike)textLabels.labels().zip(textSentence.words(), Seq$.MODULE$.canBuildFrom())).map((Function1 & Serializable & scala.Serializable)x0$2 -> {
                    Tuple2 tuple2;
                    String label;
                    Tuple2 tuple22 = x0$2;
                    if (tuple22 != null) {
                        WordAttrs word;
                        Seq attrs;
                        label = (String)tuple22._1();
                        Tuple2<Object, SparseArray> tuple23 = metadata.getFeatures((String)prevLabel$1.elem, label, (Seq<String>)(attrs = (Seq)(word = (WordAttrs)tuple22._2()).strAttrs().map((Function1 & Serializable & scala.Serializable)a -> new StringBuilder(1).append((String)a._1()).append("=").append(a._2()).toString(), Seq$.MODULE$.canBuildFrom())), (Seq<Object>)Predef$.MODULE$.wrapFloatArray(word.numAttrs()));
                        if (tuple23 == null) {
                            throw new MatchError(tuple23);
                        }
                        int labelId = tuple23._1$mcI$sp();
                        SparseArray features = (SparseArray)tuple23._2();
                        tuple2 = new Tuple2((Object)BoxesRunTime.boxToInteger((int)labelId), (Object)features);
                    } else {
                        throw new MatchError((Object)tuple22);
                    }
                    Tuple2 tuple24 = tuple2;
                    int labelId = tuple24._1$mcI$sp();
                    SparseArray features = (SparseArray)tuple24._2();
                    prevLabel$1.elem = label;
                    Tuple2 tuple25 = new Tuple2((Object)BoxesRunTime.boxToInteger((int)labelId), (Object)features);
                    return tuple25;
                }, Seq$.MODULE$.canBuildFrom())).unzip((Function1)Predef$.MODULE$.$conforms());
                if (tuple23 == null) {
                    throw new MatchError((Object)tuple23);
                }
                Seq labels = (Seq)tuple23._1();
                Seq features = (Seq)tuple23._2();
                tuple2 = new Tuple2((Object)labels, (Object)features);
            } else {
                throw new MatchError((Object)tuple22);
            }
            Tuple2 tuple24 = tuple2;
            Seq labels = (Seq)tuple24._1();
            Seq features = (Seq)tuple24._2();
            Tuple2 tuple25 = new Tuple2((Object)new InstanceLabels((Seq<Object>)labels), (Object)new Instance((Seq<SparseArray>)features));
            return tuple25;
        }).toArray(ClassTag$.MODULE$.apply(Tuple2.class));
        return new CrfDataset((Seq<Tuple2<InstanceLabels, Instance>>)Predef$.MODULE$.wrapRefArray((Object[])instances), metadata.getMetadata());
    }

    private InstanceLabels encodeLabels(TextSentenceLabels labels, DatasetMetadata metadata) {
        Seq labelIds = (Seq)labels.labels().map((Function1 & Serializable & scala.Serializable)text -> BoxesRunTime.boxToInteger((int)DatasetReader$.$anonfun$encodeLabels$1(metadata, text)), Seq$.MODULE$.canBuildFrom());
        return new InstanceLabels((Seq<Object>)labelIds);
    }

    public Instance encodeSentence(TextSentenceAttrs sentence, DatasetMetadata metadata) {
        Seq items = (Seq)sentence.words().map((Function1 & Serializable & scala.Serializable)word -> {
            Seq strAttrs = (Seq)((TraversableLike)word.strAttrs().flatMap((Function1 & Serializable & scala.Serializable)x0$1 -> {
                Tuple2 tuple2 = x0$1;
                if (tuple2 == null) {
                    throw new MatchError((Object)tuple2);
                }
                String name = (String)tuple2._1();
                String value = (String)tuple2._2();
                String key = new StringBuilder(1).append(name).append("=").append(value).toString();
                Iterable iterable = Option$.MODULE$.option2Iterable(metadata.attr2Id().get((Object)key));
                return iterable;
            }, Seq$.MODULE$.canBuildFrom())).map((Function1 & Serializable & scala.Serializable)x$3 -> DatasetReader$.$anonfun$encodeSentence$3(BoxesRunTime.unboxToInt((Object)x$3)), Seq$.MODULE$.canBuildFrom());
            Tuple2[] numAttrs = (Tuple2[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(word.numAttrs())).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).flatMap((Function1 & Serializable & scala.Serializable)x0$2 -> {
                Tuple2 tuple2 = x0$2;
                if (tuple2 == null) {
                    throw new MatchError((Object)tuple2);
                }
                float value = BoxesRunTime.unboxToFloat((Object)tuple2._1());
                int idx = tuple2._2$mcI$sp();
                String key = new StringBuilder(3).append("num").append(idx).toString();
                Option attr = metadata.attr2Id().get((Object)key);
                Iterable iterable = Option$.MODULE$.option2Iterable(attr.map((Function1 & Serializable & scala.Serializable)attrName -> DatasetReader$.$anonfun$encodeSentence$5(value, BoxesRunTime.unboxToInt((Object)attrName))));
                return iterable;
            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)));
            Seq id2value = (Seq)strAttrs.$plus$plus((GenTraversableOnce)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])numAttrs)), Seq$.MODULE$.canBuildFrom());
            Tuple2[] attrValues = (Tuple2[])((TraversableOnce)((SeqLike)id2value.sortBy((Function1 & Serializable & scala.Serializable)id -> BoxesRunTime.boxToInteger((int)id._1$mcI$sp()), (Ordering)Ordering.Int$.MODULE$)).distinct()).toArray(ClassTag$.MODULE$.apply(Tuple2.class));
            return new SparseArray(attrValues);
        }, Seq$.MODULE$.canBuildFrom());
        return new Instance((Seq<SparseArray>)items);
    }

    public CrfDataset readAndEncode(String file, int skipLines) {
        TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> textDataset = this.readWithLabels(file, skipLines);
        return this.encodeDataset(textDataset);
    }

    public TraversableOnce<Tuple2<InstanceLabels, Instance>> readAndEncode(String file, int skipLines, DatasetMetadata metadata) {
        TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> textDataset = this.readWithLabels(file, skipLines);
        return TraversableOnce$.MODULE$.MonadOps(textDataset).map((Function1 & Serializable & scala.Serializable)x0$1 -> {
            Tuple2 tuple2 = x0$1;
            if (tuple2 == null) {
                throw new MatchError((Object)tuple2);
            }
            TextSentenceLabels sourceLabels = (TextSentenceLabels)tuple2._1();
            TextSentenceAttrs sourceInstance = (TextSentenceAttrs)tuple2._2();
            InstanceLabels labels = MODULE$.encodeLabels(sourceLabels, metadata);
            Instance instance = MODULE$.encodeSentence(sourceInstance, metadata);
            Tuple2 tuple22 = new Tuple2((Object)labels, (Object)instance);
            return tuple22;
        });
    }

    private static final Option addToResultIfExists$1(ObjectRef tokens$1, ObjectRef labels$1) {
        None$ none$;
        if (((ArrayBuffer)tokens$1.elem).nonEmpty()) {
            Tuple2 result = new Tuple2((Object)new TextSentenceLabels((Seq<String>)((ArrayBuffer)labels$1.elem)), (Object)new TextSentenceAttrs((Seq<WordAttrs>)((ArrayBuffer)tokens$1.elem)));
            labels$1.elem = new ArrayBuffer();
            tokens$1.elem = new ArrayBuffer();
            none$ = new Some((Object)result);
        } else {
            none$ = None$.MODULE$;
        }
        return none$;
    }

    public static final /* synthetic */ int $anonfun$encodeLabels$1(DatasetMetadata metadata$2, String text) {
        return BoxesRunTime.unboxToInt((Object)metadata$2.label2Id().getOrElse((Object)text, (Function0)(JFunction0.mcI.sp & Serializable & scala.Serializable)() -> -1));
    }

    public static final /* synthetic */ Tuple2 $anonfun$encodeSentence$3(int x$3) {
        return new Tuple2((Object)BoxesRunTime.boxToInteger((int)x$3), (Object)BoxesRunTime.boxToFloat((float)1.0f));
    }

    public static final /* synthetic */ Tuple2 $anonfun$encodeSentence$5(float value$1, int attrName) {
        return new Tuple2((Object)BoxesRunTime.boxToInteger((int)attrName), (Object)BoxesRunTime.boxToFloat((float)value$1));
    }

    private DatasetReader$() {
        MODULE$ = this;
    }
}

