/*
 * Decompiled with CFR 0.152.
 */
package org.bitbucket.eunjeon.seunjeon;

import java.io.Serializable;
import org.bitbucket.eunjeon.seunjeon.CharSet;
import org.bitbucket.eunjeon.seunjeon.CharSetDef$;
import org.bitbucket.eunjeon.seunjeon.CompressedMorpheme;
import org.bitbucket.eunjeon.seunjeon.ConnectionCostDict;
import org.bitbucket.eunjeon.seunjeon.LNode;
import org.bitbucket.eunjeon.seunjeon.LNode$;
import org.bitbucket.eunjeon.seunjeon.Lattice;
import org.bitbucket.eunjeon.seunjeon.Lattice$;
import org.bitbucket.eunjeon.seunjeon.LexiconDict;
import org.bitbucket.eunjeon.seunjeon.Morpheme;
import org.bitbucket.eunjeon.seunjeon.Paragraph;
import org.bitbucket.eunjeon.seunjeon.UnkMorpheme;
import org.bitbucket.eunjeon.seunjeon.UnkMorpheme$;
import scala.Function1;
import scala.Predef$;
import scala.collection.Iterable;
import scala.collection.Iterable$;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.Stream;
import scala.collection.immutable.Stream$;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayOps;
import scala.math.package$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.RichInt$;

@ScalaSignature(bytes="\u0006\u0001\u0005ub\u0001B\u0001\u0003\u0001-\u0011\u0011\u0002V8lK:L'0\u001a:\u000b\u0005\r!\u0011\u0001C:fk:TWm\u001c8\u000b\u0005\u00151\u0011aB3v]*,wN\u001c\u0006\u0003\u000f!\t\u0011BY5uEV\u001c7.\u001a;\u000b\u0003%\t1a\u001c:h\u0007\u0001\u0019\"\u0001\u0001\u0007\u0011\u00055\u0001R\"\u0001\b\u000b\u0003=\tQa]2bY\u0006L!!\u0005\b\u0003\r\u0005s\u0017PU3g\u0011!\u0019\u0002A!A!\u0002\u0013!\u0012a\u00037fq&\u001cwN\u001c#jGR\u0004\"!\u0006\f\u000e\u0003\tI!a\u0006\u0002\u0003\u00171+\u00070[2p]\u0012K7\r\u001e\u0005\t3\u0001\u0011\t\u0011)A\u00055\u0005\u00112m\u001c8oK\u000e$\u0018n\u001c8D_N$H)[2u!\t)2$\u0003\u0002\u001d\u0005\t\u00112i\u001c8oK\u000e$\u0018n\u001c8D_N$H)[2u\u0011!q\u0002A!A!\u0002\u0013y\u0012\u0001C2p[B\u0014Xm]:\u0011\u00055\u0001\u0013BA\u0011\u000f\u0005\u001d\u0011un\u001c7fC:DQa\t\u0001\u0005\u0002\u0011\na\u0001P5oSRtD\u0003B\u0013'O!\u0002\"!\u0006\u0001\t\u000bM\u0011\u0003\u0019\u0001\u000b\t\u000be\u0011\u0003\u0019\u0001\u000e\t\u000by\u0011\u0003\u0019A\u0010\t\r)\u0002\u0001\u0015)\u0003\u0015\u0003!)8/\u001a:ES\u000e$\bB\u0002\u0017\u0001A\u0003&Q&\u0001\u0007nCb,fn\u001b'f]\u001e$\b\u000e\u0005\u0002\u000e]%\u0011qF\u0004\u0002\u0004\u0013:$\b\"B\u0019\u0001\t\u0003\u0011\u0014aC:fiV\u001bXM\u001d#jGR$\"a\r\u001c\u0011\u00055!\u0014BA\u001b\u000f\u0005\u0011)f.\u001b;\t\u000b]\u0002\u0004\u0019\u0001\u000b\u0002\t\u0011L7\r\u001e\u0005\u0006s\u0001!\tAO\u0001\u0010g\u0016$X*\u0019=V].dUM\\4uQR\u00111g\u000f\u0005\u0006ya\u0002\r!L\u0001\u0007Y\u0016tw\r\u001e5\t\u000by\u0002A\u0011A \u0002\u0013A\f'o]3UKb$Hc\u0001!P3B\u0019\u0011)\u0013'\u000f\u0005\t;eBA\"G\u001b\u0005!%BA#\u000b\u0003\u0019a$o\\8u}%\tq\"\u0003\u0002I\u001d\u00059\u0001/Y2lC\u001e,\u0017B\u0001&L\u0005!IE/\u001a:bE2,'B\u0001%\u000f!\t)R*\u0003\u0002O\u0005\tI\u0001+\u0019:bOJ\f\u0007\u000f\u001b\u0005\u0006!v\u0002\r!U\u0001\u0006S:\u0004X\u000f\u001e\t\u0003%Zs!a\u0015+\u0011\u0005\rs\u0011BA+\u000f\u0003\u0019\u0001&/\u001a3fM&\u0011q\u000b\u0017\u0002\u0007'R\u0014\u0018N\\4\u000b\u0005Us\u0001\"\u0002.>\u0001\u0004y\u0012!\u00043f!J,\u0017I\\1msNL7\u000fC\u0003]\u0001\u0011\u0005Q,A\u0007qCJ\u001cX\rV3yi*\u000bg/\u0019\u000b\u0004=\u00164\u0007cA0e\u00196\t\u0001M\u0003\u0002bE\u0006!A.\u00198h\u0015\u0005\u0019\u0017\u0001\u00026bm\u0006L!A\u00131\t\u000bA[\u0006\u0019A)\t\u000bi[\u0006\u0019A\u0010\t\u000b!\u0004A\u0011B5\u0002\u001dI,Wn\u001c<f\u0011\u0016\fG\rT1tiR\u0011!\u000e\u001d\t\u0004\u0003.l\u0017B\u00017L\u0005\r\u0019V-\u001d\t\u0003+9L!a\u001c\u0002\u0003\u000b1su\u000eZ3\t\u000bE<\u0007\u0019\u00016\u0002\u000b9|G-Z:\t\u000bM\u0004A\u0011\u0002;\u0002\u0019\t,\u0018\u000e\u001c3MCR$\u0018nY3\u0015\u0005UD\bCA\u000bw\u0013\t9(AA\u0004MCR$\u0018nY3\t\u000be\u0014\b\u0019A)\u0002\tQ,\u0007\u0010\u001e\u0005\u0006w\u0002!I\u0001`\u0001\u0010O\u0016$XK\\6o_^tG+\u001a:ngR\u0011!. \u0005\u0006sj\u0004\r!\u0015\u0005\u0007\u007f\u0002!I!!\u0001\u0002\u001b\u001d,Go\u00138po:$VM]7t)\rQ\u00171\u0001\u0005\u0006sz\u0004\r!\u0015\u0005\u0007w\u0002!I!a\u0002\u0015\u000b)\fI!!\u0004\t\u000f\u0005-\u0011Q\u0001a\u0001[\u0005i1\r[1sg\u0016$xJ\u001a4tKRD\u0001\"a\u0004\u0002\u0006\u0001\u0007\u0011\u0011C\u0001\bG\"\f'o]3u!\r)\u00121C\u0005\u0004\u0003+\u0011!aB\"iCJ\u001cV\r\u001e\u0005\u0007\u007f\u0002!I!!\u0007\u0015\u000f)\fY\"!\b\u0002\"!9\u00111BA\f\u0001\u0004i\u0003bBA\u0010\u0003/\u0001\r!L\u0001\u000bi\u0016\u0014Xn\u00144gg\u0016$\bbBA\u0012\u0003/\u0001\r!U\u0001\u000egV4g-\u001b=TkJ4\u0017mY3\t\u0011\u0005\u001d\u0002\u0001)C\u0005\u0003S\t\u0001cZ3uc9cUM\\4uQR+'/\\:\u0015\u0013)\fY#!\f\u00020\u0005E\u0002bBA\u0006\u0003K\u0001\r!\f\u0005\b\u0003?\t)\u00031\u0001.\u0011\u001d\t\u0019#!\nA\u0002EC\u0001\"a\u0004\u0002&\u0001\u0007\u0011\u0011\u0003\u0005\b\u0003k\u0001A\u0011BA\u001c\u0003A9W\r^$s_V\u0004H+\u001a:n\u001d>$W\rF\u0003n\u0003s\tY\u0004C\u0004\u0002\f\u0005M\u0002\u0019A\u0017\t\u0011\u0005=\u00111\u0007a\u0001\u0003#\u0001")
public class Tokenizer {
    private final LexiconDict lexiconDict;
    private final ConnectionCostDict connectionCostDict;
    private final boolean compress;
    private LexiconDict userDict;
    private int maxUnkLength;

    public void setUserDict(LexiconDict dict) {
        this.userDict = dict;
    }

    public void setMaxUnkLength(int length) {
        this.maxUnkLength = length;
    }

    public Iterable<Paragraph> parseText(String input, boolean dePreAnalysis) {
        String text = input.intern();
        IntRef offset = IntRef.create((int)0);
        String lineSeparator = System.lineSeparator();
        Iterable bestPath = (Iterable)((Stream)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])text.split(lineSeparator))).toStream().map((Function1 & Serializable & scala.Serializable)str -> {
            Seq<LNode> path = this.buildLattice((String)str).getBestPath(offset$1.elem);
            offset$1.elem += str.length() + lineSeparator.length();
            return path;
        }, Stream$.MODULE$.canBuildFrom())).map((Function1 & Serializable & scala.Serializable)x -> new Paragraph(this.removeHeadLast((Seq<LNode>)x)), Stream$.MODULE$.canBuildFrom());
        return dePreAnalysis ? (Iterable)bestPath.map((Function1 & Serializable & scala.Serializable)x -> new Paragraph((Seq<LNode>)((Seq)x.nodes().flatMap((Function1 & Serializable & scala.Serializable)node -> LNode$.MODULE$.dePreAnalysis((LNode)node), Seq$.MODULE$.canBuildFrom()))), Iterable$.MODULE$.canBuildFrom()) : bestPath;
    }

    public java.lang.Iterable<Paragraph> parseTextJava(String input, boolean dePreAnalysis) {
        return (java.lang.Iterable)JavaConverters$.MODULE$.asJavaIterableConverter(this.parseText(input, dePreAnalysis)).asJava();
    }

    private Seq<LNode> removeHeadLast(Seq<LNode> nodes) {
        return (Seq)nodes.slice(1, nodes.length() - 1);
    }

    private Lattice buildLattice(String text) {
        return Lattice$.MODULE$.apply(text, this.connectionCostDict).addAll(this.getKnownTerms(text)).addAll(this.getUnknownTerms(text)).build();
    }

    /*
     * WARNING - void declaration
     */
    private Seq<LNode> getUnknownTerms(String text) {
        void var3_3;
        Seq<CharSet> charsets = CharSetDef$.MODULE$.splitCharSet(text);
        ArrayBuffer unknownTerms = new ArrayBuffer();
        IntRef charsetOffset = IntRef.create((int)0);
        charsets.foreach((Function1 & Serializable & scala.Serializable)charset -> {
            Tokenizer.$anonfun$getUnknownTerms$1(this, unknownTerms, charsetOffset, charset);
            return BoxedUnit.UNIT;
        });
        return var3_3;
    }

    /*
     * WARNING - void declaration
     */
    private Seq<LNode> getKnownTerms(String text) {
        void var2_2;
        ArrayBuffer knownTerms = new ArrayBuffer();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), text.length()).foreach((Function1 & Serializable & scala.Serializable)idx -> knownTerms.$plus$plus$eq(this.getKnownTerms(0, BoxesRunTime.unboxToInt((Object)idx), text.substring(BoxesRunTime.unboxToInt((Object)idx)))));
        return var2_2;
    }

    /*
     * WARNING - void declaration
     */
    private Seq<LNode> getUnknownTerms(int charsetOffset, CharSet charset) {
        void var3_3;
        ArrayBuffer unknownTerms = new ArrayBuffer();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), charset.str().length()).foreach((Function1 & Serializable & scala.Serializable)idx -> Tokenizer.$anonfun$getUnknownTerms$2(this, charsetOffset, charset, unknownTerms, BoxesRunTime.unboxToInt((Object)idx)));
        Object object = charset.category().length() == 0 ? unknownTerms.$plus$eq((Object)this.getGroupTermNode(charsetOffset, charset)) : (charset.category().group() && charset.str().length() <= this.maxUnkLength ? unknownTerms.$plus$eq((Object)this.getGroupTermNode(charsetOffset, charset)) : BoxedUnit.UNIT);
        return var3_3;
    }

    private Seq<LNode> getKnownTerms(int charsetOffset, int termOffset, String suffixSurface) {
        Seq searchedTerms;
        block0: {
            searchedTerms = this.lexiconDict.commonPrefixSearch(suffixSurface);
            if (this.userDict == null) break block0;
            searchedTerms = (Seq)searchedTerms.$plus$plus(this.userDict.commonPrefixSearch(suffixSurface), Seq$.MODULE$.canBuildFrom());
        }
        return (Seq)searchedTerms.map((Function1 & Serializable & scala.Serializable)term -> new LNode((Morpheme)term, charsetOffset + termOffset, charsetOffset + termOffset + term.getSurface().length(), LNode$.MODULE$.apply$default$4()), Seq$.MODULE$.canBuildFrom());
    }

    private Seq<LNode> get1NLengthTerms(int charsetOffset, int termOffset, String suffixSurface, CharSet charset) {
        if (charset.category().group() && charset.category().length() == 0) {
            return (Seq)Seq$.MODULE$.empty();
        }
        int categoryLength = package$.MODULE$.min(package$.MODULE$.min(suffixSurface.length(), charset.category().length()), this.maxUnkLength);
        if (categoryLength == 0) {
            categoryLength = 1;
        }
        LNode[] nodes = new LNode[categoryLength];
        for (int unknownIdx = 1; unknownIdx <= categoryLength; ++unknownIdx) {
            UnkMorpheme tmp = UnkMorpheme$.MODULE$.apply(charset.str().substring(termOffset, termOffset + unknownIdx), charset.morpheme());
            Morpheme unknownTerm = this.compress ? new CompressedMorpheme(tmp) : tmp;
            nodes[unknownIdx - 1] = new LNode(unknownTerm, charsetOffset + termOffset, charsetOffset + termOffset + unknownTerm.getSurface().length(), LNode$.MODULE$.apply$default$4());
        }
        return Predef$.MODULE$.wrapRefArray((Object[])nodes);
    }

    private LNode getGroupTermNode(int charsetOffset, CharSet charset) {
        UnkMorpheme fullLengthTerm = UnkMorpheme$.MODULE$.apply(charset.str(), charset.morpheme());
        return new LNode(fullLengthTerm, charsetOffset, charsetOffset + fullLengthTerm.getSurface().length(), LNode$.MODULE$.apply$default$4());
    }

    public static final /* synthetic */ void $anonfun$getUnknownTerms$1(Tokenizer $this, ArrayBuffer unknownTerms$1, IntRef charsetOffset$3, CharSet charset) {
        unknownTerms$1.$plus$plus$eq($this.getUnknownTerms(charsetOffset$3.elem, charset));
        charsetOffset$3.elem += charset.str().length();
    }

    public static final /* synthetic */ ArrayBuffer $anonfun$getUnknownTerms$2(Tokenizer $this, int charsetOffset$2, CharSet charset$1, ArrayBuffer unknownTerms$2, int idx) {
        int termOffset = idx;
        String suffixSurface = charset$1.str().substring(idx);
        return unknownTerms$2.$plus$plus$eq($this.get1NLengthTerms(charsetOffset$2, termOffset, suffixSurface, charset$1));
    }

    public Tokenizer(LexiconDict lexiconDict, ConnectionCostDict connectionCostDict, boolean compress) {
        this.lexiconDict = lexiconDict;
        this.connectionCostDict = connectionCostDict;
        this.compress = compress;
        this.userDict = null;
        this.maxUnkLength = 8;
    }
}

