/*
 * Decompiled with CFR 0.152.
 */
package ai.tock.nlp.stanford;

import ai.tock.nlp.model.TokenizerContext;
import ai.tock.nlp.model.service.engine.NlpTokenizer;
import ai.tock.nlp.model.service.engine.TokenizerModelHolder;
import ai.tock.nlp.stanford.StanfordTokenizer;
import ai.tock.shared.LoggersKt;
import edu.stanford.nlp.international.french.process.FrenchTokenizer;
import edu.stanford.nlp.international.spanish.process.SpanishTokenizer;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import java.io.Reader;
import java.io.StringReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import kotlin.Metadata;
import kotlin.TypeCastException;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function0;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import mu.KLogger;
import mu.KotlinLogging;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={1, 1, 16}, bv={1, 0, 3}, k=1, d1={"\u0000>\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010 \n\u0002\b\u0002\n\u0002\u0010\u0011\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\b\u0000\u0018\u0000 \u00152\u00020\u0001:\u0001\u0015B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J\u0010\u0010\b\u001a\u00020\t2\u0006\u0010\n\u001a\u00020\u000bH\u0002J\u001e\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u000b0\r2\u0006\u0010\u000e\u001a\u00020\u000b2\u0006\u0010\n\u001a\u00020\u000bH\u0002J#\u0010\u000f\u001a\b\u0012\u0004\u0012\u00020\u000b0\u00102\u0006\u0010\u0011\u001a\u00020\u00122\u0006\u0010\u0013\u001a\u00020\u000bH\u0016\u00a2\u0006\u0002\u0010\u0014R\u0014\u0010\u0005\u001a\b\u0012\u0004\u0012\u00020\u00070\u0006X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0016"}, d2={"Lai/tock/nlp/stanford/StanfordTokenizer;", "Lai/tock/nlp/model/service/engine/NlpTokenizer;", "model", "Lai/tock/nlp/model/service/engine/TokenizerModelHolder;", "(Lai/tock/nlp/model/service/engine/TokenizerModelHolder;)V", "tokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "separatorRegex", "Lkotlin/text/Regex;", "separators", "", "splitSeparators", "", "word", "tokenize", "", "context", "Lai/tock/nlp/model/TokenizerContext;", "text", "(Lai/tock/nlp/model/TokenizerContext;Ljava/lang/String;)[Ljava/lang/String;", "Companion", "tock-nlp-model-stanford"})
public final class StanfordTokenizer
extends NlpTokenizer {
    private final TokenizerFactory<CoreLabel> tokenizerFactory;
    private static final KLogger logger;
    private static final ConcurrentHashMap<String, Regex> separatorRegexpMap;
    public static final Companion Companion;

    /*
     * WARNING - void declaration
     */
    @NotNull
    public String[] tokenize(@NotNull TokenizerContext context, @NotNull String text) {
        Iterable iterable;
        void $this$flatMapTo$iv$iv2;
        Intrinsics.checkParameterIsNotNull((Object)context, (String)"context");
        Intrinsics.checkParameterIsNotNull((Object)text, (String)"text");
        List list = this.tokenizerFactory.getTokenizer((Reader)new StringReader(text)).tokenize();
        Intrinsics.checkExpressionValueIsNotNull((Object)list, (String)"tokenizerFactory.getToke\u2026gReader(text)).tokenize()");
        Iterable $this$flatMap$iv = list;
        boolean $i$f$flatMap = false;
        Iterable iterable2 = $this$flatMap$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv2) {
            String word;
            CoreLabel coreLabel = (CoreLabel)element$iv$iv;
            boolean bl = false;
            String string = word = coreLabel.word();
            Intrinsics.checkExpressionValueIsNotNull((Object)string, (String)"word");
            String string2 = this.getModel().getConfiguration().getTokenizerConfiguration().getProperties().getProperty("tock_stanford_tokens_separators");
            Intrinsics.checkExpressionValueIsNotNull((Object)string2, (String)"model.configuration.toke\u2026nford_tokens_separators\")");
            Iterable list$iv$iv = this.splitSeparators(string, string2);
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        $this$flatMap$iv = (List)destination$iv$iv;
        $i$f$flatMap = false;
        boolean $this$flatMapTo$iv$iv2 = false;
        Iterable it = $this$flatMap$iv;
        boolean bl = false;
        if (it.isEmpty()) {
            Object object = text;
            boolean bl2 = false;
            object = ((Object)StringsKt.trim((CharSequence)((CharSequence)object))).toString();
            bl2 = false;
            if (object.length() == 0) {
                iterable = CollectionsKt.emptyList();
            } else {
                logger.warn((Function0)new Function0<String>(text){
                    final /* synthetic */ String $text$inlined;
                    {
                        this.$text$inlined = string;
                        super(0);
                    }

                    @NotNull
                    public final String invoke() {
                        return "empty token list for " + this.$text$inlined + ", do not split";
                    }
                });
                object = text;
                bl2 = false;
                iterable = CollectionsKt.listOf((Object)((Object)StringsKt.trim((CharSequence)((CharSequence)object))).toString());
            }
        } else {
            iterable = it;
        }
        Iterable rawTokens = iterable;
        logger.debug((Function0)new Function0<List<? extends String>>((List)rawTokens){
            final /* synthetic */ List $rawTokens;

            @NotNull
            public final List<String> invoke() {
                return this.$rawTokens;
            }
            {
                this.$rawTokens = list;
                super(0);
            }
        });
        Collection $this$toTypedArray$iv = (Collection)rawTokens;
        boolean $i$f$toTypedArray = false;
        Collection thisCollection$iv = $this$toTypedArray$iv;
        String[] stringArray = thisCollection$iv.toArray(new String[0]);
        if (stringArray == null) {
            throw new TypeCastException("null cannot be cast to non-null type kotlin.Array<T>");
        }
        return stringArray;
    }

    /*
     * WARNING - void declaration
     */
    private final Regex separatorRegex(String separators) {
        ConcurrentMap $this$getOrPut$iv = separatorRegexpMap;
        boolean $i$f$getOrPut = false;
        Object object = $this$getOrPut$iv.get(separators);
        if (object == null) {
            void $this$mapTo$iv$iv;
            boolean bl = false;
            logger.info((Function0)new Function0<String>(separators){
                final /* synthetic */ String $separators$inlined;
                {
                    this.$separators$inlined = string;
                    super(0);
                }

                @NotNull
                public final String invoke() {
                    return "using token separators: " + this.$separators$inlined;
                }
            });
            Iterable $this$map$iv = StringsKt.split$default((CharSequence)StringsKt.replace$default((String)separators, (String)"\\,", (String)"_comma_", (boolean)false, (int)4, null), (String[])new String[]{","}, (boolean)false, (int)0, (int)6, null);
            boolean $i$f$map = false;
            Iterable iterable = $this$map$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                String string = (String)item$iv$iv;
                Collection collection = destination$iv$iv;
                boolean bl2 = false;
                String string2 = StringsKt.replace$default((String)it, (String)"_comma_", (String)",", (boolean)false, (int)4, null);
                collection.add(string2);
            }
            List s = (List)destination$iv$iv;
            String string = CollectionsKt.joinToString$default((Iterable)s, (CharSequence)"|", null, null, (int)0, null, null, (int)62, null);
            boolean bl3 = false;
            Regex regex = new Regex(string);
            boolean bl4 = false;
            boolean bl5 = false;
            Regex default$iv = regex;
            boolean bl6 = false;
            object = $this$getOrPut$iv.putIfAbsent(separators, default$iv);
            if (object == null) {
                object = default$iv;
            }
        }
        Intrinsics.checkExpressionValueIsNotNull(object, (String)"separatorRegexpMap.getOr\u2026(\"|\").toRegex()\n        }");
        return (Regex)object;
    }

    private final List<String> splitSeparators(String word, String separators) {
        Object object;
        try {
            object = this.separatorRegex(separators).replace((CharSequence)word, (Function1)splitSeparators.1.INSTANCE);
            boolean bl = false;
            Object object2 = object;
            if (object2 == null) {
                throw new TypeCastException("null cannot be cast to non-null type kotlin.CharSequence");
            }
            object = CollectionsKt.toList((Iterable)StringsKt.split$default((CharSequence)((Object)StringsKt.trim((CharSequence)((CharSequence)object2))).toString(), (String[])new String[]{" "}, (boolean)false, (int)0, (int)6, null));
        }
        catch (Exception e) {
            LoggersKt.error((KLogger)logger, (Throwable)e);
            object = CollectionsKt.listOf((Object)word);
        }
        return object;
    }

    public StanfordTokenizer(@NotNull TokenizerModelHolder model) {
        Intrinsics.checkParameterIsNotNull((Object)model, (String)"model");
        super(model);
        this.tokenizerFactory = StanfordTokenizer.Companion.getTokenizerFactory(model.getLanguage());
    }

    static {
        Companion = new Companion(null);
        logger = KotlinLogging.INSTANCE.logger((Function0)Companion.logger.1.INSTANCE);
        separatorRegexpMap = new ConcurrentHashMap();
    }

    @Metadata(mv={1, 1, 16}, bv={1, 0, 3}, k=1, d1={"\u00000\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J\u0016\u0010\t\u001a\b\u0012\u0004\u0012\u00020\u000b0\n2\u0006\u0010\f\u001a\u00020\rH\u0002R\u000e\u0010\u0003\u001a\u00020\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001a\u0010\u0005\u001a\u000e\u0012\u0004\u0012\u00020\u0007\u0012\u0004\u0012\u00020\b0\u0006X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u000e"}, d2={"Lai/tock/nlp/stanford/StanfordTokenizer$Companion;", "", "()V", "logger", "Lmu/KLogger;", "separatorRegexpMap", "Ljava/util/concurrent/ConcurrentHashMap;", "", "Lkotlin/text/Regex;", "getTokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "language", "Ljava/util/Locale;", "tock-nlp-model-stanford"})
    public static final class Companion {
        /*
         * Enabled aggressive block sorting
         */
        private final TokenizerFactory<CoreLabel> getTokenizerFactory(Locale language) {
            TokenizerFactory tokenizerFactory;
            logger.trace((Function0)new Function0<String>(language){
                final /* synthetic */ Locale $language;

                @NotNull
                public final String invoke() {
                    return "getting tokenizer for : " + this.$language;
                }
                {
                    this.$language = locale;
                    super(0);
                }
            });
            String string = language.getLanguage();
            if (string != null) {
                switch (string) {
                    case "fr": {
                        TokenizerFactory tokenizerFactory2 = FrenchTokenizer.FrenchTokenizerFactory.newTokenizerFactory();
                        boolean bl = false;
                        boolean bl2 = false;
                        TokenizerFactory it = tokenizerFactory2;
                        boolean bl3 = false;
                        it.setOptions("untokenizable=noneDelete");
                        Field field = FrenchTokenizer.FrenchTokenizerFactory.class.getDeclaredField("splitContractionOption");
                        boolean bl4 = false;
                        boolean bl5 = false;
                        Field $this$apply = field;
                        boolean bl6 = false;
                        $this$apply.setAccessible(true);
                        $this$apply.set(it, false);
                        TokenizerFactory tokenizerFactory3 = tokenizerFactory2;
                        tokenizerFactory = tokenizerFactory3;
                        Intrinsics.checkExpressionValueIsNotNull((Object)tokenizerFactory3, (String)"FrenchTokenizer.FrenchTo\u2026                        }");
                        return tokenizerFactory;
                    }
                    case "en": {
                        PTBTokenizer.PTBTokenizerFactory pTBTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory((String)"");
                        Intrinsics.checkExpressionValueIsNotNull((Object)pTBTokenizerFactory, (String)"PTBTokenizer.PTBTokenize\u2026LabelTokenizerFactory(\"\")");
                        tokenizerFactory = (TokenizerFactory)pTBTokenizerFactory;
                        return tokenizerFactory;
                    }
                    case "es": {
                        TokenizerFactory tokenizerFactory4 = SpanishTokenizer.SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
                        tokenizerFactory = tokenizerFactory4;
                        Intrinsics.checkExpressionValueIsNotNull((Object)tokenizerFactory4, (String)"SpanishTokenizer.Spanish\u2026reLabelTokenizerFactory()");
                        return tokenizerFactory;
                    }
                }
            }
            PTBTokenizer.PTBTokenizerFactory pTBTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory((String)"");
            Intrinsics.checkExpressionValueIsNotNull((Object)pTBTokenizerFactory, (String)"PTBTokenizer.PTBTokenize\u2026LabelTokenizerFactory(\"\")");
            tokenizerFactory = (TokenizerFactory)pTBTokenizerFactory;
            return tokenizerFactory;
        }

        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

