/*
 * Decompiled with CFR 0.152.
 */
package ai.tock.nlp.stanford;

import ai.tock.nlp.model.TokenizerContext;
import ai.tock.nlp.model.service.engine.NlpTokenizer;
import ai.tock.nlp.model.service.engine.TokenizerModelHolder;
import ai.tock.nlp.stanford.StanfordTokenizer;
import ai.tock.shared.LoggersKt;
import edu.stanford.nlp.international.french.process.FrenchTokenizer;
import edu.stanford.nlp.international.spanish.process.SpanishTokenizer;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import java.io.Reader;
import java.io.StringReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function0;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import mu.KLogger;
import mu.KotlinLogging;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000>\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010 \n\u0002\b\u0002\n\u0002\u0010\u0011\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\b\u0000\u0018\u0000 \u00152\u00020\u0001:\u0001\u0015B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J\u0010\u0010\b\u001a\u00020\t2\u0006\u0010\n\u001a\u00020\u000bH\u0002J\u001e\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u000b0\r2\u0006\u0010\u000e\u001a\u00020\u000b2\u0006\u0010\n\u001a\u00020\u000bH\u0002J#\u0010\u000f\u001a\b\u0012\u0004\u0012\u00020\u000b0\u00102\u0006\u0010\u0011\u001a\u00020\u00122\u0006\u0010\u0013\u001a\u00020\u000bH\u0016\u00a2\u0006\u0002\u0010\u0014R\u0014\u0010\u0005\u001a\b\u0012\u0004\u0012\u00020\u00070\u0006X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0016"}, d2={"Lai/tock/nlp/stanford/StanfordTokenizer;", "Lai/tock/nlp/model/service/engine/NlpTokenizer;", "model", "Lai/tock/nlp/model/service/engine/TokenizerModelHolder;", "(Lai/tock/nlp/model/service/engine/TokenizerModelHolder;)V", "tokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "separatorRegex", "Lkotlin/text/Regex;", "separators", "", "splitSeparators", "", "word", "tokenize", "", "context", "Lai/tock/nlp/model/TokenizerContext;", "text", "(Lai/tock/nlp/model/TokenizerContext;Ljava/lang/String;)[Ljava/lang/String;", "Companion", "tock-nlp-model-stanford"})
public final class StanfordTokenizer
extends NlpTokenizer {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final TokenizerFactory<CoreLabel> tokenizerFactory;
    @NotNull
    private static final KLogger logger = KotlinLogging.INSTANCE.logger((Function0)Companion.logger.1.INSTANCE);
    @NotNull
    private static final ConcurrentHashMap<String, Regex> separatorRegexpMap = new ConcurrentHashMap();

    public StanfordTokenizer(@NotNull TokenizerModelHolder model) {
        Intrinsics.checkNotNullParameter((Object)model, (String)"model");
        super(model);
        this.tokenizerFactory = StanfordTokenizer.Companion.getTokenizerFactory(model.getLanguage());
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public String[] tokenize(@NotNull TokenizerContext context, @NotNull String text) {
        Iterable iterable;
        void $this$flatMapTo$iv$iv2;
        Intrinsics.checkNotNullParameter((Object)context, (String)"context");
        Intrinsics.checkNotNullParameter((Object)text, (String)"text");
        List list = this.tokenizerFactory.getTokenizer((Reader)new StringReader(text)).tokenize();
        Intrinsics.checkNotNullExpressionValue((Object)list, (String)"tokenizerFactory.getToke\u2026gReader(text)).tokenize()");
        Iterable $this$flatMap$iv = list;
        boolean $i$f$flatMap = false;
        Iterable iterable2 = $this$flatMap$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv2) {
            CoreLabel coreLabel = (CoreLabel)element$iv$iv;
            boolean bl = false;
            String word = coreLabel.originalText();
            Intrinsics.checkNotNullExpressionValue((Object)word, (String)"word");
            String string = this.getModel().getConfiguration().getTokenizerConfiguration().getProperties().getProperty("tock_stanford_tokens_separators");
            Intrinsics.checkNotNullExpressionValue((Object)string, (String)"model.configuration.toke\u2026nford_tokens_separators\")");
            Iterable list$iv$iv = this.splitSeparators(word, string);
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        $this$flatMap$iv = (List)destination$iv$iv;
        $i$f$flatMap = false;
        boolean $this$flatMapTo$iv$iv2 = false;
        Iterable it = $this$flatMap$iv;
        boolean bl = false;
        if (it.isEmpty()) {
            Object object = text;
            boolean bl2 = false;
            object = ((Object)StringsKt.trim((CharSequence)((CharSequence)object))).toString();
            bl2 = false;
            if (object.length() == 0) {
                iterable = CollectionsKt.emptyList();
            } else {
                logger.warn((Function0)new Function0<Object>(text){
                    final /* synthetic */ String $text;
                    {
                        this.$text = $text;
                        super(0);
                    }

                    @Nullable
                    public final Object invoke() {
                        return "empty token list for " + this.$text + ", do not split";
                    }
                });
                object = text;
                bl2 = false;
                iterable = CollectionsKt.listOf((Object)((Object)StringsKt.trim((CharSequence)((CharSequence)object))).toString());
            }
        } else {
            iterable = it;
        }
        Iterable rawTokens2 = iterable;
        logger.debug((Function0)new Function0<Object>((List<String>)rawTokens2){
            final /* synthetic */ List<String> $rawTokens;
            {
                this.$rawTokens = $rawTokens;
                super(0);
            }

            @Nullable
            public final Object invoke() {
                return this.$rawTokens;
            }
        });
        Collection $this$toTypedArray$iv = (Collection)rawTokens2;
        boolean $i$f$toTypedArray = false;
        Collection thisCollection$iv = $this$toTypedArray$iv;
        String[] stringArray = thisCollection$iv.toArray(new String[0]);
        if (stringArray == null) {
            throw new NullPointerException("null cannot be cast to non-null type kotlin.Array<T>");
        }
        return stringArray;
    }

    /*
     * WARNING - void declaration
     */
    private final Regex separatorRegex(String separators) {
        ConcurrentMap $this$getOrPut$iv = separatorRegexpMap;
        boolean $i$f$getOrPut = false;
        Object object = $this$getOrPut$iv.get(separators);
        if (object == null) {
            void $this$mapTo$iv$iv;
            void $this$map$iv;
            boolean bl = false;
            logger.info((Function0)new Function0<Object>(separators){
                final /* synthetic */ String $separators;
                {
                    this.$separators = $separators;
                    super(0);
                }

                @Nullable
                public final Object invoke() {
                    return Intrinsics.stringPlus((String)"using token separators: ", (Object)this.$separators);
                }
            });
            Object object2 = new String[]{","};
            object2 = StringsKt.split$default((CharSequence)StringsKt.replace$default((String)separators, (String)"\\,", (String)"_comma_", (boolean)false, (int)4, null), (String[])object2, (boolean)false, (int)0, (int)6, null);
            boolean $i$f$map = false;
            void var8_7 = $this$map$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                String string = (String)item$iv$iv;
                Collection collection = destination$iv$iv;
                boolean bl2 = false;
                String string2 = StringsKt.replace$default((String)it, (String)"_comma_", (String)",", (boolean)false, (int)4, null);
                collection.add(string2);
            }
            List s = (List)destination$iv$iv;
            object2 = CollectionsKt.joinToString$default((Iterable)s, (CharSequence)"|", null, null, (int)0, null, null, (int)62, null);
            boolean bl3 = false;
            Regex regex = new Regex((String)object2);
            boolean bl4 = false;
            boolean bl5 = false;
            Regex default$iv = regex;
            boolean bl6 = false;
            object = $this$getOrPut$iv.putIfAbsent(separators, default$iv);
            if (object == null) {
                object = default$iv;
            }
        }
        Object v = object;
        Intrinsics.checkNotNullExpressionValue(v, (String)"separatorRegexpMap.getOr\u2026(\"|\").toRegex()\n        }");
        return (Regex)v;
    }

    private final List<String> splitSeparators(String word, String separators) {
        Object object;
        try {
            object = this.separatorRegex(separators).replace((CharSequence)word, (Function1)splitSeparators.1.INSTANCE);
            boolean bl = false;
            if (object == null) {
                throw new NullPointerException("null cannot be cast to non-null type kotlin.CharSequence");
            }
            CharSequence charSequence = ((Object)StringsKt.trim((CharSequence)((CharSequence)object))).toString();
            object = new String[]{" "};
            object = CollectionsKt.toList((Iterable)StringsKt.split$default((CharSequence)charSequence, (String[])object, (boolean)false, (int)0, (int)6, null));
        }
        catch (Exception e) {
            LoggersKt.error((KLogger)logger, (Throwable)e);
            object = CollectionsKt.listOf((Object)word);
        }
        return object;
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u00000\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J\u0016\u0010\t\u001a\b\u0012\u0004\u0012\u00020\u000b0\n2\u0006\u0010\f\u001a\u00020\rH\u0002R\u000e\u0010\u0003\u001a\u00020\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001a\u0010\u0005\u001a\u000e\u0012\u0004\u0012\u00020\u0007\u0012\u0004\u0012\u00020\b0\u0006X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u000e"}, d2={"Lai/tock/nlp/stanford/StanfordTokenizer$Companion;", "", "()V", "logger", "Lmu/KLogger;", "separatorRegexpMap", "Ljava/util/concurrent/ConcurrentHashMap;", "", "Lkotlin/text/Regex;", "getTokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "language", "Ljava/util/Locale;", "tock-nlp-model-stanford"})
    public static final class Companion {
        private Companion() {
        }

        /*
         * Enabled aggressive block sorting
         */
        private final TokenizerFactory<CoreLabel> getTokenizerFactory(Locale language) {
            TokenizerFactory tokenizerFactory;
            logger.trace((Function0)new Function0<Object>(language){
                final /* synthetic */ Locale $language;
                {
                    this.$language = $language;
                    super(0);
                }

                @Nullable
                public final Object invoke() {
                    return Intrinsics.stringPlus((String)"getting tokenizer for : ", (Object)this.$language);
                }
            });
            String string = language.getLanguage();
            if (string != null) {
                int n = -1;
                switch (string.hashCode()) {
                    case 3241: {
                        if (!string.equals("en")) break;
                        n = 1;
                        break;
                    }
                    case 3276: {
                        if (!string.equals("fr")) break;
                        n = 2;
                        break;
                    }
                    case 3246: {
                        if (!string.equals("es")) break;
                        n = 3;
                        break;
                    }
                }
                switch (n) {
                    case 2: {
                        TokenizerFactory tokenizerFactory2 = FrenchTokenizer.FrenchTokenizerFactory.newTokenizerFactory();
                        boolean bl = false;
                        boolean bl2 = false;
                        TokenizerFactory it = tokenizerFactory2;
                        boolean bl3 = false;
                        it.setOptions("untokenizable=noneDelete");
                        Field field = FrenchTokenizer.FrenchTokenizerFactory.class.getDeclaredField("splitContractionOption");
                        boolean bl4 = false;
                        boolean bl5 = false;
                        Field $this$getTokenizerFactory_u24lambda_u2d1_u24lambda_u2d0 = field;
                        boolean bl6 = false;
                        $this$getTokenizerFactory_u24lambda_u2d1_u24lambda_u2d0.setAccessible(true);
                        $this$getTokenizerFactory_u24lambda_u2d1_u24lambda_u2d0.set(it, false);
                        TokenizerFactory tokenizerFactory3 = tokenizerFactory2;
                        Intrinsics.checkNotNullExpressionValue((Object)tokenizerFactory3, (String)"{\n                    Fr\u2026      }\n                }");
                        tokenizerFactory = tokenizerFactory3;
                        return tokenizerFactory;
                    }
                    case 1: {
                        PTBTokenizer.PTBTokenizerFactory pTBTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory((String)"");
                        Intrinsics.checkNotNullExpressionValue((Object)pTBTokenizerFactory, (String)"{\n                    PT\u2026ory(\"\")\n                }");
                        tokenizerFactory = (TokenizerFactory)pTBTokenizerFactory;
                        return tokenizerFactory;
                    }
                    case 3: {
                        TokenizerFactory tokenizerFactory4 = SpanishTokenizer.SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
                        Intrinsics.checkNotNullExpressionValue((Object)tokenizerFactory4, (String)"{\n                    Sp\u2026ctory()\n                }");
                        tokenizerFactory = tokenizerFactory4;
                        return tokenizerFactory;
                    }
                }
            }
            PTBTokenizer.PTBTokenizerFactory pTBTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory((String)"");
            Intrinsics.checkNotNullExpressionValue((Object)pTBTokenizerFactory, (String)"{\n                    PT\u2026ory(\"\")\n                }");
            tokenizerFactory = (TokenizerFactory)pTBTokenizerFactory;
            return tokenizerFactory;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

