/*
 * Decompiled with CFR 0.152.
 */
package ai.tock.nlp.stanford;

import ai.tock.nlp.model.TokenizerContext;
import ai.tock.nlp.model.service.engine.NlpTokenizer;
import ai.tock.nlp.model.service.engine.TokenizerModelHolder;
import ai.tock.nlp.stanford.StanfordTokenizer;
import ai.tock.shared.LoggersKt;
import edu.stanford.nlp.international.french.process.FrenchTokenizer;
import edu.stanford.nlp.international.spanish.process.SpanishTokenizer;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import java.io.Reader;
import java.io.StringReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function0;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import mu.KLogger;
import mu.KotlinLogging;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000>\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010 \n\u0002\b\u0002\n\u0002\u0010\u0011\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\b\u0000\u0018\u0000 \u00152\u00020\u0001:\u0001\u0015B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J\u0010\u0010\b\u001a\u00020\t2\u0006\u0010\n\u001a\u00020\u000bH\u0002J\u001e\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u000b0\r2\u0006\u0010\u000e\u001a\u00020\u000b2\u0006\u0010\n\u001a\u00020\u000bH\u0002J#\u0010\u000f\u001a\b\u0012\u0004\u0012\u00020\u000b0\u00102\u0006\u0010\u0011\u001a\u00020\u00122\u0006\u0010\u0013\u001a\u00020\u000bH\u0016\u00a2\u0006\u0002\u0010\u0014R\u0014\u0010\u0005\u001a\b\u0012\u0004\u0012\u00020\u00070\u0006X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0016"}, d2={"Lai/tock/nlp/stanford/StanfordTokenizer;", "Lai/tock/nlp/model/service/engine/NlpTokenizer;", "model", "Lai/tock/nlp/model/service/engine/TokenizerModelHolder;", "(Lai/tock/nlp/model/service/engine/TokenizerModelHolder;)V", "tokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "separatorRegex", "Lkotlin/text/Regex;", "separators", "", "splitSeparators", "", "word", "tokenize", "", "context", "Lai/tock/nlp/model/TokenizerContext;", "text", "(Lai/tock/nlp/model/TokenizerContext;Ljava/lang/String;)[Ljava/lang/String;", "Companion", "tock-nlp-model-stanford"})
@SourceDebugExtension(value={"SMAP\nStanfordTokenizer.kt\nKotlin\n*S Kotlin\n*F\n+ 1 StanfordTokenizer.kt\nai/tock/nlp/stanford/StanfordTokenizer\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 ArraysJVM.kt\nkotlin/collections/ArraysKt__ArraysJVMKt\n+ 4 MapsJVM.kt\nkotlin/collections/MapsKt__MapsJVMKt\n+ 5 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,124:1\n1368#2:125\n1454#2,5:126\n1557#2:135\n1628#2,3:136\n37#3,2:131\n72#4,2:133\n1#5:139\n*S KotlinDebug\n*F\n+ 1 StanfordTokenizer.kt\nai/tock/nlp/stanford/StanfordTokenizer\n*L\n74#1:125\n74#1:126,5\n104#1:135\n104#1:136,3\n95#1:131,2\n99#1:133,2\n99#1:139\n*E\n"})
public final class StanfordTokenizer
extends NlpTokenizer {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final TokenizerFactory<CoreLabel> tokenizerFactory;
    @NotNull
    private static final KLogger logger = KotlinLogging.INSTANCE.logger((Function0)Companion.logger.1.INSTANCE);
    @NotNull
    private static final ConcurrentHashMap<String, Regex> separatorRegexpMap = new ConcurrentHashMap();

    public StanfordTokenizer(@NotNull TokenizerModelHolder model) {
        Intrinsics.checkNotNullParameter((Object)model, (String)"model");
        super(model);
        this.tokenizerFactory = StanfordTokenizer.Companion.getTokenizerFactory(model.getLanguage());
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public String[] tokenize(@NotNull TokenizerContext context, @NotNull String text) {
        List list;
        void $this$flatMapTo$iv$iv;
        Intrinsics.checkNotNullParameter((Object)context, (String)"context");
        Intrinsics.checkNotNullParameter((Object)text, (String)"text");
        List list2 = this.tokenizerFactory.getTokenizer((Reader)new StringReader(text)).tokenize();
        Intrinsics.checkNotNullExpressionValue((Object)list2, (String)"tokenize(...)");
        Iterable $this$flatMap$iv = list2;
        boolean $i$f$flatMap = false;
        Iterable iterable = $this$flatMap$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv) {
            CoreLabel coreLabel = (CoreLabel)element$iv$iv;
            boolean bl = false;
            String word = coreLabel.originalText();
            Intrinsics.checkNotNull((Object)word);
            String string = this.getModel().getConfiguration().getTokenizerConfiguration().getProperties().getProperty("tock_stanford_tokens_separators");
            Intrinsics.checkNotNullExpressionValue((Object)string, (String)"getProperty(...)");
            Iterable list$iv$iv = this.splitSeparators(word, string);
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        List it = (List)destination$iv$iv;
        boolean bl = false;
        if (it.isEmpty()) {
            if (((CharSequence)((Object)StringsKt.trim((CharSequence)text)).toString()).length() == 0) {
                list = CollectionsKt.emptyList();
            } else {
                logger.warn((Function0)new Function0<Object>(text){
                    final /* synthetic */ String $text;
                    {
                        this.$text = $text;
                        super(0);
                    }

                    public final Object invoke() {
                        return "empty token list for " + this.$text + ", do not split";
                    }
                });
                list = CollectionsKt.listOf((Object)((Object)StringsKt.trim((CharSequence)text)).toString());
            }
        } else {
            list = it;
        }
        List rawTokens2 = list;
        logger.debug((Function0)new Function0<Object>((List<String>)rawTokens2){
            final /* synthetic */ List<String> $rawTokens;
            {
                this.$rawTokens = $rawTokens;
                super(0);
            }

            public final Object invoke() {
                return this.$rawTokens;
            }
        });
        Collection $this$toTypedArray$iv = rawTokens2;
        boolean $i$f$toTypedArray = false;
        Collection thisCollection$iv = $this$toTypedArray$iv;
        return thisCollection$iv.toArray(new String[0]);
    }

    /*
     * WARNING - void declaration
     */
    private final Regex separatorRegex(String separators) {
        ConcurrentMap $this$getOrPut$iv = separatorRegexpMap;
        boolean $i$f$getOrPut = false;
        Object object = $this$getOrPut$iv.get(separators);
        if (object == null) {
            void $this$mapTo$iv$iv;
            void $this$map$iv;
            boolean bl = false;
            logger.info((Function0)new Function0<Object>(separators){
                final /* synthetic */ String $separators;
                {
                    this.$separators = $separators;
                    super(0);
                }

                public final Object invoke() {
                    return "using token separators: " + this.$separators;
                }
            });
            Object object2 = new String[]{","};
            object2 = StringsKt.split$default((CharSequence)StringsKt.replace$default((String)separators, (String)"\\,", (String)"_comma_", (boolean)false, (int)4, null), (String[])object2, (boolean)false, (int)0, (int)6, null);
            boolean $i$f$map = false;
            void var7_7 = $this$map$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                String string = (String)item$iv$iv;
                Collection collection = destination$iv$iv;
                boolean bl2 = false;
                collection.add(StringsKt.replace$default((String)it, (String)"_comma_", (String)",", (boolean)false, (int)4, null));
            }
            List s = (List)destination$iv$iv;
            Regex default$iv = new Regex(CollectionsKt.joinToString$default((Iterable)s, (CharSequence)"|", null, null, (int)0, null, null, (int)62, null));
            boolean bl3 = false;
            object = $this$getOrPut$iv.putIfAbsent(separators, default$iv);
            if (object == null) {
                object = default$iv;
            }
        }
        Intrinsics.checkNotNullExpressionValue(object, (String)"getOrPut(...)");
        return (Regex)object;
    }

    private final List<String> splitSeparators(String word, String separators) {
        Object object;
        try {
            object = new String[]{" "};
            object = CollectionsKt.toList((Iterable)StringsKt.split$default((CharSequence)((Object)StringsKt.trim((CharSequence)this.separatorRegex(separators).replace((CharSequence)word, (Function1)splitSeparators.1.INSTANCE))).toString(), (String[])object, (boolean)false, (int)0, (int)6, null));
        }
        catch (Exception e) {
            LoggersKt.error((KLogger)logger, (Throwable)e);
            object = CollectionsKt.listOf((Object)word);
        }
        return object;
    }

    @Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u00000\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J\u0016\u0010\t\u001a\b\u0012\u0004\u0012\u00020\u000b0\n2\u0006\u0010\f\u001a\u00020\rH\u0002R\u000e\u0010\u0003\u001a\u00020\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001a\u0010\u0005\u001a\u000e\u0012\u0004\u0012\u00020\u0007\u0012\u0004\u0012\u00020\b0\u0006X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u000e"}, d2={"Lai/tock/nlp/stanford/StanfordTokenizer$Companion;", "", "()V", "logger", "Lmu/KLogger;", "separatorRegexpMap", "Ljava/util/concurrent/ConcurrentHashMap;", "", "Lkotlin/text/Regex;", "getTokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "language", "Ljava/util/Locale;", "tock-nlp-model-stanford"})
    public static final class Companion {
        private Companion() {
        }

        /*
         * Enabled aggressive block sorting
         */
        private final TokenizerFactory<CoreLabel> getTokenizerFactory(Locale language) {
            TokenizerFactory tokenizerFactory;
            logger.trace((Function0)new Function0<Object>(language){
                final /* synthetic */ Locale $language;
                {
                    this.$language = $language;
                    super(0);
                }

                public final Object invoke() {
                    return "getting tokenizer for : " + this.$language;
                }
            });
            String string = language.getLanguage();
            if (string != null) {
                int n = -1;
                switch (string.hashCode()) {
                    case 3241: {
                        if (!string.equals("en")) break;
                        n = 1;
                        break;
                    }
                    case 3276: {
                        if (!string.equals("fr")) break;
                        n = 2;
                        break;
                    }
                    case 3246: {
                        if (!string.equals("es")) break;
                        n = 3;
                        break;
                    }
                }
                switch (n) {
                    case 2: {
                        Field field;
                        TokenizerFactory tokenizerFactory2;
                        TokenizerFactory it = tokenizerFactory2 = FrenchTokenizer.FrenchTokenizerFactory.newTokenizerFactory();
                        boolean bl = false;
                        it.setOptions("untokenizable=noneDelete");
                        Field $this$getTokenizerFactory_u24lambda_u241_u24lambda_u240 = field = FrenchTokenizer.FrenchTokenizerFactory.class.getDeclaredField("splitContractionOption");
                        boolean bl2 = false;
                        $this$getTokenizerFactory_u24lambda_u241_u24lambda_u240.setAccessible(true);
                        $this$getTokenizerFactory_u24lambda_u241_u24lambda_u240.set(it, false);
                        TokenizerFactory tokenizerFactory3 = tokenizerFactory2;
                        Intrinsics.checkNotNull((Object)tokenizerFactory3);
                        tokenizerFactory = tokenizerFactory3;
                        return tokenizerFactory;
                    }
                    case 1: {
                        PTBTokenizer.PTBTokenizerFactory pTBTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory((String)"");
                        Intrinsics.checkNotNull((Object)pTBTokenizerFactory);
                        tokenizerFactory = (TokenizerFactory)pTBTokenizerFactory;
                        return tokenizerFactory;
                    }
                    case 3: {
                        TokenizerFactory tokenizerFactory4 = SpanishTokenizer.SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
                        Intrinsics.checkNotNull((Object)tokenizerFactory4);
                        tokenizerFactory = tokenizerFactory4;
                        return tokenizerFactory;
                    }
                }
            }
            PTBTokenizer.PTBTokenizerFactory pTBTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory((String)"");
            Intrinsics.checkNotNull((Object)pTBTokenizerFactory);
            tokenizerFactory = (TokenizerFactory)pTBTokenizerFactory;
            return tokenizerFactory;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

