/*
 * Decompiled with CFR 0.152.
 */
package ai.tock.nlp.stanford;

import ai.tock.nlp.model.TokenizerContext;
import ai.tock.nlp.model.service.engine.NlpTokenizer;
import ai.tock.nlp.model.service.engine.TokenizerModelHolder;
import ai.tock.shared.LoggersKt;
import edu.stanford.nlp.international.french.process.FrenchTokenizer;
import edu.stanford.nlp.international.spanish.process.SpanishTokenizer;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import java.io.Reader;
import java.io.StringReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import kotlin.Metadata;
import kotlin.Unit;
import kotlin.collections.ArraysKt;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import mu.KLogger;
import mu.KotlinLogging;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={2, 0, 0}, k=1, xi=48, d1={"\u0000>\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u0011\n\u0002\u0010\u000e\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010 \n\u0002\b\u0003\b\u0000\u0018\u0000 \u00162\u00020\u0001:\u0001\u0016B\u000f\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0004\b\u0004\u0010\u0005J#\u0010\t\u001a\b\u0012\u0004\u0012\u00020\u000b0\n2\u0006\u0010\f\u001a\u00020\r2\u0006\u0010\u000e\u001a\u00020\u000bH\u0016\u00a2\u0006\u0002\u0010\u000fJ\u0010\u0010\u0010\u001a\u00020\u00112\u0006\u0010\u0012\u001a\u00020\u000bH\u0002J\u001e\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u000b0\u00142\u0006\u0010\u0015\u001a\u00020\u000b2\u0006\u0010\u0012\u001a\u00020\u000bH\u0002R\u0014\u0010\u0006\u001a\b\u0012\u0004\u0012\u00020\b0\u0007X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0017"}, d2={"Lai/tock/nlp/stanford/StanfordTokenizer;", "Lai/tock/nlp/model/service/engine/NlpTokenizer;", "model", "Lai/tock/nlp/model/service/engine/TokenizerModelHolder;", "<init>", "(Lai/tock/nlp/model/service/engine/TokenizerModelHolder;)V", "tokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "tokenize", "", "", "context", "Lai/tock/nlp/model/TokenizerContext;", "text", "(Lai/tock/nlp/model/TokenizerContext;Ljava/lang/String;)[Ljava/lang/String;", "separatorRegex", "Lkotlin/text/Regex;", "separators", "splitSeparators", "", "word", "Companion", "tock-nlp-model-stanford"})
@SourceDebugExtension(value={"SMAP\nStanfordTokenizer.kt\nKotlin\n*S Kotlin\n*F\n+ 1 StanfordTokenizer.kt\nai/tock/nlp/stanford/StanfordTokenizer\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 ArraysJVM.kt\nkotlin/collections/ArraysKt__ArraysJVMKt\n+ 4 MapsJVM.kt\nkotlin/collections/MapsKt__MapsJVMKt\n+ 5 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,124:1\n1368#2:125\n1454#2,5:126\n1557#2:135\n1628#2,3:136\n37#3,2:131\n72#4,2:133\n1#5:139\n1#5:140\n*S KotlinDebug\n*F\n+ 1 StanfordTokenizer.kt\nai/tock/nlp/stanford/StanfordTokenizer\n*L\n74#1:125\n74#1:126,5\n104#1:135\n104#1:136,3\n95#1:131,2\n99#1:133,2\n99#1:139\n*E\n"})
public final class StanfordTokenizer
extends NlpTokenizer {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final TokenizerFactory<CoreLabel> tokenizerFactory;
    @NotNull
    private static final KLogger logger = KotlinLogging.INSTANCE.logger(StanfordTokenizer::logger$lambda$9);
    @NotNull
    private static final ConcurrentHashMap<String, Regex> separatorRegexpMap = new ConcurrentHashMap();

    public StanfordTokenizer(@NotNull TokenizerModelHolder model) {
        Intrinsics.checkNotNullParameter((Object)model, (String)"model");
        super(model);
        this.tokenizerFactory = StanfordTokenizer.Companion.getTokenizerFactory(model.getLanguage());
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public String[] tokenize(@NotNull TokenizerContext context, @NotNull String text) {
        List list;
        void $this$flatMapTo$iv$iv;
        Intrinsics.checkNotNullParameter((Object)context, (String)"context");
        Intrinsics.checkNotNullParameter((Object)text, (String)"text");
        List list2 = this.tokenizerFactory.getTokenizer((Reader)new StringReader(text)).tokenize();
        Intrinsics.checkNotNullExpressionValue((Object)list2, (String)"tokenize(...)");
        Iterable $this$flatMap$iv = list2;
        boolean $i$f$flatMap = false;
        Iterable iterable = $this$flatMap$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv) {
            CoreLabel coreLabel = (CoreLabel)element$iv$iv;
            boolean bl = false;
            String word = coreLabel.originalText();
            Intrinsics.checkNotNull((Object)word);
            String string = this.getModel().getConfiguration().getTokenizerConfiguration().getProperties().getProperty("tock_stanford_tokens_separators");
            Intrinsics.checkNotNullExpressionValue((Object)string, (String)"getProperty(...)");
            Iterable list$iv$iv = this.splitSeparators(word, string);
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        List it = (List)destination$iv$iv;
        boolean bl = false;
        if (it.isEmpty()) {
            if (((CharSequence)((Object)StringsKt.trim((CharSequence)text)).toString()).length() == 0) {
                list = CollectionsKt.emptyList();
            } else {
                logger.warn(() -> StanfordTokenizer.tokenize$lambda$2$lambda$1(text));
                list = CollectionsKt.listOf((Object)((Object)StringsKt.trim((CharSequence)text)).toString());
            }
        } else {
            list = it;
        }
        List rawTokens = list;
        logger.debug(() -> StanfordTokenizer.tokenize$lambda$3(rawTokens));
        Collection $this$toTypedArray$iv = rawTokens;
        boolean $i$f$toTypedArray = false;
        Collection thisCollection$iv = $this$toTypedArray$iv;
        return thisCollection$iv.toArray(new String[0]);
    }

    /*
     * WARNING - void declaration
     */
    private final Regex separatorRegex(String separators) {
        ConcurrentMap $this$getOrPut$iv = separatorRegexpMap;
        boolean $i$f$getOrPut = false;
        Object object = $this$getOrPut$iv.get(separators);
        if (object == null) {
            void $this$mapTo$iv$iv;
            void $this$map$iv;
            boolean bl = false;
            logger.info(() -> StanfordTokenizer.separatorRegex$lambda$6$lambda$4(separators));
            Object object2 = new String[]{","};
            object2 = StringsKt.split$default((CharSequence)StringsKt.replace$default((String)separators, (String)"\\,", (String)"_comma_", (boolean)false, (int)4, null), (String[])object2, (boolean)false, (int)0, (int)6, null);
            boolean $i$f$map = false;
            void var7_7 = $this$map$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                String string = (String)item$iv$iv;
                Collection collection = destination$iv$iv;
                boolean bl2 = false;
                collection.add(StringsKt.replace$default((String)it, (String)"_comma_", (String)",", (boolean)false, (int)4, null));
            }
            List s = (List)destination$iv$iv;
            Regex default$iv = new Regex(CollectionsKt.joinToString$default((Iterable)s, (CharSequence)"|", null, null, (int)0, null, null, (int)62, null));
            boolean bl3 = false;
            object = $this$getOrPut$iv.putIfAbsent(separators, default$iv);
            if (object == null) {
                object = default$iv;
            }
        }
        Intrinsics.checkNotNullExpressionValue(object, (String)"getOrPut(...)");
        return (Regex)object;
    }

    private final List<String> splitSeparators(String word, String separators) {
        Object object;
        try {
            object = new String[]{" "};
            object = CollectionsKt.toList((Iterable)StringsKt.split$default((CharSequence)((Object)StringsKt.trim((CharSequence)this.separatorRegex(separators).replace((CharSequence)word, StanfordTokenizer::splitSeparators$lambda$8))).toString(), (String[])object, (boolean)false, (int)0, (int)6, null));
        }
        catch (Exception e) {
            LoggersKt.error((KLogger)logger, (Throwable)e);
            object = CollectionsKt.listOf((Object)word);
        }
        return object;
    }

    private static final Object tokenize$lambda$2$lambda$1(String $text) {
        Intrinsics.checkNotNullParameter((Object)$text, (String)"$text");
        return "empty token list for " + $text + ", do not split";
    }

    private static final Object tokenize$lambda$3(List $rawTokens) {
        Intrinsics.checkNotNullParameter((Object)$rawTokens, (String)"$rawTokens");
        return $rawTokens;
    }

    private static final Object separatorRegex$lambda$6$lambda$4(String $separators) {
        Intrinsics.checkNotNullParameter((Object)$separators, (String)"$separators");
        return "using token separators: " + $separators;
    }

    private static final CharSequence splitSeparators$lambda$8(MatchResult it) {
        Object object;
        Intrinsics.checkNotNullParameter((Object)it, (String)"it");
        String $this$splitSeparators_u24lambda_u248_u24lambda_u247 = it.getValue();
        boolean bl = false;
        if ($this$splitSeparators_u24lambda_u248_u24lambda_u247.length() == 1) {
            object = " " + $this$splitSeparators_u24lambda_u248_u24lambda_u247 + " ";
        } else {
            char[] cArray = $this$splitSeparators_u24lambda_u248_u24lambda_u247.toCharArray();
            Intrinsics.checkNotNullExpressionValue((Object)cArray, (String)"toCharArray(...)");
            object = ArraysKt.joinToString$default((char[])cArray, (CharSequence)" ", null, null, (int)0, null, null, (int)62, null);
        }
        return (CharSequence)object;
    }

    private static final Unit logger$lambda$9() {
        return Unit.INSTANCE;
    }

    @Metadata(mv={2, 0, 0}, k=1, xi=48, d1={"\u00000\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\b\u0086\u0003\u0018\u00002\u00020\u0001B\t\b\u0002\u00a2\u0006\u0004\b\u0002\u0010\u0003J\u0016\u0010\n\u001a\b\u0012\u0004\u0012\u00020\f0\u000b2\u0006\u0010\r\u001a\u00020\u000eH\u0002R\u000e\u0010\u0004\u001a\u00020\u0005X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001a\u0010\u0006\u001a\u000e\u0012\u0004\u0012\u00020\b\u0012\u0004\u0012\u00020\t0\u0007X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u000f"}, d2={"Lai/tock/nlp/stanford/StanfordTokenizer$Companion;", "", "<init>", "()V", "logger", "Lmu/KLogger;", "separatorRegexpMap", "Ljava/util/concurrent/ConcurrentHashMap;", "", "Lkotlin/text/Regex;", "getTokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "language", "Ljava/util/Locale;", "tock-nlp-model-stanford"})
    public static final class Companion {
        private Companion() {
        }

        /*
         * Enabled aggressive block sorting
         */
        private final TokenizerFactory<CoreLabel> getTokenizerFactory(Locale language) {
            TokenizerFactory tokenizerFactory;
            logger.trace(() -> Companion.getTokenizerFactory$lambda$0(language));
            String string = language.getLanguage();
            if (string != null) {
                int n = -1;
                switch (string.hashCode()) {
                    case 3241: {
                        if (!string.equals("en")) break;
                        n = 1;
                        break;
                    }
                    case 3276: {
                        if (!string.equals("fr")) break;
                        n = 2;
                        break;
                    }
                    case 3246: {
                        if (!string.equals("es")) break;
                        n = 3;
                        break;
                    }
                }
                switch (n) {
                    case 2: {
                        Field field;
                        TokenizerFactory tokenizerFactory2;
                        TokenizerFactory it = tokenizerFactory2 = FrenchTokenizer.FrenchTokenizerFactory.newTokenizerFactory();
                        boolean bl = false;
                        it.setOptions("untokenizable=noneDelete");
                        Field $this$getTokenizerFactory_u24lambda_u242_u24lambda_u241 = field = FrenchTokenizer.FrenchTokenizerFactory.class.getDeclaredField("splitContractionOption");
                        boolean bl2 = false;
                        $this$getTokenizerFactory_u24lambda_u242_u24lambda_u241.setAccessible(true);
                        $this$getTokenizerFactory_u24lambda_u242_u24lambda_u241.set(it, false);
                        TokenizerFactory tokenizerFactory3 = tokenizerFactory2;
                        Intrinsics.checkNotNull((Object)tokenizerFactory3);
                        tokenizerFactory = tokenizerFactory3;
                        return tokenizerFactory;
                    }
                    case 1: {
                        PTBTokenizer.PTBTokenizerFactory pTBTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory((String)"");
                        Intrinsics.checkNotNull((Object)pTBTokenizerFactory);
                        tokenizerFactory = (TokenizerFactory)pTBTokenizerFactory;
                        return tokenizerFactory;
                    }
                    case 3: {
                        TokenizerFactory tokenizerFactory4 = SpanishTokenizer.SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
                        Intrinsics.checkNotNull((Object)tokenizerFactory4);
                        tokenizerFactory = tokenizerFactory4;
                        return tokenizerFactory;
                    }
                }
            }
            PTBTokenizer.PTBTokenizerFactory pTBTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory((String)"");
            Intrinsics.checkNotNull((Object)pTBTokenizerFactory);
            tokenizerFactory = (TokenizerFactory)pTBTokenizerFactory;
            return tokenizerFactory;
        }

        private static final Object getTokenizerFactory$lambda$0(Locale $language) {
            Intrinsics.checkNotNullParameter((Object)$language, (String)"$language");
            return "getting tokenizer for : " + $language;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

