/*
 * Decompiled with CFR 0.152.
 */
package org.apache.solr.handler.clustering.carrot2;

import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.carrot2.core.LanguageCode;
import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.linguistic.ITokenizerFactory;
import org.carrot2.text.util.MutableCharArray;
import org.carrot2.util.ExceptionUtils;
import org.carrot2.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LuceneCarrot2TokenizerFactory
implements ITokenizerFactory {
    private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    public ITokenizer getTokenizer(LanguageCode language) {
        switch (language) {
            case CHINESE_SIMPLIFIED: {
                return ChineseTokenizerFactory.createTokenizer();
            }
        }
        return new ExtendedWhitespaceTokenizer();
    }

    private static final class ChineseTokenizerFactory {
        private ChineseTokenizerFactory() {
        }

        static ITokenizer createTokenizer() {
            try {
                return new ChineseTokenizer();
            }
            catch (Throwable e) {
                if (e instanceof OutOfMemoryError) {
                    throw (OutOfMemoryError)e;
                }
                return new ExtendedWhitespaceTokenizer();
            }
        }

        static {
            block2: {
                try {
                    ReflectionUtils.classForName((String)"org.apache.lucene.analysis.cn.smart.WordTokenFilter", (boolean)false);
                    ReflectionUtils.classForName((String)"org.apache.lucene.analysis.cn.smart.SentenceTokenizer", (boolean)false);
                }
                catch (Throwable e) {
                    logger.warn("Could not instantiate Smart Chinese Analyzer, clustering quality of Chinese content may be degraded. For best quality clusters, make sure Lucene's Smart Chinese Analyzer JAR is in the classpath");
                    if (!(e instanceof Error)) break block2;
                    throw (Error)e;
                }
            }
        }

        private static final class ChineseTokenizer
        implements ITokenizer {
            private static final Pattern numeric = Pattern.compile("[\\-+'$]?\\d+([:\\-/,.]?\\d+)*[%$]?");
            private Tokenizer sentenceTokenizer;
            private TokenStream wordTokenFilter;
            private CharTermAttribute term = null;
            private final MutableCharArray tempCharSequence = new MutableCharArray(new char[0]);
            private final Class<?> tokenFilterClass;

            private ChineseTokenizer() throws Exception {
                Class tokenizerClass = ReflectionUtils.classForName((String)"org.apache.lucene.analysis.cn.smart.SentenceTokenizer", (boolean)false);
                this.sentenceTokenizer = (Tokenizer)tokenizerClass.getConstructor(Reader.class).newInstance(new Object[]{null});
                this.tokenFilterClass = ReflectionUtils.classForName((String)"org.apache.lucene.analysis.cn.smart.WordTokenFilter", (boolean)false);
            }

            public short nextToken() throws IOException {
                boolean hasNextToken = this.wordTokenFilter.incrementToken();
                if (hasNextToken) {
                    short flags = 0;
                    char[] image = this.term.buffer();
                    int length = this.term.length();
                    this.tempCharSequence.reset(image, 0, length);
                    flags = length == 1 && image[0] == ',' ? (short)3 : (numeric.matcher((CharSequence)this.tempCharSequence).matches() ? (short)2 : 1);
                    return flags;
                }
                return -1;
            }

            public void setTermBuffer(MutableCharArray array) {
                array.reset(this.term.buffer(), 0, this.term.length());
            }

            public void reset(Reader input) {
                try {
                    this.sentenceTokenizer.setReader(input);
                    this.wordTokenFilter = (TokenStream)this.tokenFilterClass.getConstructor(TokenStream.class).newInstance(this.sentenceTokenizer);
                    this.term = (CharTermAttribute)this.wordTokenFilter.addAttribute(CharTermAttribute.class);
                }
                catch (Exception e) {
                    throw ExceptionUtils.wrapAsRuntimeException((Throwable)e);
                }
            }
        }
    }
}

