/*
 * Decompiled with CFR 0.152.
 */
package org.graylog.shaded.opensearch2.org.opensearch.index.analysis;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.CharArraySet;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.bn.BengaliAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.da.DanishAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.de.GermanAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.el.GreekAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.et.EstonianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.ga.IrishAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.lt.LithuanianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.graylog.shaded.opensearch2.org.opensearch.common.settings.Settings;
import org.graylog.shaded.opensearch2.org.opensearch.core.common.Strings;
import org.graylog.shaded.opensearch2.org.opensearch.env.Environment;
import org.graylog.shaded.opensearch2.org.opensearch.index.analysis.CustomMappingRuleParser;

public class Analysis {
    private static final Logger LOGGER = LogManager.getLogger(Analysis.class);
    private static final Pattern HASH_TAG_RULE_PATTERN = Pattern.compile("^\\s*#\\s*=>");
    public static final Map<String, Set<?>> NAMED_STOP_WORDS;

    public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion) {
        String value = settings.get("stem_exclusion");
        if ("_none_".equals(value)) {
            return CharArraySet.EMPTY_SET;
        }
        List<String> stemExclusion = settings.getAsList("stem_exclusion", null);
        if (stemExclusion != null) {
            return new CharArraySet(stemExclusion, false);
        }
        return defaultStemExclusion;
    }

    public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords, Map<String, Set<?>> namedWords, boolean ignoreCase) {
        String value = settings.get(name);
        if (value != null) {
            if ("_none_".equals(value)) {
                return CharArraySet.EMPTY_SET;
            }
            return Analysis.resolveNamedWords(settings.getAsList(name), namedWords, ignoreCase);
        }
        List<String> pathLoadedWords = Analysis.parseWordList(env, settings, name, s -> s);
        if (pathLoadedWords != null) {
            return Analysis.resolveNamedWords(pathLoadedWords, namedWords, ignoreCase);
        }
        return defaultWords;
    }

    public static CharArraySet parseCommonWords(Environment env, Settings settings, CharArraySet defaultCommonWords, boolean ignoreCase) {
        return Analysis.parseWords(env, settings, "common_words", defaultCommonWords, NAMED_STOP_WORDS, ignoreCase);
    }

    public static CharArraySet parseArticles(Environment env, Settings settings) {
        boolean articlesCase = settings.getAsBoolean("articles_case", false);
        return Analysis.parseWords(env, settings, "articles", null, null, articlesCase);
    }

    public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords) {
        boolean stopwordsCase = settings.getAsBoolean("stopwords_case", false);
        return Analysis.parseStopWords(env, settings, defaultStopWords, stopwordsCase);
    }

    public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, boolean ignoreCase) {
        return Analysis.parseWords(env, settings, "stopwords", defaultStopWords, NAMED_STOP_WORDS, ignoreCase);
    }

    private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) {
        if (namedWords == null) {
            return new CharArraySet(words, ignoreCase);
        }
        CharArraySet setWords = new CharArraySet(words.size(), ignoreCase);
        for (String word : words) {
            if (namedWords.containsKey(word)) {
                setWords.addAll((Collection)namedWords.get(word));
                continue;
            }
            setWords.add(word);
        }
        return setWords;
    }

    public static CharArraySet getWordSet(Environment env, Settings settings, String settingsPrefix) {
        List wordList = Analysis.parseWordList(env, settings, settingsPrefix, s -> s);
        if (wordList == null) {
            return null;
        }
        boolean ignoreCase = settings.getAsBoolean(settingsPrefix + "_case", false);
        return new CharArraySet(wordList, ignoreCase);
    }

    public static <T> List<T> parseWordList(Environment env, Settings settings, String settingPrefix, CustomMappingRuleParser<T> parser) {
        return Analysis.parseWordList(env, settings, settingPrefix + "_path", settingPrefix, parser);
    }

    public static <T> List<T> parseWordList(Environment env, Settings settings, String settingPath, String settingList, CustomMappingRuleParser<T> parser) {
        List<String> words = Analysis.getWordList(env, settings, settingPath, settingList);
        if (words == null) {
            return null;
        }
        ArrayList rules = new ArrayList();
        int lineNum = 0;
        for (String word : words) {
            ++lineNum;
            if (word.startsWith("#") && !HASH_TAG_RULE_PATTERN.matcher(word).find()) continue;
            try {
                rules.add(parser.apply(word));
            }
            catch (RuntimeException ex) {
                String wordListPath = settings.get(settingPath, null);
                if (wordListPath == null || Analysis.isUnderConfig(env, wordListPath)) {
                    throw new RuntimeException("Line [" + lineNum + "]: " + ex.getMessage());
                }
                LOGGER.error("Line [{}]: {}", (Object)lineNum, (Object)ex);
                throw new RuntimeException("Line [" + lineNum + "]: Invalid rule");
            }
        }
        return rules;
    }

    private static List<String> getWordList(Environment env, Settings settings, String settingPath, String settingList) {
        String wordListPath = settings.get(settingPath, null);
        if (wordListPath == null) {
            return settings.getAsList(settingList, null);
        }
        Path path = Analysis.resolveAnalyzerPath(env, wordListPath);
        try {
            return Analysis.loadWordList(path);
        }
        catch (CharacterCodingException ex) {
            String message = String.format(Locale.ROOT, "Unsupported character encoding detected while reading %s: files must be UTF-8 encoded", settingPath);
            LOGGER.error("{}: from file: {}, exception is: {}", (Object)message, (Object)path.toString(), (Object)ex);
            throw new IllegalArgumentException(message);
        }
        catch (IOException ioe) {
            String message = String.format(Locale.ROOT, "IOException while reading %s: file not readable", settingPath);
            LOGGER.error("{}, from file: {}, exception is: {}", (Object)message, (Object)path.toString(), (Object)ioe);
            throw new IllegalArgumentException(message);
        }
    }

    private static List<String> loadWordList(Path path) throws IOException {
        ArrayList<String> result = new ArrayList<String>();
        try (BufferedReader br = Files.newBufferedReader(path, StandardCharsets.UTF_8);){
            String word;
            while ((word = br.readLine()) != null) {
                if (!Strings.hasText(word)) continue;
                result.add(word.trim());
            }
        }
        return result;
    }

    public static Reader getReaderFromFile(Environment env, Settings settings, String settingPrefix) {
        String filePath = settings.get(settingPrefix, null);
        if (filePath == null) {
            return null;
        }
        Path path = Analysis.resolveAnalyzerPath(env, filePath);
        try {
            return Files.newBufferedReader(path, StandardCharsets.UTF_8);
        }
        catch (CharacterCodingException ex) {
            String message = String.format(Locale.ROOT, "Unsupported character encoding detected while reading %s_path: files must be UTF-8 encoded", settingPrefix);
            LOGGER.error("{}: from file: {}, exception is: {}", (Object)message, (Object)path.toString(), (Object)ex);
            throw new IllegalArgumentException(message);
        }
        catch (IOException ioe) {
            String message = String.format(Locale.ROOT, "IOException while reading %s_path: file not readable", settingPrefix);
            LOGGER.error("{}, from file: {}, exception is: {}", (Object)message, (Object)path.toString(), (Object)ioe);
            throw new IllegalArgumentException(message);
        }
    }

    public static Path resolveAnalyzerPath(Environment env, String wordListPath) {
        return env.configFile().resolve(wordListPath).normalize();
    }

    private static boolean isUnderConfig(Environment env, String wordListPath) {
        try {
            Path path = env.configFile().resolve(wordListPath).normalize();
            return path.startsWith(env.configFile().toAbsolutePath());
        }
        catch (Exception ex) {
            return false;
        }
    }

    static {
        HashMap<String, CharArraySet> namedStopWords = new HashMap<String, CharArraySet>();
        namedStopWords.put("_arabic_", ArabicAnalyzer.getDefaultStopSet());
        namedStopWords.put("_armenian_", ArmenianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_basque_", BasqueAnalyzer.getDefaultStopSet());
        namedStopWords.put("_bengali_", BengaliAnalyzer.getDefaultStopSet());
        namedStopWords.put("_brazilian_", BrazilianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_bulgarian_", BulgarianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_catalan_", CatalanAnalyzer.getDefaultStopSet());
        namedStopWords.put("_czech_", CzechAnalyzer.getDefaultStopSet());
        namedStopWords.put("_danish_", DanishAnalyzer.getDefaultStopSet());
        namedStopWords.put("_dutch_", DutchAnalyzer.getDefaultStopSet());
        namedStopWords.put("_english_", EnglishAnalyzer.getDefaultStopSet());
        namedStopWords.put("_estonian_", EstonianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_finnish_", FinnishAnalyzer.getDefaultStopSet());
        namedStopWords.put("_french_", FrenchAnalyzer.getDefaultStopSet());
        namedStopWords.put("_galician_", GalicianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_german_", GermanAnalyzer.getDefaultStopSet());
        namedStopWords.put("_greek_", GreekAnalyzer.getDefaultStopSet());
        namedStopWords.put("_hindi_", HindiAnalyzer.getDefaultStopSet());
        namedStopWords.put("_hungarian_", HungarianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_indonesian_", IndonesianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_irish_", IrishAnalyzer.getDefaultStopSet());
        namedStopWords.put("_italian_", ItalianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_latvian_", LatvianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_lithuanian_", LithuanianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_norwegian_", NorwegianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_persian_", PersianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_portuguese_", PortugueseAnalyzer.getDefaultStopSet());
        namedStopWords.put("_romanian_", RomanianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_russian_", RussianAnalyzer.getDefaultStopSet());
        namedStopWords.put("_sorani_", SoraniAnalyzer.getDefaultStopSet());
        namedStopWords.put("_spanish_", SpanishAnalyzer.getDefaultStopSet());
        namedStopWords.put("_swedish_", SwedishAnalyzer.getDefaultStopSet());
        namedStopWords.put("_thai_", ThaiAnalyzer.getDefaultStopSet());
        namedStopWords.put("_turkish_", TurkishAnalyzer.getDefaultStopSet());
        NAMED_STOP_WORDS = Collections.unmodifiableMap(namedStopWords);
    }
}

