/*
 * Decompiled with CFR 0.152.
 */
package smile.nlp.tokenizer;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import smile.nlp.dictionary.EnglishDictionary;
import smile.nlp.tokenizer.EnglishAbbreviations;
import smile.nlp.tokenizer.SentenceSplitter;

public class SimpleSentenceSplitter
implements SentenceSplitter {
    private static final Pattern REGEX_CARRIAGE_RETURN = Pattern.compile("[\\n\\r]+");
    private static final Pattern REGEX_FORGOTTEN_SPACE = Pattern.compile("(.)([\\.!?])([\\D&&\\S&&[^\\.\"'`\\)\\}\\]]])");
    private static final Pattern REGEX_SENTENCE = Pattern.compile("(['\"`]*[\\(\\{\\[]?[a-zA-Z0-9]+.*?)([\\.!?:])(?:(?=([\\(\\[\\{\"'`\\)\\}\\]<]*[ \u0019]+)[\\(\\[\\{\"'`\\)\\}\\] ]*([A-Z0-9][a-z]*))|(?=([\\(\\)\"'`\\}<\\] \u0019]+)\\s))");
    private static final Pattern REGEX_WHITESPACE = Pattern.compile("\\s+");
    private static final Pattern REGEX_LAST_WORD = Pattern.compile("\\b([\\w0-9\\.']+)$");
    private static final SimpleSentenceSplitter singleton = new SimpleSentenceSplitter();

    private SimpleSentenceSplitter() {
    }

    public static SimpleSentenceSplitter getInstance() {
        return singleton;
    }

    @Override
    public String[] split(String text) {
        String lastPart;
        ArrayList<String> sentences = new ArrayList<String>();
        int len = 0;
        text = REGEX_CARRIAGE_RETURN.matcher((CharSequence)text).replaceAll(" ");
        text = ((String)text).replace('\u0019', ' ');
        text = REGEX_FORGOTTEN_SPACE.matcher((CharSequence)text).replaceAll("$1$2\u0019$3");
        text = (String)text + "\n";
        Matcher matcher = REGEX_SENTENCE.matcher((CharSequence)text);
        StringBuilder currentSentence = new StringBuilder();
        int end = 0;
        while (matcher.find()) {
            end = matcher.end();
            String sentence = matcher.group(1).trim();
            String punctuation = matcher.group(2);
            String stuffAfterPeriod = matcher.group(3);
            if (stuffAfterPeriod == null) {
                stuffAfterPeriod = matcher.group(5);
                if (stuffAfterPeriod == null) {
                    stuffAfterPeriod = "";
                } else {
                    end = matcher.end(5);
                }
            } else {
                end = matcher.end(3);
            }
            String[] words = REGEX_WHITESPACE.split(sentence);
            len += words.length;
            String nextWord = matcher.group(4);
            if (nextWord == null) {
                nextWord = "";
            }
            if (punctuation.compareTo(".") == 0) {
                Matcher lastWordMatcher = REGEX_LAST_WORD.matcher(sentence);
                String lastWord = "";
                if (lastWordMatcher.find()) {
                    lastWord = lastWordMatcher.group();
                }
                if (!lastWord.matches(".*[AEIOUaeiou]+.*") && lastWord.matches(".*[a-z]+.*") && !lastWord.matches(".*[y]+.*") || lastWord.matches("([a-zA-Z][\\.])+") || lastWord.matches("^[A-Za-z]$") && !lastWord.matches("^[I]$") || EnglishAbbreviations.contains(lastWord.toLowerCase())) {
                    if (EnglishDictionary.CONCISE.contains(nextWord) && len > 6) {
                        currentSentence.append(sentence);
                        currentSentence.append(punctuation);
                        currentSentence.append(stuffAfterPeriod.trim());
                        sentences.add(currentSentence.toString());
                        currentSentence = new StringBuilder();
                        len = 0;
                        continue;
                    }
                    currentSentence.append(sentence);
                    currentSentence.append(punctuation);
                    if (stuffAfterPeriod.indexOf(25) != -1) continue;
                    currentSentence.append(' ');
                    continue;
                }
                currentSentence.append(sentence);
                currentSentence.append(punctuation);
                currentSentence.append(stuffAfterPeriod.trim());
                sentences.add(currentSentence.toString());
                currentSentence = new StringBuilder();
                len = 0;
                continue;
            }
            if (punctuation.matches("[!?]") || punctuation.compareTo(":") == 0 && len > 6) {
                currentSentence.append(sentence);
                currentSentence.append(punctuation);
                currentSentence.append(stuffAfterPeriod.trim());
                sentences.add(currentSentence.toString());
                currentSentence = new StringBuilder();
                len = 0;
                continue;
            }
            currentSentence.append(sentence);
            currentSentence.append(punctuation);
            if (stuffAfterPeriod.indexOf(25) != -1) continue;
            currentSentence.append(' ');
        }
        if (end < ((String)text).length() && !(lastPart = ((String)text).substring(end)).isEmpty()) {
            currentSentence.append(lastPart);
        }
        if (!currentSentence.isEmpty()) {
            sentences.add(currentSentence.toString().trim());
        }
        String[] result = new String[sentences.size()];
        for (int i = 0; i < result.length; ++i) {
            result[i] = ((String)sentences.get(i)).replaceAll("\u0019", "");
        }
        return result;
    }
}

