/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.ChunkAnnotationUtils;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.ArrayUtils;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;

public class WordsToSentencesAnnotator
implements Annotator {
    private static final Redwood.RedwoodChannels log = Redwood.channels(WordsToSentencesAnnotator.class);
    private final WordToSentenceProcessor<CoreLabel> wts;
    private final boolean VERBOSE;
    private final boolean countLineNumbers;
    private boolean loggedExtraSplit = false;

    public WordsToSentencesAnnotator() {
        this(false);
    }

    public WordsToSentencesAnnotator(Properties properties) {
        boolean nlSplitting = Boolean.parseBoolean(properties.getProperty("ssplit.eolonly", "false"));
        if (nlSplitting) {
            boolean whitespaceTokenization = Boolean.parseBoolean(properties.getProperty("tokenize.whitespace", "false"));
            WordToSentenceProcessor wts1 = whitespaceTokenization ? (System.lineSeparator().equals("\n") ? new WordToSentenceProcessor(ArrayUtils.asImmutableSet(new String[]{"\n", "*NL*"})) : new WordToSentenceProcessor(ArrayUtils.asImmutableSet(new String[]{System.lineSeparator(), "\n", "*NL*"}))) : new WordToSentenceProcessor(ArrayUtils.asImmutableSet(new String[]{PTBTokenizer.getNewlineToken()}));
            this.countLineNumbers = true;
            this.wts = wts1;
        } else {
            String isOneSentence = properties.getProperty("ssplit.isOneSentence");
            if (Boolean.parseBoolean(isOneSentence)) {
                WordToSentenceProcessor wts1 = new WordToSentenceProcessor(true);
                this.countLineNumbers = false;
                this.wts = wts1;
            } else {
                String boundaryMultiTokenRegex = properties.getProperty("ssplit.boundaryMultiTokenRegex");
                String tokenPatternsToDiscardProp = properties.getProperty("ssplit.tokenPatternsToDiscard");
                Set<String> tokenRegexesToDiscard = null;
                if (tokenPatternsToDiscardProp != null) {
                    String[] toks = tokenPatternsToDiscardProp.split(",");
                    tokenRegexesToDiscard = Generics.newHashSet(Arrays.asList(toks));
                }
                String boundaryTokenRegex = properties.getProperty("ssplit.boundaryTokenRegex");
                String boundaryFollowersRegex = properties.getProperty("ssplit.boundaryFollowersRegex");
                Set<String> boundariesToDiscard = null;
                String bounds = properties.getProperty("ssplit.boundariesToDiscard");
                if (bounds != null) {
                    String[] toks = bounds.split(",");
                    boundariesToDiscard = Generics.newHashSet(Arrays.asList(toks));
                }
                Set<String> htmlElementsToDiscard = null;
                bounds = properties.getProperty("ssplit.htmlBoundariesToDiscard");
                if (bounds != null) {
                    String[] elements = bounds.split(",");
                    htmlElementsToDiscard = Generics.newHashSet(Arrays.asList(elements));
                }
                String nlsb = properties.getProperty("ssplit.newlineIsSentenceBreak", "never");
                this.countLineNumbers = false;
                this.wts = new WordToSentenceProcessor<CoreMap>(boundaryTokenRegex, boundaryFollowersRegex, boundariesToDiscard, htmlElementsToDiscard, WordToSentenceProcessor.stringToNewlineIsSentenceBreak(nlsb), boundaryMultiTokenRegex != null ? TokenSequencePattern.compile(boundaryMultiTokenRegex) : null, tokenRegexesToDiscard);
            }
        }
        this.VERBOSE = Boolean.parseBoolean(properties.getProperty("ssplit.verbose", "false"));
    }

    public WordsToSentencesAnnotator(boolean verbose) {
        this(verbose, false, new WordToSentenceProcessor<CoreLabel>());
    }

    public WordsToSentencesAnnotator(boolean verbose, String boundaryTokenRegex, Set<String> boundaryToDiscard, Set<String> htmlElementsToDiscard, String newlineIsSentenceBreak, String boundaryMultiTokenRegex, Set<String> tokenRegexesToDiscard) {
        this(verbose, false, new WordToSentenceProcessor<CoreMap>(boundaryTokenRegex, null, boundaryToDiscard, htmlElementsToDiscard, WordToSentenceProcessor.stringToNewlineIsSentenceBreak(newlineIsSentenceBreak), boundaryMultiTokenRegex != null ? TokenSequencePattern.compile(boundaryMultiTokenRegex) : null, tokenRegexesToDiscard));
    }

    private WordsToSentencesAnnotator(boolean verbose, boolean countLineNumbers, WordToSentenceProcessor<CoreLabel> wts) {
        this.VERBOSE = verbose;
        this.countLineNumbers = countLineNumbers;
        this.wts = wts;
    }

    public static WordsToSentencesAnnotator newlineSplitter(String ... nlToken) {
        WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor<CoreLabel>(ArrayUtils.asImmutableSet(nlToken));
        return new WordsToSentencesAnnotator(false, true, wts);
    }

    public static WordsToSentencesAnnotator nonSplitter() {
        WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor<CoreLabel>(true);
        return new WordsToSentencesAnnotator(false, false, wts);
    }

    @Override
    public void annotate(Annotation annotation) {
        if (this.VERBOSE) {
            log.info("Sentence splitting ... " + annotation);
        }
        if (!annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
            throw new IllegalArgumentException("WordsToSentencesAnnotator: unable to find words/tokens in: " + annotation);
        }
        if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
            if (!this.loggedExtraSplit) {
                log.error("Multiple WordsToSentencesAnnotator or other sentence splitters are operating on this document!");
                this.loggedExtraSplit = true;
            }
            return;
        }
        String text = (String)annotation.get(CoreAnnotations.TextAnnotation.class);
        List tokens = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
        if (this.VERBOSE) {
            log.info("Tokens are: " + tokens);
        }
        String docID = (String)annotation.get(CoreAnnotations.DocIDAnnotation.class);
        int lineNumber = 0;
        CoreMap sectionAnnotations = null;
        ArrayList<Annotation> sentences = new ArrayList<Annotation>();
        int currSectionIndex = 0;
        List sections = (List)annotation.get(CoreAnnotations.SectionsAnnotation.class);
        for (List<CoreLabel> sentenceTokens : this.wts.process(tokens)) {
            String sectionEnd;
            if (this.countLineNumbers) {
                ++lineNumber;
            }
            if (sentenceTokens.isEmpty()) {
                if (this.countLineNumbers) continue;
                throw new IllegalStateException("unexpected empty sentence: " + sentenceTokens);
            }
            int begin = (Integer)sentenceTokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
            int last = sentenceTokens.size() - 1;
            int n = (Integer)sentenceTokens.get(last).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
            String sentenceText = text.substring(begin, n);
            Annotation sentence = new Annotation(sentenceText);
            sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
            sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, n);
            sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
            sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentences.size());
            if (this.countLineNumbers) {
                sentence.set(CoreAnnotations.LineNumberAnnotation.class, lineNumber);
            }
            CoreLabel sentenceStartToken = sentenceTokens.get(0);
            CoreLabel sentenceEndToken = sentenceTokens.get(sentenceTokens.size() - 1);
            CoreMap sectionStart = (CoreMap)sentenceStartToken.get(CoreAnnotations.SectionStartAnnotation.class);
            if (sectionStart != null) {
                sectionAnnotations = sectionStart;
            }
            if (sectionAnnotations != null) {
                ChunkAnnotationUtils.copyUnsetAnnotations(sectionAnnotations, sentence);
            }
            if ((sectionEnd = (String)sentenceEndToken.get(CoreAnnotations.SectionEndAnnotation.class)) != null) {
                sectionAnnotations = null;
            }
            if (sections != null) {
                while (currSectionIndex < sections.size()) {
                    int currSectionCharBegin = (Integer)((CoreMap)sections.get(currSectionIndex)).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
                    int currSectionCharEnd = (Integer)((CoreMap)sections.get(currSectionIndex)).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
                    if (currSectionCharEnd < n) {
                        ++currSectionIndex;
                        continue;
                    }
                    if (currSectionCharBegin > begin) break;
                    for (CoreMap sectionQuote : (List)((CoreMap)sections.get(currSectionIndex)).get(CoreAnnotations.QuotesAnnotation.class)) {
                        if ((Integer)sectionQuote.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) > begin || n > (Integer)sectionQuote.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)) continue;
                        sentence.set(CoreAnnotations.QuotedAnnotation.class, true);
                        sentence.set(CoreAnnotations.AuthorAnnotation.class, sectionQuote.get(CoreAnnotations.AuthorAnnotation.class));
                    }
                    ((List)((CoreMap)sections.get(currSectionIndex)).get(CoreAnnotations.SentencesAnnotation.class)).add(sentence);
                    String sectionDate = (String)((CoreMap)sections.get(currSectionIndex)).get(CoreAnnotations.SectionDateAnnotation.class);
                    sentence.set(CoreAnnotations.SectionDateAnnotation.class, sectionDate);
                    sentence.set(CoreAnnotations.SectionIndexAnnotation.class, currSectionIndex);
                    break;
                }
            }
            if (docID != null) {
                sentence.set(CoreAnnotations.DocIDAnnotation.class, docID);
            }
            int index = 1;
            for (CoreLabel token : sentenceTokens) {
                token.setIndex(index++);
                token.setSentIndex(sentences.size());
                if (docID == null) continue;
                token.setDocID(docID);
            }
            sentences.add(sentence);
        }
        ArrayList<CoreLabel> finalTokens = new ArrayList<CoreLabel>();
        int tokenIndex = 0;
        CoreLabel prevToken = null;
        for (CoreLabel coreLabel : (List)annotation.get(CoreAnnotations.TokensAnnotation.class)) {
            if (!coreLabel.isNewline().booleanValue()) {
                finalTokens.add(coreLabel);
                coreLabel.set(CoreAnnotations.TokenBeginAnnotation.class, tokenIndex);
                coreLabel.set(CoreAnnotations.TokenEndAnnotation.class, tokenIndex + 1);
                ++tokenIndex;
                if (prevToken != null && prevToken.isNewline().booleanValue() && coreLabel.get(CoreAnnotations.BeforeAnnotation.class) != null) {
                    String prevNewlineTokenText = (String)prevToken.get(CoreAnnotations.OriginalTextAnnotation.class);
                    coreLabel.set(CoreAnnotations.BeforeAnnotation.class, prevNewlineTokenText);
                }
            } else {
                String newlineText = (String)coreLabel.get(CoreAnnotations.OriginalTextAnnotation.class);
                if (prevToken != null && prevToken.get(CoreAnnotations.AfterAnnotation.class) != null) {
                    prevToken.set(CoreAnnotations.AfterAnnotation.class, newlineText);
                }
            }
            prevToken = coreLabel;
        }
        annotation.set(CoreAnnotations.TokensAnnotation.class, finalTokens);
        for (CoreMap coreMap : sentences) {
            List sentenceTokens = (List)coreMap.get(CoreAnnotations.TokensAnnotation.class);
            int sentenceTokenBegin = (Integer)((CoreLabel)sentenceTokens.get(0)).get(CoreAnnotations.TokenBeginAnnotation.class);
            int sentenceTokenEnd = (Integer)((CoreLabel)sentenceTokens.get(sentenceTokens.size() - 1)).get(CoreAnnotations.TokenEndAnnotation.class);
            coreMap.set(CoreAnnotations.TokenBeginAnnotation.class, sentenceTokenBegin);
            coreMap.set(CoreAnnotations.TokenEndAnnotation.class, sentenceTokenEnd);
        }
        annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requires() {
        return Collections.unmodifiableSet(new ArraySet<Class>(Arrays.asList(CoreAnnotations.TextAnnotation.class, CoreAnnotations.TokensAnnotation.class, CoreAnnotations.ValueAnnotation.class, CoreAnnotations.CharacterOffsetBeginAnnotation.class, CoreAnnotations.CharacterOffsetEndAnnotation.class, CoreAnnotations.IsNewlineAnnotation.class, CoreAnnotations.TokenBeginAnnotation.class, CoreAnnotations.TokenEndAnnotation.class, CoreAnnotations.OriginalTextAnnotation.class)));
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return new HashSet<Class<? extends CoreAnnotation>>(Arrays.asList(CoreAnnotations.SentencesAnnotation.class, CoreAnnotations.SentenceIndexAnnotation.class));
    }
}

