/*
 * Decompiled with CFR 0.152.
 */
package edu.umn.biomedicus.concepts;

import edu.umn.biomedicus.acronyms.Acronym;
import edu.umn.biomedicus.common.dictionary.StringsBag;
import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
import edu.umn.biomedicus.common.types.syntax.PartsOfSpeech;
import edu.umn.biomedicus.concepts.ConceptDictionary;
import edu.umn.biomedicus.concepts.ConceptRow;
import edu.umn.biomedicus.concepts.DictionaryTerm;
import edu.umn.biomedicus.concepts.UmlsConcept;
import edu.umn.biomedicus.normalization.NormForm;
import edu.umn.biomedicus.sentences.Sentence;
import edu.umn.biomedicus.tagging.PosTag;
import edu.umn.biomedicus.tokenization.TermToken;
import edu.umn.nlpengine.Document;
import edu.umn.nlpengine.DocumentTask;
import edu.umn.nlpengine.Label;
import edu.umn.nlpengine.LabelIndex;
import edu.umn.nlpengine.Labeler;
import edu.umn.nlpengine.Span;
import edu.umn.nlpengine.TextRange;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import javax.annotation.Nonnull;
import javax.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class DetectDictionaryConcepts
implements DocumentTask {
    private static final Logger LOGGER = LoggerFactory.getLogger(DetectDictionaryConcepts.class);
    private static final Set<PartOfSpeech> TRIVIAL_POS = DetectDictionaryConcepts.buildTrivialPos();
    private static final int SPAN_SIZE = 5;
    private final ConceptDictionary conceptDictionary;
    private Labeler<DictionaryTerm> termLabeler;
    private LabelIndex<PosTag> posTags;
    private LabelIndex<NormForm> normIndexes;
    private Labeler<UmlsConcept> conceptLabeler;

    @Inject
    DetectDictionaryConcepts(ConceptDictionary conceptDictionary) {
        this.conceptDictionary = conceptDictionary;
    }

    private static Set<PartOfSpeech> buildTrivialPos() {
        HashSet<PartOfSpeech> builder = new HashSet<PartOfSpeech>();
        Collections.addAll(builder, PartOfSpeech.DT, PartOfSpeech.CD, PartOfSpeech.WDT, PartOfSpeech.TO, PartOfSpeech.CC, PartOfSpeech.PRP, PartOfSpeech.PRP$, PartOfSpeech.MD, PartOfSpeech.EX, PartOfSpeech.IN, PartOfSpeech.XX);
        Set<PartOfSpeech> punctuationClass = PartsOfSpeech.getPunctuationClass();
        builder.addAll(punctuationClass);
        return Collections.unmodifiableSet(builder);
    }

    private boolean checkPhrase(Span span, String phrase, boolean oneToken, double confMod) {
        List<ConceptRow> phraseSUI = this.conceptDictionary.forPhrase(phrase);
        if (phraseSUI != null) {
            this.makeTerm((TextRange)span, phraseSUI, 1.0 - confMod);
            return true;
        }
        if (oneToken) {
            return false;
        }
        phraseSUI = this.conceptDictionary.forLowercasePhrase(phrase.toLowerCase(Locale.ENGLISH));
        if (phraseSUI != null) {
            this.makeTerm((TextRange)span, phraseSUI, 0.6 - confMod);
            return true;
        }
        return false;
    }

    private void checkTokenSet(List<TermToken> tokenSet) {
        if (tokenSet.size() <= 1) {
            return;
        }
        Span phraseAsSpan = new Span(tokenSet.get(0).getStartIndex(), tokenSet.get(tokenSet.size() - 1).getEndIndex());
        StringsBag.Builder builder = StringsBag.builder();
        for (NormForm normForm : this.normIndexes.inside((TextRange)phraseAsSpan)) {
            PosTag posTag = (PosTag)this.posTags.firstAtLocation((TextRange)normForm);
            if (posTag != null && TRIVIAL_POS.contains((Object)posTag.getPartOfSpeech())) continue;
            builder.addTerm(normForm.normIdentifier());
        }
        StringsBag normBag = builder.build();
        List<ConceptRow> normsCUI = this.conceptDictionary.forNorms(normBag);
        if (normsCUI != null) {
            this.makeTerm((TextRange)phraseAsSpan, normsCUI, 0.3);
        }
    }

    private void makeTerm(TextRange label, List<ConceptRow> cuis, double confidence) {
        for (ConceptRow row : cuis) {
            String source2 = this.conceptDictionary.source(row.getSource());
            if (source2 == null) {
                source2 = "unknown";
                LOGGER.warn("Unknown source");
            }
            this.conceptLabeler.add((Label)new UmlsConcept(label, row.getSui().toString(), row.getCui().toString(), row.getTui().toString(), source2, confidence));
        }
        this.termLabeler.add((Label)new DictionaryTerm(label));
    }

    public void run(@Nonnull Document document) {
        LOGGER.debug("Finding concepts in document.");
        LabelIndex sentences = document.labelIndex(Sentence.class);
        this.normIndexes = document.labelIndex(NormForm.class);
        this.termLabeler = document.labeler(DictionaryTerm.class);
        this.conceptLabeler = document.labeler(UmlsConcept.class);
        this.posTags = document.labelIndex(PosTag.class);
        LabelIndex termTokenLabelIndex = document.labelIndex(TermToken.class);
        LabelIndex acronymLabelIndex = document.labelIndex(Acronym.class);
        String documentText = document.getText();
        for (Sentence sentence : sentences) {
            LOGGER.trace("Identifying concepts in a sentence");
            StringBuilder editedString = new StringBuilder();
            ArrayList<Span> editedStringSpans = new ArrayList<Span>();
            List sentenceTermTokens = termTokenLabelIndex.inside((TextRange)sentence).asList();
            for (TermToken sentenceTermToken : sentenceTermTokens) {
                Acronym acronymForToken = (Acronym)acronymLabelIndex.firstAtLocation((TextRange)sentenceTermToken);
                Label token = acronymForToken != null ? acronymForToken : sentenceTermToken;
                String tokenText = token.getText();
                Span span = new Span(editedString.length(), editedString.length() + tokenText.length());
                editedString.append(tokenText);
                if (token.getHasSpaceAfter()) {
                    editedString.append(' ');
                }
                editedStringSpans.add(span);
            }
            for (int from = 0; from < sentenceTermTokens.size(); ++from) {
                int to = Math.min(from + 5, sentenceTermTokens.size());
                List window = sentenceTermTokens.subList(from, to);
                TermToken first = (TermToken)window.get(0);
                for (int subsetSize = 1; subsetSize <= window.size(); ++subsetSize) {
                    int editedEnd;
                    int editedBegin;
                    String editedSubstring;
                    List<TermToken> windowSubset = window.subList(0, subsetSize);
                    TermToken last = (TermToken)windowSubset.get(subsetSize - 1);
                    Span entire = new Span(first.getStartIndex(), last.getEndIndex());
                    if (this.posTags.inside((TextRange)entire).stream().map(PosTag::getPartOfSpeech).allMatch(TRIVIAL_POS::contains) || this.checkPhrase(entire, entire.coveredString(documentText), subsetSize == 1, 0.0) || this.checkPhrase(entire, editedSubstring = editedString.substring(editedBegin = ((Span)editedStringSpans.get(from)).getStartIndex(), editedEnd = ((Span)editedStringSpans.get(from + subsetSize - 1)).getEndIndex()), subsetSize == 1, 0.1)) continue;
                    this.checkTokenSet(windowSubset);
                }
            }
        }
    }
}

