/*
 * Decompiled with CFR 0.152.
 */
package edu.umn.biomedicus.tnt;

import com.google.inject.Inject;
import edu.umn.biomedicus.annotations.ComponentSetting;
import edu.umn.biomedicus.sentences.Sentence;
import edu.umn.biomedicus.tagging.PosTag;
import edu.umn.biomedicus.tnt.DataStoreFactory;
import edu.umn.biomedicus.tnt.TntModel;
import edu.umn.biomedicus.tnt.TntModelTrainer;
import edu.umn.biomedicus.tokenization.ParseToken;
import edu.umn.nlpengine.Artifact;
import edu.umn.nlpengine.ArtifactsProcessor;
import edu.umn.nlpengine.Document;
import edu.umn.nlpengine.LabelIndex;
import edu.umn.nlpengine.TextRange;
import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import javax.annotation.Nonnull;

public class TnTTrainerProcessor
implements ArtifactsProcessor {
    private final String viewName;
    private final TntModelTrainer tntModelTrainer;
    private final Path outputDir;

    @Inject
    TnTTrainerProcessor(@ComponentSetting(value="tnt.train.viewName") String viewName, @ComponentSetting(value="tnt.train.outputDir.asPath") Path outputDir, DataStoreFactory dataStoreFactory) {
        this.viewName = viewName;
        dataStoreFactory.setDbPath(outputDir.resolve("words/"));
        this.tntModelTrainer = TntModelTrainer.builder().useMslSuffixModel(false).maxSuffixLength(5).maxWordFrequency(20).restrictToOpenClass(false).useCapitalization(true).dataStoreFactory(dataStoreFactory).build();
        this.outputDir = outputDir;
    }

    public void done() {
        TntModel model = this.tntModelTrainer.createModel();
        try {
            model.write(this.outputDir);
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public void process(@Nonnull Artifact artifact) {
        Document view = (Document)artifact.getDocuments().get(this.viewName);
        if (view == null) {
            throw new RuntimeException("View was null: " + this.viewName);
        }
        LabelIndex sentences = view.labelIndex(Sentence.class);
        LabelIndex tokens = view.labelIndex(ParseToken.class);
        LabelIndex partsOfSpeech = view.labelIndex(PosTag.class);
        for (Sentence sentence : sentences) {
            List sentenceTokens = tokens.inside((TextRange)sentence).asList();
            List sentencesPos = partsOfSpeech.inside((TextRange)sentence).asList();
            this.tntModelTrainer.addSentence(sentenceTokens, sentencesPos);
        }
    }
}

