/*
 * Decompiled with CFR 0.152.
 */
package edu.umn.biomedicus.vocabulary;

import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Module;
import com.google.inject.Stage;
import edu.umn.biomedicus.exc.BiomedicusException;
import edu.umn.biomedicus.framework.Bootstrapper;
import edu.umn.biomedicus.tokenization.ParseToken;
import edu.umn.biomedicus.tokenization.TermToken;
import edu.umn.biomedicus.tokenization.TermTokenMerger;
import edu.umn.biomedicus.tokenization.TokenResult;
import edu.umn.biomedicus.tokenization.Tokenizer;
import edu.umn.biomedicus.vocabulary.TermIndexBuilder;
import edu.umn.biomedicus.vocabulary.VocabularyBuilder;
import edu.umn.nlpengine.Span;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.spi.PathOptionHandler;

public class VocabularyInitializer {
    private static final Pattern PIPE_SPLITTER = Pattern.compile("\\|");
    private final VocabularyBuilder builder;
    private TermIndexBuilder normsIndexBuilder;
    private TermIndexBuilder termsIndexBuilder;
    private TermIndexBuilder wordsIndexBuilder;
    @Nullable
    @Option(name="-s", required=true, handler=PathOptionHandler.class, usage="path to SPECIALIST Lexicon installation.")
    private Path specialistPath;
    @Nullable
    @Option(name="-u", required=true, handler=PathOptionHandler.class, usage="path to UMLS installation.")
    private Path umlsPath;

    @Inject
    private VocabularyInitializer(VocabularyBuilder builder) {
        this.builder = builder;
    }

    @Argument(required=true, handler=PathOptionHandler.class)
    public void setOutputPath(Path outputPath) {
        this.builder.setOutputPath(outputPath);
        this.normsIndexBuilder = this.builder.createNormsIndexBuilder();
        this.termsIndexBuilder = this.builder.createTermsIndexBuilder();
        this.wordsIndexBuilder = this.builder.createWordsIndexBuilder();
    }

    public static void main(String[] args) {
        try {
            Bootstrapper.create(Guice.createInjector((Stage)Stage.DEVELOPMENT, (Module[])new Module[0])).getInstance(VocabularyInitializer.class).doMain(args);
        }
        catch (BiomedicusException e) {
            e.printStackTrace();
        }
    }

    void addPhrase(String phrase) throws BiomedicusException {
        Iterator tokensIterator = Tokenizer.tokenize((CharSequence)phrase).iterator();
        ArrayList<ParseToken> parseTokens = new ArrayList<ParseToken>();
        TokenResult prev = null;
        while (tokensIterator.hasNext() || prev != null) {
            TokenResult span = null;
            if (tokensIterator.hasNext()) {
                span = (TokenResult)tokensIterator.next();
            }
            if (prev != null) {
                String term = phrase.substring(prev.getStartIndex(), prev.getEndIndex());
                this.wordsIndexBuilder.addTerm(term);
                boolean hasSpaceAfter = span != null && prev.getStartIndex() != span.getEndIndex();
                ParseToken parseToken = new ParseToken(prev.getStartIndex(), prev.getEndIndex(), term, hasSpaceAfter);
                parseTokens.add(parseToken);
            }
            prev = span;
        }
        TermTokenMerger termTokenMerger = new TermTokenMerger(parseTokens.iterator());
        while (termTokenMerger.hasNext()) {
            TermToken termToken = termTokenMerger.next();
            this.termsIndexBuilder.addTerm(termToken.getText());
        }
    }

    void addNormPhrase(String normPhrase) throws BiomedicusException {
        for (TokenResult span : Tokenizer.tokenize((CharSequence)normPhrase)) {
            String norm = new Span(span.getStartIndex(), span.getEndIndex()).coveredString(normPhrase);
            this.normsIndexBuilder.addTerm(norm.toString());
        }
    }

    private void doMain(String[] args) throws BiomedicusException {
        Iterator mrconsoIt;
        long mrConsoLines;
        Iterator iterator2;
        long lragrLines;
        CmdLineParser parser = new CmdLineParser((Object)this);
        try {
            parser.parseArgument(args);
        }
        catch (CmdLineException e) {
            System.err.println(e.getLocalizedMessage());
            System.err.println("java edu.umn.biomedicus.vocabulary.VocabularyInitializer [options...]");
            parser.printUsage((OutputStream)System.err);
            return;
        }
        Path mrConso = this.umlsPath.resolve("MRCONSO.RRF");
        if (Files.notExists(mrConso, new LinkOption[0])) {
            throw new BiomedicusException("Could not find MRCNSO at " + this.umlsPath.toString(), new String[0]);
        }
        Path lragr = this.specialistPath.resolve("LRAGR");
        try {
            lragrLines = Files.lines(lragr).count();
        }
        catch (IOException e) {
            e.printStackTrace();
            return;
        }
        try {
            iterator2 = Files.lines(lragr).map(PIPE_SPLITTER::split).iterator();
        }
        catch (IOException e) {
            e.printStackTrace(System.err);
            return;
        }
        int count = 0;
        while (iterator2.hasNext()) {
            String[] line = (String[])iterator2.next();
            String inflectionalVariant = line[1];
            this.addPhrase(inflectionalVariant);
            String uninflected = line[4];
            this.addNormPhrase(uninflected);
            if (++count % 10000 != 0) continue;
            System.out.println("Read " + count + " / " + lragrLines + " lines from LRAGR.");
        }
        try {
            mrConsoLines = Files.lines(mrConso).count();
        }
        catch (IOException e) {
            e.printStackTrace();
            return;
        }
        try {
            mrconsoIt = Files.lines(mrConso).map(PIPE_SPLITTER::split).iterator();
        }
        catch (IOException e) {
            e.printStackTrace();
            return;
        }
        count = 0;
        while (mrconsoIt.hasNext()) {
            String[] line = (String[])mrconsoIt.next();
            String string = line[14];
            this.addPhrase(string);
            if (++count % 10000 != 0) continue;
            System.out.println("Read " + count + " / " + mrConsoLines + " lines from MRCONSO.RRF.");
        }
        System.out.println("Writing words");
        this.wordsIndexBuilder.doWrite();
        System.out.println("Writing norms");
        this.normsIndexBuilder.doWrite();
        System.out.println("Writing terms");
        this.termsIndexBuilder.doWrite();
        System.out.println("Done writing");
        try {
            this.builder.doShutdown();
        }
        catch (BiomedicusException e) {
            e.printStackTrace();
        }
    }
}

