/*
 * Decompiled with CFR 0.152.
 */
package edu.umn.biomedicus.normalization;

import com.google.inject.Inject;
import com.google.inject.Module;
import edu.umn.biomedicus.common.dictionary.BidirectionalDictionary;
import edu.umn.biomedicus.common.dictionary.StringIdentifier;
import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
import edu.umn.biomedicus.exc.BiomedicusException;
import edu.umn.biomedicus.framework.Bootstrapper;
import edu.umn.biomedicus.normalization.TermPos;
import edu.umn.biomedicus.normalization.TermString;
import edu.umn.biomedicus.vocabulary.Vocabulary;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.spi.PathOptionHandler;
import org.rocksdb.Options;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class NormalizerModelBuilder {
    public static final int LRAGR_INFLECTIONAL_VARIANT = 1;
    public static final int LRAGR_SYNTACTIC_CATEGORY = 2;
    public static final int LRAGR_AGREEMENT_INFLECTION_CODE = 3;
    public static final int LRAGR_BASE_FORM = 4;
    private static final Logger LOGGER = LoggerFactory.getLogger(NormalizerModelBuilder.class);
    private static final int IGNORE_WHEN_LONGER = 100;
    private static final Map<LragrPos, PartOfSpeech> LRAGR_TO_PENN;
    private static final Map<LragrPos, PartOfSpeech> LRAGR_TO_PENN_FALLBACK;
    private final BidirectionalDictionary normsIndex;
    private final BidirectionalDictionary wordsIndex;
    @Nullable
    @Option(name="-l", required=true, handler=PathOptionHandler.class, usage="path to SPECIALIST Lexicon LRAGR file.")
    private Path lragrPath;
    @Nullable
    @Argument(required=true, handler=PathOptionHandler.class, usage="output path of normalization model")
    private Path dbPath;

    @Inject
    public NormalizerModelBuilder(Vocabulary vocabulary) {
        this.normsIndex = vocabulary.getNormsIndex();
        this.wordsIndex = vocabulary.getWordsIndex();
    }

    public static void main(String[] args) {
        try {
            Bootstrapper.create(new Module[0]).getInstance(NormalizerModelBuilder.class).process(args);
        }
        catch (BiomedicusException | IOException e) {
            e.printStackTrace();
        }
    }

    public void process(String[] args) throws IOException {
        CmdLineParser parser = new CmdLineParser((Object)this);
        try {
            parser.parseArgument(args);
        }
        catch (CmdLineException e) {
            System.err.println(e.getLocalizedMessage());
            System.err.println("java edu.umn.biomedicus.normalization.NormalizerModelBuilder -l [path-to-lragr] [path-to-po");
            parser.printUsage((OutputStream)System.err);
            return;
        }
        assert (this.lragrPath != null) : "should be non-null by this point based on required = true";
        System.out.println("Starting building normalizer model from: " + this.lragrPath.toString());
        try {
            Files.deleteIfExists(this.dbPath);
        }
        catch (IOException e) {
            System.out.println("Failed to delete an existing db at location: " + this.dbPath.toString());
            e.printStackTrace();
            return;
        }
        TreeMap<TermPos, TermString> builder = new TreeMap<TermPos, TermString>();
        Pattern exclusionPattern = Pattern.compile(".*[|$#,@;:<>?\\[\\]{}\\d.].*");
        long lines = Files.lines(this.lragrPath).count();
        AtomicLong current = new AtomicLong();
        Files.lines(this.lragrPath).map(line -> line.split("\\|")).forEach(lragrArray -> {
            String inflectionalVariant = lragrArray[1];
            Matcher exclusionMatcher = exclusionPattern.matcher(inflectionalVariant);
            if (exclusionMatcher.matches() || inflectionalVariant.length() > 100) {
                return;
            }
            String syntacticCategory = lragrArray[2].trim();
            String agreementInflectionCode = lragrArray[3].trim();
            String baseForm = lragrArray[4].trim();
            LragrPos lragrPos = new LragrPos(syntacticCategory, agreementInflectionCode);
            if (!inflectionalVariant.endsWith(baseForm)) {
                PartOfSpeech fallbackPos;
                PartOfSpeech pennPos = LRAGR_TO_PENN.get(lragrPos);
                StringIdentifier termIdentifier = this.wordsIndex.getTermIdentifier(inflectionalVariant);
                if (termIdentifier.isUnknown()) {
                    return;
                }
                if (pennPos != null) {
                    builder.put(new TermPos(termIdentifier, pennPos), new TermString(this.normsIndex.getTermIdentifier(baseForm), baseForm));
                }
                if ((fallbackPos = LRAGR_TO_PENN_FALLBACK.get(lragrPos)) != null) {
                    builder.put(new TermPos(termIdentifier, fallbackPos), new TermString(this.normsIndex.getTermIdentifier(baseForm), baseForm));
                }
            }
            if (current.incrementAndGet() % 10000L == 0L) {
                System.out.println("Read " + current.get() + " of " + lines + " from LRAGR.");
            }
        });
        RocksDB.loadLibrary();
        System.out.println("Creating normalizer db from " + builder.size() + " terms");
        try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad();){
            try (RocksDB rocksDB = RocksDB.open((Options)options, (String)this.dbPath.toString());){
                builder.forEach((tp, ts) -> {
                    try {
                        rocksDB.put(tp.getBytes(), ts.getBytes());
                    }
                    catch (RocksDBException e) {
                        throw new RuntimeException(e);
                    }
                });
            }
            catch (RocksDBException e) {
                e.printStackTrace();
            }
        }
    }

    static {
        HashMap<LragrPos, PartOfSpeech> builder = new HashMap<LragrPos, PartOfSpeech>();
        builder.put(new LragrPos("noun", "uncount(thr_plur)"), PartOfSpeech.NNS);
        builder.put(new LragrPos("noun", "count(thr_plur)"), PartOfSpeech.NNS);
        builder.put(new LragrPos("noun", "uncount(thr_sing)"), PartOfSpeech.NN);
        builder.put(new LragrPos("noun", "count(thr_sing)"), PartOfSpeech.NN);
        builder.put(new LragrPos("verb", "infinitive"), PartOfSpeech.VB);
        builder.put(new LragrPos("verb", "pres(thr_sing)"), PartOfSpeech.VBZ);
        builder.put(new LragrPos("verb", "past"), PartOfSpeech.VBD);
        builder.put(new LragrPos("verb", "past_part"), PartOfSpeech.VBN);
        builder.put(new LragrPos("verb", "pres_part"), PartOfSpeech.VBG);
        builder.put(new LragrPos("adj", "comparative"), PartOfSpeech.JJR);
        builder.put(new LragrPos("adj", "superlative"), PartOfSpeech.JJS);
        builder.put(new LragrPos("adj", "positive"), PartOfSpeech.JJ);
        builder.put(new LragrPos("adv", "comparative"), PartOfSpeech.RBR);
        builder.put(new LragrPos("adv", "superlative"), PartOfSpeech.RBS);
        builder.put(new LragrPos("adv", "positive"), PartOfSpeech.RB);
        LRAGR_TO_PENN = Collections.unmodifiableMap(builder);
        builder = new HashMap();
        builder.put(new LragrPos("noun", "uncount(thr_plur)"), PartOfSpeech.NN);
        builder.put(new LragrPos("noun", "count(thr_plur)"), PartOfSpeech.NN);
        builder.put(new LragrPos("noun", "uncount(thr_sing)"), PartOfSpeech.NNS);
        builder.put(new LragrPos("noun", "count(thr_sing)"), PartOfSpeech.NNS);
        LRAGR_TO_PENN_FALLBACK = Collections.unmodifiableMap(builder);
    }

    private static class LragrPos
    implements Comparable<LragrPos> {
        private final String syntacticCategory;
        private final String agreementInflectionCode;

        public LragrPos(String syntacticCategory, String agreementInflectionCode) {
            this.syntacticCategory = Objects.requireNonNull(syntacticCategory);
            this.agreementInflectionCode = Objects.requireNonNull(agreementInflectionCode);
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            LragrPos lragrPos = (LragrPos)o;
            if (!this.agreementInflectionCode.equals(lragrPos.agreementInflectionCode)) {
                return false;
            }
            return this.syntacticCategory.equals(lragrPos.syntacticCategory);
        }

        public int hashCode() {
            int result = this.syntacticCategory.hashCode();
            result = 31 * result + this.agreementInflectionCode.hashCode();
            return result;
        }

        @Override
        public int compareTo(LragrPos o) {
            int result = this.syntacticCategory.compareTo(o.syntacticCategory);
            if (result == 0) {
                result = this.agreementInflectionCode.compareTo(o.agreementInflectionCode);
            }
            return result;
        }
    }
}

