/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.utils.vectors.lucene;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.utils.vectors.io.DelimitedTermInfoWriter;
import org.apache.mahout.utils.vectors.io.SequenceFileVectorWriter;
import org.apache.mahout.utils.vectors.io.VectorWriter;
import org.apache.mahout.utils.vectors.lucene.CachedTermInfo;
import org.apache.mahout.utils.vectors.lucene.LuceneIterable;
import org.apache.mahout.utils.vectors.lucene.TFDFMapper;
import org.apache.mahout.vectorizer.TF;
import org.apache.mahout.vectorizer.TFIDF;
import org.apache.mahout.vectorizer.Weight;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class Driver {
    private static final Logger log = LoggerFactory.getLogger(Driver.class);
    private String luceneDir;
    private String outFile;
    private String field;
    private String idField;
    private String dictOut;
    private String weightType = "tfidf";
    private String delimiter = "\t";
    private double norm = -1.0;
    private long maxDocs = Long.MAX_VALUE;
    private int minDf = 1;
    private int maxDFPercent = 99;
    private double maxPercentErrorDocs = 0.0;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void dumpVectors() throws IOException {
        TF weight;
        File file = new File(this.luceneDir);
        Preconditions.checkArgument((boolean)file.isDirectory(), (Object)("Lucene directory: " + file.getAbsolutePath() + " does not exist or is not a directory"));
        Preconditions.checkArgument((this.maxDocs >= 0L ? 1 : 0) != 0, (Object)"maxDocs must be >= 0");
        Preconditions.checkArgument((this.minDf >= 1 ? 1 : 0) != 0, (Object)"minDf must be >= 1");
        Preconditions.checkArgument((this.maxDFPercent <= 99 ? 1 : 0) != 0, (Object)"maxDFPercent must be <= 99");
        FSDirectory dir = FSDirectory.open((File)file);
        IndexReader reader = IndexReader.open((Directory)dir, (boolean)true);
        if ("tf".equalsIgnoreCase(this.weightType)) {
            weight = new TF();
        } else if ("tfidf".equalsIgnoreCase(this.weightType)) {
            weight = new TFIDF();
        } else {
            throw new IllegalArgumentException("Weight type " + this.weightType + " is not supported");
        }
        CachedTermInfo termInfo = new CachedTermInfo(reader, this.field, this.minDf, this.maxDFPercent);
        TFDFMapper mapper = new TFDFMapper(reader, (Weight)weight, termInfo);
        LuceneIterable iterable = this.norm == -1.0 ? new LuceneIterable(reader, this.idField, this.field, mapper, -1.0, this.maxPercentErrorDocs) : new LuceneIterable(reader, this.idField, this.field, mapper, this.norm, this.maxPercentErrorDocs);
        log.info("Output File: {}", (Object)this.outFile);
        VectorWriter vectorWriter = Driver.getSeqFileWriter(this.outFile);
        try {
            long numDocs = vectorWriter.write(iterable, this.maxDocs);
            log.info("Wrote: {} vectors", (Object)numDocs);
        }
        finally {
            Closeables.closeQuietly((Closeable)vectorWriter);
        }
        File dictOutFile = new File(this.dictOut);
        log.info("Dictionary Output file: {}", (Object)dictOutFile);
        BufferedWriter writer = Files.newWriter((File)dictOutFile, (Charset)Charsets.UTF_8);
        DelimitedTermInfoWriter tiWriter = new DelimitedTermInfoWriter(writer, this.delimiter, this.field);
        try {
            tiWriter.write(termInfo);
        }
        finally {
            Closeables.closeQuietly((Closeable)tiWriter);
        }
    }

    public static void main(String[] args) throws IOException {
        DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
        ArgumentBuilder abuilder = new ArgumentBuilder();
        GroupBuilder gbuilder = new GroupBuilder();
        DefaultOption inputOpt = obuilder.withLongName("dir").withRequired(true).withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create()).withDescription("The Lucene directory").withShortName("d").create();
        DefaultOption outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription("The output file").withShortName("o").create();
        DefaultOption fieldOpt = obuilder.withLongName("field").withRequired(true).withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create()).withDescription("The field in the index").withShortName("f").create();
        DefaultOption idFieldOpt = obuilder.withLongName("idField").withRequired(false).withArgument(abuilder.withName("idField").withMinimum(1).withMaximum(1).create()).withDescription("The field in the index containing the index.  If null, then the Lucene internal doc id is used which is prone to error if the underlying index changes").create();
        DefaultOption dictOutOpt = obuilder.withLongName("dictOut").withRequired(true).withArgument(abuilder.withName("dictOut").withMinimum(1).withMaximum(1).create()).withDescription("The output of the dictionary").withShortName("t").create();
        DefaultOption weightOpt = obuilder.withLongName("weight").withRequired(false).withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create()).withDescription("The kind of weight to use. Currently TF or TFIDF").withShortName("w").create();
        DefaultOption delimiterOpt = obuilder.withLongName("delimiter").withRequired(false).withArgument(abuilder.withName("delimiter").withMinimum(1).withMaximum(1).create()).withDescription("The delimiter for outputting the dictionary").withShortName("l").create();
        DefaultOption powerOpt = obuilder.withLongName("norm").withRequired(false).withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create()).withDescription("The norm to use, expressed as either a double or \"INF\" if you want to use the Infinite norm.  Must be greater or equal to 0.  The default is not to normalize").withShortName("n").create();
        DefaultOption maxOpt = obuilder.withLongName("max").withRequired(false).withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create()).withDescription("The maximum number of vectors to output.  If not specified, then it will loop over all docs").withShortName("m").create();
        DefaultOption minDFOpt = obuilder.withLongName("minDF").withRequired(false).withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create()).withDescription("The minimum document frequency.  Default is 1").withShortName("md").create();
        DefaultOption maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false).withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create()).withDescription("The max percentage of docs for the DF.  Can be used to remove really high frequency terms.  Expressed as an integer between 0 and 100. Default is 99.").withShortName("x").create();
        DefaultOption maxPercentErrorDocsOpt = obuilder.withLongName("maxPercentErrorDocs").withRequired(false).withArgument(abuilder.withName("maxPercentErrorDocs").withMinimum(1).withMaximum(1).create()).withDescription("The max percentage of docs that can have a null term vector. These are noise document and can occur if the analyzer used strips out all terms in the target field. This percentage is expressed as a value between 0 and 1. The default is 0.").withShortName("err").create();
        DefaultOption helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
        Group group = gbuilder.withName("Options").withOption((Option)inputOpt).withOption((Option)idFieldOpt).withOption((Option)outputOpt).withOption((Option)delimiterOpt).withOption((Option)helpOpt).withOption((Option)fieldOpt).withOption((Option)maxOpt).withOption((Option)dictOutOpt).withOption((Option)powerOpt).withOption((Option)maxDFPercentOpt).withOption((Option)weightOpt).withOption((Option)minDFOpt).withOption((Option)maxPercentErrorDocsOpt).create();
        try {
            Parser parser = new Parser();
            parser.setGroup(group);
            CommandLine cmdLine = parser.parse(args);
            if (cmdLine.hasOption((Option)helpOpt)) {
                CommandLineUtil.printHelp((Group)group);
                return;
            }
            if (cmdLine.hasOption((Option)inputOpt)) {
                Driver luceneDriver = new Driver();
                luceneDriver.setLuceneDir(cmdLine.getValue((Option)inputOpt).toString());
                if (cmdLine.hasOption((Option)maxOpt)) {
                    luceneDriver.setMaxDocs(Long.parseLong(cmdLine.getValue((Option)maxOpt).toString()));
                }
                if (cmdLine.hasOption((Option)weightOpt)) {
                    luceneDriver.setWeightType(cmdLine.getValue((Option)weightOpt).toString());
                }
                luceneDriver.setField(cmdLine.getValue((Option)fieldOpt).toString());
                if (cmdLine.hasOption((Option)minDFOpt)) {
                    luceneDriver.setMinDf(Integer.parseInt(cmdLine.getValue((Option)minDFOpt).toString()));
                }
                if (cmdLine.hasOption((Option)maxDFPercentOpt)) {
                    luceneDriver.setMaxDFPercent(Integer.parseInt(cmdLine.getValue((Option)maxDFPercentOpt).toString()));
                }
                if (cmdLine.hasOption((Option)powerOpt)) {
                    String power = cmdLine.getValue((Option)powerOpt).toString();
                    if ("INF".equals(power)) {
                        luceneDriver.setNorm(Double.POSITIVE_INFINITY);
                    } else {
                        luceneDriver.setNorm(Double.parseDouble(power));
                    }
                }
                if (cmdLine.hasOption((Option)idFieldOpt)) {
                    luceneDriver.setIdField(cmdLine.getValue((Option)idFieldOpt).toString());
                }
                if (cmdLine.hasOption((Option)maxPercentErrorDocsOpt)) {
                    luceneDriver.setMaxPercentErrorDocs(Double.parseDouble(cmdLine.getValue((Option)maxPercentErrorDocsOpt).toString()));
                }
                luceneDriver.setOutFile(cmdLine.getValue((Option)outputOpt).toString());
                luceneDriver.setDelimiter(cmdLine.hasOption((Option)delimiterOpt) ? cmdLine.getValue((Option)delimiterOpt).toString() : "\t");
                luceneDriver.setDictOut(cmdLine.getValue((Option)dictOutOpt).toString());
                luceneDriver.dumpVectors();
            }
        }
        catch (OptionException e) {
            log.error("Exception", (Throwable)e);
            CommandLineUtil.printHelp((Group)group);
        }
    }

    private static VectorWriter getSeqFileWriter(String outFile) throws IOException {
        Path path = new Path(outFile);
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        SequenceFile.Writer seqWriter = SequenceFile.createWriter((FileSystem)fs, (Configuration)conf, (Path)path, LongWritable.class, VectorWritable.class);
        return new SequenceFileVectorWriter(seqWriter);
    }

    public void setLuceneDir(String luceneDir) {
        this.luceneDir = luceneDir;
    }

    public void setMaxDocs(long maxDocs) {
        this.maxDocs = maxDocs;
    }

    public void setWeightType(String weightType) {
        this.weightType = weightType;
    }

    public void setField(String field) {
        this.field = field;
    }

    public void setMinDf(int minDf) {
        this.minDf = minDf;
    }

    public void setMaxDFPercent(int maxDFPercent) {
        this.maxDFPercent = maxDFPercent;
    }

    public void setNorm(double norm) {
        this.norm = norm;
    }

    public void setIdField(String idField) {
        this.idField = idField;
    }

    public void setOutFile(String outFile) {
        this.outFile = outFile;
    }

    public void setDelimiter(String delimiter) {
        this.delimiter = delimiter;
    }

    public void setDictOut(String dictOut) {
        this.dictOut = dictOut;
    }

    public void setMaxPercentErrorDocs(double maxPercentErrorDocs) {
        this.maxPercentErrorDocs = maxPercentErrorDocs;
    }
}

