/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.webgraph.driver;

import edu.umd.cloud9.webgraph.BuildReverseWebGraph;
import edu.umd.cloud9.webgraph.BuildWebGraph;
import edu.umd.cloud9.webgraph.CollectHostnames;
import edu.umd.cloud9.webgraph.CollectionConfigurationManager;
import edu.umd.cloud9.webgraph.ComputeWeight;
import edu.umd.cloud9.webgraph.DriverUtil;
import edu.umd.cloud9.webgraph.TrecExtractLinks;
import edu.umd.cloud9.webgraph.normalizer.AnchorTextNormalizer;
import java.io.File;
import java.io.PrintStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class TrecDriver
extends Configured
implements Tool {
    private String inputBase;
    private String outputBase;
    private boolean includeInternalLinks = false;
    private boolean computeAnchorWeights = false;
    private String normalizer = "edu.umd.cloud9.webgraph.normalizer.AnchorTextBasicNormalizer";
    private String filtername = null;
    private Configuration conf;
    private CollectionConfigurationManager configer;

    public int run(String[] args) throws Exception {
        this.conf = this.getConf();
        this.configer = new CollectionConfigurationManager();
        if (!this.readInput(args)) {
            TrecDriver.printUsage();
            return -1;
        }
        this.configer.applyConfig(this.conf);
        this.conf.setInt("Cloud9.Mappers", 2000);
        this.conf.setInt("Cloud9.Reducers", 200);
        this.conf.setBoolean("Cloud9.IncludeInternalLinks", this.includeInternalLinks);
        this.conf.set("Cloud9.AnchorTextNormalizer", this.normalizer);
        String inputPath = this.inputBase;
        String outputPath = this.outputBase + "/" + "extracted.links";
        this.conf.set("Cloud9.InputPath", inputPath);
        this.conf.set("Cloud9.OutputPath", outputPath);
        int r = new TrecExtractLinks(this.conf, this.configer).run();
        if (r != 0) {
            return -1;
        }
        inputPath = this.outputBase + "/" + "extracted.links";
        outputPath = this.outputBase + "/" + "reverseWebGraph" + "/";
        this.conf.set("Cloud9.InputPath", inputPath);
        this.conf.set("Cloud9.OutputPath", outputPath);
        this.conf.setInt("Cloud9.Reducers", 200);
        r = new BuildReverseWebGraph(this.conf).run();
        if (r != 0) {
            return -1;
        }
        inputPath = this.outputBase + "/" + "reverseWebGraph" + "/";
        outputPath = this.outputBase + "/" + "webGraph" + "/";
        this.conf.set("Cloud9.InputPath", inputPath);
        this.conf.set("Cloud9.OutputPath", outputPath);
        this.conf.setInt("Cloud9.Mappers", 1);
        this.conf.setInt("Cloud9.Reducers", 200);
        r = new BuildWebGraph(this.conf).run();
        if (r != 0) {
            return -1;
        }
        if (this.computeAnchorWeights) {
            inputPath = this.outputBase + "/" + "webGraph" + "/";
            outputPath = this.outputBase + "/" + "hostnames" + "/";
            this.conf.set("Cloud9.InputPath", inputPath);
            this.conf.set("Cloud9.OutputPath", outputPath);
            this.conf.setInt("Cloud9.Mappers", 1);
            this.conf.setInt("Cloud9.Reducers", 200);
            r = new CollectHostnames(this.conf).run();
            if (r != 0) {
                return -1;
            }
            inputPath = this.outputBase + "/" + "reverseWebGraph" + "/," + this.outputBase + "/" + "hostnames" + "/";
            outputPath = this.outputBase + "/" + "weightedReverseWebGraph" + "/";
            this.conf.set("Cloud9.InputPath", inputPath);
            this.conf.set("Cloud9.OutputPath", outputPath);
            this.conf.setInt("Cloud9.Mappers", 1);
            this.conf.setInt("Cloud9.Reducers", 200);
            r = new ComputeWeight(this.conf).run();
            if (r != 0) {
                return -1;
            }
        }
        return 0;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)new Configuration(), (Tool)new TrecDriver(), (String[])args);
    }

    private static int printUsage() {
        System.out.println("\nusage:[-input collection-path][-output output-base[-collection {trecweb|gov2|wt10g}] [-inputFormat userSpecifiedInputFormatClass] [-docnoClass userSpecifiedDocnoMappingClass] -docno userSpecifiedDocnoMappingFile [-il] [-caw] [-normalizer normalizerClass] ");
        System.out.println("Help:");
        System.out.println("[-input collection-path]\n\tinput directory");
        System.out.println("[-output output-base]\n\toutput directory");
        System.out.println("-collection {trecweb|gov2|wt10g}\n\tname the collection name, if it is supported, automatic configuration will be applied");
        System.out.println("-inputFormat userSpecifiedInputFormatClass\n\tspecify the class work as FileInputFormat; Required when -collection is not specified");
        System.out.println("-docnoClass userSpecifiedDocnoMappingClass\n\tspecify the class work as DocnoMapping;Required when -collection is not specified. It should implement GenericDocnoMapping interface.");
        System.out.println("-docno userSpecifiedDocnoMappingFile\n\tspecify the File work as input to specified DocnoMapping class.");
        System.out.println("-il\n\tinclude internal links, without this option we will not include internal links");
        System.out.println("-caw\n\tcompute default anchor weights, without this option we will not compute default anchor weights");
        System.out.println("-normalizer normalizerClass\n\ta normalizer class used to normalize the lines of anchor text, must extend edu.umd.cloud9.webgraph.normalize.AnchorTextNormalizer.");
        System.out.println();
        ToolRunner.printGenericCommandUsage((PrintStream)System.out);
        return -1;
    }

    private boolean readInput(String[] args) {
        if (args.length < 6) {
            System.out.println("More arguments needed.");
            return false;
        }
        this.inputBase = new File(DriverUtil.argValue(args, "-input")).getAbsolutePath();
        this.outputBase = new File(DriverUtil.argValue(args, "-output")).getAbsolutePath();
        boolean knownCollection = DriverUtil.argExists(args, "-collection");
        if (knownCollection) {
            String collectionName = DriverUtil.argValue(args, "-collection");
            if (!this.configer.setConfByCollection(collectionName)) {
                System.out.println("Collection \"" + collectionName + "\" not supported, please specify inputformat and docnomapping class, or contact developer.");
                return false;
            }
        } else {
            String ciName = DriverUtil.argValue(args, "-inputFormat");
            if (!this.configer.setUserSpecifiedInputFormat(ciName)) {
                System.out.println("class \"" + ciName + "\" doesn't exist or not sub-class of FileInputFormat");
                return false;
            }
            String cmName = DriverUtil.argValue(args, "-docnoClass");
            if (!this.configer.setUserSpecifiedDocnoMappingClass(cmName)) {
                System.out.println("class \"" + cmName + "\" doesn't exist or not implemented DocnoMappingt");
                return false;
            }
        }
        this.conf.set("Cloud9.DocnoMappingFile", DriverUtil.argValue(args, "-docno"));
        this.includeInternalLinks = DriverUtil.argExists(args, "-il");
        this.computeAnchorWeights = DriverUtil.argExists(args, "-caw");
        String nm = DriverUtil.argValue(args, "-normalizer");
        try {
            if (!AnchorTextNormalizer.class.isAssignableFrom(Class.forName(nm))) {
                System.out.println("Invalid arguments; Normalizer class must implement AnchorTextNormalizer interface.");
                return false;
            }
        }
        catch (ClassNotFoundException e) {
            System.out.println("Invalid arguments; Specified Normalizer class doesn't exist");
            return false;
        }
        this.normalizer = nm;
        return true;
    }
}

