/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.webgraph.driver;

import edu.umd.cloud9.webgraph.BuildReverseWebGraph;
import edu.umd.cloud9.webgraph.BuildWebGraph;
import edu.umd.cloud9.webgraph.ClueExtractLinks;
import edu.umd.cloud9.webgraph.CollectHostnames;
import edu.umd.cloud9.webgraph.ComputeWeight;
import edu.umd.cloud9.webgraph.DriverUtil;
import java.io.PrintStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class ClueWebDriver
extends Configured
implements Tool {
    private static int printUsage() {
        System.out.println("\nusage:-input collection-path-output output-base-docno userSpecifiedDocnoMappingFile -begin begin_segment-end end_segment[-il] [-caw] -normalizer normalizerClass");
        System.out.println("Help:");
        System.out.println("[-input collection-path]\n\tinput directory");
        System.out.println("[-output output-base]\n\toutput directory");
        System.out.println("-begin begin_segment: First segment to process.");
        System.out.println("-end end_segment: Last segment to process.");
        System.out.println("-docno docno mapping file.");
        System.out.println("-il\n\tinclude internal links, without thisoption we will not include internal links");
        System.out.println("-caw\n\tcompute default anchor weights, without this option we will not compute default anchor weights");
        System.out.println("-normalizer normalizerClass\n\ta normalizer class used to normalize the lines of anchor text, must extend edu.umd.cloud9.webgraph.normalize.AnchorTextNormalizer.");
        System.out.println();
        ToolRunner.printGenericCommandUsage((PrintStream)System.out);
        return -1;
    }

    public int run(String[] args) throws Exception {
        if (args.length < 6) {
            ClueWebDriver.printUsage();
            return -1;
        }
        Configuration conf = this.getConf();
        String inputArg = DriverUtil.argValue(args, "-input");
        String inputBase = inputArg.endsWith("/") ? inputArg : inputArg + "/";
        String outputArg = DriverUtil.argValue(args, "-output");
        String outputBase = outputArg.endsWith("/") ? outputArg : outputArg + "/";
        String docnoMapping = DriverUtil.argValue(args, "-docno");
        int fromSegment = Integer.parseInt(DriverUtil.argValue(args, "-begin"));
        int toSegment = Integer.parseInt(DriverUtil.argValue(args, "-end"));
        boolean includeInternalLinks = DriverUtil.argExists(args, "-il");
        boolean computeAnchorWeights = DriverUtil.argExists(args, "-caw");
        String normalizer = DriverUtil.argValue(args, "-normalizer");
        conf.setInt("Cloud9.Mappers", 2000);
        conf.setInt("Cloud9.Reducers", 200);
        conf.set("Cloud9.DocnoMappingFile", docnoMapping);
        conf.setBoolean("Cloud9.IncludeInternalLinks", includeInternalLinks);
        conf.set("Cloud9.AnchorTextNormalizer", normalizer);
        for (int i = fromSegment; i <= toSegment; ++i) {
            String inputPath = inputBase + "en." + (i == 10 ? "10" : "0" + i);
            String outputPath = outputBase + "extracted.links" + "/en." + (i == 10 ? "10" : "0" + i);
            conf.set("Cloud9.InputPath", inputPath);
            conf.set("Cloud9.OutputPath", outputPath);
            int r = new ClueExtractLinks(conf).run();
            if (r == 0) continue;
            return -1;
        }
        String inputPath = "";
        for (int i = fromSegment; i < toSegment; ++i) {
            inputPath = inputPath + outputBase + "extracted.links" + "/en.0" + i + "/,";
        }
        inputPath = toSegment == 10 ? inputPath + outputBase + "extracted.links" + "/en.10/" : inputPath + outputBase + "extracted.links" + "/en.0" + toSegment + "/";
        String outputPath = outputBase + "reverseWebGraph" + "/";
        conf.set("Cloud9.InputPath", inputPath);
        conf.set("Cloud9.OutputPath", outputPath);
        conf.setInt("Cloud9.Mappers", 1);
        conf.setInt("Cloud9.Reducers", 200 * (toSegment - fromSegment + 1));
        int r = new BuildReverseWebGraph(conf).run();
        if (r != 0) {
            return -1;
        }
        inputPath = outputBase + "reverseWebGraph" + "/";
        outputPath = outputBase + "webGraph" + "/";
        conf.set("Cloud9.InputPath", inputPath);
        conf.set("Cloud9.OutputPath", outputPath);
        conf.setInt("Cloud9.Mappers", 1);
        conf.setInt("Cloud9.Reducers", 200 * (toSegment - fromSegment + 1));
        r = new BuildWebGraph(conf).run();
        if (r != 0) {
            return -1;
        }
        if (computeAnchorWeights) {
            inputPath = outputBase + "webGraph" + "/";
            outputPath = outputBase + "hostnames" + "/";
            conf.set("Cloud9.InputPath", inputPath);
            conf.set("Cloud9.OutputPath", outputPath);
            conf.setInt("Cloud9.Mappers", 1);
            conf.setInt("Cloud9.Reducers", 200 * (toSegment - fromSegment + 1));
            r = new CollectHostnames(conf).run();
            if (r != 0) {
                return -1;
            }
            inputPath = outputBase + "reverseWebGraph" + "/," + outputBase + "hostnames" + "/";
            outputPath = outputBase + "weightedReverseWebGraph" + "/";
            conf.set("Cloud9.InputPath", inputPath);
            conf.set("Cloud9.OutputPath", outputPath);
            conf.setInt("Cloud9.Mappers", 1);
            conf.setInt("Cloud9.Reducers", 200 * (toSegment - fromSegment + 1));
            r = new ComputeWeight(conf).run();
            if (r != 0) {
                return -1;
            }
        }
        return 0;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)new Configuration(), (Tool)new ClueWebDriver(), (String[])args);
    }
}

