/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.webgraph.driver;

import edu.umd.cloud9.io.array.ArrayListWritable;
import edu.umd.cloud9.webgraph.data.AnchorText;
import java.io.PrintStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

public class SortWebGraph
extends Configured
implements Tool {
    private static final Logger LOG = Logger.getLogger(SortWebGraph.class);
    private static final int DEFAULT_NUMBER_OF_DOCUMENTS = 503903810;

    private static int printUsage() {
        System.out.println("usage: [input-path] [output-path] [number-of-documents] [number-of-reducers]");
        ToolRunner.printGenericCommandUsage((PrintStream)System.out);
        return -1;
    }

    public int run(String[] args) throws Exception {
        if (args.length != 4) {
            SortWebGraph.printUsage();
            return -1;
        }
        JobConf conf = new JobConf(this.getConf(), SortWebGraph.class);
        FileSystem fs = FileSystem.get((Configuration)conf);
        String inputPath = args[0];
        String outputPath = args[1];
        int numberOfDocuments = Integer.parseInt(args[2]);
        int numMappers = 1;
        int numReducers = Integer.parseInt(args[3]);
        conf.setJobName("SortWebGraph");
        conf.set("mapred.child.java.opts", "-Xmx2048m");
        conf.setInt("mapred.task.timeout", 60000000);
        conf.set("mapreduce.map.memory.mb", "2048");
        conf.set("mapreduce.map.java.opts", "-Xmx2048m");
        conf.set("mapreduce.reduce.memory.mb", "2048");
        conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
        conf.set("mapreduce.task.timeout", "60000000");
        if (numberOfDocuments == 0) {
            numberOfDocuments = 503903810;
        }
        conf.setInt("Cloud9.NumberOfDocuments", numberOfDocuments);
        conf.setNumMapTasks(numMappers);
        conf.setNumReduceTasks(numReducers);
        conf.setMapperClass(IdentityMapper.class);
        conf.setPartitionerClass(Partition.class);
        conf.setReducerClass(IdentityReducer.class);
        conf.setOutputKeyClass(IntWritable.class);
        conf.setOutputValueClass(ArrayListWritable.class);
        conf.setMapOutputKeyClass(IntWritable.class);
        conf.setMapOutputValueClass(ArrayListWritable.class);
        conf.setInputFormat(SequenceFileInputFormat.class);
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setCompressOutput((JobConf)conf, (boolean)true);
        SequenceFileOutputFormat.setOutputCompressionType((JobConf)conf, (SequenceFile.CompressionType)SequenceFile.CompressionType.BLOCK);
        SequenceFileInputFormat.setInputPaths((JobConf)conf, (String)inputPath);
        FileOutputFormat.setOutputPath((JobConf)conf, (Path)new Path(outputPath));
        LOG.info((Object)"SortAnchorText");
        LOG.info((Object)(" - input path: " + inputPath));
        LOG.info((Object)(" - output path: " + outputPath));
        LOG.info((Object)(" - number of documents: " + conf.getInt("Cloud9.NumberOfDocuments", 503903810)));
        fs.delete(new Path(outputPath));
        JobClient.runJob((JobConf)conf);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)new Configuration(), (Tool)new SortWebGraph(), (String[])args);
        System.exit(res);
    }

    protected static class Partition
    implements Partitioner<IntWritable, ArrayListWritable<AnchorText>> {
        int totalDocuments;

        protected Partition() {
        }

        public void configure(JobConf job) {
            this.totalDocuments = job.getInt("Cloud9.NumberOfDocuments", 503903810);
        }

        public int getPartition(IntWritable key, ArrayListWritable<AnchorText> value, int numReduceTasks) {
            int i = key.get() / (this.totalDocuments / numReduceTasks);
            if (i >= numReduceTasks) {
                i = numReduceTasks - 1;
            }
            return i;
        }
    }
}

