/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.collection.aquaint2;

import edu.umd.cloud9.collection.DocnoMapping;
import edu.umd.cloud9.collection.aquaint2.Aquaint2DocnoMapping;
import edu.umd.cloud9.collection.aquaint2.Aquaint2Document;
import edu.umd.cloud9.collection.aquaint2.Aquaint2DocumentInputFormatOld;
import edu.umd.cloud9.collection.aquaint2.DemoCountAquaint2Documents;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

public class Aquaint2ForwardIndexBuilder
extends Configured
implements Tool {
    private static final Logger sLogger = Logger.getLogger(Aquaint2ForwardIndexBuilder.class);

    private static int printUsage() {
        System.out.println("usage: [collection-path] [output-path] [index-file] [docno-mapping-file]");
        ToolRunner.printGenericCommandUsage((PrintStream)System.out);
        return -1;
    }

    public int run(String[] args) throws Exception {
        if (args.length != 4) {
            Aquaint2ForwardIndexBuilder.printUsage();
            return -1;
        }
        String collectionPath = args[0];
        String outputPath = args[1];
        String indexFile = args[2];
        String mappingFile = args[3];
        return this.runTool(this.getConf(), collectionPath, outputPath, indexFile, mappingFile);
    }

    public int runTool(Configuration config, String collectionPath, String outputPath, String indexFile, String mappingFile) throws Exception {
        JobConf conf = new JobConf(config, DemoCountAquaint2Documents.class);
        FileSystem fs = FileSystem.get((Configuration)config);
        sLogger.info((Object)"Tool name: BuildAquaint2ForwardIndex");
        sLogger.info((Object)(" - collection path: " + collectionPath));
        sLogger.info((Object)(" - output path: " + outputPath));
        sLogger.info((Object)(" - index file: " + indexFile));
        sLogger.info((Object)(" - mapping file: " + mappingFile));
        conf.setJobName("BuildAquaint2ForwardIndex");
        conf.set("mapred.child.java.opts", "-Xmx1024m");
        conf.setNumReduceTasks(1);
        if (conf.get("mapred.job.tracker").equals("local")) {
            conf.set("DocnoMappingFile", mappingFile);
        } else {
            DistributedCache.addCacheFile((URI)new URI(mappingFile), (Configuration)conf);
        }
        FileInputFormat.setInputPaths((JobConf)conf, (Path[])new Path[]{new Path(collectionPath)});
        FileOutputFormat.setOutputPath((JobConf)conf, (Path)new Path(outputPath));
        FileOutputFormat.setCompressOutput((JobConf)conf, (boolean)false);
        conf.setInputFormat(Aquaint2DocumentInputFormatOld.class);
        conf.setOutputKeyClass(IntWritable.class);
        conf.setOutputValueClass(Text.class);
        conf.setMapperClass(MyMapper.class);
        conf.setReducerClass(IdentityReducer.class);
        FileSystem.get((Configuration)conf).delete(new Path(outputPath), true);
        RunningJob job = JobClient.runJob((JobConf)conf);
        Counters counters = job.getCounters();
        int numDocs = (int)((Counters.Counter)counters.findCounter((Enum)Count.DOCS)).getCounter();
        String inputFile = outputPath + "/" + "part-00000";
        sLogger.info((Object)("Writing " + numDocs + " doc offseta to " + indexFile));
        LineReader reader = new LineReader((InputStream)fs.open(new Path(inputFile)));
        FSDataOutputStream writer = fs.create(new Path(indexFile), true);
        writer.writeUTF("edu.umd.cloud9.collection.aquaint2.Aquaint2ForwardIndex");
        writer.writeUTF(collectionPath);
        writer.writeInt(numDocs);
        int cnt = 0;
        Text line = new Text();
        while (reader.readLine(line) > 0) {
            String[] arr = line.toString().split("\\t");
            long offset = Long.parseLong(arr[1]);
            int len = Integer.parseInt(arr[2]);
            writer.writeLong(offset);
            writer.writeInt(len);
            if (++cnt % 100000 != 0) continue;
            sLogger.info((Object)(cnt + " docs"));
        }
        reader.close();
        writer.close();
        sLogger.info((Object)(cnt + " docs total. Done!"));
        if (numDocs != cnt) {
            throw new RuntimeException("Unexpected number of documents in building forward index!");
        }
        return 0;
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int res = ToolRunner.run((Configuration)conf, (Tool)new Aquaint2ForwardIndexBuilder(), (String[])args);
        System.exit(res);
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class MyMapper
    extends MapReduceBase
    implements Mapper<LongWritable, Aquaint2Document, IntWritable, Text> {
        private static final IntWritable sInt = new IntWritable(1);
        private static final Text sText = new Text();
        private DocnoMapping mDocMapping;

        private MyMapper() {
        }

        public void configure(JobConf job) {
            try {
                this.mDocMapping = new Aquaint2DocnoMapping();
                if (job.get("mapred.job.tracker").equals("local")) {
                    FileSystem fs = FileSystem.get((Configuration)job);
                    String mappingFile = job.get("DocnoMappingFile");
                    this.mDocMapping.loadMapping(new Path(mappingFile), fs);
                } else {
                    Path[] localFiles = DistributedCache.getLocalCacheFiles((Configuration)job);
                    this.mDocMapping.loadMapping(localFiles[0], (FileSystem)FileSystem.getLocal((Configuration)job));
                }
            }
            catch (Exception e) {
                e.printStackTrace();
                throw new RuntimeException("Error initializing DocnoMapping!");
            }
        }

        public void map(LongWritable key, Aquaint2Document doc, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException {
            reporter.incrCounter((Enum)Count.DOCS, 1L);
            int len = doc.getContent().getBytes().length;
            sInt.set(this.mDocMapping.getDocno(doc.getDocid()));
            sText.set(key + "\t" + len);
            output.collect((Object)sInt, (Object)sText);
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static enum Count {
        DOCS;

    }
}

