/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.collection.trecweb;

import edu.umd.cloud9.collection.DocnoMapping;
import edu.umd.cloud9.collection.trecweb.TrecWebDocument;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

public class TrecWebDocnoMappingBuilder
extends Configured
implements Tool,
DocnoMapping.Builder {
    private static final Logger LOG = Logger.getLogger(TrecWebDocnoMappingBuilder.class);
    private static final Random random = new Random();

    public int build(Path src, Path dest, Configuration conf) throws IOException {
        super.setConf(conf);
        return this.run(new String[]{"-collection=" + src.toString(), "-docnoMapping=" + dest.toString()});
    }

    public int run(String[] args) throws IOException {
        DocnoMapping.DefaultBuilderOptions options = DocnoMapping.BuilderUtils.parseDefaultOptions(args);
        if (options == null) {
            return -1;
        }
        String tmpDir = "tmp-" + TrecWebDocnoMappingBuilder.class.getSimpleName() + "-" + random.nextInt(10000);
        LOG.info((Object)("Tool name: " + TrecWebDocnoMappingBuilder.class.getCanonicalName()));
        LOG.info((Object)(" - input path: " + options.collection));
        LOG.info((Object)(" - output file: " + options.docnoMapping));
        Job job = new Job(this.getConf(), TrecWebDocnoMappingBuilder.class.getSimpleName() + ":" + options.collection);
        FileSystem fs = FileSystem.get((Configuration)job.getConfiguration());
        job.setJarByClass(TrecWebDocnoMappingBuilder.class);
        job.setNumReduceTasks(1);
        PathFilter filter = new PathFilter(){

            public boolean accept(Path path) {
                return !path.getName().startsWith("_");
            }
        };
        Path collectionPath = new Path(options.collection);
        for (FileStatus status : fs.listStatus(collectionPath, filter)) {
            if (status.isDirectory()) {
                for (FileStatus s : fs.listStatus(status.getPath(), filter)) {
                    FileInputFormat.addInputPath((Job)job, (Path)s.getPath());
                }
                continue;
            }
            FileInputFormat.addInputPath((Job)job, (Path)status.getPath());
        }
        FileOutputFormat.setOutputPath((Job)job, (Path)new Path(tmpDir));
        FileOutputFormat.setCompressOutput((Job)job, (boolean)false);
        job.setInputFormatClass(options.inputFormat);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        fs.delete(new Path(tmpDir), true);
        try {
            job.waitForCompletion(true);
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        TrecWebDocnoMappingBuilder.writeMappingData(new Path(tmpDir + "/part-r-00000"), new Path(options.docnoMapping), fs);
        fs.delete(new Path(tmpDir), true);
        return 0;
    }

    private static void writeMappingData(Path input, Path output, FileSystem fs) throws IOException {
        LOG.info((Object)("Writing docids to " + output));
        LineReader reader = new LineReader((InputStream)fs.open(input));
        LOG.info((Object)("Reading " + input));
        int cnt = 0;
        Text line = new Text();
        while (reader.readLine(line) > 0) {
            ++cnt;
        }
        reader.close();
        LOG.info((Object)"Done!");
        LOG.info((Object)("Writing " + output));
        FSDataOutputStream out = fs.create(output, true);
        reader = new LineReader((InputStream)fs.open(input));
        out.writeInt(cnt);
        cnt = 0;
        while (reader.readLine(line) > 0) {
            String[] arr = line.toString().split("\\t");
            out.writeUTF(arr[0]);
            if (++cnt % 100000 != 0) continue;
            LOG.info((Object)(cnt + " documents"));
        }
        reader.close();
        out.close();
        LOG.info((Object)("Done! " + cnt + " documents total."));
    }

    public static void main(String[] args) throws Exception {
        LOG.info((Object)("Running " + TrecWebDocnoMappingBuilder.class.getCanonicalName() + " with args " + Arrays.toString(args)));
        ToolRunner.run((Configuration)new Configuration(), (Tool)new TrecWebDocnoMappingBuilder(), (String[])args);
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class MyReducer
    extends Reducer<Text, IntWritable, Text, IntWritable> {
        private static final IntWritable cnt = new IntWritable(1);

        private MyReducer() {
        }

        public void reduce(Text key, Iterable<IntWritable> values, Reducer.Context context) throws IOException, InterruptedException {
            context.write((Object)key, (Object)cnt);
            cnt.set(cnt.get() + 1);
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class MyMapper
    extends Mapper<LongWritable, TrecWebDocument, Text, IntWritable> {
        private static final Text text = new Text();
        private static final IntWritable out = new IntWritable(1);

        private MyMapper() {
        }

        public void map(LongWritable key, TrecWebDocument doc, Mapper.Context context) throws IOException, InterruptedException {
            context.getCounter((Enum)Documents.Total).increment(1L);
            text.set(doc.getDocid());
            context.write((Object)text, (Object)out);
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    protected static enum Documents {
        Total;

    }
}

