/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.collection.clue;

import edu.umd.cloud9.collection.DocumentForwardIndex;
import edu.umd.cloud9.collection.clue.ClueWarcDocnoMapping;
import edu.umd.cloud9.collection.clue.ClueWarcRecord;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.log4j.Logger;

public class ClueWarcForwardIndex
implements DocumentForwardIndex<ClueWarcRecord> {
    private static final Logger LOG = Logger.getLogger(ClueWarcForwardIndex.class);
    private static DecimalFormat FORMAT5 = new DecimalFormat("00000");
    private Configuration conf;
    private int[] docnos;
    private int[] offsets;
    private short[] fileno;
    private String collectionPath;
    private int lastDocno = -1;
    private ClueWarcDocnoMapping docnoMapping = new ClueWarcDocnoMapping();

    @Override
    public void loadIndex(Path index, Path mapping, FileSystem fs) throws IOException {
        this.conf = fs.getConf();
        LOG.info((Object)("Loading forward index: " + index));
        this.docnoMapping.loadMapping(mapping, fs);
        FSDataInputStream in = fs.open(index);
        in.readUTF();
        this.collectionPath = in.readUTF();
        int blocks = in.readInt();
        LOG.info((Object)(blocks + " blocks expected"));
        this.docnos = new int[blocks];
        this.offsets = new int[blocks];
        this.fileno = new short[blocks];
        for (int i = 0; i < blocks; ++i) {
            this.docnos[i] = in.readInt();
            this.offsets[i] = in.readInt();
            this.fileno[i] = in.readShort();
            if (i <= 0 || i % 100000 != 0) continue;
            LOG.info((Object)(i + " blocks read"));
        }
        in.close();
    }

    @Override
    public String getCollectionPath() {
        return this.collectionPath;
    }

    @Override
    public ClueWarcRecord getDocument(int docno) {
        long start = System.currentTimeMillis();
        if (docno < this.getFirstDocno() || docno > this.getLastDocno()) {
            return null;
        }
        int idx = Arrays.binarySearch(this.docnos, docno);
        if (idx < 0) {
            idx = -idx - 2;
        }
        DecimalFormat df = new DecimalFormat("00000");
        String file = this.collectionPath + "/part-" + df.format(this.fileno[idx]);
        LOG.info((Object)("fetching docno " + docno + ": seeking to " + this.offsets[idx] + " at " + file));
        try {
            SequenceFile.Reader reader = new SequenceFile.Reader(this.conf, new SequenceFile.Reader.Option[]{SequenceFile.Reader.file((Path)new Path(file))});
            IntWritable key = new IntWritable();
            ClueWarcRecord value = new ClueWarcRecord();
            reader.seek((long)this.offsets[idx]);
            while (reader.next((Writable)key) && key.get() != docno) {
            }
            reader.getCurrentValue((Writable)value);
            reader.close();
            long duration = System.currentTimeMillis() - start;
            LOG.info((Object)(" docno " + docno + " fetched in " + duration + "ms"));
            return value;
        }
        catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    @Override
    public ClueWarcRecord getDocument(String docid) {
        return this.getDocument(this.docnoMapping.getDocno(docid));
    }

    @Override
    public int getDocno(String docid) {
        return this.docnoMapping.getDocno(docid);
    }

    @Override
    public String getDocid(int docno) {
        return this.docnoMapping.getDocid(docno);
    }

    @Override
    public int getFirstDocno() {
        return this.docnos[0];
    }

    @Override
    public int getLastDocno() {
        if (this.lastDocno != -1) {
            return this.lastDocno;
        }
        int idx = this.docnos.length - 1;
        String file = this.collectionPath + "/part-" + FORMAT5.format(this.fileno[idx]);
        try {
            SequenceFile.Reader reader = new SequenceFile.Reader(this.conf, new SequenceFile.Reader.Option[]{SequenceFile.Reader.file((Path)new Path(file))});
            IntWritable key = new IntWritable();
            reader.seek((long)this.offsets[idx]);
            while (reader.next((Writable)key)) {
            }
            this.lastDocno = key.get();
            reader.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return this.lastDocno;
    }

    public static void main(String[] args) throws IOException {
        if (args.length < 4) {
            System.out.println("usage: [findex] [mapping-file] [getDocno|getDocid] [docid/docno]");
            System.exit(-1);
        }
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        System.out.println("forward index: " + args[0]);
        System.out.println("mapping file: " + args[1]);
        ClueWarcForwardIndex findex = new ClueWarcForwardIndex();
        findex.loadIndex(new Path(args[0]), new Path(args[1]), fs);
        if (args[2].equals("getDocno")) {
            System.out.println("looking up docno " + args[3]);
            System.out.println(findex.getDocument(Integer.parseInt(args[3])).getDisplayContent());
        } else if (args[2].equals("getDocid")) {
            System.out.println("looking up docid " + args[3]);
            System.out.println(findex.getDocument(args[3]).getDisplayContent());
        } else {
            System.out.println("Invalid command!");
            System.out.println("usage: [findex] [mapping-file] [getDocno|getDocid] [docid/docno]");
        }
    }
}

