/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.collection.wikipedia;

import com.google.common.base.Preconditions;
import edu.umd.cloud9.collection.DocumentForwardIndex;
import edu.umd.cloud9.collection.wikipedia.WikipediaDocnoMapping;
import edu.umd.cloud9.collection.wikipedia.WikipediaPage;
import edu.umd.cloud9.collection.wikipedia.language.WikipediaPageFactory;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.log4j.Logger;

public class WikipediaForwardIndex
implements DocumentForwardIndex<WikipediaPage> {
    private static final Logger LOG = Logger.getLogger(WikipediaPage.class);
    private Configuration conf;
    private int[] docnos;
    private int[] offsets;
    private short[] fileno;
    private String collectionPath;
    private int lastDocno = -1;
    private WikipediaDocnoMapping docnoMapping = new WikipediaDocnoMapping();

    public WikipediaForwardIndex() {
        this.conf = new Configuration();
    }

    public WikipediaForwardIndex(Configuration conf) {
        this.conf = (Configuration)Preconditions.checkNotNull((Object)conf);
    }

    @Override
    public void loadIndex(Path index, Path mapping, FileSystem fs) throws IOException {
        LOG.info((Object)("Loading forward index: " + index));
        this.docnoMapping.loadMapping(mapping, fs);
        FSDataInputStream in = fs.open(index);
        in.readUTF();
        this.collectionPath = in.readUTF();
        int blocks = in.readInt();
        LOG.info((Object)(blocks + " blocks expected"));
        this.docnos = new int[blocks];
        this.offsets = new int[blocks];
        this.fileno = new short[blocks];
        for (int i = 0; i < blocks; ++i) {
            this.docnos[i] = in.readInt();
            this.offsets[i] = in.readInt();
            this.fileno[i] = in.readShort();
            if (i <= 0 || i % 100000 != 0) continue;
            LOG.info((Object)(i + " blocks read"));
        }
        in.close();
    }

    @Override
    public String getCollectionPath() {
        return this.collectionPath;
    }

    @Override
    public WikipediaPage getDocument(int docno) {
        long start = System.currentTimeMillis();
        if (docno < this.getFirstDocno() || docno > this.getLastDocno()) {
            return null;
        }
        int idx = Arrays.binarySearch(this.docnos, docno);
        if (idx < 0) {
            idx = -idx - 2;
        }
        try {
            FileSystem fs = FileSystem.get((Configuration)this.conf);
            DecimalFormat df = new DecimalFormat("00000");
            Path file = new Path(this.collectionPath + "/part-m-" + df.format(this.fileno[idx]));
            if (!fs.exists(file)) {
                file = new Path(this.collectionPath + "/part-" + df.format(this.fileno[idx]));
            }
            LOG.info((Object)("fetching docno " + docno + ": seeking to " + this.offsets[idx] + " at " + file));
            SequenceFile.Reader reader = new SequenceFile.Reader(this.conf, new SequenceFile.Reader.Option[]{SequenceFile.Reader.file((Path)file)});
            IntWritable key = new IntWritable();
            WikipediaPage value = WikipediaPageFactory.createWikipediaPage(this.conf.get("wiki.language"));
            reader.seek((long)this.offsets[idx]);
            while (reader.next((Writable)key) && key.get() != docno) {
            }
            reader.getCurrentValue((Writable)value);
            reader.close();
            long duration = System.currentTimeMillis() - start;
            LOG.info((Object)(" docno " + docno + " fetched in " + duration + "ms"));
            return value;
        }
        catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    @Override
    public WikipediaPage getDocument(String docid) {
        return this.getDocument(this.docnoMapping.getDocno(docid));
    }

    @Override
    public int getDocno(String docid) {
        return this.docnoMapping.getDocno(docid);
    }

    @Override
    public String getDocid(int docno) {
        return this.docnoMapping.getDocid(docno);
    }

    @Override
    public int getFirstDocno() {
        return this.docnos[0];
    }

    @Override
    public int getLastDocno() {
        if (this.lastDocno != -1) {
            return this.lastDocno;
        }
        int idx = this.docnos.length - 1;
        try {
            FileSystem fs = FileSystem.get((Configuration)this.conf);
            DecimalFormat df = new DecimalFormat("00000");
            Path file = new Path(this.collectionPath + "/part-m-" + df.format(this.fileno[idx]));
            if (!fs.exists(file)) {
                file = new Path(this.collectionPath + "/part-" + df.format(this.fileno[idx]));
            }
            SequenceFile.Reader reader = new SequenceFile.Reader(this.conf, new SequenceFile.Reader.Option[]{SequenceFile.Reader.file((Path)file)});
            IntWritable key = new IntWritable();
            reader.seek((long)this.offsets[idx]);
            while (reader.next((Writable)key)) {
            }
            this.lastDocno = key.get();
            reader.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return this.lastDocno;
    }
}

