/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.mg4j.document;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectArrays;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import it.unimi.dsi.fastutil.objects.ObjectListIterator;
import it.unimi.dsi.fastutil.objects.Reference2ObjectArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
import it.unimi.dsi.io.SegmentedInputStream;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.mg4j.document.AbstractDocumentCollection;
import it.unimi.dsi.mg4j.document.AbstractDocumentIterator;
import it.unimi.dsi.mg4j.document.CompositeDocumentFactory;
import it.unimi.dsi.mg4j.document.Document;
import it.unimi.dsi.mg4j.document.DocumentFactory;
import it.unimi.dsi.mg4j.document.DocumentIterator;
import it.unimi.dsi.mg4j.document.IdentityDocumentFactory;
import it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory;
import it.unimi.dsi.mg4j.document.TRECHeaderDocumentFactory;
import it.unimi.dsi.mg4j.util.MG4JClassParser;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;

public class TRECDocumentCollection
extends AbstractDocumentCollection
implements Serializable {
    private static final Logger LOGGER = Logger.getLogger(TRECDocumentCollection.class);
    private static final long serialVersionUID = -4251461013312968454L;
    private static final boolean DEBUG = false;
    public static final String DEFAULT_BUFFER_SIZE = "64Ki";
    private String[] file;
    private final boolean useGzip;
    protected DocumentFactory factory;
    protected transient ObjectArrayList<TRECDocumentDescriptor> descriptors;
    private final int bufferSize;
    private SegmentedInputStream lastStream;
    protected static final byte[] DOC_OPEN;
    protected static final byte[] DOC_CLOSE;
    protected static final byte[] DOCNO_OPEN;
    protected static final byte[] DOCNO_CLOSE;
    protected static final byte[] DOCHDR_OPEN;
    protected static final byte[] DOCHDR_CLOSE;
    byte[] buffer = new byte[8192];

    protected static boolean equals(byte[] a, int len, byte[] b) {
        if (len != b.length) {
            return false;
        }
        while (len-- != 0) {
            if (a[len] == b[len]) continue;
            return false;
        }
        return true;
    }

    private void parseContent(int fileIndex, InputStream is) throws IOException {
        int l;
        boolean pastHeader = false;
        boolean startedBlock = false;
        LOGGER.debug((Object)("Processing file " + fileIndex + " (" + this.file[fileIndex] + ")"));
        FastBufferedInputStream fbis = new FastBufferedInputStream(is, this.bufferSize);
        long currStart = 0L;
        long currInter = 0L;
        long oldPos = 0L;
        while ((l = fbis.readLine(this.buffer)) != -1) {
            if (l == this.buffer.length) {
                while ((l = fbis.readLine(this.buffer)) == this.buffer.length) {
                }
                continue;
            }
            if (!startedBlock && TRECDocumentCollection.equals(this.buffer, l, DOC_OPEN)) {
                currStart = oldPos;
                startedBlock = true;
            } else if (startedBlock && TRECDocumentCollection.equals(this.buffer, l, DOC_CLOSE)) {
                long currStop = oldPos;
                this.descriptors.add((Object)new TRECDocumentDescriptor(fileIndex, currStart, currInter, currStop));
                pastHeader = false;
                startedBlock = false;
            } else if (startedBlock && !pastHeader && TRECDocumentCollection.equals(this.buffer, l, DOCHDR_CLOSE)) {
                currInter = fbis.position();
                pastHeader = true;
            }
            oldPos = fbis.position();
        }
        fbis.close();
    }

    protected TRECDocumentCollection(String[] file, DocumentFactory factory, ObjectArrayList<TRECDocumentDescriptor> descriptors, int bufferSize, boolean useGzip) {
        this.useGzip = useGzip;
        this.file = file;
        this.bufferSize = bufferSize;
        this.factory = factory;
        this.descriptors = descriptors;
    }

    @Override
    public TRECDocumentCollection copy() {
        return new TRECDocumentCollection(this.file, this.factory.copy(), this.descriptors, this.bufferSize, this.useGzip);
    }

    private final InputStream openFileStream(String fileName) throws IOException {
        FileInputStream s = new FileInputStream(fileName);
        if (this.useGzip) {
            return new GZIPInputStream(s);
        }
        return s;
    }

    public TRECDocumentCollection(String[] file, DocumentFactory factory, int bufferSize, boolean useGzip) throws IOException {
        this.file = file;
        this.factory = factory;
        this.bufferSize = bufferSize;
        this.descriptors = new ObjectArrayList();
        this.useGzip = useGzip;
        ProgressLogger progressLogger = new ProgressLogger(LOGGER);
        progressLogger.expectedUpdates = file.length;
        progressLogger.itemsName = "files";
        progressLogger.start((CharSequence)("Parsing " + (useGzip ? "GZip" : "plain") + " files"));
        for (int i = 0; i < file.length; ++i) {
            this.parseContent(i, this.openFileStream(file[i]));
            progressLogger.update();
        }
        progressLogger.done();
    }

    @Override
    public int size() {
        return this.descriptors.size();
    }

    @Override
    public Document document(int n) throws IOException {
        Reference2ObjectMap<Enum<?>, Object> metadata = this.metadata(n);
        return this.factory.getDocument(this.stream(n), metadata);
    }

    @Override
    public InputStream stream(int n) throws IOException {
        this.ensureDocumentIndex(n);
        IOUtils.closeQuietly((InputStream)this.lastStream);
        TRECDocumentDescriptor descr = (TRECDocumentDescriptor)this.descriptors.get(n);
        this.lastStream = new SegmentedInputStream(this.openFileStream(this.file[descr.fileIndex]), descr.toSegments());
        return this.lastStream;
    }

    @Override
    public Reference2ObjectMap<Enum<?>, Object> metadata(int index) {
        this.ensureDocumentIndex(index);
        Reference2ObjectArrayMap metadata = new Reference2ObjectArrayMap(4);
        metadata.put((Object)PropertyBasedDocumentFactory.MetadataKeys.URI, (Object)("Document #" + index));
        return metadata;
    }

    @Override
    public DocumentFactory factory() {
        return this.factory;
    }

    @Override
    public void close() throws IOException {
        super.close();
        if (this.lastStream != null) {
            this.lastStream.close();
        }
        this.descriptors = null;
    }

    public void merge(TRECDocumentCollection other) {
        int oldLength = this.file.length;
        this.file = (String[])ObjectArrays.ensureCapacity((Object[])this.file, (int)(this.file.length + other.file.length));
        System.arraycopy(other.file, 0, this.file, oldLength, other.file.length);
        ObjectListIterator iter = other.descriptors.iterator();
        while (iter.hasNext()) {
            TRECDocumentDescriptor tdd = (TRECDocumentDescriptor)((TRECDocumentDescriptor)iter.next()).clone();
            tdd.fileIndex += oldLength;
            this.descriptors.add((Object)tdd);
        }
    }

    @Override
    public DocumentIterator iterator() throws IOException {
        return new AbstractDocumentIterator(){
            private final ObjectIterator<TRECDocumentDescriptor> descriptorIterator;
            private SegmentedInputStream siStream;
            private int currentDocument;
            private Document last;
            private TRECDocumentDescriptor firstNextDescriptor;
            {
                this.descriptorIterator = TRECDocumentCollection.this.descriptors.iterator();
                this.currentDocument = 0;
            }

            private boolean nextFile() throws FileNotFoundException, IOException {
                if (TRECDocumentCollection.this.size() == 0) {
                    return false;
                }
                IOUtils.closeQuietly((InputStream)this.siStream);
                if (!this.descriptorIterator.hasNext()) {
                    return false;
                }
                TRECDocumentDescriptor currentDescriptor = this.firstNextDescriptor != null ? this.firstNextDescriptor : (TRECDocumentDescriptor)this.descriptorIterator.next();
                int currentFileIndex = currentDescriptor.fileIndex;
                this.siStream = new SegmentedInputStream(TRECDocumentCollection.this.openFileStream(TRECDocumentCollection.this.file[currentFileIndex]));
                do {
                    this.siStream.addBlock(currentDescriptor.toSegments());
                    if (!this.descriptorIterator.hasNext()) break;
                    currentDescriptor = (TRECDocumentDescriptor)this.descriptorIterator.next();
                } while (currentDescriptor.fileIndex == currentFileIndex);
                this.firstNextDescriptor = currentDescriptor;
                return true;
            }

            @Override
            public Document nextDocument() throws IOException {
                if (this.last != null) {
                    this.last.close();
                    if (!this.siStream.hasMoreBlocks()) {
                        if (!this.nextFile()) {
                            this.last = null;
                            return null;
                        }
                    } else {
                        this.siStream.nextBlock();
                    }
                } else if (!this.nextFile()) {
                    return null;
                }
                this.last = TRECDocumentCollection.this.factory.getDocument((InputStream)this.siStream, TRECDocumentCollection.this.metadata(this.currentDocument++));
                return this.last;
            }

            @Override
            public void close() throws IOException {
                if (this.siStream != null) {
                    if (this.last != null) {
                        this.last.close();
                    }
                    super.close();
                    this.siStream.close();
                    this.siStream = null;
                }
            }
        };
    }

    private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
        s.defaultReadObject();
        int size = s.readInt();
        ObjectArrayList descriptors = new ObjectArrayList();
        descriptors.ensureCapacity(size);
        for (int i = 0; i < size; ++i) {
            descriptors.add((Object)new TRECDocumentDescriptor(s.readInt(), s.readLong(), s.readInt(), s.readInt()));
        }
        this.descriptors = descriptors;
    }

    private void writeObject(ObjectOutputStream s) throws IOException {
        s.defaultWriteObject();
        s.writeInt(this.descriptors.size());
        for (TRECDocumentDescriptor descriptor : this.descriptors) {
            s.writeInt(descriptor.fileIndex);
            s.writeLong(descriptor.startMarker);
            s.writeInt(descriptor.intermediateMarkerDiff);
            s.writeInt(descriptor.stopMarkerDiff);
        }
    }

    public static void main(String[] arg) throws IOException, JSAPException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
        SimpleJSAP jsap = new SimpleJSAP(TRECDocumentCollection.class.getName(), "Saves a serialised TREC document collection based on a set of file names (which will be sorted lexicographically).", new Parameter[]{new FlaggedOption("factory", (StringParser)MG4JClassParser.getParser(), IdentityDocumentFactory.class.getName(), false, 'f', "factory", "A document factory with a standard constructor."), new FlaggedOption("property", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'p', "property", "A 'key=value' specification, or the name of a property file").setAllowMultipleDeclarations(true), new Switch("gzipped", 'z', "gzipped", "The files are gzipped."), new Switch("unsorted", 'u', "unsorted", "Keep the file list unsorted."), new FlaggedOption("bufferSize", (StringParser)JSAP.INTSIZE_PARSER, DEFAULT_BUFFER_SIZE, false, 'b', "buffer-size", "The size of an I/O buffer."), new UnflaggedOption("collection", (StringParser)JSAP.STRING_PARSER, true, "The filename for the serialised collection."), new UnflaggedOption("file", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, true, "A list of files that will be indexed. If missing, a list of files will be read from standard input.")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        PropertyBasedDocumentFactory userFactory = PropertyBasedDocumentFactory.getInstance(jsapResult.getClass("factory"), jsapResult.getStringArray("property"));
        Object[] file = jsapResult.getStringArray("file");
        if (file.length == 0) {
            String s;
            ObjectArrayList files = new ObjectArrayList();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in));
            while ((s = bufferedReader.readLine()) != null) {
                files.add((Object)s);
            }
            file = (String[])files.toArray((Object[])new String[0]);
        }
        if (!jsapResult.getBoolean("unsorted")) {
            Arrays.sort(file);
        }
        DocumentFactory composite = CompositeDocumentFactory.getFactory(new TRECHeaderDocumentFactory(), userFactory);
        if (file.length == 0) {
            System.err.println("WARNING: empty file set.");
        }
        BinIO.storeObject((Object)new TRECDocumentCollection((String[])file, composite, jsapResult.getInt("bufferSize"), jsapResult.getBoolean("gzipped")), (CharSequence)jsapResult.getString("collection"));
    }

    static {
        try {
            DOC_OPEN = "<DOC>".getBytes("ASCII");
            DOC_CLOSE = "</DOC>".getBytes("ASCII");
            DOCNO_OPEN = "<DOCNO>".getBytes("ASCII");
            DOCNO_CLOSE = "</DOCNO>".getBytes("ASCII");
            DOCHDR_OPEN = "<DOCHDR>".getBytes("ASCII");
            DOCHDR_CLOSE = "</DOCHDR>".getBytes("ASCII");
        }
        catch (UnsupportedEncodingException cantHappen) {
            throw new RuntimeException(cantHappen);
        }
    }

    private static class TRECDocumentDescriptor
    implements Cloneable {
        public int fileIndex;
        public long startMarker;
        public int intermediateMarkerDiff;
        public int stopMarkerDiff;

        public TRECDocumentDescriptor(int findex, long start, long intermediateMarker, long stop) {
            this.fileIndex = findex;
            this.startMarker = start;
            this.intermediateMarkerDiff = (int)(intermediateMarker - start);
            this.stopMarkerDiff = (int)(stop - start);
        }

        public TRECDocumentDescriptor(int findex, long start, int intermediateMarkerDiff, int stopMarkerDiff) {
            this.fileIndex = findex;
            this.startMarker = start;
            this.intermediateMarkerDiff = intermediateMarkerDiff;
            this.stopMarkerDiff = stopMarkerDiff;
        }

        public final long[] toSegments() {
            return new long[]{this.startMarker, this.startMarker + (long)this.intermediateMarkerDiff, (long)this.stopMarkerDiff + this.startMarker};
        }

        public Object clone() {
            return new TRECDocumentDescriptor(this.fileIndex, this.startMarker, this.startMarker + (long)this.intermediateMarkerDiff, (long)this.stopMarkerDiff + this.startMarker);
        }
    }
}

