/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.mg4j.document;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.mg4j.document.Document;
import it.unimi.dsi.mg4j.document.DocumentCollection;
import it.unimi.dsi.mg4j.document.DocumentCollectionBuilder;
import it.unimi.dsi.mg4j.document.DocumentFactory;
import it.unimi.dsi.mg4j.document.DocumentIterator;
import it.unimi.dsi.mg4j.document.DocumentSequence;
import it.unimi.dsi.mg4j.document.IdentityDocumentFactory;
import it.unimi.dsi.mg4j.document.ZipDocumentCollection;
import it.unimi.dsi.mg4j.tool.Scan;
import it.unimi.dsi.mg4j.util.MG4JClassParser;
import java.io.DataOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.lang.reflect.InvocationTargetException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.log4j.Logger;

public class ZipDocumentCollectionBuilder
implements DocumentCollectionBuilder {
    private static final Logger LOGGER = Util.getLogger(ZipDocumentCollectionBuilder.class);
    private static final boolean DEBUG = false;
    private final String basename;
    private String basenameSuffix;
    private ZipOutputStream zipOut;
    private DataOutputStream zipDataOutputStream;
    private int numberOfDocuments;
    private final boolean exact;
    private final DocumentFactory factory;
    private boolean inTextField;

    public ZipDocumentCollectionBuilder(String basename, DocumentFactory factory, boolean exact) {
        this.basename = basename;
        this.factory = factory;
        this.exact = exact;
    }

    @Override
    public void open(CharSequence suffix) throws FileNotFoundException {
        this.basenameSuffix = this.basename + suffix;
        this.zipOut = new ZipOutputStream(new FileOutputStream(this.basenameSuffix + ".zip"));
        this.zipDataOutputStream = new DataOutputStream(this.zipOut);
        this.numberOfDocuments = 0;
    }

    @Override
    public String basename() {
        return this.basename;
    }

    @Override
    public void startDocument(CharSequence title, CharSequence uri) throws IOException {
        ZipEntry currEntry = new ZipEntry(Integer.toString(this.numberOfDocuments));
        currEntry.setComment(((Object)title).toString());
        this.zipOut.putNextEntry(currEntry);
        new MutableString(uri != null ? uri : "").writeSelfDelimUTF8((OutputStream)this.zipOut);
    }

    @Override
    public void endDocument() throws IOException {
        this.zipOut.closeEntry();
        ++this.numberOfDocuments;
    }

    @Override
    public void startTextField() {
        this.inTextField = true;
    }

    @Override
    public void nonTextField(Object o) throws IOException {
        ObjectOutputStream oos = new ObjectOutputStream(this.zipOut);
        oos.writeObject(o);
        oos.flush();
    }

    @Override
    public void virtualField(ObjectList<Scan.VirtualDocumentFragment> fragments) throws IOException {
        this.zipDataOutputStream.writeInt(fragments.size());
        for (Scan.VirtualDocumentFragment fragment : fragments) {
            fragment.documentSpecifier().writeSelfDelimUTF8((OutputStream)this.zipOut);
            fragment.text().writeSelfDelimUTF8((OutputStream)this.zipOut);
        }
    }

    @Override
    public void endTextField() throws IOException {
        if (!this.inTextField) {
            throw new IllegalStateException();
        }
        this.inTextField = false;
        this.zipOut.write(0);
        if (this.exact) {
            this.zipOut.write(0);
        }
    }

    @Override
    public void add(MutableString word, MutableString nonWord) throws IOException {
        if (!this.inTextField) {
            return;
        }
        if (this.exact || word.length() > 0) {
            word.writeSelfDelimUTF8((OutputStream)this.zipOut);
        }
        if (this.exact) {
            nonWord.writeSelfDelimUTF8((OutputStream)this.zipOut);
        }
    }

    @Override
    public void close() throws IOException {
        if (this.numberOfDocuments == 0) {
            this.zipOut.putNextEntry(new ZipEntry("dummy"));
        }
        this.zipDataOutputStream.close();
        ZipDocumentCollection zipDocumentCollection = new ZipDocumentCollection(this.basenameSuffix + ".zip", this.factory, this.numberOfDocuments, this.exact);
        BinIO.storeObject((Object)zipDocumentCollection, (CharSequence)(this.basenameSuffix + ".collection"));
        zipDocumentCollection.close();
    }

    public void build(DocumentSequence inputSequence) throws IOException {
        Document document;
        this.numberOfDocuments = 0;
        DocumentIterator docIt = inputSequence.iterator();
        if (this.factory != inputSequence.factory()) {
            throw new IllegalStateException("The factory provided by the constructor does not correspond to the factory of the input sequence");
        }
        int numberOfFields = this.factory.numberOfFields();
        MutableString word = new MutableString();
        MutableString nonWord = new MutableString();
        this.open("");
        while ((document = docIt.nextDocument()) != null) {
            this.startDocument(document.title(), document.uri());
            for (int field = 0; field < numberOfFields; ++field) {
                Object content = document.content(field);
                if (this.factory.fieldType(field) == DocumentFactory.FieldType.TEXT) {
                    this.startTextField();
                    WordReader wordReader = document.wordReader(field);
                    wordReader.setReader((Reader)content);
                    while (wordReader.next(word, nonWord)) {
                        this.add(word, nonWord);
                    }
                    this.endTextField();
                    continue;
                }
                if (this.factory.fieldType(field) == DocumentFactory.FieldType.VIRTUAL) {
                    this.virtualField((ObjectList<Scan.VirtualDocumentFragment>)((ObjectList)content));
                    continue;
                }
                this.nonTextField(content);
            }
            document.close();
            this.endDocument();
        }
        docIt.close();
        this.close();
    }

    public static void main(String[] arg) throws JSAPException, IOException, ClassNotFoundException, InvocationTargetException, NoSuchMethodException, IllegalAccessException, InstantiationException, IllegalArgumentException, SecurityException {
        SimpleJSAP jsap = new SimpleJSAP(ZipDocumentCollectionBuilder.class.getName(), "Produces a zip document collection from an existing document sequence.", new Parameter[]{new FlaggedOption("sequence", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'S', "sequence", "A serialised document sequence that will be used instead of stdin."), new FlaggedOption("factory", (StringParser)MG4JClassParser.getParser(), IdentityDocumentFactory.class.getName(), false, 'f', "factory", "A document factory with a standard constructor."), new FlaggedOption("property", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'p', "property", "A 'key=value' specification, or the name of a property file").setAllowMultipleDeclarations(true), new FlaggedOption("delimiter", (StringParser)JSAP.INTEGER_PARSER, Integer.toString(10), false, 'd', "delimiter", "The document delimiter."), new Switch("approximated", 'a', "approximated", "If specified, non-words will not be copied."), new FlaggedOption("logInterval", (StringParser)JSAP.LONG_PARSER, Long.toString(10000L), false, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds."), new UnflaggedOption("basename", (StringParser)JSAP.STRING_PARSER, true, "The basename for the collection.")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        DocumentSequence documentSequence = Scan.getSequence(jsapResult.getString("sequence"), jsapResult.getClass("factory"), jsapResult.getStringArray("property"), jsapResult.getInt("delimiter"), LOGGER);
        ProgressLogger progressLogger = new ProgressLogger(LOGGER, "documents");
        if (documentSequence instanceof DocumentCollection) {
            progressLogger.expectedUpdates = ((DocumentCollection)documentSequence).size();
        }
        ZipDocumentCollectionBuilder zipDocumentCollectionBuilder = new ZipDocumentCollectionBuilder(jsapResult.getString("basename"), documentSequence.factory(), !jsapResult.getBoolean("approximated"));
        zipDocumentCollectionBuilder.open("");
        zipDocumentCollectionBuilder.build(documentSequence);
    }
}

