/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.mg4j.tool;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.InputBitStream;
import it.unimi.dsi.io.OutputBitStream;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.mg4j.index.BitStreamHPIndex;
import it.unimi.dsi.mg4j.index.BitStreamIndex;
import it.unimi.dsi.mg4j.index.DiskBasedIndex;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.cluster.IndexCluster;
import it.unimi.dsi.mg4j.index.cluster.LexicalCluster;
import it.unimi.dsi.mg4j.index.cluster.LexicalPartitioningStrategy;
import it.unimi.dsi.mg4j.index.cluster.LexicalStrategies;
import it.unimi.dsi.util.Properties;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.ConfigurationMap;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;

public class PartitionLexically {
    private static final Logger LOGGER = Util.getLogger(PartitionLexically.class);
    public static final int DEFAULT_BUFFER_SIZE = 0x100000;
    private final int numIndices;
    private final String outputBasename;
    private final String[] localBasename;
    private final String inputBasename;
    private final int bufferSize;
    private final String strategyFilename;
    private final LexicalPartitioningStrategy strategy;
    private final Properties[] strategyProperties;
    private final long logInterval;

    public PartitionLexically(String inputBasename, String outputBasename, LexicalPartitioningStrategy strategy, String strategyFilename, int bufferSize, long logInterval) {
        this.inputBasename = inputBasename;
        this.outputBasename = outputBasename;
        this.strategy = strategy;
        this.strategyFilename = strategyFilename;
        this.bufferSize = bufferSize;
        this.logInterval = logInterval;
        this.numIndices = strategy.numberOfLocalIndices();
        this.strategyProperties = strategy.properties();
        this.localBasename = new String[this.numIndices];
        for (int i = 0; i < this.numIndices; ++i) {
            this.localBasename[i] = outputBasename + "-" + i;
        }
    }

    public void runTermsOnly() throws IOException {
        ProgressLogger pl = new ProgressLogger(LOGGER, this.logInterval);
        PrintWriter[] localTerms = new PrintWriter[this.numIndices];
        int[] numTerms = new int[this.numIndices];
        FastBufferedReader terms = new FastBufferedReader((Reader)new InputStreamReader((InputStream)new FileInputStream(this.inputBasename + ".terms"), "UTF-8"));
        for (int i = 0; i < this.numIndices; ++i) {
            localTerms[i] = new PrintWriter(new OutputStreamWriter((OutputStream)new FastBufferedOutputStream((OutputStream)new FileOutputStream(this.localBasename[i] + ".terms")), "UTF-8"));
        }
        MutableString currTerm = new MutableString();
        pl.itemsName = "terms";
        pl.logInterval = this.logInterval;
        pl.start((CharSequence)"Partitioning index terms...");
        int termNumber = 0;
        while (terms.readLine(currTerm) != null) {
            int k = this.strategy.localIndex(termNumber);
            if (numTerms[k] != this.strategy.localNumber(termNumber)) {
                throw new IllegalStateException();
            }
            int n = k;
            numTerms[n] = numTerms[n] + 1;
            currTerm.println(localTerms[k]);
            pl.update();
            ++termNumber;
        }
        terms.close();
        for (int i = 0; i < this.numIndices; ++i) {
            localTerms[i].close();
        }
        pl.done();
    }

    public void run() throws ConfigurationException, IOException, ClassNotFoundException {
        int i;
        int res;
        long length;
        ProgressLogger pl = new ProgressLogger(LOGGER, this.logInterval);
        byte[] buffer = new byte[this.bufferSize];
        OutputBitStream[] localIndexStream = new OutputBitStream[this.numIndices];
        OutputBitStream[] localPositionsStream = new OutputBitStream[this.numIndices];
        OutputBitStream[] localOffsets = new OutputBitStream[this.numIndices];
        OutputBitStream[] localPosNumBits = new OutputBitStream[this.numIndices];
        OutputBitStream[] localFrequencies = new OutputBitStream[this.numIndices];
        OutputBitStream[] localGlobCounts = new OutputBitStream[this.numIndices];
        PrintWriter[] localTerms = new PrintWriter[this.numIndices];
        int[] numTerms = new int[this.numIndices];
        long[] numberOfOccurrences = new long[this.numIndices];
        long[] numberOfPostings = new long[this.numIndices];
        boolean isHighPerformance = BitStreamHPIndex.class.isAssignableFrom(Class.forName(new Properties(this.inputBasename + ".properties").getString((Enum)Index.PropertyKeys.INDEXCLASS)));
        InputBitStream globalIndex = new InputBitStream(this.inputBasename + ".index", this.bufferSize);
        long globalPositionsLength = new File(this.inputBasename + ".positions").length();
        InputBitStream globalPositions = isHighPerformance ? new InputBitStream(this.inputBasename + ".positions", this.bufferSize) : null;
        FastBufferedReader terms = new FastBufferedReader((Reader)new InputStreamReader((InputStream)new FileInputStream(this.inputBasename + ".terms"), "UTF-8"));
        InputBitStream offsets = new InputBitStream(this.inputBasename + ".offsets");
        File posNumBitsFile = new File(this.inputBasename + ".posnumbits");
        InputBitStream posNumBits = posNumBitsFile.exists() ? new InputBitStream(this.inputBasename + ".posnumbits") : null;
        InputBitStream frequencies = new InputBitStream(this.inputBasename + ".frequencies");
        InputBitStream globCounts = new InputBitStream(this.inputBasename + ".globcounts");
        offsets.readGamma();
        for (int i2 = 0; i2 < this.numIndices; ++i2) {
            localIndexStream[i2] = new OutputBitStream(this.localBasename[i2] + ".index", this.bufferSize);
            if (isHighPerformance) {
                localPositionsStream[i2] = new OutputBitStream(this.localBasename[i2] + ".positions", this.bufferSize);
            }
            localFrequencies[i2] = new OutputBitStream(this.localBasename[i2] + ".frequencies");
            localGlobCounts[i2] = new OutputBitStream(this.localBasename[i2] + ".globcounts");
            localTerms[i2] = new PrintWriter(new OutputStreamWriter((OutputStream)new FastBufferedOutputStream((OutputStream)new FileOutputStream(this.localBasename[i2] + ".terms")), "UTF-8"));
            localOffsets[i2] = new OutputBitStream(this.localBasename[i2] + ".offsets");
            if (posNumBits != null) {
                localPosNumBits[i2] = new OutputBitStream(this.localBasename[i2] + ".posnumbits");
            }
            localOffsets[i2].writeGamma(0);
        }
        MutableString currTerm = new MutableString();
        pl.expectedUpdates = (new File(this.inputBasename + ".index").length() + (isHighPerformance ? new File(this.inputBasename + ".positions").length() : 0L)) * 8L;
        pl.itemsName = "bits";
        pl.logInterval = this.logInterval;
        pl.start((CharSequence)"Partitioning index...");
        int termNumber = 0;
        int prevK = -1;
        int previousHeaderLength = 0;
        int newHeaderLength = 0;
        long positionsOffset = 0L;
        while (terms.readLine(currTerm) != null) {
            int k = this.strategy.localIndex(termNumber);
            if (numTerms[k] != this.strategy.localNumber(termNumber)) {
                throw new IllegalStateException();
            }
            int n = k;
            numTerms[n] = numTerms[n] + 1;
            if (isHighPerformance) {
                long temp = globalIndex.readBits();
                positionsOffset = globalIndex.readLongDelta();
                previousHeaderLength = (int)(globalIndex.readBits() - temp);
                if (prevK != -1) {
                    pl.count += length;
                    for (length = positionsOffset - globalPositions.readBits(); length > 0L; length -= (long)res) {
                        res = (int)Math.min((long)(this.bufferSize * 8), length);
                        globalPositions.read(buffer, res);
                        localPositionsStream[prevK].write(buffer, (long)res);
                    }
                }
                newHeaderLength = localIndexStream[k].writeLongDelta(localPositionsStream[k].writtenBits());
            }
            int frequency = frequencies.readGamma();
            localFrequencies[k].writeGamma(frequency);
            int n2 = k;
            numberOfPostings[n2] = numberOfPostings[n2] + (long)frequency;
            if (posNumBits != null) {
                localPosNumBits[k].writeGamma(posNumBits.readGamma());
            }
            long count = globCounts.readLongGamma();
            int n3 = k;
            numberOfOccurrences[n3] = numberOfOccurrences[n3] + count;
            localGlobCounts[k].writeLongGamma(count);
            currTerm.println(localTerms[k]);
            localOffsets[k].writeLongGamma(length + (long)newHeaderLength);
            pl.count += length + (long)previousHeaderLength - 1L;
            for (length = offsets.readLongGamma() - (long)previousHeaderLength; length > 0L; length -= (long)res) {
                res = (int)Math.min((long)(this.bufferSize * 8), length);
                globalIndex.read(buffer, res);
                localIndexStream[k].write(buffer, (long)res);
            }
            pl.update();
            prevK = k;
            ++termNumber;
        }
        if (isHighPerformance && prevK != -1) {
            System.err.println(globalPositionsLength * 8L - globalPositions.readBits());
            for (length = globalPositionsLength * 8L - globalPositions.readBits(); length > 0L; length -= (long)res) {
                res = (int)Math.min((long)(this.bufferSize * 8), length);
                globalPositions.read(buffer, res);
                localPositionsStream[prevK].write(buffer, (long)res);
            }
        }
        pl.done();
        terms.close();
        offsets.close();
        frequencies.close();
        globCounts.close();
        globalIndex.close();
        if (posNumBits != null) {
            posNumBits.close();
        }
        if (isHighPerformance) {
            globalPositions.close();
        }
        Properties properties = new Properties(this.inputBasename + ".properties");
        Properties globalProperties = new Properties();
        if (this.strategyFilename != null) {
            globalProperties.setProperty((Enum)IndexCluster.PropertyKeys.STRATEGY, (Object)this.strategyFilename);
        }
        globalProperties.setProperty((Enum)IndexCluster.PropertyKeys.BLOOM, false);
        globalProperties.setProperty((Enum)Index.PropertyKeys.INDEXCLASS, (Object)LexicalCluster.class.getName());
        for (i = 0; i < this.numIndices; ++i) {
            globalProperties.addProperty((Enum)IndexCluster.PropertyKeys.LOCALINDEX, (Object)this.localBasename[i]);
        }
        globalProperties.setProperty((Enum)Index.PropertyKeys.FIELD, properties.getProperty((Enum)Index.PropertyKeys.FIELD));
        globalProperties.setProperty((Enum)Index.PropertyKeys.POSTINGS, properties.getProperty((Enum)Index.PropertyKeys.POSTINGS));
        globalProperties.setProperty((Enum)Index.PropertyKeys.OCCURRENCES, properties.getProperty((Enum)Index.PropertyKeys.OCCURRENCES));
        globalProperties.setProperty((Enum)Index.PropertyKeys.DOCUMENTS, properties.getProperty((Enum)Index.PropertyKeys.DOCUMENTS));
        globalProperties.setProperty((Enum)Index.PropertyKeys.TERMS, properties.getProperty((Enum)Index.PropertyKeys.TERMS));
        globalProperties.setProperty((Enum)Index.PropertyKeys.TERMPROCESSOR, properties.getProperty((Enum)Index.PropertyKeys.TERMPROCESSOR));
        globalProperties.setProperty((Enum)Index.PropertyKeys.MAXCOUNT, properties.getProperty((Enum)Index.PropertyKeys.MAXCOUNT));
        globalProperties.setProperty((Enum)Index.PropertyKeys.MAXDOCSIZE, properties.getProperty((Enum)Index.PropertyKeys.MAXDOCSIZE));
        globalProperties.save(this.outputBasename + ".properties");
        LOGGER.debug((Object)("Properties for clustered index " + this.outputBasename + ": " + new ConfigurationMap((Configuration)globalProperties)));
        for (i = 0; i < this.numIndices; ++i) {
            localIndexStream[i].close();
            if (isHighPerformance) {
                localPositionsStream[i].close();
            }
            localOffsets[i].close();
            if (posNumBits != null) {
                localPosNumBits[i].close();
            }
            localFrequencies[i].close();
            localGlobCounts[i].close();
            localTerms[i].close();
            FileInputStream input = new FileInputStream(this.inputBasename + ".sizes");
            FileOutputStream output = new FileOutputStream(this.localBasename[i] + ".sizes");
            IOUtils.copy((InputStream)input, (OutputStream)output);
            ((InputStream)input).close();
            ((OutputStream)output).close();
            Properties localProperties = new Properties();
            localProperties.addAll((Configuration)globalProperties);
            localProperties.setProperty((Enum)Index.PropertyKeys.TERMS, numTerms[i]);
            localProperties.setProperty((Enum)Index.PropertyKeys.OCCURRENCES, numberOfOccurrences[i]);
            localProperties.setProperty((Enum)Index.PropertyKeys.POSTINGS, numberOfPostings[i]);
            localProperties.setProperty((Enum)Index.PropertyKeys.POSTINGS, numberOfPostings[i]);
            localProperties.setProperty((Enum)Index.PropertyKeys.INDEXCLASS, properties.getProperty((Enum)Index.PropertyKeys.INDEXCLASS));
            localProperties.addProperties((Enum)Index.PropertyKeys.CODING, properties.getStringArray((Enum)Index.PropertyKeys.CODING));
            localProperties.setProperty((Enum)BitStreamIndex.PropertyKeys.SKIPQUANTUM, properties.getProperty((Enum)BitStreamIndex.PropertyKeys.SKIPQUANTUM));
            localProperties.setProperty((Enum)BitStreamIndex.PropertyKeys.SKIPHEIGHT, properties.getProperty((Enum)BitStreamIndex.PropertyKeys.SKIPHEIGHT));
            if (this.strategyProperties[i] != null) {
                localProperties.addAll((Configuration)this.strategyProperties[i]);
            }
            localProperties.save(this.localBasename[i] + ".properties");
            LOGGER.debug((Object)("Post-partitioning properties for index " + this.localBasename[i] + ": " + new ConfigurationMap((Configuration)localProperties)));
        }
    }

    public static void main(String[] arg) throws JSAPException, ConfigurationException, IOException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException {
        SimpleJSAP jsap = new SimpleJSAP(PartitionLexically.class.getName(), "Partitions an index lexically.", new Parameter[]{new FlaggedOption("bufferSize", (StringParser)JSAP.INTSIZE_PARSER, Util.formatBinarySize((long)0x100000L), false, 'b', "buffer-size", "The size of an I/O buffer."), new FlaggedOption("logInterval", (StringParser)JSAP.LONG_PARSER, Long.toString(10000L), false, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds."), new FlaggedOption("strategy", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 's', "strategy", "A serialised lexical partitioning strategy."), new FlaggedOption("uniformStrategy", (StringParser)JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, false, 'u', "uniform", "Requires a uniform partitioning in the given number of parts."), new Switch("termsOnly", 't', "terms-only", "Just partition the term list."), new UnflaggedOption("inputBasename", (StringParser)JSAP.STRING_PARSER, true, "The basename of the global index."), new UnflaggedOption("outputBasename", (StringParser)JSAP.STRING_PARSER, true, "The basename of the local indices.")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        String inputBasename = jsapResult.getString("inputBasename");
        String outputBasename = jsapResult.getString("outputBasename");
        String strategyFilename = jsapResult.getString("strategy");
        LexicalPartitioningStrategy strategy = null;
        if (jsapResult.userSpecified("uniformStrategy")) {
            strategy = LexicalStrategies.uniform(jsapResult.getInt("uniformStrategy"), DiskBasedIndex.getInstance(inputBasename, false, false, true));
            strategyFilename = outputBasename + ".strategy";
            BinIO.storeObject((Object)strategy, (CharSequence)strategyFilename);
        } else if (strategyFilename != null) {
            strategy = (LexicalPartitioningStrategy)BinIO.loadObject((CharSequence)strategyFilename);
        } else {
            throw new IllegalArgumentException("You must specify a splitting strategy");
        }
        PartitionLexically partitionLexically = new PartitionLexically(inputBasename, outputBasename, strategy, strategyFilename, jsapResult.getInt("bufferSize"), jsapResult.getLong("logInterval"));
        if (jsapResult.getBoolean("termsOnly")) {
            partitionLexically.runTermsOnly();
        } else {
            partitionLexically.run();
        }
    }
}

