/*
 * Decompiled with CFR 0.152.
 */
package water.util;

import water.H2O;
import water.MRTask;
import water.fvec.CategoricalWrappedVec;
import water.fvec.Chunk;
import water.fvec.Vec;
import water.util.ArrayUtils;
import water.util.MathUtils;
import water.util.PrettyPrint;
import water.util.TwoDimTable;

public class ChunkSummary
extends MRTask<ChunkSummary> {
    static final transient String[] chunkTypes = new String[]{"C0L", "C0D", "CBS", "CX0", "CXI", "C1", "C1N", "C1S", "C2", "C2S", "C4", "C4S", "C4F", "C8", "CNAXI", "C16", "CStr", "CXD", "CNAXD", "CUD", "C8D"};
    static final transient String[] chunkNames = new String[]{"Constant Integers", "Constant Reals", "Bits", "Zero Sparse Bits", "Zero Sparse Integers", "1-Byte Integers", "1-Byte Integers (w/o NAs)", "1-Byte Fractions", "2-Byte Integers", "2-Byte Fractions", "4-Byte Integers", "4-Byte Fractions", "32-bit Reals", "64-bit Integers", "NA Sparse Integers", "128-bit UUID", "String", "Zero Sparse Reals", "NA Sparse Reals", "Unique Reals", "64-bit Reals"};
    private long[] chunk_counts;
    private long total_chunk_count;
    private long[] chunk_byte_sizes;
    private long total_chunk_byte_size;
    private long[] byte_size_per_node;
    private double byte_size_per_node_mean;
    private double byte_size_per_node_min;
    private double byte_size_per_node_max;
    private double byte_size_per_node_stddev;
    private long total_row_count;
    private long[] row_count_per_node;
    private double row_count_per_node_mean;
    private double row_count_per_node_min;
    private double row_count_per_node_max;
    private double row_count_per_node_stddev;
    private long total_chunk_count_per_col;
    private long[] chunk_count_per_col_per_node;
    private double chunk_count_per_col_per_node_mean;
    private double chunk_count_per_col_per_node_min;
    private double chunk_count_per_col_per_node_max;
    private double chunk_count_per_col_per_node_stddev;

    ChunkSummary() {
        super((byte)(Thread.currentThread() instanceof H2O.FJWThr ? ChunkSummary.currThrPriority() + 1 : 117));
    }

    @Override
    public void map(Chunk[] cs) {
        this.chunk_counts = new long[chunkTypes.length];
        this.chunk_byte_sizes = new long[chunkTypes.length];
        this.byte_size_per_node = new long[H2O.CLOUD.size()];
        this.row_count_per_node = new long[H2O.CLOUD.size()];
        this.chunk_count_per_col_per_node = new long[H2O.CLOUD.size()];
        for (Chunk c : cs) {
            int j;
            String cname = c.getClass().getSimpleName();
            int nlen = cname.length();
            assert (nlen > 5 && cname.charAt(nlen - 5) == 'C' && cname.charAt(nlen - 1) == 'k');
            String sname = cname.substring(0, nlen - 5);
            if (sname.equals("CategoricalWrapped")) {
                Chunk ec = ((CategoricalWrappedVec.CategoricalWrappedChunk)c)._c;
                cname = ec.getClass().getSimpleName();
                nlen = cname.length();
                assert (nlen > 5 && cname.charAt(nlen - 5) == 'C' && cname.charAt(nlen - 1) == 'k');
                sname = cname.substring(0, nlen - 5);
            }
            for (j = 0; j < chunkTypes.length && !sname.equals(chunkTypes[j]); ++j) {
            }
            if (j == chunkTypes.length) {
                throw H2O.fail("Unknown Chunk Type: " + sname);
            }
            int n = j;
            this.chunk_counts[n] = this.chunk_counts[n] + 1L;
            int n2 = j;
            this.chunk_byte_sizes[n2] = this.chunk_byte_sizes[n2] + c.byteSize();
            int n3 = H2O.SELF.index();
            this.byte_size_per_node[n3] = this.byte_size_per_node[n3] + c.byteSize();
        }
        int n = H2O.SELF.index();
        this.row_count_per_node[n] = this.row_count_per_node[n] + (long)cs[0].len();
        this.total_row_count += (long)cs[0].len();
        int n4 = H2O.SELF.index();
        this.chunk_count_per_col_per_node[n4] = this.chunk_count_per_col_per_node[n4] + 1L;
        ++this.total_chunk_count_per_col;
    }

    @Override
    public void reduce(ChunkSummary mrt) {
        ArrayUtils.add(this.chunk_counts, mrt.chunk_counts);
        ArrayUtils.add(this.chunk_byte_sizes, mrt.chunk_byte_sizes);
        ArrayUtils.add(this.byte_size_per_node, mrt.byte_size_per_node);
        ArrayUtils.add(this.row_count_per_node, mrt.row_count_per_node);
        ArrayUtils.add(this.chunk_count_per_col_per_node, mrt.chunk_count_per_col_per_node);
        this.total_row_count += mrt.total_row_count;
        this.total_chunk_count_per_col += mrt.total_chunk_count_per_col;
    }

    @Override
    protected void postGlobal() {
        if (this.chunk_counts == null || this.chunk_byte_sizes == null || this.byte_size_per_node == null) {
            return;
        }
        assert (this.total_row_count == this._fr.numRows()) : "total_row_count[" + this.total_row_count + "] != _fr.numRows()[" + this._fr.numRows() + "]. ";
        this.total_chunk_byte_size = 0L;
        this.total_chunk_count = 0L;
        for (int j = 0; j < chunkTypes.length; ++j) {
            this.total_chunk_byte_size += this.chunk_byte_sizes[j];
            this.total_chunk_count += this.chunk_counts[j];
        }
        long check = 0L;
        for (Vec v : this._fr.vecs()) {
            check += (long)v.nChunks();
        }
        assert (this.total_chunk_count == check);
        double[] res = MathUtils.min_max_mean_stddev(this.byte_size_per_node);
        this.byte_size_per_node_min = res[0];
        this.byte_size_per_node_max = res[1];
        this.byte_size_per_node_mean = res[2];
        this.byte_size_per_node_stddev = res[3];
        res = MathUtils.min_max_mean_stddev(this.row_count_per_node);
        this.row_count_per_node_min = res[0];
        this.row_count_per_node_max = res[1];
        this.row_count_per_node_mean = res[2];
        this.row_count_per_node_stddev = res[3];
        res = MathUtils.min_max_mean_stddev(this.chunk_count_per_col_per_node);
        this.chunk_count_per_col_per_node_min = res[0];
        this.chunk_count_per_col_per_node_max = res[1];
        this.chunk_count_per_col_per_node_mean = res[2];
        this.chunk_count_per_col_per_node_stddev = res[3];
    }

    String display(long val) {
        return String.format("%10s", val == 0L ? "  0  B" : PrettyPrint.bytes(val));
    }

    public TwoDimTable toTwoDimTableChunkTypes() {
        String tableHeader = "Chunk compression summary";
        int rows = 0;
        for (int j = 0; j < chunkTypes.length; ++j) {
            if (this.chunk_counts == null || this.chunk_counts[j] <= 0L) continue;
            ++rows;
        }
        String[] rowHeaders = new String[rows];
        String[] colHeaders = new String[]{"Chunk Type", "Chunk Name", "Count", "Count Percentage", "Size", "Size Percentage"};
        String[] colTypes = new String[]{"string", "string", "int", "float", "string", "float"};
        String[] colFormats = new String[]{"%8s", "%s", "%10d", "%10.3f %%", "%10s", "%10.3f %%"};
        String colHeaderForRowHeaders = null;
        TwoDimTable table = new TwoDimTable("Chunk compression summary", null, rowHeaders, colHeaders, colTypes, colFormats, colHeaderForRowHeaders);
        int row = 0;
        for (int j = 0; j < chunkTypes.length; ++j) {
            if (this.chunk_counts == null || this.chunk_counts[j] <= 0L) continue;
            table.set(row, 0, chunkTypes[j]);
            table.set(row, 1, chunkNames[j]);
            table.set(row, 2, this.chunk_counts[j]);
            table.set(row, 3, (double)((float)this.chunk_counts[j] / (float)this.total_chunk_count) * 100.0);
            table.set(row, 4, this.display(this.chunk_byte_sizes[j]));
            table.set(row, 5, (double)((float)this.chunk_byte_sizes[j] / (float)this.total_chunk_byte_size) * 100.0);
            ++row;
        }
        return table;
    }

    public TwoDimTable toTwoDimTableDistribution() {
        int row;
        String tableHeader = "Frame distribution summary";
        int rows = H2O.CLOUD.size() + 5;
        String[] rowHeaders = new String[rows];
        for (row = 0; row < rows - 5; ++row) {
            rowHeaders[row] = H2O.CLOUD._memary[row].getIpPortString();
        }
        rowHeaders[row++] = "mean";
        rowHeaders[row++] = "min";
        rowHeaders[row++] = "max";
        rowHeaders[row++] = "stddev";
        rowHeaders[row] = "total";
        String[] colHeaders = new String[]{"Size", "Number of Rows", "Number of Chunks per Column", "Number of Chunks"};
        String[] colTypes = new String[]{"string", "float", "float", "float"};
        String[] colFormats = new String[]{"%s", "%f", "%f", "%f"};
        String colHeaderForRowHeaders = "";
        TwoDimTable table = new TwoDimTable("Frame distribution summary", null, rowHeaders, colHeaders, colTypes, colFormats, "");
        for (row = 0; row < rows - 5; ++row) {
            if (this.byte_size_per_node == null) continue;
            table.set(row, 0, this.display(this.byte_size_per_node[row]));
            table.set(row, 1, this.row_count_per_node[row]);
            table.set(row, 2, this.chunk_count_per_col_per_node[row]);
            table.set(row, 3, (long)this._fr.numCols() * this.chunk_count_per_col_per_node[row]);
        }
        table.set(row, 0, this.display((long)this.byte_size_per_node_mean));
        table.set(row, 1, this.row_count_per_node_mean);
        table.set(row, 2, this.chunk_count_per_col_per_node_mean);
        table.set(row++, 3, (double)this._fr.numCols() * this.chunk_count_per_col_per_node_mean);
        table.set(row, 0, this.display((long)this.byte_size_per_node_min));
        table.set(row, 1, this.row_count_per_node_min);
        table.set(row, 2, this.chunk_count_per_col_per_node_min);
        table.set(row++, 3, (double)this._fr.numCols() * this.chunk_count_per_col_per_node_min);
        table.set(row, 0, this.display((long)this.byte_size_per_node_max));
        table.set(row, 1, this.row_count_per_node_max);
        table.set(row, 2, this.chunk_count_per_col_per_node_max);
        table.set(row++, 3, (double)this._fr.numCols() * this.chunk_count_per_col_per_node_max);
        table.set(row, 0, this.display((long)this.byte_size_per_node_stddev));
        table.set(row, 1, this.row_count_per_node_stddev);
        table.set(row, 2, this.chunk_count_per_col_per_node_stddev);
        table.set(row++, 3, (double)this._fr.numCols() * this.chunk_count_per_col_per_node_stddev);
        table.set(row, 0, this.display(this.total_chunk_byte_size));
        table.set(row, 1, this.total_row_count);
        table.set(row, 2, this.total_chunk_count_per_col);
        table.set(row, 3, (long)this._fr.numCols() * this.total_chunk_count_per_col);
        return table;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append(this.toTwoDimTableChunkTypes().toString());
        sb.append(this.toTwoDimTableDistribution().toString());
        if (H2O.CLOUD.size() > 1 && this.byte_size_per_node_stddev > 0.2 * this.byte_size_per_node_mean) {
            sb.append("** Note: Dataset is not well distributed, consider rebalancing **\n");
        }
        return sb.toString();
    }
}

