/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark.builder;

import java.io.IOException;
import org.apache.kylin.engine.spark.metadata.ColumnDesc;
import org.apache.kylin.engine.spark.metadata.SegmentInfo;
import org.apache.spark.dict.NGlobalDictBuilderAssist;
import org.apache.spark.dict.NGlobalDictMetaInfo;
import org.apache.spark.dict.NGlobalDictionary;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DictionaryBuilderHelper {
    protected static final Logger logger = LoggerFactory.getLogger(DictionaryBuilderHelper.class);

    public static int calculateBucketSize(SegmentInfo desc, ColumnDesc col, Dataset<Row> afterDistinct) throws IOException {
        NGlobalDictionary globalDict = new NGlobalDictionary(desc.project(), col.tableAliasName(), col.columnName(), desc.kylinconf().getHdfsWorkingDirectory());
        int bucketPartitionSize = globalDict.getBucketSizeOrDefault(desc.kylinconf().getGlobalDictV2MinHashPartitions());
        int bucketThreshold = desc.kylinconf().getGlobalDictV2ThresholdBucketSize();
        int resizeBucketSize = bucketPartitionSize;
        if (globalDict.isFirst()) {
            long afterDisCount = afterDistinct.count();
            double loadFactor = desc.kylinconf().getGlobalDictV2InitLoadFactor();
            resizeBucketSize = Math.max(Math.toIntExact(afterDisCount / (long)((int)((double)bucketThreshold * loadFactor))), bucketPartitionSize);
            logger.info("Building a global dictionary column first for  {} , the size of the bucket is set to {}", (Object)col.columnName(), (Object)bucketPartitionSize);
        } else {
            long afterDisCount = afterDistinct.count();
            NGlobalDictMetaInfo metaInfo = globalDict.getMetaInfo();
            long[] bucketCntArray = metaInfo.getBucketCount();
            double loadFactor = desc.kylinconf().getGlobalDictV2InitLoadFactor();
            double bucketOverheadFactor = desc.kylinconf().getGlobalDictV2BucketOverheadFactor();
            int averageBucketSize = 0;
            int newDataBucketSize = Math.toIntExact(afterDisCount / (long)bucketThreshold);
            if (newDataBucketSize > metaInfo.getBucketSize()) {
                newDataBucketSize = Math.toIntExact(afterDisCount / (long)((int)((double)bucketThreshold * loadFactor)));
            }
            if (metaInfo.getDictCount() >= (long)(bucketThreshold * metaInfo.getBucketSize())) {
                averageBucketSize = Math.toIntExact(metaInfo.getDictCount() / (long)((int)((double)bucketThreshold * loadFactor)));
            }
            int peakBucketSize = 0;
            for (long bucketCnt : bucketCntArray) {
                if (!((double)bucketCnt > (double)bucketThreshold * bucketOverheadFactor)) continue;
                peakBucketSize = bucketPartitionSize * 2;
                break;
            }
            if ((resizeBucketSize = Math.max(Math.max(newDataBucketSize, averageBucketSize), Math.max(peakBucketSize, bucketPartitionSize))) != bucketPartitionSize) {
                logger.info("Start building a global dictionary column for {}, need resize from {} to {} ", new Object[]{col.columnName(), bucketPartitionSize, resizeBucketSize});
                NGlobalDictBuilderAssist.resize((ColumnDesc)col, (SegmentInfo)desc, (int)resizeBucketSize, (SparkSession)afterDistinct.sparkSession());
                logger.info("End building a global dictionary column for {}, need resize from {} to {} ", new Object[]{col.columnName(), bucketPartitionSize, resizeBucketSize});
            }
        }
        return resizeBucketSize;
    }
}

