/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark.builder;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.kylin.metadata.cube.model.IndexEntity;
import org.apache.kylin.metadata.cube.model.LayoutEntity;
import org.apache.kylin.metadata.cube.model.NDataLayout;
import org.apache.kylin.metadata.cube.model.NDataSegment;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.ParameterDesc;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.spark.dict.NGlobalDictBuilderAssist;
import org.apache.spark.dict.NGlobalDictMetaInfo;
import org.apache.spark.dict.NGlobalDictionaryV2;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DictionaryBuilderHelper {
    protected static final Logger logger = LoggerFactory.getLogger(DictionaryBuilderHelper.class);

    public static int calculateBucketSize(NDataSegment seg, TblColRef col, Dataset<Row> afterDistinct) throws IOException {
        NGlobalDictionaryV2 globalDict = new NGlobalDictionaryV2(seg.getProject(), col.getTable(), col.getName(), seg.getConfig().getHdfsWorkingDirectory());
        int bucketPartitionSize = globalDict.getBucketSizeOrDefault(seg.getConfig().getGlobalDictV2MinHashPartitions());
        int bucketThreshold = seg.getConfig().getGlobalDictV2ThresholdBucketSize();
        int resizeBucketSize = bucketPartitionSize;
        if (globalDict.isFirst()) {
            long afterDisCount = afterDistinct.count();
            double loadFactor = seg.getConfig().getGlobalDictV2InitLoadFactor();
            resizeBucketSize = Math.max(Math.toIntExact(afterDisCount / (long)((int)((double)bucketThreshold * loadFactor))), bucketPartitionSize);
            logger.info("Building a global dictionary column first for  {} , the size of the bucket is set to {}", (Object)col.getName(), (Object)bucketPartitionSize);
        } else {
            long afterDisCount = afterDistinct.count();
            NGlobalDictMetaInfo metaInfo = globalDict.getMetaInfo();
            long[] bucketCntArray = metaInfo.getBucketCount();
            double loadFactor = seg.getConfig().getGlobalDictV2InitLoadFactor();
            double bucketOverheadFactor = seg.getConfig().getGlobalDictV2BucketOverheadFactor();
            int averageBucketSize = 0;
            int newDataBucketSize = Math.toIntExact(afterDisCount / (long)bucketThreshold);
            if (newDataBucketSize > metaInfo.getBucketSize()) {
                newDataBucketSize = Math.toIntExact(afterDisCount / (long)((int)((double)bucketThreshold * loadFactor)));
            }
            if (metaInfo.getDictCount() >= (long)(bucketThreshold * metaInfo.getBucketSize())) {
                averageBucketSize = Math.toIntExact(metaInfo.getDictCount() / (long)((int)((double)bucketThreshold * loadFactor)));
            }
            int peakBucketSize = 0;
            for (long bucketCnt : bucketCntArray) {
                if (!((double)bucketCnt > (double)bucketThreshold * bucketOverheadFactor)) continue;
                peakBucketSize = bucketPartitionSize * 2;
                break;
            }
            if ((resizeBucketSize = Math.max(Math.max(newDataBucketSize, averageBucketSize), Math.max(peakBucketSize, bucketPartitionSize))) != bucketPartitionSize) {
                logger.info("Start building a global dictionary column for {}, need resize from {} to {} ", new Object[]{col.getName(), bucketPartitionSize, resizeBucketSize});
                NGlobalDictBuilderAssist.resize((TblColRef)col, (NDataSegment)seg, (int)resizeBucketSize, (SparkSession)afterDistinct.sparkSession());
                logger.info("End building a global dictionary column for {}, need resize from {} to {} ", new Object[]{col.getName(), bucketPartitionSize, resizeBucketSize});
            }
        }
        return resizeBucketSize;
    }

    protected static Set<TblColRef> findNeedDictCols(List<LayoutEntity> layouts) {
        HashSet dictColSet = Sets.newHashSet();
        for (LayoutEntity layout : layouts) {
            for (MeasureDesc measureDesc : layout.getIndex().getEffectiveMeasures().values()) {
                if (DictionaryBuilderHelper.needGlobalDict(measureDesc) == null) continue;
                TblColRef col = ((ParameterDesc)measureDesc.getFunction().getParameters().get(0)).getColRef();
                dictColSet.add(col);
            }
        }
        return dictColSet;
    }

    public static Set<TblColRef> extractTreeRelatedGlobalDictToBuild(NDataSegment seg, Collection<IndexEntity> toBuildIndexEntities) {
        ArrayList toBuildCuboids = Lists.newArrayList();
        for (IndexEntity indexEntity : toBuildIndexEntities) {
            toBuildCuboids.addAll(indexEntity.getLayouts());
        }
        ArrayList buildedLayouts = Lists.newArrayList();
        if (seg.getSegDetails() != null) {
            for (NDataLayout cuboid : seg.getSegDetails().getLayouts()) {
                buildedLayouts.add(cuboid.getLayout());
            }
        }
        Set<TblColRef> set = DictionaryBuilderHelper.findNeedDictCols(buildedLayouts);
        Set<TblColRef> toBuildColRefSet = DictionaryBuilderHelper.findNeedDictCols(toBuildCuboids);
        toBuildColRefSet.removeIf(col -> buildedColRefSet.contains(col));
        return toBuildColRefSet;
    }

    public static Set<TblColRef> extractTreeRelatedGlobalDicts(NDataSegment seg, Collection<IndexEntity> toBuildIndexEntities) {
        List<LayoutEntity> toBuildCuboids = toBuildIndexEntities.stream().flatMap(entity -> entity.getLayouts().stream()).collect(Collectors.toList());
        return DictionaryBuilderHelper.findNeedDictCols(toBuildCuboids);
    }

    public static TblColRef needGlobalDict(MeasureDesc measure) {
        String returnDataTypeName = measure.getFunction().getReturnDataType().getName();
        if (returnDataTypeName.equalsIgnoreCase("bitmap")) {
            List cols = measure.getFunction().getColRefs();
            Preconditions.checkArgument((cols.size() >= 1 ? 1 : 0) != 0);
            return (TblColRef)cols.get(0);
        }
        return null;
    }
}

