/*
 * Decompiled with CFR 0.152.
 */
package ai.h2o.targetencoding;

import ai.h2o.targetencoding.BlendingParams;
import ai.h2o.targetencoding.TargetEncoderFrameHelper;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.Random;
import water.DKV;
import water.Iced;
import water.Key;
import water.Keyed;
import water.Lockable;
import water.MRTask;
import water.Scope;
import water.fvec.CategoricalWrappedVec;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.Vec;
import water.fvec.task.FillNAWithDoubleValueTask;
import water.fvec.task.FillNAWithLongValueTask;
import water.rapids.Merge;
import water.rapids.Rapids;
import water.rapids.Val;
import water.rapids.ast.prims.mungers.AstGroup;
import water.util.IcedHashMapGeneric;
import water.util.Log;

public class TargetEncoder
extends Iced<TargetEncoder> {
    public static final String ENCODED_COLUMN_POSTFIX = "_te";
    public static final BlendingParams DEFAULT_BLENDING_PARAMS = new BlendingParams(20.0, 10.0);
    public static String NUMERATOR_COL_NAME = "numerator";
    public static String DENOMINATOR_COL_NAME = "denominator";
    private final String[] _columnNamesToEncode;

    public TargetEncoder(String[] columnNamesToEncode) {
        if (columnNamesToEncode == null || columnNamesToEncode.length == 0) {
            throw new IllegalStateException("Argument 'columnsToEncode' is not defined or empty");
        }
        this._columnNamesToEncode = columnNamesToEncode;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public IcedHashMapGeneric<String, Frame> prepareEncodingMap(Frame data, String targetColumnName, String foldColumnName, boolean imputeNAsWithNewCategory) {
        if (data == null) {
            throw new IllegalStateException("Argument 'data' is missing, with no default");
        }
        if (targetColumnName == null || targetColumnName.equals("")) {
            throw new IllegalStateException("Argument 'target' is missing, with no default");
        }
        if (!this.checkAllTEColumnsExistAndAreCategorical(data, this._columnNamesToEncode)) {
            throw new IllegalStateException("Argument 'columnsToEncode' should contain only names of categorical columns");
        }
        if (Arrays.asList(this._columnNamesToEncode).contains(targetColumnName)) {
            throw new IllegalArgumentException("Columns for target encoding contain target column.");
        }
        int targetIndex = data.find(targetColumnName);
        Frame dataWithoutNAsForTarget = null;
        Frame dataWithEncodedTarget = null;
        try {
            dataWithoutNAsForTarget = this.filterOutNAsFromTargetColumn(data, targetIndex);
            dataWithEncodedTarget = this.ensureTargetColumnIsBinaryCategorical(dataWithoutNAsForTarget, targetColumnName);
            IcedHashMapGeneric columnToEncodingMap = new IcedHashMapGeneric();
            for (String teColumnName : this._columnNamesToEncode) {
                Frame teColumnFrame = null;
                this.imputeNAsForColumn(dataWithEncodedTarget, teColumnName, teColumnName + "_NA");
                teColumnFrame = this.groupThenAggregateForNumeratorAndDenominator(dataWithEncodedTarget, teColumnName, foldColumnName, targetIndex);
                TargetEncoderFrameHelper.renameColumn(teColumnFrame, "sum_" + targetColumnName, NUMERATOR_COL_NAME);
                TargetEncoderFrameHelper.renameColumn(teColumnFrame, "nrow", DENOMINATOR_COL_NAME);
                columnToEncodingMap.put((Object)teColumnName, (Object)teColumnFrame);
            }
            dataWithoutNAsForTarget.delete();
            dataWithEncodedTarget.delete();
            IcedHashMapGeneric icedHashMapGeneric = columnToEncodingMap;
            return icedHashMapGeneric;
        }
        finally {
            if (dataWithoutNAsForTarget != null) {
                dataWithoutNAsForTarget.delete();
            }
            if (dataWithEncodedTarget != null) {
                dataWithEncodedTarget.delete();
            }
        }
    }

    Frame groupThenAggregateForNumeratorAndDenominator(Frame fr, String teColumnName, String foldColumnName, int targetIndex) {
        int teColumnIndex = fr.find(teColumnName);
        int[] groupByColumns = null;
        if (foldColumnName == null) {
            groupByColumns = new int[]{teColumnIndex};
        } else {
            int foldColumnIndex = fr.find(foldColumnName);
            groupByColumns = new int[]{teColumnIndex, foldColumnIndex};
        }
        AstGroup.AGG[] aggs = new AstGroup.AGG[2];
        AstGroup.NAHandling na = AstGroup.NAHandling.ALL;
        aggs[0] = new AstGroup.AGG(AstGroup.FCN.sum, targetIndex, na, -1);
        aggs[1] = new AstGroup.AGG(AstGroup.FCN.nrow, targetIndex, na, -1);
        Frame result = new AstGroup().performGroupingWithAggregations(fr, groupByColumns, aggs).getFrame();
        return TargetEncoderFrameHelper.register(result);
    }

    Frame ensureTargetColumnIsBinaryCategorical(Frame data, String targetColumnName) {
        Vec targetVec = data.vec(targetColumnName);
        if (!targetVec.isCategorical()) {
            throw new IllegalStateException("`target` must be a binary categorical vector. We do not support multi-class and continuos target case for now");
        }
        if (targetVec.cardinality() != 2) {
            throw new IllegalStateException("`target` must be a binary vector. We do not support multi-class target case for now");
        }
        return data;
    }

    public IcedHashMapGeneric<String, Frame> prepareEncodingMap(Frame data, String targetColumnName, String foldColumnName) {
        boolean imputeNAsWithNewCategory = true;
        return this.prepareEncodingMap(data, targetColumnName, foldColumnName, imputeNAsWithNewCategory);
    }

    String[] getColumnNamesBy(Frame data, int[] columnIndexes) {
        String[] allColumnNames = (String[])data._names.clone();
        ArrayList<String> columnNames = new ArrayList<String>();
        for (int idx : columnIndexes) {
            columnNames.add(allColumnNames[idx]);
        }
        return columnNames.toArray(new String[columnIndexes.length]);
    }

    private Frame execRapidsAndGetFrame(String astTree) {
        Val val = Rapids.exec((String)astTree);
        return TargetEncoderFrameHelper.register(val.getFrame());
    }

    Frame filterOutNAsFromTargetColumn(Frame data, int targetColumnIndex) {
        return TargetEncoderFrameHelper.filterOutNAsInColumn(data, targetColumnIndex);
    }

    Frame imputeNAsForColumn(Frame data, String teColumnName, String strToImpute) {
        int columnIndex = data.find(teColumnName);
        Vec currentVec = data.vec(columnIndex);
        int indexForNACategory = currentVec.cardinality();
        FillNAWithLongValueTask task = new FillNAWithLongValueTask(columnIndex, (long)indexForNACategory);
        task.doAll(data);
        if (task._imputationHappened) {
            String[] oldDomain = currentVec.domain();
            String[] newDomain = new String[indexForNACategory + 1];
            System.arraycopy(oldDomain, 0, newDomain, 0, oldDomain.length);
            newDomain[indexForNACategory] = strToImpute;
            this.updateDomainGlobally(data, teColumnName, newDomain);
        }
        return data;
    }

    private void updateDomainGlobally(Frame fr, String teColumnName, String[] domain) {
        Lockable lock = fr.write_lock();
        Vec updatedVec = fr.vec(teColumnName);
        updatedVec.setDomain(domain);
        DKV.put((Keyed)updatedVec);
        fr.update();
        lock.unlock();
    }

    Frame getOutOfFoldData(Frame encodingMap, String foldColumnName, long currentFoldValue) {
        int foldColumnIndexInEncodingMap = encodingMap.find(foldColumnName);
        return TargetEncoderFrameHelper.filterNotByValue(encodingMap, foldColumnIndexInEncodingMap, currentFoldValue);
    }

    long[] getUniqueValuesOfTheFoldColumn(Frame data, int columnIndex) {
        Vec uniqueValues = TargetEncoderFrameHelper.uniqueValuesBy(data, columnIndex).vec(0);
        long numberOfUniqueValues = uniqueValues.length();
        assert (numberOfUniqueValues <= Integer.MAX_VALUE) : "Number of unique values exceeded Integer.MAX_VALUE";
        int length = (int)numberOfUniqueValues;
        long[] uniqueValuesArr = new long[length];
        int i = 0;
        while ((long)i < uniqueValues.length()) {
            uniqueValuesArr[i] = uniqueValues.at8((long)i);
            ++i;
        }
        uniqueValues.remove();
        return uniqueValuesArr;
    }

    private boolean checkAllTEColumnsExistAndAreCategorical(Frame data, String[] columnsToEncode) {
        for (String columnName : columnsToEncode) {
            int columnIndex = data.find(columnName);
            assert (columnIndex != -1) : "Column name `" + columnName + "` was not found in the provided data frame";
            if (data.vec(columnIndex).isCategorical()) continue;
            return false;
        }
        return true;
    }

    static Frame groupByTEColumnAndAggregate(Frame data, int teColumnIndex) {
        int numeratorColumnIndex = data.find(NUMERATOR_COL_NAME);
        int denominatorColumnIndex = data.find(DENOMINATOR_COL_NAME);
        AstGroup.AGG[] aggs = new AstGroup.AGG[2];
        AstGroup.NAHandling na = AstGroup.NAHandling.ALL;
        aggs[0] = new AstGroup.AGG(AstGroup.FCN.sum, numeratorColumnIndex, na, -1);
        aggs[1] = new AstGroup.AGG(AstGroup.FCN.sum, denominatorColumnIndex, na, -1);
        Frame result = new AstGroup().performGroupingWithAggregations(data, new int[]{teColumnIndex}, aggs).getFrame();
        return TargetEncoderFrameHelper.register(result);
    }

    Frame rBind(Frame a, Frame b) {
        if (a == null) {
            assert (b != null);
            return b;
        }
        String tree = String.format("(rbind %s %s)", a._key, b._key);
        return this.execRapidsAndGetFrame(tree);
    }

    Frame mergeByTEAndFoldColumns(Frame a, Frame holdoutEncodeMap, int teColumnIndexOriginal, int foldColumnIndexOriginal, int teColumnIndex) {
        int foldColumnIndexInEncodingMap = holdoutEncodeMap.find("foldValueForMerge");
        return this.merge(a, holdoutEncodeMap, new int[]{teColumnIndexOriginal, foldColumnIndexOriginal}, new int[]{teColumnIndex, foldColumnIndexInEncodingMap});
    }

    Frame merge(Frame l, Frame r, int[] byLeft, int[] byRite) {
        boolean allLeft = true;
        new GCForceTask().doAllNodes();
        int ncols = byLeft.length;
        l.moveFirst(byLeft);
        r.moveFirst(byRite);
        int[][] id_maps = new int[ncols][];
        for (int i = 0; i < ncols; ++i) {
            Vec lv = l.vec(i);
            Vec rv = r.vec(i);
            if (!lv.isCategorical()) continue;
            assert (rv.isCategorical());
            id_maps[i] = CategoricalWrappedVec.computeMap((String[])lv.domain(), (String[])rv.domain());
        }
        int[] cols = new int[ncols];
        for (int i = 0; i < ncols; ++i) {
            cols[i] = i;
        }
        return TargetEncoderFrameHelper.register(Merge.merge((Frame)l, (Frame)r, (int[])cols, (int[])cols, (boolean)allLeft, (int[][])id_maps));
    }

    Frame mergeByTEColumn(Frame a, Frame b, int teColumnIndexOriginal, int teColumnIndex) {
        return this.merge(a, b, new int[]{teColumnIndexOriginal}, new int[]{teColumnIndex});
    }

    Frame imputeWithMean(Frame fr, int columnIndex, double mean) {
        Vec vecWithEncodings = fr.vec(columnIndex);
        assert (vecWithEncodings.get_type() == 3) : "Imputation of mean value is supported only for numerical vectors.";
        long numberOfNAs = vecWithEncodings.naCnt();
        if (numberOfNAs > 0L) {
            new FillNAWithDoubleValueTask(columnIndex, mean).doAll(fr);
            Log.info((Object[])new Object[]{String.format("Frame with id = %s was imputed with mean = %f ( %d rows were affected)", fr._key, mean, numberOfNAs)});
        }
        return fr;
    }

    Frame imputeWithPosteriorForNALevelOrWithPrior(String teColumnName, Frame fr, int columnIndex, Frame encodingMapForCurrentTEColumn, double priorMean) {
        int numberOfRowsInEncodingMap = (int)encodingMapForCurrentTEColumn.numRows();
        String lastDomain = encodingMapForCurrentTEColumn.domains()[0][numberOfRowsInEncodingMap - 1];
        boolean missingValuesWerePresent = lastDomain.equals(teColumnName + "_NA");
        double numeratorForNALevel = encodingMapForCurrentTEColumn.vec(NUMERATOR_COL_NAME).at((long)(numberOfRowsInEncodingMap - 1));
        double denominatorForNALevel = encodingMapForCurrentTEColumn.vec(DENOMINATOR_COL_NAME).at((long)(numberOfRowsInEncodingMap - 1));
        double posteriorForNALevel = numeratorForNALevel / denominatorForNALevel;
        double valueForImputation = missingValuesWerePresent ? posteriorForNALevel : priorMean;
        Vec vecWithEncodings = fr.vec(columnIndex);
        assert (vecWithEncodings.get_type() == 3) : "Imputation of mean value is supported only for numerical vectors.";
        long numberOfNAs = vecWithEncodings.naCnt();
        if (numberOfNAs > 0L) {
            new FillNAWithDoubleValueTask(columnIndex, valueForImputation).doAll(fr);
            Log.info((Object[])new Object[]{String.format("Frame with id = %s was imputed with posterior mean from NA level = %f ( %d rows were affected)", fr._key, valueForImputation, numberOfNAs)});
        }
        return fr;
    }

    double calculatePriorMean(Frame fr) {
        Vec numeratorVec = fr.vec(NUMERATOR_COL_NAME);
        Vec denominatorVec = fr.vec(DENOMINATOR_COL_NAME);
        return numeratorVec.mean() / denominatorVec.mean();
    }

    Frame calculateAndAppendBlendedTEEncoding(Frame fr, Frame encodingMap, String appendedColumnName, BlendingParams blendingParams) {
        int numeratorIndex = fr.find(NUMERATOR_COL_NAME);
        int denominatorIndex = fr.find(DENOMINATOR_COL_NAME);
        double globalMeanForTargetClass = this.calculatePriorMean(encodingMap);
        Log.info((Object[])new Object[]{"Global mean for blending = " + globalMeanForTargetClass});
        Vec zeroVec = fr.anyVec().makeCon(0.0);
        fr.add(appendedColumnName, zeroVec);
        int encodingsColumnIdx = fr.find(appendedColumnName);
        new CalcEncodingsWithBlending(numeratorIndex, denominatorIndex, globalMeanForTargetClass, blendingParams, encodingsColumnIdx).doAll(fr);
        return fr;
    }

    Frame calculateAndAppendTEEncoding(Frame fr, Frame encodingMap, String appendedColumnName) {
        int numeratorIndex = fr.find(NUMERATOR_COL_NAME);
        int denominatorIndex = fr.find(DENOMINATOR_COL_NAME);
        double globalMeanForTargetClass = this.calculatePriorMean(encodingMap);
        Vec zeroVec = fr.anyVec().makeCon(0.0);
        fr.add(appendedColumnName, zeroVec);
        int encodingsColumnIdx = fr.find(appendedColumnName);
        new CalcEncodings(numeratorIndex, denominatorIndex, globalMeanForTargetClass, encodingsColumnIdx).doAll(fr);
        return fr;
    }

    Frame addNoise(Frame fr, String applyToColumnName, double noiseLevel, long seed) {
        int appyToColumnIndex = fr.find(applyToColumnName);
        if (seed == -1L) {
            seed = new Random().nextLong();
        }
        Vec zeroVec = fr.anyVec().makeCon(0.0);
        Vec randomVec = zeroVec.makeRand(seed);
        Vec runif = fr.add("runif", randomVec);
        int runifIdx = fr.find("runif");
        new AddNoiseTask(appyToColumnIndex, runifIdx, noiseLevel).doAll(fr);
        fr.remove("runif");
        randomVec.remove();
        zeroVec.remove();
        runif.remove();
        return fr;
    }

    Frame subtractTargetValueForLOO(Frame data, String targetColumnName) {
        int numeratorIndex = data.find(NUMERATOR_COL_NAME);
        int denominatorIndex = data.find(DENOMINATOR_COL_NAME);
        int targetIndex = data.find(targetColumnName);
        new SubtractCurrentRowForLeaveOneOutTask(numeratorIndex, denominatorIndex, targetIndex).doAll(data);
        return data;
    }

    public Frame applyTargetEncoding(Frame data, String targetColumnName, Map<String, Frame> columnToEncodingMap, DataLeakageHandlingStrategy dataLeakageHandlingStrategy, String foldColumnName, boolean withBlendedAvg, double noiseLevel, boolean imputeNAsWithNewCategory, BlendingParams blendingParams, long seed) {
        return this.applyTargetEncoding(data, targetColumnName, columnToEncodingMap, dataLeakageHandlingStrategy, foldColumnName, withBlendedAvg, noiseLevel, seed, null, blendingParams);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public Frame applyTargetEncoding(Frame data, String targetColumnName, Map<String, Frame> columnToEncodingMap, DataLeakageHandlingStrategy dataLeakageHandlingStrategy, String foldColumnName, boolean useBlending, double noiseLevel, long seed, Key<Frame> encodedFrameKey, BlendingParams blendingParams) {
        if (blendingParams == null) {
            blendingParams = DEFAULT_BLENDING_PARAMS;
        }
        if (noiseLevel < 0.0) {
            throw new IllegalStateException("`_noiseLevel` must be non-negative");
        }
        Frame dataWithAllEncodings = null;
        try {
            if (encodedFrameKey == null) {
                encodedFrameKey = Key.make();
            }
            dataWithAllEncodings = data.deepCopy(encodedFrameKey.toString());
            DKV.put((Keyed)dataWithAllEncodings);
            if (dataLeakageHandlingStrategy == DataLeakageHandlingStrategy.LeaveOneOut) {
                this.ensureTargetColumnIsBinaryCategorical(dataWithAllEncodings, targetColumnName);
            }
            block24: for (String teColumnName : this._columnNamesToEncode) {
                this.imputeNAsForColumn(dataWithAllEncodings, teColumnName, teColumnName + "_NA");
                String newEncodedColumnName = teColumnName + ENCODED_COLUMN_POSTFIX;
                Frame encodingMapForCurrentTEColumn = columnToEncodingMap.get(teColumnName);
                double priorMeanFromTrainingDataset = this.calculatePriorMean(encodingMapForCurrentTEColumn);
                int teColumnIndex = dataWithAllEncodings.find(teColumnName);
                switch (dataLeakageHandlingStrategy) {
                    case KFold: {
                        Frame holdoutEncodeMap = null;
                        Frame dataWithMergedAggregationsK = null;
                        try {
                            if (foldColumnName == null) {
                                throw new IllegalStateException("`foldColumn` must be provided for dataLeakageHandlingStrategy = KFold");
                            }
                            int teColumnIndexInEncodingMap = encodingMapForCurrentTEColumn.find(teColumnName);
                            int foldColumnIndex = dataWithAllEncodings.find(foldColumnName);
                            long[] foldValues = this.getUniqueValuesOfTheFoldColumn(encodingMapForCurrentTEColumn, 1);
                            Scope.enter();
                            try {
                                for (long foldValue : foldValues) {
                                    Frame outOfFoldData = this.getOutOfFoldData(encodingMapForCurrentTEColumn, foldColumnName, foldValue);
                                    Frame groupedByTEColumnAndAggregate = TargetEncoder.groupByTEColumnAndAggregate(outOfFoldData, teColumnIndexInEncodingMap);
                                    TargetEncoderFrameHelper.renameColumn(groupedByTEColumnAndAggregate, "sum_numerator", NUMERATOR_COL_NAME);
                                    TargetEncoderFrameHelper.renameColumn(groupedByTEColumnAndAggregate, "sum_denominator", DENOMINATOR_COL_NAME);
                                    Frame groupedWithAppendedFoldColumn = TargetEncoderFrameHelper.addCon(groupedByTEColumnAndAggregate, "foldValueForMerge", foldValue);
                                    if (holdoutEncodeMap == null) {
                                        holdoutEncodeMap = groupedWithAppendedFoldColumn;
                                    } else {
                                        Frame newHoldoutEncodeMap = this.rBind(holdoutEncodeMap, groupedWithAppendedFoldColumn);
                                        holdoutEncodeMap.delete();
                                        holdoutEncodeMap = newHoldoutEncodeMap;
                                    }
                                    outOfFoldData.delete();
                                    Scope.track((Frame[])new Frame[]{groupedWithAppendedFoldColumn});
                                }
                            }
                            finally {
                                Scope.exit((Key[])new Key[0]);
                            }
                            dataWithMergedAggregationsK = this.mergeByTEAndFoldColumns(dataWithAllEncodings, holdoutEncodeMap, teColumnIndex, foldColumnIndex, teColumnIndexInEncodingMap);
                            Frame withEncodingsFrameK = this.calculateEncoding(dataWithMergedAggregationsK, encodingMapForCurrentTEColumn, newEncodedColumnName, useBlending, blendingParams);
                            Frame withAddedNoiseEncodingsFrameK = this.applyNoise(withEncodingsFrameK, newEncodedColumnName, noiseLevel, seed);
                            Frame imputedEncodingsFrameK = this.imputeWithMean(withAddedNoiseEncodingsFrameK, withAddedNoiseEncodingsFrameK.find(newEncodedColumnName), priorMeanFromTrainingDataset);
                            this.removeNumeratorAndDenominatorColumns(imputedEncodingsFrameK);
                            dataWithAllEncodings.delete();
                            dataWithAllEncodings = imputedEncodingsFrameK;
                            continue block24;
                        }
                        catch (Exception ex) {
                            if (dataWithMergedAggregationsK != null) {
                                dataWithMergedAggregationsK.delete();
                            }
                            throw ex;
                        }
                        finally {
                            if (holdoutEncodeMap != null) {
                                holdoutEncodeMap.delete();
                            }
                        }
                    }
                    case LeaveOneOut: {
                        Frame groupedTargetEncodingMap = null;
                        Frame dataWithMergedAggregationsL = null;
                        try {
                            this.foldColumnIsInEncodingMapCheck(foldColumnName, encodingMapForCurrentTEColumn);
                            groupedTargetEncodingMap = TargetEncoder.groupingIgnoringFoldColumn(foldColumnName, encodingMapForCurrentTEColumn, teColumnName);
                            int teColumnIndexInGroupedEncodingMap = groupedTargetEncodingMap.find(teColumnName);
                            dataWithMergedAggregationsL = this.mergeByTEColumn(dataWithAllEncodings, groupedTargetEncodingMap, teColumnIndex, teColumnIndexInGroupedEncodingMap);
                            Frame subtractedFrameL = this.subtractTargetValueForLOO(dataWithMergedAggregationsL, targetColumnName);
                            Frame withEncodingsFrameL = this.calculateEncoding(subtractedFrameL, groupedTargetEncodingMap, newEncodedColumnName, useBlending, blendingParams);
                            Frame withAddedNoiseEncodingsFrameL = this.applyNoise(withEncodingsFrameL, newEncodedColumnName, noiseLevel, seed);
                            Frame imputedEncodingsFrameL = this.imputeWithMean(withAddedNoiseEncodingsFrameL, withAddedNoiseEncodingsFrameL.find(newEncodedColumnName), priorMeanFromTrainingDataset);
                            this.removeNumeratorAndDenominatorColumns(imputedEncodingsFrameL);
                            dataWithAllEncodings.delete();
                            dataWithAllEncodings = imputedEncodingsFrameL;
                            continue block24;
                        }
                        catch (Exception ex) {
                            if (dataWithMergedAggregationsL != null) {
                                dataWithMergedAggregationsL.delete();
                            }
                            throw ex;
                        }
                        finally {
                            if (groupedTargetEncodingMap != null) {
                                groupedTargetEncodingMap.delete();
                            }
                        }
                    }
                    case None: {
                        Frame groupedTargetEncodingMapForNone = null;
                        Frame dataWithMergedAggregationsN = null;
                        try {
                            this.foldColumnIsInEncodingMapCheck(foldColumnName, encodingMapForCurrentTEColumn);
                            groupedTargetEncodingMapForNone = TargetEncoder.groupingIgnoringFoldColumn(foldColumnName, encodingMapForCurrentTEColumn, teColumnName);
                            int teColumnIndexInGroupedEncodingMapNone = groupedTargetEncodingMapForNone.find(teColumnName);
                            dataWithMergedAggregationsN = this.mergeByTEColumn(dataWithAllEncodings, groupedTargetEncodingMapForNone, teColumnIndex, teColumnIndexInGroupedEncodingMapNone);
                            Frame withEncodingsFrameN = this.calculateEncoding(dataWithMergedAggregationsN, groupedTargetEncodingMapForNone, newEncodedColumnName, useBlending, blendingParams);
                            Frame withAddedNoiseEncodingsFrameN = this.applyNoise(withEncodingsFrameN, newEncodedColumnName, noiseLevel, seed);
                            Frame imputedEncodingsFrameN = this.imputeWithPosteriorForNALevelOrWithPrior(teColumnName, withAddedNoiseEncodingsFrameN, withAddedNoiseEncodingsFrameN.find(newEncodedColumnName), groupedTargetEncodingMapForNone, priorMeanFromTrainingDataset);
                            this.removeNumeratorAndDenominatorColumns(imputedEncodingsFrameN);
                            dataWithAllEncodings.delete();
                            dataWithAllEncodings = imputedEncodingsFrameN;
                            if (groupedTargetEncodingMapForNone == null) continue block24;
                        }
                        catch (Exception ex) {
                            try {
                                if (dataWithMergedAggregationsN != null) {
                                    dataWithMergedAggregationsN.delete();
                                }
                                throw ex;
                            }
                            catch (Throwable throwable) {
                                if (groupedTargetEncodingMapForNone != null) {
                                    groupedTargetEncodingMapForNone.delete();
                                }
                                throw throwable;
                            }
                        }
                        groupedTargetEncodingMapForNone.delete();
                        continue block24;
                    }
                }
            }
            DKV.remove((Key)dataWithAllEncodings._key);
            DKV.put((Key)encodedFrameKey, (Iced)dataWithAllEncodings);
            dataWithAllEncodings._key = encodedFrameKey;
            return dataWithAllEncodings;
        }
        catch (Exception ex) {
            if (dataWithAllEncodings != null) {
                dataWithAllEncodings.delete();
            }
            throw ex;
        }
    }

    Frame calculateEncoding(Frame preparedFrame, Frame encodingMap, String newEncodedColumnName, boolean withBlendedAvg, BlendingParams blendingParams) {
        if (withBlendedAvg) {
            return this.calculateAndAppendBlendedTEEncoding(preparedFrame, encodingMap, newEncodedColumnName, blendingParams);
        }
        return this.calculateAndAppendTEEncoding(preparedFrame, encodingMap, newEncodedColumnName);
    }

    private Frame applyNoise(Frame frameWithEncodings, String newEncodedColumnName, double noiseLevel, long seed) {
        return noiseLevel > 0.0 ? this.addNoise(frameWithEncodings, newEncodedColumnName, noiseLevel, seed) : frameWithEncodings;
    }

    void removeNumeratorAndDenominatorColumns(Frame fr) {
        Vec removedNumeratorNone = fr.remove(NUMERATOR_COL_NAME);
        removedNumeratorNone.remove();
        Vec removedDenominatorNone = fr.remove(DENOMINATOR_COL_NAME);
        removedDenominatorNone.remove();
    }

    void foldColumnIsInEncodingMapCheck(String foldColumnName, Frame targetEncodingMap) {
        if (foldColumnName == null && targetEncodingMap.names().length > 3) {
            throw new IllegalStateException("Passed along encoding map possibly contains fold column. Please provide fold column name so that it becomes possible to regroup (by ignoring folds).");
        }
    }

    public static Frame groupingIgnoringFoldColumn(String foldColumnName, Frame targetEncodingMap, String teColumnName) {
        if (foldColumnName != null) {
            int teColumnIndex = targetEncodingMap.find(teColumnName);
            Frame newTargetEncodingMap = TargetEncoder.groupByTEColumnAndAggregate(targetEncodingMap, teColumnIndex);
            TargetEncoderFrameHelper.renameColumn(newTargetEncodingMap, "sum_" + NUMERATOR_COL_NAME, NUMERATOR_COL_NAME);
            TargetEncoderFrameHelper.renameColumn(newTargetEncodingMap, "sum_" + DENOMINATOR_COL_NAME, DENOMINATOR_COL_NAME);
            return newTargetEncodingMap;
        }
        Frame targetEncodingMapCopy = targetEncodingMap.deepCopy(Key.make().toString());
        DKV.put((Keyed)targetEncodingMapCopy);
        return targetEncodingMapCopy;
    }

    public Frame applyTargetEncoding(Frame data, String targetColumnName, Map<String, Frame> targetEncodingMap, DataLeakageHandlingStrategy dataLeakageHandlingStrategy, String foldColumn, boolean withBlendedAvg, boolean imputeNAsWithNewCategory, BlendingParams blendingParams, long seed) {
        return this.applyTargetEncoding(data, targetColumnName, targetEncodingMap, dataLeakageHandlingStrategy, foldColumn, withBlendedAvg, seed, imputeNAsWithNewCategory, null, blendingParams);
    }

    public Frame applyTargetEncoding(Frame data, String targetColumnName, Map<String, Frame> targetEncodingMap, DataLeakageHandlingStrategy dataLeakageHandlingStrategy, String foldColumn, boolean withBlendedAvg, long seed, boolean imputeNAsWithNewCategory, Key<Frame> encodedColumnName, BlendingParams blendingParams) {
        double defaultNoiseLevel = 0.01;
        int targetIndex = data.find(targetColumnName);
        double noiseLevel = 0.0;
        if (targetIndex != -1) {
            Vec targetVec = data.vec(targetIndex);
            noiseLevel = targetVec.isNumeric() ? defaultNoiseLevel * (targetVec.max() - targetVec.min()) : defaultNoiseLevel;
        }
        return this.applyTargetEncoding(data, targetColumnName, targetEncodingMap, dataLeakageHandlingStrategy, foldColumn, withBlendedAvg, noiseLevel, seed, encodedColumnName, blendingParams);
    }

    public Frame applyTargetEncoding(Frame data, String targetColumnName, Map<String, Frame> targetEncodingMap, DataLeakageHandlingStrategy dataLeakageHandlingStrategy, boolean withBlendedAvg, boolean imputeNAsWithNewCategory, BlendingParams blendingParams, long seed) {
        return this.applyTargetEncoding(data, targetColumnName, targetEncodingMap, dataLeakageHandlingStrategy, null, withBlendedAvg, imputeNAsWithNewCategory, blendingParams, seed);
    }

    public Frame applyTargetEncoding(Frame data, String targetColumnName, Map<String, Frame> targetEncodingMap, DataLeakageHandlingStrategy dataLeakageHandlingStrategy, boolean withBlendedAvg, double noiseLevel, boolean imputeNAsWithNewCategory, BlendingParams blendingParams, long seed) {
        assert (!DataLeakageHandlingStrategy.KFold.equals((Object)dataLeakageHandlingStrategy)) : "Use another overloaded method for KFold dataLeakageHandlingStrategy.";
        return this.applyTargetEncoding(data, targetColumnName, targetEncodingMap, dataLeakageHandlingStrategy, null, withBlendedAvg, noiseLevel, imputeNAsWithNewCategory, blendingParams, seed);
    }

    public static class SubtractCurrentRowForLeaveOneOutTask
    extends MRTask<SubtractCurrentRowForLeaveOneOutTask> {
        private int _numeratorIdx;
        private int _denominatorIdx;
        private int _targetIdx;

        public SubtractCurrentRowForLeaveOneOutTask(int numeratorIdx, int denominatorIdx, int targetIdx) {
            this._numeratorIdx = numeratorIdx;
            this._denominatorIdx = denominatorIdx;
            this._targetIdx = targetIdx;
        }

        public void map(Chunk[] cs) {
            Chunk num = cs[this._numeratorIdx];
            Chunk den = cs[this._denominatorIdx];
            Chunk target = cs[this._targetIdx];
            for (int i = 0; i < num._len; ++i) {
                if (target.isNA(i)) continue;
                num.set(i, num.atd(i) - target.atd(i));
                den.set(i, den.atd(i) - 1.0);
            }
        }
    }

    public static class AddNoiseTask
    extends MRTask<AddNoiseTask> {
        private int _applyToColumnIdx;
        private int _runifIdx;
        private double _noiseLevel;

        public AddNoiseTask(int applyToColumnIdx, int runifIdx, double noiseLevel) {
            this._applyToColumnIdx = applyToColumnIdx;
            this._runifIdx = runifIdx;
            this._noiseLevel = noiseLevel;
        }

        public void map(Chunk[] cs) {
            Chunk column = cs[this._applyToColumnIdx];
            Chunk runifCol = cs[this._runifIdx];
            for (int i = 0; i < column._len; ++i) {
                if (column.isNA(i)) continue;
                column.set(i, column.atd(i) + (runifCol.atd(i) * 2.0 * this._noiseLevel - this._noiseLevel));
            }
        }
    }

    static class CalcEncodings
    extends MRTask<CalcEncodings> {
        private double _priorMean;
        private int _numeratorIdx;
        private int _denominatorIdx;
        private int _encodingsIdx;

        CalcEncodings(int numeratorIdx, int denominatorIdx, double priorMean, int encodingsIdx) {
            this._numeratorIdx = numeratorIdx;
            this._denominatorIdx = denominatorIdx;
            this._priorMean = priorMean;
            this._encodingsIdx = encodingsIdx;
        }

        public void map(Chunk[] cs) {
            Chunk num = cs[this._numeratorIdx];
            Chunk den = cs[this._denominatorIdx];
            Chunk encodings = cs[this._encodingsIdx];
            for (int i = 0; i < num._len; ++i) {
                if (num.isNA(i) || den.isNA(i)) {
                    encodings.setNA(i);
                    continue;
                }
                if (den.at8(i) == 0L) {
                    encodings.set(i, this._priorMean);
                    continue;
                }
                double posteriorMean = num.atd(i) / den.atd(i);
                encodings.set(i, posteriorMean);
            }
        }
    }

    static class CalcEncodingsWithBlending
    extends MRTask<CalcEncodingsWithBlending> {
        private double _priorMean;
        private int _numeratorIdx;
        private int _denominatorIdx;
        private int _encodingsIdx;
        private BlendingParams _blendingParams;

        CalcEncodingsWithBlending(int numeratorIdx, int denominatorIdx, double priorMean, BlendingParams blendingParams, int encodingsIdx) {
            this._numeratorIdx = numeratorIdx;
            this._denominatorIdx = denominatorIdx;
            this._priorMean = priorMean;
            this._blendingParams = blendingParams;
            this._encodingsIdx = encodingsIdx;
        }

        public void map(Chunk[] cs) {
            Chunk num = cs[this._numeratorIdx];
            Chunk den = cs[this._denominatorIdx];
            Chunk encodings = cs[this._encodingsIdx];
            for (int i = 0; i < num._len; ++i) {
                if (num.isNA(i) || den.isNA(i)) {
                    encodings.setNA(i);
                    continue;
                }
                if (den.at8(i) == 0L) {
                    Log.info((Object[])new Object[]{"Denominator is zero for column index = " + this._encodingsIdx + ". Imputing with _priorMean = " + this._priorMean});
                    encodings.set(i, this._priorMean);
                    continue;
                }
                double numberOfRowsInCurrentCategory = den.atd(i);
                double lambda = 1.0 / (1.0 + Math.exp((this._blendingParams.getK() - numberOfRowsInCurrentCategory) / this._blendingParams.getF()));
                double posteriorMean = num.atd(i) / den.atd(i);
                double blendedValue = lambda * posteriorMean + (1.0 - lambda) * this._priorMean;
                encodings.set(i, blendedValue);
            }
        }
    }

    static class GCForceTask
    extends MRTask<GCForceTask> {
        GCForceTask() {
        }

        protected void setupLocal() {
            System.gc();
        }
    }

    public static enum DataLeakageHandlingStrategy {
        LeaveOneOut(0),
        KFold(1),
        None(2);

        private final byte val;

        public static DataLeakageHandlingStrategy fromVal(byte val) throws IllegalArgumentException {
            switch (val) {
                case 0: {
                    return LeaveOneOut;
                }
                case 1: {
                    return KFold;
                }
                case 2: {
                    return None;
                }
            }
            throw new IllegalArgumentException(String.format("Unknown DataLeakageHandlingStrategy corresponding to value: '%s'", val));
        }

        private DataLeakageHandlingStrategy(byte val) {
            this.val = val;
        }

        public byte getVal() {
            return this.val;
        }
    }
}

