/*
 * Decompiled with CFR 0.152.
 */
package water.parser;

import java.util.concurrent.atomic.AtomicInteger;
import water.Iced;
import water.parser.BufferedString;
import water.util.IcedHashMap;
import water.util.Log;
import water.util.PrettyPrint;

public final class Categorical
extends Iced {
    public static final int MAX_CATEGORICAL_COUNT = 10000000;
    transient AtomicInteger _id = new AtomicInteger();
    int _maxId = -1;
    volatile IcedHashMap<BufferedString, Integer> _map = new IcedHashMap();
    boolean maxDomainExceeded = false;
    public static final int MAX_EXAMPLES = 10;

    Categorical() {
    }

    int addKey(BufferedString str) {
        IcedHashMap<BufferedString, Integer> m = this._map;
        if (m == null) {
            return Integer.MAX_VALUE;
        }
        Integer res = (Integer)m.get(str);
        if (res != null) {
            return res;
        }
        assert (str.length() < 65535);
        int newVal = this._id.incrementAndGet();
        res = m.putIfAbsent(new BufferedString(str), newVal);
        if (res != null) {
            return res;
        }
        if (m.size() > 10000000) {
            this.maxDomainExceeded = true;
        }
        return newVal;
    }

    final boolean containsKey(BufferedString key) {
        return this._map.containsKey(key);
    }

    public String toString() {
        return "{" + this._map + " }";
    }

    int getTokenId(BufferedString str) {
        return (Integer)this._map.get(str);
    }

    int maxId() {
        return this._maxId == -1 ? this._id.get() : this._maxId;
    }

    int size() {
        return this._map.size();
    }

    boolean isMapFull() {
        return this.maxDomainExceeded;
    }

    BufferedString[] getColumnDomain() {
        return this._map.keySet().toArray(new BufferedString[this._map.size()]);
    }

    public void convertToUTF8(int col) {
        int hexConvCnt = 0;
        BufferedString[] bStrs = this._map.keySet().toArray(new BufferedString[this._map.size()]);
        StringBuilder hexSB = new StringBuilder();
        for (int i = 0; i < bStrs.length; ++i) {
            String s = bStrs[i].toString();
            if (bStrs[i].sameString(s)) continue;
            if (s.contains("\ufffd")) {
                s = bStrs[i].bytesToString();
                if (hexConvCnt++ < 10) {
                    hexSB.append(s + ", ");
                }
                if (hexConvCnt == 10) {
                    hexSB.append("...");
                }
            }
            int val = (Integer)this._map.get(bStrs[i]);
            this._map.remove(bStrs[i]);
            bStrs[i] = new BufferedString(s);
            this._map.put(bStrs[i], val);
        }
        if (hexConvCnt > 0) {
            Log.info("Found categoricals with non-UTF-8 characters in the " + PrettyPrint.withOrdinalIndicator(col) + " column. Converting unrecognized characters into hex:  " + hexSB.toString());
        }
    }
}

