/*
 * Decompiled with CFR 0.152.
 */
package hivemall.ftvec.hashing;

import hivemall.UDFWithOptions;
import hivemall.annotations.VisibleForTesting;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.hashing.MurmurHash3;
import hivemall.utils.lang.Primitives;
import hivemall.utils.lang.StringUtils;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

@Description(name="feature_hashing", value="_FUNC_(array<string> features [, const string options]) - returns a hashed feature vector in array<string>", extended="select feature_hashing(array('aaa:1.0','aaa','bbb:2.0'), '-libsvm');\n [\"4063537:1.0\",\"4063537:1\",\"8459207:2.0\"]\n\nselect feature_hashing(array('aaa:1.0','aaa','bbb:2.0'), '-features 10');\n [\"7:1.0\",\"7\",\"1:2.0\"]\n\nselect feature_hashing(array('aaa:1.0','aaa','bbb:2.0'), '-features 10 -libsvm');\n [\"1:2.0\",\"7:1.0\",\"7:1\"]\n")
@UDFType(deterministic=true, stateful=false)
public final class FeatureHashingUDF
extends UDFWithOptions {
    private static final IndexComparator indexCmp = new IndexComparator();
    @Nullable
    private ListObjectInspector _listOI;
    private boolean _libsvmFormat = false;
    private int _numFeatures = 0x1000000;
    @Nullable
    private transient List<String> _returnObj;

    @Override
    protected Options getOptions() {
        Options opts = new Options();
        opts.addOption("libsvm", false, "Returns in libsvm format (<index>:<value>)* sorted by index ascending order");
        opts.addOption("features", "num_features", true, "The number of features [default: 16777217 (2^24)]");
        return opts;
    }

    @Override
    protected CommandLine processOptions(@Nonnull String optionValue) throws UDFArgumentException {
        CommandLine cl = this.parseOptions(optionValue);
        this._libsvmFormat = cl.hasOption("libsvm");
        this._numFeatures = Primitives.parseInt(cl.getOptionValue("num_features"), this._numFeatures);
        return cl;
    }

    public ObjectInspector initialize(@Nonnull ObjectInspector[] argOIs) throws UDFArgumentException {
        ObjectInspector argOI0;
        if (argOIs.length != 1 && argOIs.length != 2) {
            this.showHelp("The feature_hashing function takes 1 or 2 arguments: " + argOIs.length);
        }
        ListObjectInspector listObjectInspector = this._listOI = HiveUtils.isListOI(argOI0 = argOIs[0]) ? (ListObjectInspector)argOI0 : null;
        if (argOIs.length == 2) {
            String opts = HiveUtils.getConstString(argOIs[1]);
            this.processOptions(opts);
        }
        if (this._listOI == null) {
            return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
        }
        return ObjectInspectorFactory.getStandardListObjectInspector((ObjectInspector)PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }

    public Object evaluate(@Nonnull GenericUDF.DeferredObject[] arguments) throws HiveException {
        Object arg0 = arguments[0].get();
        if (arg0 == null) {
            return null;
        }
        if (this._listOI == null) {
            return this.evaluateScalar(arg0);
        }
        return this.evaluateList(arg0);
    }

    @Nonnull
    private String evaluateScalar(@Nonnull Object arg0) {
        String fv = arg0.toString();
        return FeatureHashingUDF.featureHashing(fv, this._numFeatures, this._libsvmFormat);
    }

    @Nonnull
    private List<String> evaluateList(@Nonnull Object arg0) throws HiveException {
        int len = this._listOI.getListLength(arg0);
        List<String> list = this._returnObj;
        if (list == null) {
            this._returnObj = list = new ArrayList<String>(len);
        } else {
            list.clear();
        }
        int numFeatures = this._numFeatures;
        for (int i = 0; i < len; ++i) {
            Object obj = this._listOI.getListElement(arg0, i);
            if (obj == null) continue;
            String fv = FeatureHashingUDF.featureHashing(obj.toString(), numFeatures, this._libsvmFormat);
            list.add(fv);
        }
        if (this._libsvmFormat) {
            try {
                Collections.sort(list, indexCmp);
            }
            catch (NumberFormatException e) {
                throw new HiveException((Throwable)e);
            }
        }
        return list;
    }

    @Nonnull
    @VisibleForTesting
    static String featureHashing(@Nonnull String fv, int numFeatures) {
        return FeatureHashingUDF.featureHashing(fv, numFeatures, false);
    }

    @Nonnull
    static String featureHashing(@Nonnull String fv, int numFeatures, boolean libsvmFormat) {
        int headPos = fv.indexOf(58);
        if (headPos == -1) {
            if (fv.equals("0")) {
                return fv;
            }
            int h = FeatureHashingUDF.mhash(fv, numFeatures);
            if (libsvmFormat) {
                return h + ":1";
            }
            return String.valueOf(h);
        }
        int tailPos = fv.lastIndexOf(58);
        if (headPos == tailPos) {
            String v;
            double d;
            String f = fv.substring(0, headPos);
            String tail = fv.substring(headPos);
            if (f.equals("0") && (d = Double.parseDouble(v = fv.substring(headPos + 1))) == 1.0) {
                return fv;
            }
            int h = FeatureHashingUDF.mhash(f, numFeatures);
            return h + tail;
        }
        String field = fv.substring(0, headPos + 1);
        String f = fv.substring(headPos + 1, tailPos);
        int h = FeatureHashingUDF.mhash(f, numFeatures);
        String v = fv.substring(tailPos);
        return field + h + v;
    }

    static int mhash(@Nonnull String word, int numFeatures) {
        int r = MurmurHash3.murmurhash3_x86_32(word, 0, word.length(), -1756908916) % numFeatures;
        if (r < 0) {
            r += numFeatures;
        }
        return r + 1;
    }

    public String getDisplayString(String[] children) {
        return "feature_hashing(" + StringUtils.join(children, ',') + ')';
    }

    private static final class IndexComparator
    implements Comparator<String>,
    Serializable {
        private static final long serialVersionUID = -260142385860586255L;

        private IndexComparator() {
        }

        @Override
        public int compare(@Nonnull String lhs, @Nonnull String rhs) {
            int l = IndexComparator.getIndex(lhs);
            int r = IndexComparator.getIndex(rhs);
            return Integer.compare(l, r);
        }

        private static int getIndex(@Nonnull String fv) {
            int tailPos;
            int headPos = fv.indexOf(58);
            String f = headPos == (tailPos = fv.lastIndexOf(58)) ? fv.substring(0, headPos) : fv.substring(headPos + 1, tailPos);
            return Integer.parseInt(f);
        }
    }
}

