/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hadoop.hive.ql.udf.generic;

import com.facebook.presto.hive.$internal.org.apache.commons.logging.Log;
import com.facebook.presto.hive.$internal.org.apache.commons.logging.LogFactory;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.NGramEstimator;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.Text;

@Description(name="ngrams", value="_FUNC_(expr, n, k, pf) - Estimates the top-k n-grams in rows that consist of sequences of strings, represented as arrays of strings, or arrays of arrays of strings. 'pf' is an optional precision factor that controls memory usage.", extended="The parameter 'n' specifies what type of n-grams are being estimated. Unigrams are n = 1, and bigrams are n = 2. Generally, n will not be greater than about 5. The 'k' parameter specifies how many of the highest-frequency n-grams will be returned by the UDAF. The optional precision factor 'pf' specifies how much memory to use for estimation; more memory will give more accurate frequency counts, but could crash the JVM. The default value is 20, which internally maintains 20*k n-grams, but only returns the k highest frequency ones. The output is an array of structs with the top-k n-grams. It might be convenient to explode() the output of this UDAF.")
public class GenericUDAFnGrams
implements GenericUDAFResolver {
    static final Log LOG = LogFactory.getLog(GenericUDAFnGrams.class.getName());

    @Override
    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
        PrimitiveTypeInfo pti;
        if (parameters.length != 3 && parameters.length != 4) {
            throw new UDFArgumentTypeException(parameters.length - 1, "Please specify either three or four arguments.");
        }
        if (parameters[0].getCategory() != ObjectInspector.Category.LIST) {
            throw new UDFArgumentTypeException(0, "Only list type arguments are accepted but " + parameters[0].getTypeName() + " was passed as parameter 1.");
        }
        switch (((ListTypeInfo)parameters[0]).getListElementTypeInfo().getCategory()) {
            case PRIMITIVE: {
                pti = (PrimitiveTypeInfo)((ListTypeInfo)parameters[0]).getListElementTypeInfo();
                break;
            }
            case LIST: {
                ListTypeInfo lti = (ListTypeInfo)((ListTypeInfo)parameters[0]).getListElementTypeInfo();
                pti = (PrimitiveTypeInfo)lti.getListElementTypeInfo();
                break;
            }
            default: {
                throw new UDFArgumentTypeException(0, "Only arrays of strings or arrays of arrays of strings are accepted but " + parameters[0].getTypeName() + " was passed as parameter 1.");
            }
        }
        if (pti.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
            throw new UDFArgumentTypeException(0, "Only array<string> or array<array<string>> is allowed, but " + parameters[0].getTypeName() + " was passed as parameter 1.");
        }
        if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentTypeException(1, "Only integers are accepted but " + parameters[1].getTypeName() + " was passed as parameter 2.");
        }
        switch (((PrimitiveTypeInfo)parameters[1]).getPrimitiveCategory()) {
            case BYTE: 
            case SHORT: 
            case INT: 
            case LONG: 
            case TIMESTAMP: {
                break;
            }
            default: {
                throw new UDFArgumentTypeException(1, "Only integers are accepted but " + parameters[1].getTypeName() + " was passed as parameter 2.");
            }
        }
        if (parameters[2].getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentTypeException(2, "Only integers are accepted but " + parameters[2].getTypeName() + " was passed as parameter 3.");
        }
        switch (((PrimitiveTypeInfo)parameters[2]).getPrimitiveCategory()) {
            case BYTE: 
            case SHORT: 
            case INT: 
            case LONG: 
            case TIMESTAMP: {
                break;
            }
            default: {
                throw new UDFArgumentTypeException(2, "Only integers are accepted but " + parameters[2].getTypeName() + " was passed as parameter 3.");
            }
        }
        if (parameters.length == 4) {
            if (parameters[3].getCategory() != ObjectInspector.Category.PRIMITIVE) {
                throw new UDFArgumentTypeException(3, "Only integers are accepted but " + parameters[3].getTypeName() + " was passed as parameter 4.");
            }
            switch (((PrimitiveTypeInfo)parameters[3]).getPrimitiveCategory()) {
                case BYTE: 
                case SHORT: 
                case INT: 
                case LONG: 
                case TIMESTAMP: {
                    break;
                }
                default: {
                    throw new UDFArgumentTypeException(3, "Only integers are accepted but " + parameters[3].getTypeName() + " was passed as parameter 4.");
                }
            }
        }
        return new GenericUDAFnGramEvaluator();
    }

    public static class GenericUDAFnGramEvaluator
    extends GenericUDAFEvaluator {
        private transient ListObjectInspector outerInputOI;
        private transient StandardListObjectInspector innerInputOI;
        private transient PrimitiveObjectInspector inputOI;
        private transient PrimitiveObjectInspector nOI;
        private transient PrimitiveObjectInspector kOI;
        private transient PrimitiveObjectInspector pOI;
        private transient ListObjectInspector loi;

        @Override
        public ObjectInspector init(GenericUDAFEvaluator.Mode m, ObjectInspector[] parameters) throws HiveException {
            super.init(m, parameters);
            if (m == GenericUDAFEvaluator.Mode.PARTIAL1 || m == GenericUDAFEvaluator.Mode.COMPLETE) {
                this.outerInputOI = (ListObjectInspector)parameters[0];
                if (this.outerInputOI.getListElementObjectInspector().getCategory() == ObjectInspector.Category.LIST) {
                    this.innerInputOI = (StandardListObjectInspector)this.outerInputOI.getListElementObjectInspector();
                    this.inputOI = (PrimitiveObjectInspector)this.innerInputOI.getListElementObjectInspector();
                } else {
                    this.inputOI = (PrimitiveObjectInspector)this.outerInputOI.getListElementObjectInspector();
                    this.innerInputOI = null;
                }
                this.nOI = (PrimitiveObjectInspector)parameters[1];
                this.kOI = (PrimitiveObjectInspector)parameters[2];
                this.pOI = parameters.length == 4 ? (PrimitiveObjectInspector)parameters[3] : null;
            } else {
                this.loi = (ListObjectInspector)parameters[0];
            }
            if (m == GenericUDAFEvaluator.Mode.PARTIAL1 || m == GenericUDAFEvaluator.Mode.PARTIAL2) {
                return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
            }
            ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
            foi.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector));
            foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
            ArrayList<String> fname = new ArrayList<String>();
            fname.add("ngram");
            fname.add("estfrequency");
            return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi));
        }

        @Override
        public void merge(GenericUDAFEvaluator.AggregationBuffer agg, Object partial) throws HiveException {
            if (partial == null) {
                return;
            }
            NGramAggBuf myagg = (NGramAggBuf)agg;
            List<?> partialNGrams = this.loi.getList(partial);
            int n = Integer.parseInt(partialNGrams.get(partialNGrams.size() - 1).toString());
            if (n == 0) {
                return;
            }
            if (myagg.n > 0 && myagg.n != n) {
                throw new HiveException(this.getClass().getSimpleName() + ": mismatch in value for 'n'" + ", which usually is caused by a non-constant expression. Found '" + n + "' and '" + myagg.n + "'.");
            }
            myagg.n = n;
            partialNGrams.remove(partialNGrams.size() - 1);
            myagg.nge.merge(partialNGrams);
        }

        @Override
        public Object terminatePartial(GenericUDAFEvaluator.AggregationBuffer agg) throws HiveException {
            NGramAggBuf myagg = (NGramAggBuf)agg;
            ArrayList<Text> result = myagg.nge.serialize();
            result.add(new Text(Integer.toString(myagg.n)));
            return result;
        }

        private void processNgrams(NGramAggBuf agg, ArrayList<String> seq) throws HiveException {
            for (int i = seq.size() - agg.n; i >= 0; --i) {
                ArrayList<String> ngram = new ArrayList<String>();
                for (int j = 0; j < agg.n; ++j) {
                    ngram.add(seq.get(i + j));
                }
                agg.nge.add(ngram);
            }
        }

        @Override
        public void iterate(GenericUDAFEvaluator.AggregationBuffer agg, Object[] parameters) throws HiveException {
            assert (parameters.length == 3 || parameters.length == 4);
            if (parameters[0] == null || parameters[1] == null || parameters[2] == null) {
                return;
            }
            NGramAggBuf myagg = (NGramAggBuf)agg;
            if (!myagg.nge.isInitialized()) {
                int n = PrimitiveObjectInspectorUtils.getInt(parameters[1], this.nOI);
                int k = PrimitiveObjectInspectorUtils.getInt(parameters[2], this.kOI);
                int pf = 0;
                if (n < 1) {
                    throw new HiveException(this.getClass().getSimpleName() + " needs 'n' to be at least 1, " + "but you supplied " + n);
                }
                if (k < 1) {
                    throw new HiveException(this.getClass().getSimpleName() + " needs 'k' to be at least 1, " + "but you supplied " + k);
                }
                if (parameters.length == 4) {
                    pf = PrimitiveObjectInspectorUtils.getInt(parameters[3], this.pOI);
                    if (pf < 1) {
                        throw new HiveException(this.getClass().getSimpleName() + " needs 'pf' to be at least 1, " + "but you supplied " + pf);
                    }
                } else {
                    pf = 1;
                }
                myagg.n = n;
                myagg.nge.initialize(k, pf, n);
            }
            List<?> outer = this.outerInputOI.getList(parameters[0]);
            if (this.innerInputOI != null) {
                for (int i = 0; i < outer.size(); ++i) {
                    List<?> inner = this.innerInputOI.getList(outer.get(i));
                    ArrayList<String> words = new ArrayList<String>();
                    for (int j = 0; j < inner.size(); ++j) {
                        String word = PrimitiveObjectInspectorUtils.getString(inner.get(j), this.inputOI);
                        words.add(word);
                    }
                    this.processNgrams(myagg, words);
                }
            } else {
                ArrayList<String> words = new ArrayList<String>();
                for (int i = 0; i < outer.size(); ++i) {
                    String word = PrimitiveObjectInspectorUtils.getString(outer.get(i), this.inputOI);
                    words.add(word);
                }
                this.processNgrams(myagg, words);
            }
        }

        @Override
        public Object terminate(GenericUDAFEvaluator.AggregationBuffer agg) throws HiveException {
            NGramAggBuf myagg = (NGramAggBuf)agg;
            return myagg.nge.getNGrams();
        }

        @Override
        public GenericUDAFEvaluator.AggregationBuffer getNewAggregationBuffer() throws HiveException {
            NGramAggBuf result = new NGramAggBuf();
            result.nge = new NGramEstimator();
            this.reset(result);
            return result;
        }

        @Override
        public void reset(GenericUDAFEvaluator.AggregationBuffer agg) throws HiveException {
            NGramAggBuf result = (NGramAggBuf)agg;
            result.nge.reset();
            result.n = 0;
        }

        static class NGramAggBuf
        extends GenericUDAFEvaluator.AbstractAggregationBuffer {
            NGramEstimator nge;
            int n;

            NGramAggBuf() {
            }
        }
    }
}

