/*
 * Decompiled with CFR 0.152.
 */
package hivemall.tools.text;

import hivemall.utils.lang.StringUtils;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.io.Text;

@Description(name="word_ngrams", value="_FUNC_(array<string> words, int minSize, int maxSize]) - Returns list of n-grams for given words, where `minSize <= n <= maxSize`", extended="SELECT word_ngrams(tokenize('Machine learning is fun!', true), 1, 2);\n\n [\"machine\",\"machine learning\",\"learning\",\"learning is\",\"is\",\"is fun\",\"fun\"]")
@UDFType(deterministic=true, stateful=false)
public final class WordNgramsUDF
extends UDF {
    @Nullable
    public List<Text> evaluate(@Nullable List<Text> words, int minSize, int maxSize) throws HiveException {
        if (words == null) {
            return null;
        }
        if (minSize <= 0) {
            throw new UDFArgumentException("`minSize` must be greater than zero: " + minSize);
        }
        if (minSize > maxSize) {
            throw new UDFArgumentException("`maxSize` must be greater than or equal to `minSize`: " + maxSize);
        }
        return WordNgramsUDF.getNgrams(words, minSize, maxSize);
    }

    @Nonnull
    private static List<Text> getNgrams(@Nonnull List<Text> words, @Nonnegative int minSize, @Nonnegative int maxSize) throws HiveException {
        ArrayList<Text> ngrams = new ArrayList<Text>();
        StringBuilder ngram = new StringBuilder();
        int numWords = words.size();
        for (int i = 0; i < numWords; ++i) {
            for (int ngramSize = minSize; ngramSize <= maxSize; ++ngramSize) {
                int end = i + ngramSize;
                if (end > numWords) continue;
                StringUtils.clear(ngram);
                for (int j = i; j < end; ++j) {
                    Text word = words.get(j);
                    if (word == null) {
                        throw new UDFArgumentException("`array<string> words` must not contain NULL element");
                    }
                    if (j > i) {
                        ngram.append(" ");
                    }
                    ngram.append(word.toString());
                }
                ngrams.add(new Text(ngram.toString()));
            }
        }
        return ngrams;
    }
}

