/*
 * Decompiled with CFR 0.152.
 */
package hivemall.nlp.tokenizer;

import hivemall.annotations.VisibleForTesting;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.lang.ArrayUtils;
import hivemall.utils.lang.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

@Description(name="stoptags_exclude", value="_FUNC_(array<string> excludeTags, [, const string lang='ja']) - Returns stoptags excluding given tags", extended="SELECT stoptags_exclude(array('\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e', '\u5f62\u5bb9\u8a5e'))")
@UDFType(deterministic=true, stateful=false)
public final class StoptagsExcludeUDF
extends GenericUDF {
    static final String[] STOPTAGS_JA = new String[]{"\u540d\u8a5e", "\u540d\u8a5e-\u4e00\u822c", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4e00\u822c", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u4e00\u822c", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u59d3", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u540d", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u7d44\u7e54", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df-\u4e00\u822c", "\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df-\u56fd", "\u540d\u8a5e-\u4ee3\u540d\u8a5e", "\u540d\u8a5e-\u4ee3\u540d\u8a5e-\u4e00\u822c", "\u540d\u8a5e-\u4ee3\u540d\u8a5e-\u7e2e\u7d04", "\u540d\u8a5e-\u526f\u8a5e\u53ef\u80fd", "\u540d\u8a5e-\u30b5\u5909\u63a5\u7d9a", "\u540d\u8a5e-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79", "\u540d\u8a5e-\u6570", "\u540d\u8a5e-\u975e\u81ea\u7acb", "\u540d\u8a5e-\u975e\u81ea\u7acb-\u4e00\u822c", "\u540d\u8a5e-\u975e\u81ea\u7acb-\u526f\u8a5e\u53ef\u80fd", "\u540d\u8a5e-\u975e\u81ea\u7acb-\u52a9\u52d5\u8a5e\u8a9e\u5e79", "\u540d\u8a5e-\u975e\u81ea\u7acb-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79", "\u540d\u8a5e-\u7279\u6b8a", "\u540d\u8a5e-\u7279\u6b8a-\u52a9\u52d5\u8a5e\u8a9e\u5e79", "\u540d\u8a5e-\u63a5\u5c3e", "\u540d\u8a5e-\u63a5\u5c3e-\u4e00\u822c", "\u540d\u8a5e-\u63a5\u5c3e-\u4eba\u540d", "\u540d\u8a5e-\u63a5\u5c3e-\u5730\u57df", "\u540d\u8a5e-\u63a5\u5c3e-\u30b5\u5909\u63a5\u7d9a", "\u540d\u8a5e-\u63a5\u5c3e-\u52a9\u52d5\u8a5e\u8a9e\u5e79", "\u540d\u8a5e-\u63a5\u5c3e-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79", "\u540d\u8a5e-\u63a5\u5c3e-\u526f\u8a5e\u53ef\u80fd", "\u540d\u8a5e-\u63a5\u5c3e-\u52a9\u6570\u8a5e", "\u540d\u8a5e-\u63a5\u5c3e-\u7279\u6b8a", "\u540d\u8a5e-\u63a5\u7d9a\u8a5e\u7684", "\u540d\u8a5e-\u52d5\u8a5e\u975e\u81ea\u7acb\u7684", "\u540d\u8a5e-\u5f15\u7528\u6587\u5b57\u5217", "\u540d\u8a5e-\u30ca\u30a4\u5f62\u5bb9\u8a5e\u8a9e\u5e79", "\u63a5\u982d\u8a5e", "\u63a5\u982d\u8a5e-\u540d\u8a5e\u63a5\u7d9a", "\u63a5\u982d\u8a5e-\u52d5\u8a5e\u63a5\u7d9a", "\u63a5\u982d\u8a5e-\u5f62\u5bb9\u8a5e\u63a5\u7d9a", "\u63a5\u982d\u8a5e-\u6570\u63a5", "\u52d5\u8a5e", "\u52d5\u8a5e-\u81ea\u7acb", "\u52d5\u8a5e-\u975e\u81ea\u7acb", "\u52d5\u8a5e-\u63a5\u5c3e", "\u5f62\u5bb9\u8a5e", "\u5f62\u5bb9\u8a5e-\u81ea\u7acb", "\u5f62\u5bb9\u8a5e-\u975e\u81ea\u7acb", "\u5f62\u5bb9\u8a5e-\u63a5\u5c3e", "\u526f\u8a5e", "\u526f\u8a5e-\u4e00\u822c", "\u526f\u8a5e-\u52a9\u8a5e\u985e\u63a5\u7d9a", "\u9023\u4f53\u8a5e", "\u63a5\u7d9a\u8a5e", "\u52a9\u8a5e", "\u52a9\u8a5e-\u683c\u52a9\u8a5e", "\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u4e00\u822c", "\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u5f15\u7528", "\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u9023\u8a9e", "\u52a9\u8a5e-\u63a5\u7d9a\u52a9\u8a5e", "\u52a9\u8a5e-\u4fc2\u52a9\u8a5e", "\u52a9\u8a5e-\u526f\u52a9\u8a5e", "\u52a9\u8a5e-\u9593\u6295\u52a9\u8a5e", "\u52a9\u8a5e-\u4e26\u7acb\u52a9\u8a5e", "\u52a9\u8a5e-\u7d42\u52a9\u8a5e", "\u52a9\u8a5e-\u526f\u52a9\u8a5e\uff0f\u4e26\u7acb\u52a9\u8a5e\uff0f\u7d42\u52a9\u8a5e", "\u52a9\u8a5e-\u9023\u4f53\u5316", "\u52a9\u8a5e-\u526f\u8a5e\u5316", "\u52a9\u8a5e-\u7279\u6b8a", "\u52a9\u52d5\u8a5e", "\u611f\u52d5\u8a5e", "\u8a18\u53f7", "\u8a18\u53f7-\u4e00\u822c", "\u8a18\u53f7-\u8aad\u70b9", "\u8a18\u53f7-\u53e5\u70b9", "\u8a18\u53f7-\u7a7a\u767d", "\u8a18\u53f7-\u62ec\u5f27\u958b", "\u8a18\u53f7-\u62ec\u5f27\u9589", "\u8a18\u53f7-\u30a2\u30eb\u30d5\u30a1\u30d9\u30c3\u30c8", "\u305d\u306e\u4ed6", "\u305d\u306e\u4ed6-\u9593\u6295", "\u30d5\u30a3\u30e9\u30fc", "\u975e\u8a00\u8a9e\u97f3", "\u8a9e\u65ad\u7247", "\u672a\u77e5\u8a9e"};
    private ListObjectInspector tagsOI;
    private String[] stopTags;
    @Nullable
    private List<String> result;

    public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
        if (argOIs.length != 1 && argOIs.length != 2) {
            throw new UDFArgumentException("stoptags_exclude(array<string> tags, [, const string lang='ja']) takes one or two arguments: " + argOIs.length);
        }
        if (!HiveUtils.isStringListOI(argOIs[0])) {
            throw new UDFArgumentException("stoptags_exclude(array<string> tags, [, const string lang='ja']) expects array<string> for the first argument : " + argOIs[0].getTypeName());
        }
        this.tagsOI = HiveUtils.asListOI(argOIs[0]);
        if (argOIs.length == 2) {
            if (!HiveUtils.isConstString(argOIs[1])) {
                throw new UDFArgumentException("stoptags_exclude(array<string> tags, [, const string lang='ja']) expects const string for the second argument: " + argOIs[1].getTypeName());
            }
            String lang = HiveUtils.getConstString(argOIs[1]);
            if (!"ja".equalsIgnoreCase(lang)) {
                throw new UDFArgumentException("Unsupported lang: " + lang);
            }
        }
        this.stopTags = STOPTAGS_JA;
        if (ObjectInspectorUtils.isConstantObjectInspector((ObjectInspector)this.tagsOI)) {
            String[] excludeTags = HiveUtils.getConstStringArray((ObjectInspector)this.tagsOI);
            this.result = StoptagsExcludeUDF.getStoptags(this.stopTags, excludeTags);
        }
        return ObjectInspectorFactory.getStandardListObjectInspector((ObjectInspector)PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }

    public List<String> evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException {
        if (this.result != null) {
            return this.result;
        }
        Objects.requireNonNull(this.stopTags);
        String[] excludeTags = HiveUtils.asStringArray(arguments[0], this.tagsOI);
        if (excludeTags == null) {
            return ArrayUtils.asKryoSerializableList(this.stopTags);
        }
        return StoptagsExcludeUDF.getStoptags(this.stopTags, excludeTags);
    }

    @Nonnull
    @VisibleForTesting
    static List<String> getStoptags(@Nonnull String[] stopTags, @Nonnull String[] excludeTags) {
        String[] mutableStopTags = (String[])stopTags.clone();
        block0: for (String tag : excludeTags) {
            int index = Arrays.binarySearch(stopTags, tag);
            if (index < 0) continue;
            for (int i = index; i < mutableStopTags.length; ++i) {
                char c;
                String stopTag = mutableStopTags[i];
                if (stopTag == null) continue;
                if (!stopTag.startsWith(tag)) continue block0;
                int tagLen = tag.length();
                if (stopTag.length() > tagLen && (c = stopTag.charAt(tagLen)) != '-') continue;
                mutableStopTags[i] = null;
            }
        }
        ArrayList<String> result = new ArrayList<String>(mutableStopTags.length);
        for (String tag : mutableStopTags) {
            if (tag == null) continue;
            result.add(tag);
        }
        return result;
    }

    public String getDisplayString(String[] children) {
        return "stoptags_exclude(" + StringUtils.join(children, ',') + ')';
    }

    static {
        Arrays.sort(STOPTAGS_JA);
    }
}

