/*
 * Decompiled with CFR 0.152.
 */
package com.github.aaronshan.functions.string;

import com.github.aaronshan.functions.utils.Failures;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceUtf8;
import io.airlift.slice.Slices;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

@Description(name="levenshtein_distance", value="_FUNC_(string, string) - computes Levenshtein distance between two strings.", extended="Example:\n > select _FUNC_(string, string) from src;")
public class UDFStringLevenshteinDistance
extends UDF {
    private LongWritable result = new LongWritable(0L);

    public LongWritable evaluate(Text leftText, Text rightText) throws HiveException {
        int i;
        int[] rightCodePoints;
        if (leftText == null || rightText == null) {
            return null;
        }
        Slice left = Slices.utf8Slice((String)leftText.toString());
        Slice right = Slices.utf8Slice((String)rightText.toString());
        int[] leftCodePoints = UDFStringLevenshteinDistance.castToCodePoints(left);
        if (leftCodePoints.length < (rightCodePoints = UDFStringLevenshteinDistance.castToCodePoints(right)).length) {
            int[] tempCodePoints = leftCodePoints;
            leftCodePoints = rightCodePoints;
            rightCodePoints = tempCodePoints;
        }
        if (rightCodePoints.length == 0) {
            this.result.set((long)leftCodePoints.length);
            return this.result;
        }
        Failures.checkCondition(leftCodePoints.length * (rightCodePoints.length - 1) <= 1000000, "The combined inputs for Levenshtein distance are too large", new Object[0]);
        int[] distances = new int[rightCodePoints.length];
        for (i = 0; i < rightCodePoints.length; ++i) {
            distances[i] = i + 1;
        }
        for (i = 0; i < leftCodePoints.length; ++i) {
            int leftUpDistance = distances[0];
            distances[0] = leftCodePoints[i] == rightCodePoints[0] ? i : Math.min(i, distances[0]) + 1;
            for (int j = 1; j < rightCodePoints.length; ++j) {
                int leftUpDistanceNext = distances[j];
                distances[j] = leftCodePoints[i] == rightCodePoints[j] ? leftUpDistance : Math.min(distances[j - 1], Math.min(leftUpDistance, distances[j])) + 1;
                leftUpDistance = leftUpDistanceNext;
            }
        }
        this.result.set((long)distances[rightCodePoints.length - 1]);
        return this.result;
    }

    private static int[] castToCodePoints(Slice slice) throws HiveException {
        int[] codePoints = new int[UDFStringLevenshteinDistance.safeCountCodePoints(slice)];
        int position = 0;
        for (int index = 0; index < codePoints.length; ++index) {
            codePoints[index] = SliceUtf8.getCodePointAt((Slice)slice, (int)position);
            position += SliceUtf8.lengthOfCodePoint((Slice)slice, (int)position);
        }
        return codePoints;
    }

    private static int safeCountCodePoints(Slice slice) throws HiveException {
        int codePoints = 0;
        int position = 0;
        while (position < slice.length()) {
            int codePoint = SliceUtf8.tryGetCodePointAt((Slice)slice, (int)position);
            if (codePoint < 0) {
                throw new HiveException("Invalid UTF-8 encoding in characters: " + slice.toStringUtf8());
            }
            position += SliceUtf8.lengthOfCodePoint((int)codePoint);
            ++codePoints;
        }
        return codePoints;
    }
}

