/*
 * Decompiled with CFR 0.152.
 */
package boofcv.alg.filter.convolve.down;

import boofcv.alg.filter.convolve.down.UtilDownConvolve;
import boofcv.struct.convolve.Kernel1D_F32;
import boofcv.struct.convolve.Kernel2D_F32;
import boofcv.struct.image.ImageFloat32;

public class ConvolveDownNoBorderUnrolled_F32_F32 {
    public static boolean horizontal(Kernel1D_F32 kernel, ImageFloat32 image, ImageFloat32 dest, int skip) {
        switch (kernel.width) {
            case 3: {
                ConvolveDownNoBorderUnrolled_F32_F32.horizontal3(kernel, image, dest, skip);
                break;
            }
            case 5: {
                ConvolveDownNoBorderUnrolled_F32_F32.horizontal5(kernel, image, dest, skip);
                break;
            }
            case 7: {
                ConvolveDownNoBorderUnrolled_F32_F32.horizontal7(kernel, image, dest, skip);
                break;
            }
            case 9: {
                ConvolveDownNoBorderUnrolled_F32_F32.horizontal9(kernel, image, dest, skip);
                break;
            }
            case 11: {
                ConvolveDownNoBorderUnrolled_F32_F32.horizontal11(kernel, image, dest, skip);
                break;
            }
            default: {
                return false;
            }
        }
        return true;
    }

    public static boolean vertical(Kernel1D_F32 kernel, ImageFloat32 image, ImageFloat32 dest, int skip) {
        switch (kernel.width) {
            case 3: {
                ConvolveDownNoBorderUnrolled_F32_F32.vertical3(kernel, image, dest, skip);
                break;
            }
            case 5: {
                ConvolveDownNoBorderUnrolled_F32_F32.vertical5(kernel, image, dest, skip);
                break;
            }
            case 7: {
                ConvolveDownNoBorderUnrolled_F32_F32.vertical7(kernel, image, dest, skip);
                break;
            }
            case 9: {
                ConvolveDownNoBorderUnrolled_F32_F32.vertical9(kernel, image, dest, skip);
                break;
            }
            case 11: {
                ConvolveDownNoBorderUnrolled_F32_F32.vertical11(kernel, image, dest, skip);
                break;
            }
            default: {
                return false;
            }
        }
        return true;
    }

    public static boolean convolve(Kernel2D_F32 kernel, ImageFloat32 image, ImageFloat32 dest, int skip) {
        switch (kernel.width) {
            case 3: {
                ConvolveDownNoBorderUnrolled_F32_F32.convolve3(kernel, image, dest, skip);
                break;
            }
            case 5: {
                ConvolveDownNoBorderUnrolled_F32_F32.convolve5(kernel, image, dest, skip);
                break;
            }
            case 7: {
                ConvolveDownNoBorderUnrolled_F32_F32.convolve7(kernel, image, dest, skip);
                break;
            }
            case 9: {
                ConvolveDownNoBorderUnrolled_F32_F32.convolve9(kernel, image, dest, skip);
                break;
            }
            case 11: {
                ConvolveDownNoBorderUnrolled_F32_F32.convolve11(kernel, image, dest, skip);
                break;
            }
            default: {
                return false;
            }
        }
        return true;
    }

    public static void horizontal3(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int height = input.getHeight();
        int offsetX = UtilDownConvolve.computeOffset(skip, radius);
        for (int i = 0; i < height; ++i) {
            int indexDst = output.startIndex + i * output.stride + offsetX / skip;
            int j = input.startIndex + i * input.stride - radius;
            int jEnd = j + widthEnd;
            j += offsetX;
            while (j <= jEnd) {
                int indexSrc = j;
                float total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k3;
                j += skip;
            }
        }
    }

    public static void horizontal5(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        float k4 = kernel.data[3];
        float k5 = kernel.data[4];
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int height = input.getHeight();
        int offsetX = UtilDownConvolve.computeOffset(skip, radius);
        for (int i = 0; i < height; ++i) {
            int indexDst = output.startIndex + i * output.stride + offsetX / skip;
            int j = input.startIndex + i * input.stride - radius;
            int jEnd = j + widthEnd;
            j += offsetX;
            while (j <= jEnd) {
                int indexSrc = j;
                float total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k5;
                j += skip;
            }
        }
    }

    public static void horizontal7(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        float k4 = kernel.data[3];
        float k5 = kernel.data[4];
        float k6 = kernel.data[5];
        float k7 = kernel.data[6];
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int height = input.getHeight();
        int offsetX = UtilDownConvolve.computeOffset(skip, radius);
        for (int i = 0; i < height; ++i) {
            int indexDst = output.startIndex + i * output.stride + offsetX / skip;
            int j = input.startIndex + i * input.stride - radius;
            int jEnd = j + widthEnd;
            j += offsetX;
            while (j <= jEnd) {
                int indexSrc = j;
                float total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                total += dataSrc[indexSrc++] * k5;
                total += dataSrc[indexSrc++] * k6;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k7;
                j += skip;
            }
        }
    }

    public static void horizontal9(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        float k4 = kernel.data[3];
        float k5 = kernel.data[4];
        float k6 = kernel.data[5];
        float k7 = kernel.data[6];
        float k8 = kernel.data[7];
        float k9 = kernel.data[8];
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int height = input.getHeight();
        int offsetX = UtilDownConvolve.computeOffset(skip, radius);
        for (int i = 0; i < height; ++i) {
            int indexDst = output.startIndex + i * output.stride + offsetX / skip;
            int j = input.startIndex + i * input.stride - radius;
            int jEnd = j + widthEnd;
            j += offsetX;
            while (j <= jEnd) {
                int indexSrc = j;
                float total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                total += dataSrc[indexSrc++] * k5;
                total += dataSrc[indexSrc++] * k6;
                total += dataSrc[indexSrc++] * k7;
                total += dataSrc[indexSrc++] * k8;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k9;
                j += skip;
            }
        }
    }

    public static void horizontal11(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        float k4 = kernel.data[3];
        float k5 = kernel.data[4];
        float k6 = kernel.data[5];
        float k7 = kernel.data[6];
        float k8 = kernel.data[7];
        float k9 = kernel.data[8];
        float k10 = kernel.data[9];
        float k11 = kernel.data[10];
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int height = input.getHeight();
        int offsetX = UtilDownConvolve.computeOffset(skip, radius);
        for (int i = 0; i < height; ++i) {
            int indexDst = output.startIndex + i * output.stride + offsetX / skip;
            int j = input.startIndex + i * input.stride - radius;
            int jEnd = j + widthEnd;
            j += offsetX;
            while (j <= jEnd) {
                int indexSrc = j;
                float total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                total += dataSrc[indexSrc++] * k5;
                total += dataSrc[indexSrc++] * k6;
                total += dataSrc[indexSrc++] * k7;
                total += dataSrc[indexSrc++] * k8;
                total += dataSrc[indexSrc++] * k9;
                total += dataSrc[indexSrc++] * k10;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k11;
                j += skip;
            }
        }
    }

    public static void vertical3(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offsetY;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        int radius = kernel.getRadius();
        int width = input.width;
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offsetY = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            int i;
            int indexDst = output.startIndex + y / skip * output.stride;
            int iEnd = i + width;
            for (i = input.startIndex + (y - radius) * input.stride; i < iEnd; ++i) {
                int indexSrc = i;
                float total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += input.stride] * k2;
                dataDst[indexDst++] = total += dataSrc[indexSrc += input.stride] * k3;
            }
        }
    }

    public static void vertical5(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offsetY;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        float k4 = kernel.data[3];
        float k5 = kernel.data[4];
        int radius = kernel.getRadius();
        int width = input.width;
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offsetY = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            int i;
            int indexDst = output.startIndex + y / skip * output.stride;
            int iEnd = i + width;
            for (i = input.startIndex + (y - radius) * input.stride; i < iEnd; ++i) {
                int indexSrc = i;
                float total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += input.stride] * k2;
                total += dataSrc[indexSrc += input.stride] * k3;
                total += dataSrc[indexSrc += input.stride] * k4;
                dataDst[indexDst++] = total += dataSrc[indexSrc += input.stride] * k5;
            }
        }
    }

    public static void vertical7(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offsetY;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        float k4 = kernel.data[3];
        float k5 = kernel.data[4];
        float k6 = kernel.data[5];
        float k7 = kernel.data[6];
        int radius = kernel.getRadius();
        int width = input.width;
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offsetY = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            int i;
            int indexDst = output.startIndex + y / skip * output.stride;
            int iEnd = i + width;
            for (i = input.startIndex + (y - radius) * input.stride; i < iEnd; ++i) {
                int indexSrc = i;
                float total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += input.stride] * k2;
                total += dataSrc[indexSrc += input.stride] * k3;
                total += dataSrc[indexSrc += input.stride] * k4;
                total += dataSrc[indexSrc += input.stride] * k5;
                total += dataSrc[indexSrc += input.stride] * k6;
                dataDst[indexDst++] = total += dataSrc[indexSrc += input.stride] * k7;
            }
        }
    }

    public static void vertical9(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offsetY;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        float k4 = kernel.data[3];
        float k5 = kernel.data[4];
        float k6 = kernel.data[5];
        float k7 = kernel.data[6];
        float k8 = kernel.data[7];
        float k9 = kernel.data[8];
        int radius = kernel.getRadius();
        int width = input.width;
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offsetY = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            int i;
            int indexDst = output.startIndex + y / skip * output.stride;
            int iEnd = i + width;
            for (i = input.startIndex + (y - radius) * input.stride; i < iEnd; ++i) {
                int indexSrc = i;
                float total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += input.stride] * k2;
                total += dataSrc[indexSrc += input.stride] * k3;
                total += dataSrc[indexSrc += input.stride] * k4;
                total += dataSrc[indexSrc += input.stride] * k5;
                total += dataSrc[indexSrc += input.stride] * k6;
                total += dataSrc[indexSrc += input.stride] * k7;
                total += dataSrc[indexSrc += input.stride] * k8;
                dataDst[indexDst++] = total += dataSrc[indexSrc += input.stride] * k9;
            }
        }
    }

    public static void vertical11(Kernel1D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offsetY;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float k1 = kernel.data[0];
        float k2 = kernel.data[1];
        float k3 = kernel.data[2];
        float k4 = kernel.data[3];
        float k5 = kernel.data[4];
        float k6 = kernel.data[5];
        float k7 = kernel.data[6];
        float k8 = kernel.data[7];
        float k9 = kernel.data[8];
        float k10 = kernel.data[9];
        float k11 = kernel.data[10];
        int radius = kernel.getRadius();
        int width = input.width;
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offsetY = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            int i;
            int indexDst = output.startIndex + y / skip * output.stride;
            int iEnd = i + width;
            for (i = input.startIndex + (y - radius) * input.stride; i < iEnd; ++i) {
                int indexSrc = i;
                float total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += input.stride] * k2;
                total += dataSrc[indexSrc += input.stride] * k3;
                total += dataSrc[indexSrc += input.stride] * k4;
                total += dataSrc[indexSrc += input.stride] * k5;
                total += dataSrc[indexSrc += input.stride] * k6;
                total += dataSrc[indexSrc += input.stride] * k7;
                total += dataSrc[indexSrc += input.stride] * k8;
                total += dataSrc[indexSrc += input.stride] * k9;
                total += dataSrc[indexSrc += input.stride] * k10;
                dataDst[indexDst++] = total += dataSrc[indexSrc += input.stride] * k11;
            }
        }
    }

    public static void convolve3(Kernel2D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offset;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offset = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            float k1 = kernel.data[0];
            float k2 = kernel.data[1];
            float k3 = kernel.data[2];
            int indexDst = output.startIndex + y / skip * output.stride + offset / skip;
            int indexSrcRow = input.startIndex + (y - radius) * input.stride - radius;
            for (int x = offset; x <= widthEnd; x += skip) {
                int indexSrc = indexSrcRow + x;
                float total = 0.0f;
                total += dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k3;
            }
            for (int i = 1; i < 3; ++i) {
                indexDst = output.startIndex + y / skip * output.stride + offset / skip;
                indexSrcRow = input.startIndex + (y + i - radius) * input.stride - radius;
                k1 = kernel.data[i * 3 + 0];
                k2 = kernel.data[i * 3 + 1];
                k3 = kernel.data[i * 3 + 2];
                for (int x = offset; x <= widthEnd; x += skip) {
                    int indexSrc = indexSrcRow + x;
                    float total = 0.0f;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    int n = indexDst++;
                    dataDst[n] = dataDst[n] + (total += dataSrc[indexSrc] * k3);
                }
            }
        }
    }

    public static void convolve5(Kernel2D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offset;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offset = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            float k1 = kernel.data[0];
            float k2 = kernel.data[1];
            float k3 = kernel.data[2];
            float k4 = kernel.data[3];
            float k5 = kernel.data[4];
            int indexDst = output.startIndex + y / skip * output.stride + offset / skip;
            int indexSrcRow = input.startIndex + (y - radius) * input.stride - radius;
            for (int x = offset; x <= widthEnd; x += skip) {
                int indexSrc = indexSrcRow + x;
                float total = 0.0f;
                total += dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k5;
            }
            for (int i = 1; i < 5; ++i) {
                indexDst = output.startIndex + y / skip * output.stride + offset / skip;
                indexSrcRow = input.startIndex + (y + i - radius) * input.stride - radius;
                k1 = kernel.data[i * 5 + 0];
                k2 = kernel.data[i * 5 + 1];
                k3 = kernel.data[i * 5 + 2];
                k4 = kernel.data[i * 5 + 3];
                k5 = kernel.data[i * 5 + 4];
                for (int x = offset; x <= widthEnd; x += skip) {
                    int indexSrc = indexSrcRow + x;
                    float total = 0.0f;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    total += dataSrc[indexSrc++] * k3;
                    total += dataSrc[indexSrc++] * k4;
                    int n = indexDst++;
                    dataDst[n] = dataDst[n] + (total += dataSrc[indexSrc] * k5);
                }
            }
        }
    }

    public static void convolve7(Kernel2D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offset;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offset = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            float k1 = kernel.data[0];
            float k2 = kernel.data[1];
            float k3 = kernel.data[2];
            float k4 = kernel.data[3];
            float k5 = kernel.data[4];
            float k6 = kernel.data[5];
            float k7 = kernel.data[6];
            int indexDst = output.startIndex + y / skip * output.stride + offset / skip;
            int indexSrcRow = input.startIndex + (y - radius) * input.stride - radius;
            for (int x = offset; x <= widthEnd; x += skip) {
                int indexSrc = indexSrcRow + x;
                float total = 0.0f;
                total += dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                total += dataSrc[indexSrc++] * k5;
                total += dataSrc[indexSrc++] * k6;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k7;
            }
            for (int i = 1; i < 7; ++i) {
                indexDst = output.startIndex + y / skip * output.stride + offset / skip;
                indexSrcRow = input.startIndex + (y + i - radius) * input.stride - radius;
                k1 = kernel.data[i * 7 + 0];
                k2 = kernel.data[i * 7 + 1];
                k3 = kernel.data[i * 7 + 2];
                k4 = kernel.data[i * 7 + 3];
                k5 = kernel.data[i * 7 + 4];
                k6 = kernel.data[i * 7 + 5];
                k7 = kernel.data[i * 7 + 6];
                for (int x = offset; x <= widthEnd; x += skip) {
                    int indexSrc = indexSrcRow + x;
                    float total = 0.0f;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    total += dataSrc[indexSrc++] * k3;
                    total += dataSrc[indexSrc++] * k4;
                    total += dataSrc[indexSrc++] * k5;
                    total += dataSrc[indexSrc++] * k6;
                    int n = indexDst++;
                    dataDst[n] = dataDst[n] + (total += dataSrc[indexSrc] * k7);
                }
            }
        }
    }

    public static void convolve9(Kernel2D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offset;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offset = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            float k1 = kernel.data[0];
            float k2 = kernel.data[1];
            float k3 = kernel.data[2];
            float k4 = kernel.data[3];
            float k5 = kernel.data[4];
            float k6 = kernel.data[5];
            float k7 = kernel.data[6];
            float k8 = kernel.data[7];
            float k9 = kernel.data[8];
            int indexDst = output.startIndex + y / skip * output.stride + offset / skip;
            int indexSrcRow = input.startIndex + (y - radius) * input.stride - radius;
            for (int x = offset; x <= widthEnd; x += skip) {
                int indexSrc = indexSrcRow + x;
                float total = 0.0f;
                total += dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                total += dataSrc[indexSrc++] * k5;
                total += dataSrc[indexSrc++] * k6;
                total += dataSrc[indexSrc++] * k7;
                total += dataSrc[indexSrc++] * k8;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k9;
            }
            for (int i = 1; i < 9; ++i) {
                indexDst = output.startIndex + y / skip * output.stride + offset / skip;
                indexSrcRow = input.startIndex + (y + i - radius) * input.stride - radius;
                k1 = kernel.data[i * 9 + 0];
                k2 = kernel.data[i * 9 + 1];
                k3 = kernel.data[i * 9 + 2];
                k4 = kernel.data[i * 9 + 3];
                k5 = kernel.data[i * 9 + 4];
                k6 = kernel.data[i * 9 + 5];
                k7 = kernel.data[i * 9 + 6];
                k8 = kernel.data[i * 9 + 7];
                k9 = kernel.data[i * 9 + 8];
                for (int x = offset; x <= widthEnd; x += skip) {
                    int indexSrc = indexSrcRow + x;
                    float total = 0.0f;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    total += dataSrc[indexSrc++] * k3;
                    total += dataSrc[indexSrc++] * k4;
                    total += dataSrc[indexSrc++] * k5;
                    total += dataSrc[indexSrc++] * k6;
                    total += dataSrc[indexSrc++] * k7;
                    total += dataSrc[indexSrc++] * k8;
                    int n = indexDst++;
                    dataDst[n] = dataDst[n] + (total += dataSrc[indexSrc] * k9);
                }
            }
        }
    }

    public static void convolve11(Kernel2D_F32 kernel, ImageFloat32 input, ImageFloat32 output, int skip) {
        int offset;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        int radius = kernel.getRadius();
        int widthEnd = UtilDownConvolve.computeMaxSide(input.width, skip, radius);
        int heightEnd = UtilDownConvolve.computeMaxSide(input.height, skip, radius);
        for (int y = offset = UtilDownConvolve.computeOffset(skip, radius); y <= heightEnd; y += skip) {
            float k1 = kernel.data[0];
            float k2 = kernel.data[1];
            float k3 = kernel.data[2];
            float k4 = kernel.data[3];
            float k5 = kernel.data[4];
            float k6 = kernel.data[5];
            float k7 = kernel.data[6];
            float k8 = kernel.data[7];
            float k9 = kernel.data[8];
            float k10 = kernel.data[9];
            float k11 = kernel.data[10];
            int indexDst = output.startIndex + y / skip * output.stride + offset / skip;
            int indexSrcRow = input.startIndex + (y - radius) * input.stride - radius;
            for (int x = offset; x <= widthEnd; x += skip) {
                int indexSrc = indexSrcRow + x;
                float total = 0.0f;
                total += dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                total += dataSrc[indexSrc++] * k5;
                total += dataSrc[indexSrc++] * k6;
                total += dataSrc[indexSrc++] * k7;
                total += dataSrc[indexSrc++] * k8;
                total += dataSrc[indexSrc++] * k9;
                total += dataSrc[indexSrc++] * k10;
                dataDst[indexDst++] = total += dataSrc[indexSrc] * k11;
            }
            for (int i = 1; i < 11; ++i) {
                indexDst = output.startIndex + y / skip * output.stride + offset / skip;
                indexSrcRow = input.startIndex + (y + i - radius) * input.stride - radius;
                k1 = kernel.data[i * 11 + 0];
                k2 = kernel.data[i * 11 + 1];
                k3 = kernel.data[i * 11 + 2];
                k4 = kernel.data[i * 11 + 3];
                k5 = kernel.data[i * 11 + 4];
                k6 = kernel.data[i * 11 + 5];
                k7 = kernel.data[i * 11 + 6];
                k8 = kernel.data[i * 11 + 7];
                k9 = kernel.data[i * 11 + 8];
                k10 = kernel.data[i * 11 + 9];
                k11 = kernel.data[i * 11 + 10];
                for (int x = offset; x <= widthEnd; x += skip) {
                    int indexSrc = indexSrcRow + x;
                    float total = 0.0f;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    total += dataSrc[indexSrc++] * k3;
                    total += dataSrc[indexSrc++] * k4;
                    total += dataSrc[indexSrc++] * k5;
                    total += dataSrc[indexSrc++] * k6;
                    total += dataSrc[indexSrc++] * k7;
                    total += dataSrc[indexSrc++] * k8;
                    total += dataSrc[indexSrc++] * k9;
                    total += dataSrc[indexSrc++] * k10;
                    int n = indexDst++;
                    dataDst[n] = dataDst[n] + (total += dataSrc[indexSrc] * k11);
                }
            }
        }
    }
}

