/*
 * Decompiled with CFR 0.152.
 */
package info.debatty.java.stringsimilarity;

import java.io.Serializable;
import java.security.InvalidParameterException;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class KShingling
extends HashSet<String>
implements Serializable {
    protected int k = 5;
    private static final Pattern spaceReg = Pattern.compile("\\s+");

    public static void main(String[] args) {
        String s1 = "my string,  \n  my song";
        String s2 = "another string, from a song";
        KShingling ks = new KShingling(4);
        ks.parse(s1);
        ks.parse(s2);
        System.out.println(ks.toString());
        KShingling.printArray(ks.booleanVectorOf(s1));
        KShingling.printArray(ks.booleanVectorOf(s2));
        KShingling.printArray(ks.profileOf(s1));
        ks.add("This should trigger an exception!");
    }

    public static int countOccurences(String substring, String str) {
        return (str.length() - str.replace(substring, "").length()) / substring.length();
    }

    public static void printArray(boolean[] a) {
        System.out.print("[");
        for (boolean b : a) {
            System.out.print(b ? "1" : "0");
        }
        System.out.println("]");
    }

    public static void printArray(int[] a) {
        System.out.print("[");
        for (int i : a) {
            System.out.print("" + i + "\t");
        }
        System.out.println("]");
    }

    public KShingling() {
    }

    public KShingling(int k) {
        this.setK(k);
    }

    public int getK() {
        return this.k;
    }

    public final void setK(int k) {
        if (k <= 0) {
            throw new InvalidParameterException("k should be positive!");
        }
        this.k = k;
    }

    public boolean parse(String s) {
        s = spaceReg.matcher(s).replaceAll(" ");
        for (int i = 0; i < s.length() - this.k + 1; ++i) {
            this.add(s.substring(i, i + this.k));
        }
        return true;
    }

    @Override
    public boolean add(String s) {
        if (s.length() != this.k) {
            throw new InvalidParameterException("This size of this String (" + s.length() + ") is different from k (" + this.k + ")");
        }
        return super.add(s);
    }

    public boolean[] booleanVectorOf(String s) {
        boolean[] r = new boolean[this.size()];
        int i = 0;
        for (String shingle : this) {
            r[i] = s.contains(shingle);
            ++i;
        }
        return r;
    }

    public Set<Integer> integerSetOf(String s) {
        HashSet<Integer> set = new HashSet<Integer>();
        int i = 0;
        for (String shingle : this) {
            if (s.contains(shingle)) {
                set.add(i);
            }
            ++i;
        }
        return set;
    }

    public int[] profileOf(String s) {
        int[] p = new int[this.size()];
        int i = 0;
        for (String shingle : this) {
            p[i] = KShingling.countOccurences(shingle, s);
            ++i;
        }
        return p;
    }
}

