/*
 * Decompiled with CFR 0.152.
 */
package org.apache.jackrabbit.oak.plugins.index.lucene.util.fv;

import java.io.Reader;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.FVTokenizer;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.FeaturePositionTokenFilter;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.MinHashFilter;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.TruncateTokenFilter;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.util.Version;

public class LSHAnalyzer
extends Analyzer {
    private static final int DEFAULT_SHINGLE_SIZE = 5;
    private final int min;
    private final int max;
    private final int hashCount;
    private final int bucketCount;
    private final int hashSetSize;

    private LSHAnalyzer(int min, int max, int hashCount, int bucketCount, int hashSetSize) {
        this.min = min;
        this.max = max;
        this.hashCount = hashCount;
        this.bucketCount = bucketCount;
        this.hashSetSize = hashSetSize;
    }

    public LSHAnalyzer() {
        this(5, 5, 1, 512, 1);
    }

    @Override
    protected Analyzer.TokenStreamComponents createComponents(String fieldName, Reader reader) {
        FVTokenizer source = new FVTokenizer(Version.LUCENE_47, reader);
        TruncateTokenFilter truncate = new TruncateTokenFilter(source, 3);
        FeaturePositionTokenFilter featurePos = new FeaturePositionTokenFilter(truncate);
        ShingleFilter shingleFilter = new ShingleFilter(featurePos, this.min, this.max);
        shingleFilter.setTokenSeparator(" ");
        shingleFilter.setOutputUnigrams(false);
        shingleFilter.setOutputUnigramsIfNoShingles(false);
        MinHashFilter filter = new MinHashFilter(shingleFilter, this.hashCount, this.bucketCount, this.hashSetSize, this.bucketCount > 1);
        return new Analyzer.TokenStreamComponents(source, filter);
    }
}

