/*
 * Decompiled with CFR 0.152.
 */
package org.apache.jackrabbit.oak.plugins.index.lucene.util.fv;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.LSHAnalyzer;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SimSearchUtils {
    private static final Logger log = LoggerFactory.getLogger(SimSearchUtils.class);

    public static String toDoubleString(byte[] bytes) {
        double[] a = SimSearchUtils.toDoubleArray(bytes);
        StringBuilder builder = new StringBuilder();
        double[] dArray = a;
        int n = dArray.length;
        for (int i = 0; i < n; ++i) {
            Double d = dArray[i];
            if (builder.length() > 0) {
                builder.append(' ');
            }
            builder.append(d);
        }
        return builder.toString();
    }

    public static List<Double> toDoubles(byte[] array) {
        int blockSize = 8;
        ByteBuffer wrap = ByteBuffer.wrap(array);
        int capacity = array.length / blockSize;
        ArrayList<Double> doubles = new ArrayList<Double>(capacity);
        for (int i = 0; i < capacity; ++i) {
            double e = wrap.getDouble(i * blockSize);
            doubles.add(e);
        }
        return doubles;
    }

    private static double[] toDoubleArray(byte[] array) {
        int blockSize = 8;
        ByteBuffer wrap = ByteBuffer.wrap(array);
        int capacity = array.length / blockSize;
        double[] doubles = new double[capacity];
        for (int i = 0; i < capacity; ++i) {
            double e;
            doubles[i] = e = wrap.getDouble(i * blockSize);
        }
        return doubles;
    }

    private static Collection<BytesRef> getTokens(Analyzer analyzer, String field, String sampleTextString) throws IOException {
        LinkedList<BytesRef> tokens = new LinkedList<BytesRef>();
        TokenStream ts = analyzer.tokenStream(field, sampleTextString);
        ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            CharTermAttribute charTermAttribute = ts.getAttribute(CharTermAttribute.class);
            String token = new String(charTermAttribute.buffer(), 0, charTermAttribute.length());
            tokens.add(new BytesRef(token));
        }
        ts.end();
        ts.close();
        return tokens;
    }

    static Query getSimQuery(Analyzer analyzer, String fieldName, String text) throws IOException {
        return SimSearchUtils.createLSHQuery(fieldName, SimSearchUtils.getTokens(analyzer, fieldName, text), 1.0f, 1.0f);
    }

    public static byte[] toByteArray(List<Double> values) {
        int blockSize = 8;
        byte[] bytes = new byte[values.size() * blockSize];
        int i = 0;
        int j = 0;
        while (i < values.size()) {
            ByteBuffer.wrap(bytes, j, blockSize).putDouble(values.get(i));
            ++i;
            j += blockSize;
        }
        return bytes;
    }

    public static byte[] toByteArray(String value) {
        LinkedList<Double> doubles = new LinkedList<Double>();
        for (String dv : value.split(",")) {
            doubles.add(Double.parseDouble(dv));
        }
        return SimSearchUtils.toByteArray(doubles);
    }

    public static Query getSimilarityQuery(List<PropertyDefinition> sp, IndexReader reader, String queryString) {
        try {
            log.debug("parsing similarity query on {}", (Object)queryString);
            BooleanQuery similarityQuery = null;
            String text = null;
            for (String param : queryString.split("&")) {
                String[] keyValuePair = param.split("=");
                if (keyValuePair.length != 2 || keyValuePair[0] == null || keyValuePair[1] == null) {
                    throw new RuntimeException("Unparsable native Lucene query for fv similarity: " + queryString);
                }
                if (!"stream.body".equals(keyValuePair[0])) continue;
                text = keyValuePair[1];
                break;
            }
            if (text != null && !sp.isEmpty()) {
                log.debug("generating similarity query for {}", text);
                BooleanQuery booleanQuery = new BooleanQuery(true);
                LSHAnalyzer analyzer = new LSHAnalyzer();
                IndexSearcher searcher = new IndexSearcher(reader);
                TermQuery q = new TermQuery(new Term(":path", text));
                TopDocs top = searcher.search((Query)q, 1);
                if (top.totalHits > 0) {
                    ScoreDoc d = top.scoreDocs[0];
                    Document doc = reader.document(d.doc);
                    for (PropertyDefinition pd : sp) {
                        log.debug("adding similarity clause for property {}", (Object)pd.name);
                        String similarityFieldName = FieldNames.createSimilarityFieldName(pd.name);
                        String fvString = doc.get(similarityFieldName);
                        if (fvString != null && fvString.trim().length() > 0) {
                            log.trace("generating sim query on field {} and text {}", (Object)similarityFieldName, (Object)fvString);
                            Query simQuery = SimSearchUtils.getSimQuery(analyzer, similarityFieldName, fvString);
                            booleanQuery.add(new BooleanClause(simQuery, BooleanClause.Occur.SHOULD));
                            log.trace("similarity query generated for {}", (Object)pd.name);
                            continue;
                        }
                        log.warn("could not create query for similarity field {}", (Object)fvString);
                    }
                }
                if (booleanQuery.clauses().size() > 0) {
                    similarityQuery = booleanQuery;
                    log.trace("final similarity query is {}", (Object)similarityQuery);
                }
            }
            return similarityQuery;
        }
        catch (Exception e) {
            throw new RuntimeException("could not handle similarity query " + queryString);
        }
    }

    private static Query createLSHQuery(String field, Collection<BytesRef> minhashes, float similarity, float expectedTruePositive) {
        TermQuery tq;
        int bandSize = 1;
        if (expectedTruePositive < 1.0f) {
            bandSize = SimSearchUtils.computeBandSize(minhashes.size(), similarity, expectedTruePositive);
        }
        BooleanQuery builder = new BooleanQuery();
        BooleanQuery childBuilder = new BooleanQuery();
        int rowInBand = 0;
        for (BytesRef minHash : minhashes) {
            tq = new TermQuery(new Term(field, minHash));
            if (bandSize == 1) {
                builder.add(new ConstantScoreQuery(tq), BooleanClause.Occur.SHOULD);
                continue;
            }
            childBuilder.add(new ConstantScoreQuery(tq), BooleanClause.Occur.MUST);
            if (++rowInBand != bandSize) continue;
            builder.add(new ConstantScoreQuery(childBuilder), BooleanClause.Occur.SHOULD);
            childBuilder = new BooleanQuery();
            rowInBand = 0;
        }
        if (childBuilder.clauses().size() > 0) {
            for (BytesRef token : minhashes) {
                tq = new TermQuery(new Term(field, token.toString()));
                childBuilder.add(new ConstantScoreQuery(tq), BooleanClause.Occur.MUST);
                if (++rowInBand != bandSize) continue;
                builder.add(new ConstantScoreQuery(childBuilder), BooleanClause.Occur.SHOULD);
                break;
            }
        }
        if ((double)expectedTruePositive >= 1.0 && similarity < 1.0f) {
            builder.setMinimumNumberShouldMatch((int)Math.ceil((float)minhashes.size() * similarity));
        }
        log.trace("similarity query with bands : {}, minShouldMatch : {}, no. of clauses : {}", new Object[]{bandSize, builder.getMinimumNumberShouldMatch(), builder.clauses().size()});
        return builder;
    }

    private static int computeBandSize(int numHash, double similarity, double expectedTruePositive) {
        for (int bands = 1; bands <= numHash; ++bands) {
            int rowsInBand = numHash / bands;
            double truePositive = 1.0 - Math.pow(1.0 - Math.pow(similarity, rowsInBand), bands);
            if (!(truePositive > expectedTruePositive)) continue;
            return rowsInBand;
        }
        return 1;
    }

    public static void bruteForceFVRerank(List<PropertyDefinition> sp, TopDocs docs, IndexSearcher indexSearcher) throws IOException {
        double distSum = 0.0;
        double counter = 0.0;
        HashMap<Integer, Double> distances = new HashMap<Integer, Double>();
        int k = 15;
        ScoreDoc inputDoc = docs.scoreDocs[0];
        LinkedList<Integer> toDiscard = new LinkedList<Integer>();
        for (PropertyDefinition pd : sp) {
            String fieldName = FieldNames.createBinSimilarityFieldName(pd.name);
            BytesRef binaryValue = indexSearcher.doc(inputDoc.doc).getBinaryValue(fieldName);
            if (binaryValue == null) continue;
            double[] inputVector = SimSearchUtils.toDoubleArray(binaryValue.bytes);
            for (int j = 0; j < docs.scoreDocs.length; ++j) {
                double[] currentVector = SimSearchUtils.toDoubleArray(indexSearcher.doc((int)docs.scoreDocs[j].doc).getBinaryValue((String)fieldName).bytes);
                double distance = SimSearchUtils.dist(inputVector, currentVector) + 1.0E-10;
                if (Double.isNaN(distance) || Double.isInfinite(distance)) {
                    toDiscard.add(docs.scoreDocs[j].doc);
                    continue;
                }
                distSum += distance;
                counter += 1.0;
                distances.put(docs.scoreDocs[j].doc, distance);
                docs.scoreDocs[j].score += (float)(1.0 / distance);
            }
        }
        if (!toDiscard.isEmpty()) {
            docs.scoreDocs = (ScoreDoc[])Arrays.stream(docs.scoreDocs).filter(e -> !toDiscard.contains(e.doc)).toArray(ScoreDoc[]::new);
        }
        double distanceThreshold = 10.0 * distSum / counter;
        docs.scoreDocs = (ScoreDoc[])Arrays.stream(docs.scoreDocs).filter(e -> distances.containsKey(e.doc) && (Double)distances.get(e.doc) < distanceThreshold).toArray(ScoreDoc[]::new);
        Arrays.parallelSort(docs.scoreDocs, 0, docs.scoreDocs.length, (o1, o2) -> -1 * Double.compare(o1.score, o2.score));
        if (docs.scoreDocs.length > k) {
            docs.scoreDocs = Arrays.copyOfRange(docs.scoreDocs, 0, k);
        }
        if (docs.scoreDocs.length > 0) {
            docs.setMaxScore(docs.scoreDocs[0].score);
        }
    }

    private static double dist(double[] x, double[] y) {
        double d = 0.0;
        for (int i = 0; i < x.length; ++i) {
            d += Math.pow(y[i] - x[i], 2.0);
        }
        return Math.sqrt(d);
    }
}

