/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.utils.wiki;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.ctakes.utils.struct.CounterMap;
import org.apache.ctakes.utils.wiki.ApproximateMath;
import org.apache.ctakes.utils.wiki.ApproximateSimilarity;
import org.apache.ctakes.utils.wiki.Cache;
import org.apache.ctakes.utils.wiki.SearchResult;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;

public class WikiIndex {
    public static int defaultMaxHits = 10;
    public static String defaultIndexPath = "/home/dima/i2b2/wiki-index/index_nometa";
    public static String defaultSearchField = "text";
    private int maxHits;
    private String indexPath;
    private String searchField;
    private IndexReader indexReader;
    private IndexSearcher indexSearcher;
    private Analyzer standardAnalyzer;
    private QueryParser queryParser;
    private ClassicSimilarity similarity;
    private int numDocs;
    private boolean useCache = true;
    private Cache lastQuery = null;

    public WikiIndex(int maxHits, String indexPath, String searchField, boolean approximate) {
        this.maxHits = maxHits;
        this.indexPath = indexPath;
        this.searchField = searchField;
        this.similarity = approximate ? new ApproximateSimilarity() : new ClassicSimilarity();
    }

    public WikiIndex(int maxHits, String indexPath, String searchField) {
        this(maxHits, indexPath, searchField, false);
    }

    public WikiIndex() {
        this.maxHits = defaultMaxHits;
        this.indexPath = defaultIndexPath;
        this.searchField = defaultSearchField;
    }

    public void initialize() throws CorruptIndexException, IOException {
        this.indexReader = DirectoryReader.open((Directory)FSDirectory.open((Path)new File(this.indexPath).toPath()));
        this.numDocs = this.indexReader.numDocs();
        this.indexSearcher = new IndexSearcher(this.indexReader);
        this.standardAnalyzer = new StandardAnalyzer();
        this.queryParser = new QueryParser(this.searchField, this.standardAnalyzer);
        this.lastQuery = new Cache();
    }

    public ArrayList<SearchResult> search(String queryText) throws ParseException, IOException {
        ScoreDoc[] scoreDocs;
        ArrayList<SearchResult> articleTitles = new ArrayList<SearchResult>();
        String escaped = QueryParser.escape((String)queryText);
        Query query = this.queryParser.parse(escaped);
        for (ScoreDoc scoreDoc : scoreDocs = this.indexSearcher.search((Query)query, (int)this.maxHits).scoreDocs) {
            ScoreDoc redirectScoreDoc = this.handlePossibleRedirect(scoreDoc);
            Document doc = this.indexSearcher.doc(redirectScoreDoc.doc);
            articleTitles.add(new SearchResult(doc.get("title"), redirectScoreDoc.score));
        }
        return articleTitles;
    }

    public double getCosineSimilarity(String queryText1, String queryText2) throws ParseException, IOException {
        HashMap<String, Double> vector1 = null;
        if (this.useCache && this.lastQuery.t1 != null && this.lastQuery.t1.equals(queryText1)) {
            vector1 = this.lastQuery.v1;
        } else if (this.useCache && this.lastQuery.t2 != null && this.lastQuery.t2.equals(queryText1)) {
            vector1 = this.lastQuery.v2;
        } else {
            ArrayList<Terms> termFreqVectors1 = this.getTermFreqVectors(queryText1);
            if (termFreqVectors1.size() == 0) {
                return 0.0;
            }
            vector1 = this.makeTfIdfVector(termFreqVectors1);
        }
        if (vector1.size() == 0) {
            return 0.0;
        }
        HashMap<String, Double> vector2 = null;
        if (this.useCache && this.lastQuery.t1 != null && this.lastQuery.t1.equals(queryText2)) {
            vector2 = this.lastQuery.v1;
        } else if (this.useCache && this.lastQuery.t2 != null && this.lastQuery.t2.equals(queryText2)) {
            vector2 = this.lastQuery.v2;
        } else {
            ArrayList<Terms> termFreqVectors2 = this.getTermFreqVectors(queryText2);
            if (termFreqVectors2.size() == 0) {
                return 0.0;
            }
            vector2 = this.makeTfIdfVector(termFreqVectors2);
        }
        if (vector2.size() == 0) {
            return 0.0;
        }
        if (this.useCache) {
            this.lastQuery.t1 = queryText1;
            this.lastQuery.v1 = vector1;
            this.lastQuery.t2 = queryText2;
            this.lastQuery.v2 = vector2;
        }
        double dotProduct = this.computeDotProduct(vector1, vector2);
        double norm1 = this.computeEuclideanNorm(vector1);
        double norm2 = this.computeEuclideanNorm(vector2);
        return dotProduct / (norm1 * norm2);
    }

    public ArrayList<Terms> getTermFreqVectors(String queryString) throws ParseException, IOException {
        String escaped = QueryParser.escape((String)queryString);
        Query query = this.queryParser.parse(escaped);
        ScoreDoc[] scoreDocs = this.indexSearcher.search((Query)query, (int)this.maxHits).scoreDocs;
        ArrayList<Terms> termFreqVectors = new ArrayList<Terms>();
        for (ScoreDoc scoreDoc : scoreDocs) {
            ScoreDoc redirectScoreDoc = this.handlePossibleRedirect(scoreDoc);
            Terms termFreqVector = this.indexReader.getTermVector(redirectScoreDoc.doc, "text");
            termFreqVectors.add(termFreqVector);
        }
        return termFreqVectors;
    }

    private ScoreDoc handlePossibleRedirect(ScoreDoc scoreDoc) throws ParseException, CorruptIndexException, IOException {
        Document doc = this.indexSearcher.doc(scoreDoc.doc);
        String redirectTitle = doc.get("redirect");
        if (redirectTitle == null) {
            return scoreDoc;
        }
        QueryParser redirectQueryParser = new QueryParser("title", this.standardAnalyzer);
        String redirectTitleNoUnderscores = redirectTitle.replaceAll("_", " ");
        String redirectTitleQuoted = "\"" + redirectTitleNoUnderscores + "\"";
        String redirectTitleEscaped = QueryParser.escape((String)redirectTitleQuoted);
        Query redirectQuery = redirectQueryParser.parse(redirectTitleEscaped);
        ScoreDoc[] redirectScoreDocs = this.indexSearcher.search((Query)redirectQuery, (int)1).scoreDocs;
        if (redirectScoreDocs.length < 1) {
            System.out.println("failed redirect: " + redirectTitle + " -> " + redirectTitle);
            return scoreDoc;
        }
        ScoreDoc redirectScoreDoc = redirectScoreDocs[0];
        return redirectScoreDoc;
    }

    private HashMap<String, Double> makeTfIdfVector(ArrayList<Terms> termFreqVectors) throws IOException {
        CounterMap<String> countVector = new CounterMap<String>();
        HashMap<String, Double> tfIdfVector = new HashMap<String, Double>();
        for (Terms terms : termFreqVectors) {
            if (terms == null) continue;
            TermsEnum termsEnum = terms.iterator();
            while (termsEnum.next() != null) {
                BytesRef term = termsEnum.term();
                String termStr = term.utf8ToString();
                countVector.add(termStr);
            }
            for (String key : countVector.keySet()) {
                double tf = this.similarity.tf((float)countVector.get(key).intValue());
                double idf = this.similarity.idf((long)this.indexReader.docFreq(new Term("text", key)), (long)this.numDocs);
                tfIdfVector.put(key, tf * idf);
            }
        }
        return tfIdfVector;
    }

    private double computeEuclideanNorm(HashMap<String, Double> tfIdfVector) {
        double sumOfSquares = 0.0;
        for (double tfidf : tfIdfVector.values()) {
            sumOfSquares += tfidf * tfidf;
        }
        return ApproximateMath.asqrt(sumOfSquares);
    }

    private double computeDotProduct(HashMap<String, Double> vector1, HashMap<String, Double> vector2) {
        double dotProduct = 0.0;
        HashMap<String, Double> smallSet = null;
        HashMap<String, Double> largeSet = null;
        if (vector1.size() > vector2.size()) {
            smallSet = vector2;
            largeSet = vector1;
        } else {
            smallSet = vector1;
            largeSet = vector2;
        }
        for (String term : smallSet.keySet()) {
            if (!largeSet.containsKey(term)) continue;
            dotProduct += (Double)smallSet.get(term) * (Double)largeSet.get(term);
        }
        return dotProduct;
    }

    private HashMap<String, Double> addVectors(HashMap<String, Double> vector1, HashMap<String, Double> vector2) {
        HashMap<String, Double> sum = new HashMap<String, Double>();
        HashMap<String, Double> smallSet = null;
        HashMap<String, Double> largeSet = null;
        if (vector1.size() > vector2.size()) {
            smallSet = vector2;
            largeSet = vector1;
        } else {
            smallSet = vector1;
            largeSet = vector2;
        }
        for (String term : smallSet.keySet()) {
            if (!largeSet.containsKey(term)) continue;
            sum.put(term, (Double)smallSet.get(term) + (Double)largeSet.get(term));
        }
        return sum;
    }

    public void close() throws IOException {
        this.indexReader.close();
        this.standardAnalyzer.close();
    }
}

