/*
 * Decompiled with CFR 0.152.
 */
package com.tencent.tcvectordb.examples;

import com.tencent.tcvdbtext.encoder.SparseVectorBm25Encoder;
import com.tencent.tcvectordb.client.VectorDBClient;
import com.tencent.tcvectordb.examples.CommonService;
import com.tencent.tcvectordb.model.Collection;
import com.tencent.tcvectordb.model.Database;
import com.tencent.tcvectordb.model.DocField;
import com.tencent.tcvectordb.model.Document;
import com.tencent.tcvectordb.model.param.collection.CreateCollectionParam;
import com.tencent.tcvectordb.model.param.collection.Embedding;
import com.tencent.tcvectordb.model.param.collection.FieldType;
import com.tencent.tcvectordb.model.param.collection.FilterIndex;
import com.tencent.tcvectordb.model.param.collection.FilterIndexConfig;
import com.tencent.tcvectordb.model.param.collection.HNSWParams;
import com.tencent.tcvectordb.model.param.collection.IndexType;
import com.tencent.tcvectordb.model.param.collection.MetricType;
import com.tencent.tcvectordb.model.param.collection.SparseVectorIndex;
import com.tencent.tcvectordb.model.param.collection.VectorIndex;
import com.tencent.tcvectordb.model.param.dml.AnnOption;
import com.tencent.tcvectordb.model.param.dml.HybridSearchParam;
import com.tencent.tcvectordb.model.param.dml.InsertParam;
import com.tencent.tcvectordb.model.param.dml.MatchOption;
import com.tencent.tcvectordb.model.param.dml.WeightRerankParam;
import com.tencent.tcvectordb.model.param.entity.AffectRes;
import com.tencent.tcvectordb.model.param.enums.EmbeddingModelEnum;
import com.tencent.tcvectordb.utils.JsonUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class VectorDBWithHybridSearchEmbeddingExample {
    private static final String DBNAME = "book_hybrid_embedding";
    private static final String COLL_NAME = "book_embedding_collection";

    public static void main(String[] args) throws InterruptedException {
        VectorDBClient client = CommonService.initClient();
        CommonService.anySafe(() -> client.dropDatabase(DBNAME));
        VectorDBWithHybridSearchEmbeddingExample.createDatabaseAndCollection(client);
        VectorDBWithHybridSearchEmbeddingExample.upsertData(client);
        VectorDBWithHybridSearchEmbeddingExample.searchData(client);
        VectorDBWithHybridSearchEmbeddingExample.deleteAndDrop(client);
    }

    private static void createDatabaseAndCollection(VectorDBClient client) {
        System.out.println("---------------------- createDatabase ----------------------");
        Database db = client.createDatabase(DBNAME);
        System.out.println("---------------------- listCollections ----------------------");
        List<String> database = client.listDatabase();
        for (String s : database) {
            System.out.println("\tres: " + s);
        }
        System.out.println("---------------------- createCollection ----------------------");
        CreateCollectionParam collectionParam = VectorDBWithHybridSearchEmbeddingExample.initCreateCollectionParam(COLL_NAME);
        db.createCollection(collectionParam);
        System.out.println("book_embedding_collection exists: " + db.IsExistsCollection(COLL_NAME));
    }

    private static void upsertData(VectorDBClient client) throws InterruptedException {
        Database database = client.database(DBNAME);
        Collection collection = database.describeCollection(COLL_NAME);
        SparseVectorBm25Encoder bm25Encoder = SparseVectorBm25Encoder.getDefaultBm25Encoder();
        List<String> texts = Arrays.asList("\u817e\u8baf\u4e91\u5411\u91cf\u6570\u636e\u5e93\uff08Tencent Cloud VectorDB\uff09\u662f\u4e00\u6b3e\u5168\u6258\u7ba1\u7684\u81ea\u7814\u4f01\u4e1a\u7ea7\u5206\u5e03\u5f0f\u6570\u636e\u5e93\u670d\u52a1\uff0c\u4e13\u7528\u4e8e\u5b58\u50a8\u3001\u7d22\u5f15\u3001\u68c0\u7d22\u3001\u7ba1\u7406\u7531\u6df1\u5ea6\u795e\u7ecf\u7f51\u7edc\u6216\u5176\u4ed6\u673a\u5668\u5b66\u4e60\u6a21\u578b\u751f\u6210\u7684\u5927\u91cf\u591a\u7ef4\u5d4c\u5165\u5411\u91cf\u3002", "\u4f5c\u4e3a\u4e13\u95e8\u4e3a\u5904\u7406\u8f93\u5165\u5411\u91cf\u67e5\u8be2\u800c\u8bbe\u8ba1\u7684\u6570\u636e\u5e93\uff0c\u5b83\u652f\u6301\u591a\u79cd\u7d22\u5f15\u7c7b\u578b\u548c\u76f8\u4f3c\u5ea6\u8ba1\u7b97\u65b9\u6cd5\uff0c\u5355\u7d22\u5f15\u652f\u630110\u4ebf\u7ea7\u5411\u91cf\u89c4\u6a21\uff0c\u9ad8\u8fbe\u767e\u4e07\u7ea7 QPS \u53ca\u6beb\u79d2\u7ea7\u67e5\u8be2\u5ef6\u8fdf\u3002", "\u4e0d\u4ec5\u80fd\u4e3a\u5927\u6a21\u578b\u63d0\u4f9b\u5916\u90e8\u77e5\u8bc6\u5e93\uff0c\u63d0\u9ad8\u5927\u6a21\u578b\u56de\u7b54\u7684\u51c6\u786e\u6027\uff0c\u8fd8\u53ef\u5e7f\u6cdb\u5e94\u7528\u4e8e\u63a8\u8350\u7cfb\u7edf\u3001NLP \u670d\u52a1\u3001\u8ba1\u7b97\u673a\u89c6\u89c9\u3001\u667a\u80fd\u5ba2\u670d\u7b49 AI \u9886\u57df\u3002", "\u817e\u8baf\u4e91\u5411\u91cf\u6570\u636e\u5e93\uff08Tencent Cloud VectorDB\uff09\u4f5c\u4e3a\u4e00\u79cd\u4e13\u95e8\u5b58\u50a8\u548c\u68c0\u7d22\u5411\u91cf\u6570\u636e\u7684\u670d\u52a1\u63d0\u4f9b\u7ed9\u7528\u6237\uff0c \u5728\u9ad8\u6027\u80fd\u3001\u9ad8\u53ef\u7528\u3001\u5927\u89c4\u6a21\u3001\u4f4e\u6210\u672c\u3001\u7b80\u5355\u6613\u7528\u3001\u7a33\u5b9a\u53ef\u9760\u7b49\u65b9\u9762\u4f53\u73b0\u51fa\u663e\u8457\u4f18\u52bf\u3002 ", "\u817e\u8baf\u4e91\u5411\u91cf\u6570\u636e\u5e93\u53ef\u4ee5\u548c\u5927\u8bed\u8a00\u6a21\u578b LLM \u914d\u5408\u4f7f\u7528\u3002\u4f01\u4e1a\u7684\u79c1\u57df\u6570\u636e\u5728\u7ecf\u8fc7\u6587\u672c\u5206\u5272\u3001\u5411\u91cf\u5316\u540e\uff0c\u53ef\u4ee5\u5b58\u50a8\u5728\u817e\u8baf\u4e91\u5411\u91cf\u6570\u636e\u5e93\u4e2d\uff0c\u6784\u5efa\u8d77\u4f01\u4e1a\u4e13\u5c5e\u7684\u5916\u90e8\u77e5\u8bc6\u5e93\uff0c\u4ece\u800c\u5728\u540e\u7eed\u7684\u68c0\u7d22\u4efb\u52a1\u4e2d\uff0c\u4e3a\u5927\u6a21\u578b\u63d0\u4f9b\u63d0\u793a\u4fe1\u606f\uff0c\u8f85\u52a9\u5927\u6a21\u578b\u751f\u6210\u66f4\u52a0\u51c6\u786e\u7684\u7b54\u6848\u3002");
        List sparseVectors = bm25Encoder.encodeTexts(texts);
        ArrayList<Document> documentList = new ArrayList<Document>(Arrays.asList(Document.newBuilder().withId("0001").addDocField(new DocField("text", texts.get(0))).withSparseVector((List)sparseVectors.get(0)).build(), Document.newBuilder().withId("0002").withSparseVector((List)sparseVectors.get(1)).addDocField(new DocField("text", texts.get(1))).build(), Document.newBuilder().withId("0003").withSparseVector((List)sparseVectors.get(2)).addDocField(new DocField("text", texts.get(2))).build(), Document.newBuilder().withId("0004").withSparseVector((List)sparseVectors.get(3)).addDocField(new DocField("text", texts.get(3))).build(), Document.newBuilder().withId("0005").withSparseVector((List)sparseVectors.get(4)).addDocField(new DocField("text", texts.get(4))).build()));
        System.out.println("---------------------- upsert ----------------------");
        InsertParam insertParam = InsertParam.newBuilder().withDocuments(documentList).build();
        AffectRes affectRes = client.upsert(DBNAME, COLL_NAME, insertParam);
        System.out.println(JsonUtils.toJsonString(affectRes));
        Thread.sleep(5000L);
    }

    private static void searchData(VectorDBClient client) {
        System.out.println("---------------------- hybridSearch ----------------------");
        SparseVectorBm25Encoder encoder = SparseVectorBm25Encoder.getBm25Encoder((String)"zh");
        HybridSearchParam hybridSearchParam = HybridSearchParam.newBuilder().withAnn(AnnOption.newBuilder().withFieldName("text").withTextData("\u4ec0\u4e48\u662f\u817e\u8baf\u4e91\u5411\u91cf\u6570\u636e\u5e93").withLimit(2).build()).withMatch(MatchOption.newBuilder().withFieldName("sparse_vector").withData(encoder.encodeQueries(Arrays.asList("\u4ec0\u4e48\u662f\u817e\u8baf\u4e91\u5411\u91cf\u6570\u636e\u5e93"))).withCutoffFrequency(0.1).withTerminateAfter(4000).withLimit(2).build()).withRerank(new WeightRerankParam(Arrays.asList("vector", "sparse_vector"), Arrays.asList(1, 1))).withLimit(3).withRetrieveVector(false).build();
        List<Document> siDocs = client.hybridSearch(DBNAME, COLL_NAME, hybridSearchParam).getDocuments();
        int i = 0;
        for (Document docs : siDocs) {
            System.out.println("\tres: " + i++ + ((Object)docs).toString());
        }
    }

    private static void deleteAndDrop(VectorDBClient client) {
        Database database = client.database(DBNAME);
        System.out.println("---------------------- truncate collection ----------------------");
        database.dropCollection(COLL_NAME);
        System.out.println("---------------------- delete database ----------------------");
        client.dropDatabase(DBNAME);
    }

    private static CreateCollectionParam initCreateCollectionParam(String collName) {
        return CreateCollectionParam.newBuilder().withName(collName).withShardNum(1).withReplicaNum(1).withDescription("test hybrid embedding collection").addField(new FilterIndex("id", FieldType.String, IndexType.PRIMARY_KEY)).addField(new VectorIndex("vector", 768, FieldType.Vector, IndexType.HNSW, MetricType.IP, new HNSWParams(16, 200))).addField(new SparseVectorIndex("sparse_vector", IndexType.INVERTED, MetricType.IP)).withEmbedding(Embedding.newBuilder().withVectorField("vector").withField("text").withModelName(EmbeddingModelEnum.BGE_BASE_ZH.getModelName()).build()).withFilterIndexConfig(FilterIndexConfig.newBuilder().withFilterAll(true).build()).build();
    }
}

