/*
 * Decompiled with CFR 0.152.
 */
package io.quarkiverse.langchain4j.jlama;

import com.github.tjake.jlama.model.AbstractModel;
import com.github.tjake.jlama.model.functions.Generator;
import com.github.tjake.jlama.safetensors.DType;
import com.github.tjake.jlama.safetensors.prompt.PromptContext;
import com.github.tjake.jlama.safetensors.prompt.PromptSupport;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.internal.RetryUtils;
import dev.langchain4j.model.StreamingResponseHandler;
import dev.langchain4j.model.chat.StreamingChatLanguageModel;
import dev.langchain4j.model.output.FinishReason;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.model.output.TokenUsage;
import io.quarkiverse.langchain4j.jlama.JlamaModel;
import io.quarkiverse.langchain4j.jlama.JlamaModelRegistry;
import io.quarkiverse.langchain4j.runtime.VertxUtil;
import java.nio.file.Path;
import java.util.List;
import java.util.Optional;
import java.util.UUID;

public class JlamaStreamingChatModel
implements StreamingChatLanguageModel {
    private final AbstractModel model;
    private final Float temperature;
    private final Integer maxTokens;
    private final UUID id = UUID.randomUUID();

    public JlamaStreamingChatModel(JlamaStreamingChatModelBuilder builder) {
        JlamaModelRegistry registry = JlamaModelRegistry.getOrCreate(builder.modelCachePath);
        JlamaModel jlamaModel = (JlamaModel)RetryUtils.withRetry(() -> registry.downloadModel(builder.modelName, Optional.ofNullable(builder.authToken)), (int)3);
        JlamaModel.Loader loader = jlamaModel.loader();
        if (builder.quantizeModelAtRuntime != null && builder.quantizeModelAtRuntime.booleanValue()) {
            loader = loader.quantized();
        }
        if (builder.workingQuantizedType != null) {
            loader = loader.workingQuantizationType(builder.workingQuantizedType);
        }
        if (builder.threadCount != null) {
            loader = loader.threadCount(builder.threadCount);
        }
        if (builder.workingDirectory != null) {
            loader = loader.workingDirectory(builder.workingDirectory);
        }
        this.model = loader.load();
        this.temperature = Float.valueOf(builder.temperature == null ? 0.7f : builder.temperature.floatValue());
        this.maxTokens = builder.maxTokens == null ? this.model.getConfig().contextLength : builder.maxTokens;
    }

    public static JlamaStreamingChatModelBuilder builder() {
        return new JlamaStreamingChatModelBuilder();
    }

    public void generate(List<ChatMessage> messages, final StreamingResponseHandler<AiMessage> handler) {
        final PromptContext promptContext = this.createPromptContext(messages);
        VertxUtil.runOutEventLoop((Runnable)new Runnable(){
            final /* synthetic */ JlamaStreamingChatModel this$0;
            {
                this.this$0 = this$0;
            }

            @Override
            public void run() {
                this.this$0.internalGenerate((StreamingResponseHandler<AiMessage>)handler, promptContext);
            }
        });
    }

    private void internalGenerate(StreamingResponseHandler<AiMessage> handler, PromptContext promptContext) {
        try {
            Generator.Response r = this.model.generate(this.id, promptContext, this.temperature.floatValue(), this.maxTokens.intValue(), (token, time) -> handler.onNext(token));
            handler.onComplete(Response.from((Object)AiMessage.from((String)r.responseText), (TokenUsage)new TokenUsage(Integer.valueOf(r.promptTokens), Integer.valueOf(r.generatedTokens)), (FinishReason)JlamaModel.toFinishReason(r.finishReason)));
        }
        catch (Throwable t) {
            handler.onError(t);
        }
    }

    private PromptContext createPromptContext(List<ChatMessage> messages) {
        if (this.model.promptSupport().isEmpty()) {
            throw new UnsupportedOperationException("This model does not support chat generation");
        }
        PromptSupport.Builder promptBuilder = ((PromptSupport)this.model.promptSupport().get()).builder();
        block5: for (ChatMessage message : messages) {
            switch (message.type()) {
                case SYSTEM: {
                    promptBuilder.addSystemMessage(message.text());
                    continue block5;
                }
                case USER: {
                    promptBuilder.addUserMessage(message.text());
                    continue block5;
                }
                case AI: {
                    promptBuilder.addAssistantMessage(message.text());
                    continue block5;
                }
            }
            throw new IllegalArgumentException("Unsupported message type: " + String.valueOf(message.type()));
        }
        return promptBuilder.build();
    }

    public static class JlamaStreamingChatModelBuilder {
        private Optional<Path> modelCachePath;
        private String modelName;
        private String authToken;
        private Integer threadCount;
        private Path workingDirectory;
        private Boolean quantizeModelAtRuntime;
        private DType workingQuantizedType;
        private Float temperature;
        private Integer maxTokens;

        public JlamaStreamingChatModelBuilder modelCachePath(Optional<Path> modelCachePath) {
            this.modelCachePath = modelCachePath;
            return this;
        }

        public JlamaStreamingChatModelBuilder modelName(String modelName) {
            this.modelName = modelName;
            return this;
        }

        public JlamaStreamingChatModelBuilder authToken(String authToken) {
            this.authToken = authToken;
            return this;
        }

        public JlamaStreamingChatModelBuilder threadCount(Integer threadCount) {
            this.threadCount = threadCount;
            return this;
        }

        public JlamaStreamingChatModelBuilder workingDirectory(Path workingDirectory) {
            this.workingDirectory = workingDirectory;
            return this;
        }

        public JlamaStreamingChatModelBuilder quantizeModelAtRuntime(Boolean quantizeModelAtRuntime) {
            this.quantizeModelAtRuntime = quantizeModelAtRuntime;
            return this;
        }

        public JlamaStreamingChatModelBuilder workingQuantizedType(DType workingQuantizedType) {
            this.workingQuantizedType = workingQuantizedType;
            return this;
        }

        public JlamaStreamingChatModelBuilder temperature(Float temperature) {
            this.temperature = temperature;
            return this;
        }

        public JlamaStreamingChatModelBuilder maxTokens(Integer maxTokens) {
            this.maxTokens = maxTokens;
            return this;
        }

        public JlamaStreamingChatModel build() {
            return new JlamaStreamingChatModel(this);
        }
    }
}

