/**************************************************************************
 * (C) 2019-2024 SAP SE or an SAP affiliate company. All rights reserved. *
 **************************************************************************/
package com.sap.cds.services.impl.outbox.persistence.collectors;

import static com.sap.cds.services.impl.model.DynamicModelProvider.STATIC_MODEL_ACCESS_FEATURE;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.Version;
import com.fasterxml.jackson.core.util.VersionUtil;
import com.sap.cds.CdsLockTimeoutException;
import com.sap.cds.Result;
import com.sap.cds.Struct;
import com.sap.cds.impl.parser.JsonParser;
import com.sap.cds.ql.CQL;
import com.sap.cds.ql.Delete;
import com.sap.cds.ql.Predicate;
import com.sap.cds.ql.Select;
import com.sap.cds.ql.Update;
import com.sap.cds.ql.cqn.CqnSelect;
import com.sap.cds.services.environment.CdsProperties.Outbox.OutboxServiceConfig;
import com.sap.cds.services.impl.outbox.Messages;
import com.sap.cds.services.impl.outbox.Messages_;
import com.sap.cds.services.impl.outbox.persistence.PersistentOutbox;
import com.sap.cds.services.impl.outbox.persistence.TelemetryData;
import com.sap.cds.services.outbox.OutboxMessage;
import com.sap.cds.services.outbox.OutboxMessageEventContext;
import com.sap.cds.services.outbox.OutboxService;
import com.sap.cds.services.persistence.PersistenceService;
import com.sap.cds.services.runtime.CdsRuntime;
import com.sap.cds.services.utils.OpenTelemetryUtils;
import com.sap.cds.services.utils.lib.mt.TenantUtils;

import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.SpanKind;
import io.opentelemetry.context.Scope;


/**
 * Outbox collector implementation for a specific outbox partition.
 */
public class PartitionCollector implements Runnable {

	private static final Logger LOG = LoggerFactory.getLogger(PartitionCollector.class);

	private PersistenceService db;
	private final CdsRuntime runtime;
	private final OutboxService outboxService;
	private final TelemetryData telemetryData;
	private final int chunkSize;
	private final String target;

	private volatile boolean shutdown;
	private volatile boolean isRunning;
	private Thread job;

	private final Object pauseMonitor = new Object();
	private final AtomicInteger pauseCount = new AtomicInteger(5);
	private volatile boolean pause = false; // flag to synchronize wakeup

	private final long maxPauseMillis;
	private final long emitTimeoutSeconds;
	private final int maxPublishAttempts;
	private final boolean storeLastError;
	private final boolean ordered;

	private final Supplier<List<String>> tenantSupplier;

	private final boolean checkVersion;
	private final String appVersion;
	private final Version appVersionParsed;
	private final Set<String> suspendedTenants = new HashSet<>();

	public PartitionCollector(CdsRuntime runtime, PersistentOutbox outboxService, OutboxServiceConfig config, String appVersion, Supplier<List<String>> tenantSupplier, TelemetryData telemetryData) {
		this.runtime = runtime;
		this.outboxService = outboxService;
		this.telemetryData = telemetryData;
		this.target = outboxService.getName();

		this.chunkSize = config.getChunkSize();
		this.maxPauseMillis = config.getMaxPause().getSeconds() * 1000;
		this.emitTimeoutSeconds = config.getEmitTimeout().getSeconds();
		this.maxPublishAttempts = config.getMaxAttempts();
		this.storeLastError = config.getStoreLastError().isEnabled();
		this.ordered = config.isOrdered();

		this.tenantSupplier = tenantSupplier;
		this.checkVersion = config.isCheckVersion();
		this.appVersion = appVersion;
		this.appVersionParsed = checkVersion ? VersionUtil.parseVersion(appVersion, null, null) : null;

		if (!storeLastError) {
			LOG.debug("Storing errors for outbox '{}' is disabled.", outboxService.getName());
		}
	}

	@Override
	public void run() {
		shutdown = false;
		isRunning = true;
		try {
			this.db = runtime.getServiceCatalog().getService(PersistenceService.class, PersistenceService.DEFAULT_NAME);
			processPartition();
		} finally {
			isRunning = false;
			shutdown = false;
		}
	}

	public void start() {
		if (!isRunning) {
			LOG.debug("Starting collector of the outbox '{}'", outboxService.getName());
			job = new Thread(this, outboxService.getName() + "-collector");
			job.setDaemon(true);
			job.start();
		}
	}

	public void stop(long millis) throws InterruptedException {
		if (!shutdown && isRunning) {
			LOG.debug("Stopping collector of the outbox '{}'", outboxService.getName());
			shutdown = true;
			unpause();
			job.join(millis);
			job = null;
		}
	}

	public boolean isRunning() {
		return isRunning;
	}

	private boolean isShutdown() {
		return shutdown || Thread.currentThread().isInterrupted();
	}

	private void pause(long pauseInMillis) {
		synchronized(pauseMonitor) {
			if (isShutdown()) return;
			pause = true;
			try {
				LOG.debug("Pausing collector '{}' for {} ms", target, pauseInMillis);
				pauseMonitor.wait(pauseInMillis);
			} catch (InterruptedException e) {
				LOG.debug("Collector '{}' interrupted", target);
				Thread.currentThread().interrupt();
			}
			pause = false;
		}
	}

	public void unpause() {
		// ensures that the next pause is short
		pauseCount.set(0);
		// wakes up a currently sleeping collector
		if (pause) {
			synchronized (pauseMonitor) {
				if(pause) {
					pause = false;
					pauseMonitor.notifyAll();
					LOG.debug("Notified paused collector '{}'", target);
				}
			}
		}
	}

	private boolean isNotEmptyOutbox(String tenant) {
		LOG.debug("Checking tenant '{}' for outbox entries in collector '{}'", tenant, target);
		return runtime.requestContext().featureToggles(STATIC_MODEL_ACCESS_FEATURE).systemUser(tenant).run(req -> {
			CqnSelect select = Select.from(Messages_.class)
					.columns(c -> c.ID())
					.where(e -> e.target().eq(target)
							.and(e.attempts().lt(this.maxPublishAttempts)))
					.limit(1);
			Result res = db.run(select);
			return res.rowCount() != 0;
		});
	}

	private void processPartition() {
		while (!isShutdown()) {
			try {
				LOG.debug("Executing collector '{}'", target);
				AtomicBoolean doPause = new AtomicBoolean(true);
				List<String> tenants = new ArrayList<>(this.tenantSupplier.get());
				tenants.removeAll(suspendedTenants);
				// shuffle the list to avoid processing the same tenant first every time
				Collections.shuffle(tenants);
				for (String tenant : tenants) {
					Optional<Span> span = OpenTelemetryUtils.createSpan(OpenTelemetryUtils.CdsSpanType.OUTBOX, SpanKind.SERVER);
					try (Scope scope = span.map(Span::makeCurrent).orElse(null)) {
						span.ifPresent(s -> {
							s.updateName("Outbox Collector (" + target + ")");
							s.setAttribute(OpenTelemetryUtils.CDS_TENANT, tenant);
							s.setAttribute(OpenTelemetryUtils.CDS_OUTBOX_TARGET, target);
						});

						if (isNotEmptyOutbox(tenant)) {
							LOG.debug("Processing tenant '{}' in collector '{}'", tenant, target);
							boolean interrupted = runtime.requestContext().systemUser(tenant).run(req -> {
								return runtime.changeSetContext().run(ctx -> {
									Predicate where = CQL.get(Messages.TARGET).eq(target);
									// for compatibility
									if (OutboxService.PERSISTENT_UNORDERED_NAME.equals(target)) {
										where = CQL.or(where, CQL.get(Messages.TARGET).startsWith("auditlog/"));
									} else if (OutboxService.PERSISTENT_ORDERED_NAME.equals(target)) {
										where = CQL.or(where, CQL.get(Messages.TARGET).startsWith("messaging/"));
									}
									where = CQL.and(where, CQL.get(Messages.ATTEMPTS).lt(this.maxPublishAttempts));

									long skip = calculateOffset(where);
									CqnSelect select = Select.from(Messages_.class).where(where)
											.orderBy(e -> e.timestamp().asc(), e -> e.ID().asc())
											.limit(this.chunkSize, skip)
											.lock(0);

									Result res = db.run(select);
									// at least one tenant still has more messages to process
									if(skip != 0 || res.rowCount() >= select.top()) {
										doPause.set(false);
									}

									if(res.rowCount() > 0) {
										// track start of dispatch to interrupt if sequential dispatching takes too long
										final Instant startOfDispatch = Instant.now();
										for (Messages msg : res.listOf(Messages.class)) {
											// sequential publishing
											PublishState state = publish(msg, startOfDispatch);

											if (state == PublishState.SUCCESS) {
												db.run(Delete.from(Messages_.class).where(e -> e.ID().eq(msg.getId())));
												telemetryData.recordOutgoingMessages(tenant, 1);
											} else if (state == PublishState.TIMEOUT) {
												break;
											} else if (state == PublishState.INVALID_VERSION) {
												suspendedTenants.add(tenant);
												break;
											} else if (state == PublishState.INTERRUPTED) {
												return true;
											}

											// track time of dispatch process and interrupt if threshold has been reached
											if (Duration.between(startOfDispatch, Instant.now()).getSeconds() > this.emitTimeoutSeconds) {
												break;
											}
										}
									}
									return false;
								});
							});
							if (interrupted || isShutdown()) {
								doPause.set(false);
								break;
							}
							// record statistics after processing outbox
							telemetryData.recordStatistics(runtime, db, tenant);
						} else {
							LOG.debug("The outbox for the tenant '{}' in collector '{}' is empty", tenant, target);
						}
					} catch (Exception e) {
						OpenTelemetryUtils.recordException(span, e);

						if (isLockTimeoutException(e)) {
							LOG.debug("Collector '{}' timed out waiting for table lock for tenant '{}'", target, tenant);
						} else if (TenantUtils.isUnknownTenant(e)) {
							LOG.debug("Unknown tenant '{}' for the outbox collector", tenant);
						} else {
							LOG.warn("Exception occurred for tenant '{}' in collector '{}'", tenant, target, e);
						}
					} finally {
						OpenTelemetryUtils.endSpan(span);
					}
				}

				if(doPause.get()) {
					pause(getPauseMillis(pauseCount.get(), maxPauseMillis));
					if(pauseCount.get() < 20) {
						pauseCount.addAndGet(2);
					}
				} else {
					pauseCount.set(0);
				}
			} catch (Throwable e) {
				LOG.warn("Unexpected exception occurred in collector '{}'", target, e);
			}
		}
	}

	private long calculateOffset(Predicate whereClause) {
		if (this.ordered) {
			return 0;
		}

		CqnSelect select = Select
				.from(Messages_.class)
				.columns(c -> CQL.count().as("count"))
				.where(whereClause);
		Result res = db.run(select);
		long count = ((Number) res.single().get("count")).longValue();
		long chunks = count / this.chunkSize;
		// no secure random number needed, because the offset is not used for security relevant operations
		long offset = chunks < 2 ? 0 : ThreadLocalRandom.current().nextLong(chunks) * this.chunkSize; // NOSONAR

		LOG.debug("Calculated offset for unordered processing of outbox collector '{}' is {}", target, offset);

		return offset;
	}

	@SuppressWarnings("unchecked")
	private PublishState publish(final Messages msg, final Instant startOfDispatch) {
		// by publishing a retry message we have to check whether the retry pause is reached before trying
		if (msg.getAttempts() != 0 && msg.getLastAttemptTimestamp() != null && (Duration.between(msg.getLastAttemptTimestamp(), Instant.now()).toMillis() < getPauseMillis(msg.getAttempts(), Integer.MAX_VALUE))) {
			LOG.debug("Message '{}' cannot be republished until the retry waiting time is reached", msg.getId());
			return PublishState.TIMEOUT;
		}

		String outboxEvent = null;
		Map<String, Object> message = JsonParser.map(JsonParser.parseJson(msg.getMsg()));
		// backward compatibility
		if (msg.getTarget().startsWith("messaging/") ||
				msg.getTarget().startsWith("auditlog/")) {
			outboxEvent = msg.getTarget().substring(msg.getTarget().indexOf('/') + 1);
		} else {
			outboxEvent = (String) message.get(PersistentOutbox.ATTR_EVENT);
			String messageVersion = (String) message.get(PersistentOutbox.ATTR_VERSION);
			message = (Map<String, Object>) message.get(PersistentOutbox.ATTR_MESSAGE);

			// check message version
			if (checkVersion && !Objects.equals(appVersion, messageVersion)) {
				Version messageVersionParsed = VersionUtil.parseVersion(messageVersion, null, null);
				if (appVersionParsed.compareTo(messageVersionParsed) < 0) {
					LOG.debug("Found newer version '{}' in outbox message. Suspending collector with version '{}'.", messageVersion, appVersion);
					return PublishState.INVALID_VERSION;
				}
			}
		}

		LOG.debug("Publishing outbox message for target '{}' with event '{}'", msg.getTarget(), outboxEvent);

		OutboxMessageEventContext ctx = OutboxMessageEventContext.create(outboxEvent);
		ctx.setIsInbound(true);
		ctx.setTimestamp(msg.getTimestamp());
		ctx.setMessage(Struct.access(message).as(OutboxMessage.class));

		// recover the boxed context
		while (!isShutdown()) {
			try {
				outboxService.emit(ctx);
				return PublishState.SUCCESS;
			} catch (Throwable e) { // NOSONAR
				// we should may be check the CdsErrorStatuses.NO_ON_HANDLER exception in order
				// to ignore the entry without handling it as an error.
				LOG.warn("Failed to emit Outbox message with id '{}' for target '{}' with event '{}'", msg.getId(), msg.getTarget(), outboxEvent, e);

				int currentAttempts = msg.getAttempts();

				// re-attempt to publish
				msg.setAttempts(++currentAttempts);
				msg.setLastAttemptTimestamp(Instant.now());
				Map<String, Object> data = new HashMap<>();
				data.put(Messages.ATTEMPTS, msg.getAttempts());
				data.put(Messages.LAST_ATTEMPT_TIMESTAMP, msg.getLastAttemptTimestamp());

				if (storeLastError) {
					StringWriter stringWriter = new StringWriter();
					e.printStackTrace(new PrintWriter(stringWriter));

					data.put(Messages.LAST_ERROR, stringWriter.toString());
				}

				db.run(Update.entity(Messages_.class).data(data).where(m -> m.ID().eq(msg.getId())));

				if (currentAttempts < this.maxPublishAttempts) {
					// exponential backoff in ms for re-attempt
					long pauseInMillis = getPauseMillis(currentAttempts, Integer.MAX_VALUE);

					// check time + pause of dispatch process and interrupt if threshold has been reached
					if (Duration.between(startOfDispatch, Instant.now().plusMillis(pauseInMillis)).getSeconds() > this.emitTimeoutSeconds) {
						LOG.debug("The retry waiting time of message '{}' would exceed the emit timeout, therefore release lock and commit transaction", msg.getId());
						return PublishState.TIMEOUT;
					}

					// wait till next try
					pause(pauseInMillis);
				} else {
					// giving up to publish
					LOG.warn("Reached maximum number of attempts to emit Outbox message with id '{}' to target '{}' with event '{}'",
							msg.getId(), msg.getTarget(), outboxEvent);
					return PublishState.FAILED;
				}
			}
		}
		return PublishState.INTERRUPTED;
	}

	private static long getPauseMillis(int pauseCount, long maxTimeoutMillis) {
		long retryInMillis = Math.round(Math.pow(2d, pauseCount) * 1000 + ThreadLocalRandom.current().nextLong(1001));
		return Math.min(retryInMillis, maxTimeoutMillis);
	}

	private static boolean isLockTimeoutException(Throwable t) {
		while (t != null) {
			if (t instanceof CdsLockTimeoutException) {
				return true;
			}
			t = t.getCause();
		}
		return false;
	}

	private static enum PublishState {
		SUCCESS,
		FAILED,
		TIMEOUT,
		INTERRUPTED,
		INVALID_VERSION
	}
}
