/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.sling.discovery.impl.common.heartbeat;

import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.Set;
import java.util.UUID;

import javax.jcr.Session;

import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.ModifiableValueMap;
import org.apache.sling.api.resource.PersistenceException;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.apache.sling.commons.scheduler.Scheduler;
import org.apache.sling.discovery.impl.Config;
import org.apache.sling.discovery.impl.DiscoveryServiceImpl;
import org.apache.sling.discovery.impl.cluster.voting.VotingHandler;
import org.apache.sling.discovery.impl.cluster.voting.VotingHelper;
import org.apache.sling.discovery.impl.cluster.voting.VotingView;
import org.apache.sling.discovery.impl.common.ViewHelper;
import org.apache.sling.discovery.impl.common.resource.ResourceHelper;
import org.apache.sling.discovery.impl.topology.announcement.AnnouncementRegistry;
import org.apache.sling.discovery.impl.topology.connector.ConnectorRegistry;
import org.apache.sling.launchpad.api.StartupListener;
import org.apache.sling.launchpad.api.StartupMode;
import org.apache.sling.settings.SlingSettingsService;
import org.osgi.framework.BundleException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The heartbeat handler is responsible and capable of issuing both local and
 * remote heartbeats and registers a periodic job with the scheduler for doing so.
 * <p>
 * Local heartbeats are stored in the repository. Remote heartbeats are POSTs to
 * remote TopologyConnectorServlets.
 */
@Component
@Service(value = { HeartbeatHandler.class, StartupListener.class })
public class HeartbeatHandler implements Runnable, StartupListener {

    private final Logger logger = LoggerFactory.getLogger(this.getClass());

    /** the name used for the period job with the scheduler **/
    private String NAME = "discovery.impl.heartbeat.runner.";

    @Reference
    private SlingSettingsService slingSettingsService;

    @Reference
    private ResourceResolverFactory resourceResolverFactory;

    @Reference
    private ConnectorRegistry connectorRegistry;

    @Reference
    private AnnouncementRegistry announcementRegistry;

    @Reference
    private Scheduler scheduler;

    @Reference
    private Config config;

    @Reference
    private VotingHandler votingHandler;

    /** the discovery service reference is used to get properties updated before heartbeats are sent **/
    private DiscoveryServiceImpl discoveryService;

    /** the sling id of the local instance **/
    private String slingId;

    /** the id which is to be used for the next voting **/
    private String nextVotingId = UUID.randomUUID().toString();

    /** whether or not to reset the leaderElectionId at next heartbeat time **/
    private boolean resetLeaderElectionId = false;

    /** lock object for synchronizing the run method **/
    private final Object lock = new Object();

    /** SLING-2892: remember first heartbeat written to repository by this instance **/
    private long firstHeartbeatWritten = -1;

    /** SLING-2892: remember the value of the heartbeat this instance has written the last time **/
    private Calendar lastHeartbeatWritten = null;

    /** SLING-2895: avoid heartbeats after deactivation **/
    private volatile boolean activated = false;

    /** SLING-2901: the runtimeId is a unique id, set on activation, used for robust duplicate sling.id detection **/
    private String runtimeId;

    /** keep a reference to the component context **/
    private ComponentContext context;

    /** SLING-2968 : start issuing remote heartbeats only after startup finished **/
    private boolean startupFinished = false;

    /** SLING-3382 : force ping instructs the servlet to start the backoff from scratch again **/
    private boolean forcePing;

    public void inform(StartupMode mode, boolean finished) {
    	if (finished) {
    		startupFinished(mode);
    	}
    }

    public void startupFinished(StartupMode mode) {
    	synchronized(lock) {
    		startupFinished = true;
    		issueHeartbeat();
    	}
    }

    public void startupProgress(float ratio) {
    	// we dont care
    }

    @Activate
    protected void activate(ComponentContext context) {
    	synchronized(lock) {
    		this.context = context;

	        slingId = slingSettingsService.getSlingId();
	        NAME = "discovery.impl.heartbeat.runner." + slingId;
	        // on activate the resetLeaderElectionId is set to true to ensure that
	        // the 'leaderElectionId' property is reset on next heartbeat issuance.
	        // the idea being that a node which leaves the cluster should not
	        // become leader on next join - and by resetting the leaderElectionId
	        // to the current time, this is ensured.
	        resetLeaderElectionId = true;
	        runtimeId = UUID.randomUUID().toString();

	        // SLING-2895: reset variables to avoid unnecessary log.error
	        firstHeartbeatWritten = -1;
	        lastHeartbeatWritten = null;

	        activated = true;
    	}
    }

    @Deactivate
    protected void deactivate() {
        // SLING-3365 : dont synchronize on deactivate
        activated = false;
    	scheduler.removeJob(NAME);
    }

    /**
     * The initialize method is called by the DiscoveryServiceImpl.activate
     * as we require the discoveryService (and the discoveryService has
     * a reference on us - but we cant have circular references in osgi).
     * <p>
     * The initialVotingId is used to avoid an unnecessary topologyChanged event
     * when switching form isolated to established view but with only the local
     * instance in the view.
     */
    public void initialize(final DiscoveryServiceImpl discoveryService,
            final String initialVotingId) {
        synchronized(lock) {
        	this.discoveryService = discoveryService;
        	this.nextVotingId = initialVotingId;
            issueHeartbeat();
        }

        try {
            final long interval = config.getHeartbeatInterval();
            logger.info("initialize: starting periodic heartbeat job for "+slingId+" with interval "+interval+" sec.");
            scheduler.addPeriodicJob(NAME, this,
                    null, interval, false);
        } catch (Exception e) {
            logger.error("activate: Could not start heartbeat runner: " + e, e);
        }
    }

    public void run() {
        synchronized(lock) {
        	if (!activated) {
        		// SLING:2895: avoid heartbeats if not activated
        		return;
        	}

            // issue a heartbeat
            issueHeartbeat();

            // check the view
            checkView();
        }
    }

    /** Get or create a ResourceResolver **/
    private ResourceResolver getResourceResolver() throws LoginException {
        if (resourceResolverFactory == null) {
            logger.error("getResourceResolver: resourceResolverFactory is null!");
            return null;
        }
        return resourceResolverFactory.getAdministrativeResourceResolver(null);
    }

    /** Calcualte the local cluster instance path **/
    private String getLocalClusterNodePath() {
        return config.getClusterInstancesPath() + "/" + slingId;
    }

    /** Trigger the issuance of the next heartbeat asap instead of at next heartbeat interval **/
    public void triggerHeartbeat() {
        forcePing = true;
        try {
            // then fire a job immediately
            // use 'fireJobAt' here, instead of 'fireJob' to make sure the job can always be triggered
            // 'fireJob' checks for a job from the same job-class to already exist
            // 'fireJobAt' though allows to pass a name for the job - which can be made unique, thus does not conflict/already-exist
            scheduler.fireJobAt(NAME+UUID.randomUUID(), this, null, new Date(System.currentTimeMillis()-1000 /* make sure it gets triggered immediately*/));
        } catch (Exception e) {
            logger.info("triggerHeartbeat: Could not trigger heartbeat: " + e);
        }
    }

    /**
     * Issue a heartbeat.
     * <p>
     * This action consists of first updating the local properties,
     * then issuing a cluster-local heartbeat (within the repository)
     * and then a remote heartbeat (to all the topology connectors
     * which announce this part of the topology to others)
     */
    private void issueHeartbeat() {
        if (discoveryService == null) {
            logger.error("issueHeartbeat: discoveryService is null");
        } else {
            discoveryService.updateProperties();
        }
        issueClusterLocalHeartbeat();
        issueRemoteHeartbeats();
    }

    /** Issue a remote heartbeat using the topology connectors **/
    private void issueRemoteHeartbeats() {
        if (connectorRegistry == null) {
            logger.error("issueRemoteHeartbeats: connectorRegistry is null");
            return;
        }
        if (!startupFinished) {
        	logger.debug("issueRemoteHeartbeats: not issuing remote heartbeat yet, startup not yet finished");
        	return;
        }
        if (logger.isDebugEnabled()) {
            logger.debug("issueRemoteHeartbeats: pinging outgoing topology connectors (if there is any) for "+slingId);
        }
        connectorRegistry.pingOutgoingConnectors(forcePing);
        forcePing = false;
    }

    /** Issue a cluster local heartbeat (into the repository) **/
    private void issueClusterLocalHeartbeat() {
        if (logger.isDebugEnabled()) {
            logger.debug("issueClusterLocalHeartbeat: storing cluster-local heartbeat to repository for "+slingId);
        }
        ResourceResolver resourceResolver = null;
        final String myClusterNodePath = getLocalClusterNodePath();
        final Calendar currentTime = Calendar.getInstance();
        try {
            resourceResolver = getResourceResolver();
            if (resourceResolver == null) {
                logger.error("issueClusterLocalHeartbeat: no resourceresolver available!");
                return;
            }

            final Resource resource = ResourceHelper.getOrCreateResource(
                    resourceResolver, myClusterNodePath);
            final ModifiableValueMap resourceMap = resource.adaptTo(ModifiableValueMap.class);

            if (firstHeartbeatWritten!=-1 && lastHeartbeatWritten!=null) {
            	// SLING-2892: additional paranoia check
            	// after the first heartbeat, check if there's someone else using
            	// the same sling.id in this cluster
            	final long timeSinceFirstHeartbeat =
            			System.currentTimeMillis() - firstHeartbeatWritten;
            	if (timeSinceFirstHeartbeat > 2*config.getHeartbeatInterval()) {
            		// but wait at least 2 heartbeat intervals to handle the situation
            		// where a bundle is refreshed, and startup cases.
            		final Calendar lastHeartbeat = resourceMap.get("lastHeartbeat", Calendar.class);
            		if (lastHeartbeat!=null) {
            			// if there is a heartbeat value, check if it is what I've written
            			// the last time
            			if (!lastHeartbeatWritten.getTime().equals(lastHeartbeat.getTime())) {
            				// then we've likely hit the situation where there is another
            				// sling instance accessing the same repository (ie in the same cluster)
            				// using the same sling.id - hence writing to the same
            				// resource
            				logger.error("issueClusterLocalHeartbeat: SLING-2892: Detected unexpected, concurrent update of: "+
            						myClusterNodePath+" 'lastHeartbeat'. If not done manually, " +
            						"this likely indicates that there is more than 1 instance running in this cluster" +
            						" with the same sling.id. My sling.id is "+slingId+"." +
            						" Check for sling.id.file in your installation of all instances in this cluster " +
            						"to verify this! Duplicate sling.ids are not allowed within a cluster!");
            			}
            		}
            	}

            	// SLING-2901 : robust paranoia check: on first heartbeat write, the
            	//              'runtimeId' is set as a property (ignoring any former value).
            	//              If in subsequent calls the value of 'runtimeId' changes, then
            	//              there is someone else around with the same slingId.
            	final String readRuntimeId = resourceMap.get("runtimeId", String.class);
            	if ( readRuntimeId == null ) { // SLING-3977
            	    // someone deleted the resource property
            	    firstHeartbeatWritten = -1;
            	} else if (!runtimeId.equals(readRuntimeId)) {
            		logger.error("issueClusterLocalHeartbeat: SLING-2091: Detected more than 1 instance running in this cluster " +
            				" with the same sling.id. My sling.id is "+slingId+", " +
    						" Check for sling.id.file in your installation of all instances in this cluster " +
    						"to verify this! Duplicate sling.ids are not allowed within a cluster!");
            		logger.error("issueClusterLocalHeartbeat: sending TOPOLOGY_CHANGING before self-disabling.");
            		discoveryService.forcedShutdown();
            		logger.error("issueClusterLocalHeartbeat: disabling discovery.impl");
            		activated = false;
            		if (context!=null) {
            			// disable all components
            			try {
							context.getBundleContext().getBundle().stop();
						} catch (BundleException e) {
							logger.warn("issueClusterLocalHeartbeat: could not stop bundle: "+e, e);
							// then disable all compnoents instead
							context.disableComponent(null);
						}
            		}
            		return;
            	}
            }
            resourceMap.put("lastHeartbeat", currentTime);
            if (firstHeartbeatWritten==-1) {
            	resourceMap.put("runtimeId", runtimeId);
            }
            if (resetLeaderElectionId || !resourceMap.containsKey("leaderElectionId")) {
                int maxLongLength = String.valueOf(Long.MAX_VALUE).length();
                String currentTimeMillisStr = String.format("%0"
                        + maxLongLength + "d", System.currentTimeMillis());

                final boolean shouldInvertRepositoryDescriptor = config.shouldInvertRepositoryDescriptor();
                String prefix = (shouldInvertRepositoryDescriptor ? "1" : "0");

                String leaderElectionRepositoryDescriptor = config.getLeaderElectionRepositoryDescriptor();
                if (leaderElectionRepositoryDescriptor!=null && leaderElectionRepositoryDescriptor.length()!=0) {
                    // when this property is configured, check the value of the repository descriptor
                    // and if that value is set, include it in the leader election id

                    final Session session = resourceResolver.adaptTo(Session.class);
                    if ( session != null ) {
                        String value = session.getRepository()
                                .getDescriptor(leaderElectionRepositoryDescriptor);
                        if (value != null) {
                            if (value.equalsIgnoreCase("true")) {
                                if (!shouldInvertRepositoryDescriptor) {
                                    prefix = "1";
                                } else {
                                    prefix = "0";
                                }
                            }
                        }
                    }
                }
                final String newLeaderElectionId = prefix + "_"
                        + currentTimeMillisStr + "_" + slingId;
                resourceMap.put("leaderElectionId", newLeaderElectionId);
                resourceMap.put("leaderElectionIdCreatedAt", new Date());
                logger.debug("issueClusterLocalHeartbeat: set leaderElectionId to "+newLeaderElectionId);
                resetLeaderElectionId = false;
            }
            resourceResolver.commit();

            // SLING-2892: only in success case: remember the last heartbeat value written
            lastHeartbeatWritten = currentTime;
            // and set the first heartbeat written value - if it is not already set
            if (firstHeartbeatWritten==-1) {
            	firstHeartbeatWritten = System.currentTimeMillis();
            }

        } catch (LoginException e) {
            logger.error("issueHeartbeat: could not log in administratively: "
                    + e, e);
        } catch (PersistenceException e) {
            logger.error("issueHeartbeat: Got a PersistenceException: "
                    + myClusterNodePath + " " + e, e);
        } finally {
            if (resourceResolver != null) {
                resourceResolver.close();
            }
        }
    }

    /** Check whether the established view matches the reality, ie matches the
     * heartbeats
     */
    private void checkView() {
        // check the remotes first
        if (announcementRegistry == null) {
            logger.error("announcementRegistry is null");
            return;
        }
        announcementRegistry.checkExpiredAnnouncements();

        ResourceResolver resourceResolver = null;
        try {
            resourceResolver = getResourceResolver();
            doCheckView(resourceResolver);
        } catch (LoginException e) {
            logger.error("checkView: could not log in administratively: " + e,
                    e);
        } catch (PersistenceException e) {
            logger.error(
                    "checkView: encountered a persistence exception during view check: "
                            + e, e);
        } finally {
            if (resourceResolver != null) {
                resourceResolver.close();
            }
        }
    }

    /** do the established-against-heartbeat view check using the given resourceResolver.
     */
    private void doCheckView(final ResourceResolver resourceResolver) throws PersistenceException {

        if (votingHandler==null) {
            logger.info("doCheckView: votingHandler is null! slingId="+slingId);
        } else {
            votingHandler.analyzeVotings(resourceResolver);
            try{
                votingHandler.cleanupTimedoutVotings(resourceResolver);
            } catch(Exception e) {
                logger.warn("doCheckView: Exception occurred while cleaning up votings: "+e, e);
            }
        }

        final VotingView winningVoting = VotingHelper.getWinningVoting(
                resourceResolver, config);
        int numOpenNonWinningVotes = VotingHelper.listOpenNonWinningVotings(
                resourceResolver, config).size();
        if (winningVoting != null || (numOpenNonWinningVotes > 0)) {
            // then there are votings pending and I shall wait for them to
            // settle
        	if (logger.isDebugEnabled()) {
	            logger.debug("doCheckView: "
	                    + numOpenNonWinningVotes
	                    + " ongoing votings, no one winning yet - I shall wait for them to settle.");
        	}
            return;
        }

        final Resource clusterNodesRes = ResourceHelper.getOrCreateResource(
                resourceResolver, config.getClusterInstancesPath());
        final Set<String> liveInstances = ViewHelper.determineLiveInstances(
                clusterNodesRes, config);

        if (ViewHelper.establishedViewMatches(resourceResolver, config, liveInstances)) {
            // that's the normal case. the established view matches what we're
            // seeing.
            // all happy and fine
            logger.debug("doCheckView: no pending nor winning votes. view is fine. we're all happy.");
            return;
        }
    	if (logger.isDebugEnabled()) {
	        logger.debug("doCheckView: no pending nor winning votes. But: view does not match established or no established yet. Initiating a new voting");
	        Iterator<String> it = liveInstances.iterator();
	        while (it.hasNext()) {
	            logger.debug("doCheckView: one of the live instances is: "
	                    + it.next());
	        }
    	}

        // we seem to be the first to realize that the currently established
        // view doesnt match
        // the currently live instances.

        // initiate a new voting
        doStartNewVoting(resourceResolver, liveInstances);
    }

    private void doStartNewVoting(final ResourceResolver resourceResolver,
            final Set<String> liveInstances) throws PersistenceException {
        String votingId = nextVotingId;
        nextVotingId = UUID.randomUUID().toString();

        VotingView.newVoting(resourceResolver, config, votingId, slingId, liveInstances);
    }

    /**
     * Management function to trigger the otherwise algorithm-dependent
     * start of a new voting.
     * This can make sense when explicitly trying to force a leader
     * change (which is otherwise not allowed by the discovery API)
     */
    public void startNewVoting() {
        logger.info("startNewVoting: explicitly starting new voting...");
        ResourceResolver resourceResolver = null;
        try {
            resourceResolver = getResourceResolver();
            final Resource clusterNodesRes = ResourceHelper.getOrCreateResource(
                    resourceResolver, config.getClusterInstancesPath());
            final Set<String> liveInstances = ViewHelper.determineLiveInstances(
                    clusterNodesRes, config);
            doStartNewVoting(resourceResolver, liveInstances);
            logger.info("startNewVoting: explicit new voting was started.");
        } catch (LoginException e) {
            logger.error("startNewVoting: could not log in administratively: " + e,
                    e);
        } catch (PersistenceException e) {
            logger.error(
                    "startNewVoting: encountered a persistence exception during view check: "
                            + e, e);
        } finally {
            if (resourceResolver != null) {
                resourceResolver.close();
            }
        }
    }

}
