/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at
 * docs/licenses/cddl.txt
 * or http://www.opensource.org/licenses/cddl1.php.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at
 * docs/licenses/cddl.txt.  If applicable,
 * add the following below this CDDL HEADER, with the fields enclosed
 * by brackets "[]" replaced with your own identifying information:
 *      Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 *
 *      Copyright 2011-2019 Ping Identity Corporation
 */
package com.unboundid.directory.sdk.sync.scripting;


import java.io.Serializable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicLong;

import com.unboundid.directory.sdk.common.internal.Configurable;
import com.unboundid.directory.sdk.sync.config.SyncSourceConfig;
import com.unboundid.directory.sdk.sync.internal.SynchronizationServerExtension;
import com.unboundid.directory.sdk.sync.types.EndpointException;
import com.unboundid.directory.sdk.sync.types.SetStartpointOptions;
import com.unboundid.directory.sdk.sync.types.SyncOperation;
import com.unboundid.directory.sdk.sync.types.SyncServerContext;
import com.unboundid.directory.sdk.sync.types.ChangeRecord;
import com.unboundid.ldap.sdk.Entry;
import com.unboundid.util.Extensible;
import com.unboundid.util.args.ArgumentException;
import com.unboundid.util.args.ArgumentParser;



/**
 * This class defines an API that must be implemented by extensions in order to
 * synchronize data from a generic (non-LDAP and non-JDBC) endpoint. Since the
 * ${SYNC_SERVER_PRODUCT_NAME} is LDAP-centric, this API allows you to take
 * generic content and convert it into LDAP entries which can then be processed
 * by the ${SYNC_SERVER_BASE_NAME}. The lifecycle of a sync operation is as
 * follows:
 * <ol>
 * <li>Detect change at the synchronization source</li>
 * <li>Fetch full source entry</li>
 * <li>Perform any mappings and compute the equivalent destination entry</li>
 * <li>Fetch full destination entry</li>
 * <li>Diff the computed destination entry and actual (fetched) destination
 * entry</li>
 * <li>Apply the minimal set of changes at the destination to bring it in sync
 * </li>
 * </ol>
 * This implies that the
 * {@link #fetchEntry(SyncOperation)} method will be called once for every
 * change that is returned by
 * {@link #getNextBatchOfChanges(int, AtomicLong)}.
 * <p>
 * This is a generic interface and there is no protocol-specific connection
 * management provided. It is expected that implementers will provide their own
 * libraries for talking to the source endpoint and handle the connection
 * lifecycle in the {@link #initializeSyncSource(SyncServerContext,
 * SyncSourceConfig, ArgumentParser)} and {@link #finalizeSyncSource()}
 * methods of this extension.
 * <p>
 * During realtime synchronization (i.e. when a Sync Pipe is running), there is
 * a sliding window of changes being processed, and this API provides a
 * distinction between some different points along that window:
 * <ul>
 *   <li><b>Old changes</b>: These are changes that the Sync Server has
 *       processed and acknowledged back to the Sync Source. The Sync Source is
 *       under no obligation to re-detect these changes.</li>
 *   <li><b>Startpoint</b>: This marks where the Sync Source will start
 *       detecting changes if it is restarted.</li>
 *   <li><b>Detected but unacknowledged</b>: These changes have been returned by
 *       <code>getNextBatchOfChanges()</code> but not completely processed and
 *       acknowledged back to the Sync Source.</li>
 *   <li><b>Undetected changes</b>: The next call to
 *       <code>getNextBatchOfChanges()</code> should return the first changes
 *       that have not been detected.  This should be somewhere at or ahead of
 *       the startpoint.</li>
 * </ul>
 * <p>
 * Several of these methods throw {@link EndpointException}, which should be
 * used in the case of any connection or endpoint error. For other types of
 * errors, runtime exceptions may be used (IllegalStateException,
 * NullPointerException, etc.). The ${SYNC_SERVER_BASE_NAME} will automatically
 * retry operations that fail, up to a configurable amount of attempts. The
 * EndpointException class allows you to specify a retry policy as well.
 * <BR>
 * <H2>Configuring Groovy Scripted Sync Sources</H2>
 * In order to configure a Sync Source based on this API and written in Groovy,
 * use a command like:
 * <PRE>
 *      dsconfig create-sync-source \
 *           --source-name "<I>{source-name}</I>" \
 *           --type groovy-scripted \
 *           --set "script-class:<I>{class-name}</I>" \
 *           --set "script-argument:<I>{name=value}</I>"
 * </PRE>
 * where "<I>{source-name}</I>" is the name to use for the Sync Source
 * instance, "<I>{class-name}</I>" is the fully-qualified
 * name of the Groovy class written using this API, and "<I>{name=value}</I>"
 * represents name-value pairs for any arguments to provide to the sync
 * source.  If multiple arguments should be provided to the sync source,
 * then the "<CODE>--set script-argument:<I>{name=value}</I></CODE>" option
 * should be provided multiple times.
 */
@Extensible()
@SynchronizationServerExtension(appliesToLocalContent=false,
                                appliesToSynchronizedContent=true)
public abstract class ScriptedSyncSource implements Configurable
{
  /**
   * {@inheritDoc}
   */
  public void defineConfigArguments(final ArgumentParser parser)
         throws ArgumentException
  {
    // No arguments will be allowed by default.
  }



  /**
   * This hook is called when a Sync Pipe first starts up, when the
   * <i>resync</i> process first starts up, or when the set-startpoint
   * subcommand is called from the <i>realtime-sync</i> command line tool.
   * Any initialization of this sync source should be performed here. This
   * method should generally store the {@link SyncServerContext} in a class
   * member so that it can be used elsewhere in the implementation.
   * <p>
   * The default implementation is empty.
   *
   * @param  serverContext  A handle to the server context for the server in
   *                        which this extension is running.
   * @param  config         The general configuration for this sync source.
   * @param  parser         The argument parser which has been initialized from
   *                        the configuration for this JDBC sync source.
   */
  public void initializeSyncSource(final SyncServerContext serverContext,
                                   final SyncSourceConfig config,
                                   final ArgumentParser parser)
  {
    // No initialization will be performed by default.
  }



  /**
   * This hook is called when a Sync Pipe shuts down, when the <i>resync</i>
   * process shuts down, or when the set-startpoint subcommand (from the
   * <i>realtime-sync</i> command line tool) is finished. Any clean up of this
   * sync source should be performed here.
   * <p>
   * The default implementation is empty.
   */
  public void finalizeSyncSource()
  {
    //No implementation required by default.
  }



  /**
   * Return the URL or path identifying the source endpoint
   * from which this extension is transmitting data. This is used for logging
   * purposes only, so it could just be a server name or hostname and port, etc.
   *
   * @return the path to the source endpoint
   */
  public abstract String getCurrentEndpointURL();



  /**
   * This method should effectively set the starting point for synchronization
   * to the place specified by the <code>options</code> parameter. This should
   * cause all changes previous to the specified start point to be disregarded
   * and only changes after that point to be returned by
   * {@link #getNextBatchOfChanges(int, AtomicLong)}.
   * <p>
   * There are several different startpoint types (see
   * {@link SetStartpointOptions}), and this implementation is not required to
   * support them all. If the specified startpoint type is unsupported, this
   * method should throw an {@link UnsupportedOperationException}.
   * <p>
   * <b>IMPORTANT</b>: The <code>RESUME_AT_SERIALIZABLE</code> startpoint type
   * must be supported by your implementation, because this is used when a Sync
   * Pipe first starts up. The {@link Serializable} in this case is the same
   * type that is returned by {@link #getStartpoint()}; the Sync Server persists
   * it and passes it back in on a restart.
   * <p>
   * This method can be called from two different contexts:
   * <ul>
   * <li>When the 'set-startpoint' subcommand of the realtime-sync CLI is used
   * (the Sync Pipe is required to be stopped in this context)</li>
   * <li>Immediately after a Sync Pipe starts up and a connection is first
   *     established to the source server (e.g. before the first call to
   * {@link #getNextBatchOfChanges(int, AtomicLong)})</li>
   * </ul>
   *
   * @param options
   *          an object which indicates where exactly to start synchronizing
   *          (e.g. the end of the changelog, specific change number, a certain
   *          time ago, etc)
   * @throws EndpointException
   *           if there is any error while setting the start point
   */
  public abstract void setStartpoint(final SetStartpointOptions options)
                                        throws EndpointException;



  /**
   * Gets the current value of the startpoint for change detection. This is the
   * "bookmark" which indicates which changes have already been processed and
   * which have not. In most cases, a change number is used to detect changes
   * and is managed by the ${SYNC_SERVER_BASE_NAME}, in which case this
   * implementation needs only to return the latest acknowledged
   * change number. In other cases, the return value may correspond to a
   * different value, such as the SYS_CHANGE_VERSION in Microsoft SQL Server.
   * In any case, this method should return the value that is updated by
   * {@link #acknowledgeCompletedOps(LinkedList)}.
   * <p>
   * This method is called periodically and the return value is saved in the
   * persistent state for the Sync Pipe that uses this extension as its Sync
   * Source.
   * <p>
   * <b>IMPORTANT</b>: The internal value for the startpoint should only be
   * updated after a sync operation is acknowledged back to this script (via
   * {@link #acknowledgeCompletedOps(LinkedList)}).
   * Otherwise it will be possible for changes to be missed when the
   * ${SYNC_SERVER_BASE_NAME} is restarted or a connection error occurs.
   * @return a value to store in the persistent state for the Sync Pipe. This is
   *         usually a change number, but if a changelog table is not used to
   *         detect changes, this value should represent some other token to
   *         pass into {@link #setStartpoint(SetStartpointOptions)}
   *         when the sync pipe starts up.
   */
  public abstract Serializable getStartpoint();



  /**
   * Return the next batch of change records from the source. Change records
   * are usually just hints that a change happened; they do not include
   * the full contents of the target entry. In an effort to never synchronize
   * stale data, the ${SYNC_SERVER_BASE_NAME} will go back and fetch the full
   * target entry for each change record.
   * <p>
   * On the first invocation, this should return changes starting from the
   * startpoint that was set by
   * {@link #setStartpoint(SetStartpointOptions)}. This method is also
   * responsible for updating the internal state such that subsequent
   * invocations do not return duplicate changes.
   * <p>
   * The resulting list should be limited by <code>maxChanges</code>. The
   * <code>numStillPending</code> reference should be set to the estimated
   * number of changes that haven't yet been retrieved from the source endpoint
   * when this method returns, or zero if all the current changes have been
   * retrieved.
   * <p>
   * <b>IMPORTANT</b>: While this method needs to keep track of which changes
   * have already been returned so that it does not return them again, it should
   * <b>NOT</b> modify the official startpoint. The internal value for the
   * startpoint should only be updated after a sync operation is acknowledged
   * back to this script (via
   * {@link #acknowledgeCompletedOps(LinkedList)}).
   * Otherwise it will be possible for changes to be missed when the
   * ${SYNC_SERVER_BASE_NAME} is restarted or a connection error occurs. The
   * startpoint should not change as a result of this method.
   * <p>
   * This method <b>does not need to be thread-safe</b>. It will be invoked
   * repeatedly by a single thread, based on the polling interval set in the
   * Sync Pipe configuration.
   *
   * @param maxChanges
   *          the maximum number of changes to retrieve
   * @param numStillPending
   *          this should be set to the number of unretrieved changes that
   *          are still pending after this batch has been retrieved. This will
   *          be passed in
   *          as zero, and may be left that way if the actual value cannot be
   *          determined.
   * @return a list of {@link ChangeRecord} instances, each
   *         corresponding to a single change at the source endpoint.
   *         If there are no new changes to return, this method should return
   *         an empty list.
   * @throws EndpointException
   *           if there is any error while retrieving the next batch of changes
   */
  public abstract List<ChangeRecord> getNextBatchOfChanges(
                                              final int maxChanges,
                                              final AtomicLong numStillPending)
                                                      throws EndpointException;



  /**
   * Return a full source entry (in LDAP form) from the source, corresponding
   * to the {@link ChangeRecord} that is passed in through the
   * {@link SyncOperation}. This method should perform any queries necessary to
   * gather the latest values for all the attributes to be synchronized.
   * <p>
   * This method <b>must be thread safe</b>, as it will be called repeatedly and
   * concurrently by each of the Sync Pipe worker threads as they process
   * entries.
   * <p>
   * If the original ChangeRecord has the full entry already set on it (which
   * can be done using <code>ChangeRecord.Builder#fullEntry(Entry)</code>,
   * then this method will not get called, and the Sync Server will
   * automatically use the full entry from the ChangeRecord. In this case, the
   * implementation can always return {@code null}.
   *
   * @param operation
   *          the SyncOperation which identifies the source "entry" to
   *          fetch. The ChangeRecord can be obtained by calling
   *          <code>operation.getChangeRecord()</code>.
   *          These ChangeRecords are generated by
   *        {@link #getNextBatchOfChanges(int, AtomicLong)}
   *          or by
   *        {@link #listAllEntries(BlockingQueue)}.
   *
   * @return a full LDAP Entry, or null if no such entry exists.
   * @throws EndpointException
   *           if there is an error fetching the entry
   */
  public abstract Entry fetchEntry(final SyncOperation operation)
                                      throws EndpointException;



  /**
   * Provides a way for the ${SYNC_SERVER_BASE_NAME} to acknowledge back to the
   * script which sync operations it has processed. This method should update
   * the official startpoint which was set by
   * {@link #setStartpoint(SetStartpointOptions)} and is
   * returned by {@link #getStartpoint()}.
   * <p>
   * <b>IMPORTANT</b>: The internal value for the startpoint should only be
   * updated after a sync operation is acknowledged back to this extension (via
   * this method). Otherwise it will be possible for changes to be missed when
   * the ${SYNC_SERVER_BASE_NAME} is restarted or a connection error occurs.
   *
   * @param completedOps
   *          a list of {@link SyncOperation}s that have finished processing.
   *          The records are listed in the order they were first detected.
   * @throws EndpointException
   *           if there is an error acknowledging the changes back to the
   *           database
   */
  public abstract void acknowledgeCompletedOps(
                                   final LinkedList<SyncOperation> completedOps)
                                      throws EndpointException;



  /**
   * Gets a list of all the entries in the source endpoint. This is used by the
   * 'resync' command line tool. The default implementation throws a
   * {@link UnsupportedOperationException}; subclasses should override if the
   * resync functionality is needed.
   * <p>
   * The <code>outputQueue</code> should contain {@link ChangeRecord} objects
   * with the <code>ChangeType</code> set to <code>null</code> to indicate that
   * these are resync operations.
   * <p>
   * This method should not return until all the entries at the source
   * have been added to the output queue. Separate threads will concurrently
   * drain entries from the queue and process them. The queue typically should
   * not contain full entries, but rather ChangeRecord objects which identify
   * the full source entries. These objects are then individually passed in to
   * {@link #fetchEntry(SyncOperation)}. Therefore, it is important to make sure
   * that the ChangeRecord instances contain enough identifiable information
   * (e.g. primary keys) for each entry so that the entry can be found again.
   * <p>
   * The lifecycle of resync is similar to that of real-time sync, with a few
   * differences:
   * <ol>
   * <li>Stream out a list of identifiers for the entries in the source
   *     endpoint, using a ChangeRecord as the identifier</li>
   * <li>Fetch full source entry for a ChangeRecord</li>
   * <li>Perform any mappings and compute the equivalent destination entry</li>
   * <li>Fetch full destination entry</li>
   * <li>Diff the computed destination entry and actual destination entry</li>
   * <li>Apply the minimal set of changes at the destination to bring it in sync
   * </li>
   * </ol>
   * <p>
   * Alternatively, the full entry can be set on the ChangeRecord within this
   * method, which will cause the "fetch full entry" step to be skipped. In this
   * case the Sync Server will just use the entry specified on the ChangeRecord.
   * <p>
   * If the total set of entries is very large, it is fine to split up the work
   * into multiple network queries within this method. The queue will not grow
   * out of control because it blocks when it becomes full. The queue capacity
   * is fixed at 1000.
   * <p>
   * @param outputQueue
   *          a queue of ChangeRecord objects which will be individually
   *          fetched via {@link #fetchEntry(SyncOperation)}
   * @throws EndpointException
   *           if there is an error retrieving the list of entries to resync
   */
  public void listAllEntries(final BlockingQueue<ChangeRecord> outputQueue)
                                throws EndpointException
  {
    throw new UnsupportedOperationException(
            "The listAllEntries(BlockingQueue) " +
            "method must be implemented in the " +
            getClass().getName() + " extension.");
  }



  /**
   * Gets a list of all the entries in the source from a given file input.
   * This is used by the 'resync' command line tool. The default implementation
   * throws a {@link UnsupportedOperationException}; subclasses should override
   * if the resync functionality is needed for specific records, which
   * can be specified in the input file.
   * <p>
   * The format for the <code>inputLines</code> (e.g. the content of the file)
   * is user-defined; it may be key/value pairs, primary keys, or full SQL
   * statements, for example. The use of this method is triggered via the
   * <i>--sourceInputFile</i> argument on the resync CLI. The
   * <code>outputQueue</code> should contain {@link ChangeRecord}
   * objects with the <code>ChangeType</code> set to <code>null</code> to
   * indicate that these are resync operations.
   * <p>
   * This method should not return until all the entries specified by the input
   * file have been added to the output queue. Separate threads will
   * concurrently drain entries from the queue and process them. The queue
   * typically should not contain full entries, but rather ChangeRecord
   * objects which identify the full source entries. These objects are then
   * individually passed in to {@link #fetchEntry(SyncOperation)}. Therefore,
   * it is important to make sure that the ChangeRecord instances
   * contain enough identifiable information (e.g. primary keys) for each entry
   * so that the entry can be found again.
   * <p>
   * The lifecycle of resync is similar to that of real-time sync, with a few
   * differences:
   * <ol>
   * <li>Stream out a list of identifiers for entries in the source endpoint,
   *     using the given input file as the basis for which entries to resync
   * </li>
   * <li>Fetch full source entry for an identifier</li>
   * <li>Perform any mappings and compute the equivalent destination entry</li>
   * <li>Fetch full destination entry</li>
   * <li>Diff the computed destination entry and actual destination entry</li>
   * <li>Apply the minimal set of changes at the destination to bring it in sync
   * </li>
   * </ol>
   * <p>
   * Alternatively, the full entry can be set on the ChangeRecord within this
   * method, which will cause the "fetch full entry" step to be skipped. In this
   * case the Sync Server will just use the entry specified on the ChangeRecord.
   * <p>
   * If the total set of entries is very large, it is fine to split up the work
   * into multiple network queries within this method. The queue will not grow
   * out of control because it blocks when it becomes full. The queue capacity
   * is fixed at 1000.
   * <p>
   * @param inputLines
   *          an Iterator containing the lines from the specified input file to
   *          resync (this is specified on the CLI for the resync command).
   *          These lines can be any format, for example a set of primary keys,
   *          a set of WHERE clauses, a set of full SQL queries, etc.
   * @param outputQueue
   *          a queue of ChangeRecord objects which will be individually
   *          fetched via {@link #fetchEntry(SyncOperation)}
   * @throws EndpointException
   *           if there is an error retrieving the list of entries to resync
   */
  public void listAllEntries(final Iterator<String> inputLines,
                             final BlockingQueue<ChangeRecord> outputQueue)
                                    throws EndpointException
  {
    throw new UnsupportedOperationException(
            "The listAllEntries(Iterator,BlockingQueue) " +
            "method must be implemented in the " +
            getClass().getName() + " extension.");
  }
}
