package com.ksyun.kmr.hadoop.fs.ks3.committer;

import com.ksyun.kmr.hadoop.fs.ks3.Ks3FileStatus;
import com.ksyun.kmr.hadoop.fs.ks3.Ks3FileSystem;
import com.ksyun.kmr.hadoop.fs.ks3.Ks3FileSystemStore;
import com.ksyun.kmr.hadoop.fs.ks3.parallel.EngineShutter;
import com.ksyun.kmr.hadoop.fs.ks3.parallel.conveyor.CommitAction;
import com.ksyun.kmr.hadoop.fs.ks3.parallel.conveyor.DestroyAction;
import com.ksyun.kmr.hadoop.fs.ks3.requestbuilder.ListFileStatus;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;


@InterfaceAudience.Public
@InterfaceStability.Stable
public class CommitInfoFileCommitter extends FileOutputCommitter {
    private static final Log LOG = LogFactory.getLog(CommitInfoFileCommitter.class);

    /**
     * 本committer用v1方式来提交信息文件，这些信息文件用在commit job时提交ks3文件
     * 有时会通过store直接操作来减轻对ks3的压力
     */
    public static final String PENDING_DIR_NAME = "_direct_output_committer_temporary";

    public static final String TASK_DIR_NAME = "_direct_output_committer_tasks";
    public static final String SUCCEEDED_FILE_NAME = "_SUCCESS";
    public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER =
            "mapreduce.fileoutputcommitter.marksuccessfuljobs";

    private Path outputPath = null;
    private Path workPath = null;
    private Ks3FileSystem fs;
    private Ks3FileSystemStore store;

    public CommitInfoFileCommitter(Path outputPath,
                               JobContext context) throws IOException {
        super(outputPath, context);

        if (outputPath != null) {
            fs = (Ks3FileSystem) FileSystem.get(outputPath.toUri(), context.getConfiguration());

            store = fs.getStore();
            this.outputPath = fs.makeQualified(outputPath);

            if (context.getClass().toString().contains("TaskAttemptContext")){
                workPath = getTaskAttemptPath((TaskAttemptContext)context, outputPath);
            }
        }
    }

    private Path getPendingJobAttemptsPath() {
        return getPendingJobAttemptsPath(getOutputPath());
    }

    private static Path getPendingJobAttemptsPath(Path out) {
        return new Path(out, PENDING_DIR_NAME);
    }

    private static int getAppAttemptId(JobContext context) {
        return context.getConfiguration().getInt(
                MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    }

    public Path getJobAttemptPath(JobContext context) {
        return getJobAttemptPath(context, getOutputPath());
    }

    public static Path getJobAttemptPath(JobContext context, Path out) {
        return getJobAttemptPath(getAppAttemptId(context), out);
    }

    protected Path getJobAttemptPath(int appAttemptId) {
        return getJobAttemptPath(appAttemptId, getOutputPath());
    }

    private static Path getJobAttemptPath(int appAttemptId, Path out) {
        return new Path(getPendingJobAttemptsPath(out), String.valueOf(appAttemptId));
    }

    private Path getPendingTaskAttemptsPath(JobContext context) {
        return getPendingTaskAttemptsPath(context, getOutputPath());
    }

    private static Path getPendingTaskAttemptsPath(JobContext context, Path out) {
        return new Path(getJobAttemptPath(context, out), TASK_DIR_NAME);
    }

    public Path getTaskAttemptPath(TaskAttemptContext context) {
        return new Path(getPendingTaskAttemptsPath(context),
                String.valueOf(context.getTaskAttemptID()));
    }

    public static Path getTaskAttemptPath(TaskAttemptContext context, Path out) {
        return new Path(getPendingTaskAttemptsPath(context, out),
                String.valueOf(context.getTaskAttemptID()));
    }

    public Path getCommittedTaskPath(TaskAttemptContext context) {
        return getCommittedTaskPath(getAppAttemptId(context), context);
    }

    protected Path getCommittedTaskPath(int appAttemptId, TaskAttemptContext context) {
        return new Path(getJobAttemptPath(appAttemptId),
                String.valueOf(context.getTaskAttemptID().getTaskID()));
    }

    public static class PendingCommitFilter implements PathFilter {
        @Override
        public boolean accept(Path path) {
            return path.getName().endsWith(PendingCommit.EXT);
        }
    }

    public static class PendingCommitListFilter implements PathFilter {
        @Override
        public boolean accept(Path path) {
            return path.getName().endsWith(PendingCommitList.EXT);
        }
    }

    public Path getWorkPath() {
        return workPath;
    }

    public void setupJob(JobContext context) throws IOException {
        cleanupJob(context);
    }

    public void commitJob(JobContext context) throws IOException {
        Path jobAttemptPath = getJobAttemptPath(context);
        Ks3FileStatus jobAttemptDirStatus = null;

        try {
            jobAttemptDirStatus = fs.getFileStatus(jobAttemptPath);
        } catch (FileNotFoundException e) {
            // spark sql 没有 job id
            LOG.warn("No Output found for commitJob");
        }

        if (jobAttemptDirStatus != null) {
            ListFileStatus listFileStatus = new ListFileStatus(fs, jobAttemptPath, false, jobAttemptDirStatus);
            AtomicReference<Exception> exceptionAtomicReference = new AtomicReference<>();
            DestroyAction destroyAction = new DestroyAction(store, exceptionAtomicReference);
            CommitAction commitAction = new CommitAction(store, exceptionAtomicReference);

            try {
                commitAction.startEngines();
                destroyAction.startEngines();

                commitAction.sink = (dataLoc) -> {
                    destroyAction.sendData(dataLoc);
                };

                listFileStatus.genStream(commitAction.getExceptionAtomicReference()).map(batch -> {
                    List<Ks3FileStatus> fss = listFileStatus.wrapToFileStatus(batch, new PendingCommitListFilter());
                    LOG.info("wrap batch finished");
                    return fss;
                }).forEach(stats -> {
                    for (Ks3FileStatus stat : stats) {
                        if (!commitAction.source().sendData(Collections.singletonMap("data", stat.getPath()))) {
                            break;
                        }
                    }
                    LOG.info("message batch finished");
                });
            } finally {
                EngineShutter.shutdownAll(commitAction, destroyAction);
            }
        }

        cleanupJob(context);
        if (context.getConfiguration().getBoolean(
                SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
            Path markerPath = new Path(outputPath, SUCCEEDED_FILE_NAME);
            fs.create(markerPath).close();
        }
    }

    @Override
    @Deprecated
    public void cleanupJob(JobContext context) throws IOException {
        Path pendingJobAttemptsPath = getPendingJobAttemptsPath();
        store.deleteDir(fs.pathToKey(pendingJobAttemptsPath), true);
    }

    @Override
    public void abortJob(JobContext context, JobStatus.State state)
            throws IOException {
        cleanupJob(context);
    }

    @Override
    public void setupTask(TaskAttemptContext context) throws IOException {}

    @Override
    public void commitTask(TaskAttemptContext context)
            throws IOException {
        commitTask(context, null);
    }

    // 传来的taskAttemptPath是错误的
    @Private
    public void commitTask(TaskAttemptContext context, Path taskAttemptPath)
            throws IOException {
        context.progress();
        TaskAttemptID attemptId = context.getTaskAttemptID();
        taskAttemptPath = getTaskAttemptPath(context);
        Ks3FileStatus taskAttemptDirStatus;

        try {
            taskAttemptDirStatus = fs.getFileStatus(taskAttemptPath);
        } catch (FileNotFoundException e) {
            taskAttemptDirStatus = null;
        }

        if (taskAttemptDirStatus != null) {
            Path committedTaskPath = getCommittedTaskPath(context);
            PendingCommitList pendingCommitList = new PendingCommitList();
            List<String> pendingCommitPaths = new LinkedList<String>();

            Ks3FileStatus[] fss = new ListFileStatus(fs, taskAttemptPath, true, taskAttemptDirStatus).listStatus(new PendingCommitFilter());

            for (Ks3FileStatus status : fss) {
                Path itemPath = status.getPath();
                pendingCommitPaths.add(fs.pathToKey(itemPath));
                PendingCommit itemCommit = PendingCommit.load(fs, itemPath);
                pendingCommitList.data.add(itemCommit);
            }
            pendingCommitPaths.add(fs.pathToKey(taskAttemptPath) + "/");

            store.putObject(fs.pathToKey(committedTaskPath) + PendingCommitList.EXT, pendingCommitList.toBytes());
            store.deleteObjects(pendingCommitPaths);
        } else {
            LOG.warn("No Output found for " + attemptId);
        }
    }

    @Override
    public void abortTask(TaskAttemptContext context) throws IOException {
        abortTask(context, null);
    }

    // abort task 和 abort job在spark中可能同时进行，delete需要skip404
    // abort task 不能删除 committedInfoKey，因为abort task可能会在commit task后执行
    // 传来的taskAttemptPath是错误的
    @Private
    public void abortTask(TaskAttemptContext context, Path taskAttemptPath) throws IOException {
        context.progress();

        taskAttemptPath = getTaskAttemptPath(context);
        store.deleteDir(fs.pathToKey(taskAttemptPath), true);
    }

    // spark没有调用这个方法
    @Override
    public boolean needsTaskCommit(TaskAttemptContext context
    ) throws IOException {
        return true;
    }

    // spark没有调用这个方法
    @Private
    public boolean needsTaskCommit(TaskAttemptContext context, Path taskAttemptPath
    ) throws IOException {
        return true;
    }

    // spark没有调用这个方法
    @Override
    @Deprecated
    public boolean isRecoverySupported() {
        return false;
    }

    // spark没有调用这个方法
    @Override
    public boolean isCommitJobRepeatable(JobContext context) throws IOException {
        return false;
    }

    // spark没有调用这个方法
    @Override
    public void recoverTask(TaskAttemptContext context)
            throws IOException {
        LOG.warn("Cannot recover task {" + context.getTaskAttemptID() + "}");
        throw new IOException("unsupport recover task");
    }
}
