/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.hellbender.tools.genomicsdb;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.CloseableTribbleIterator;
import htsjdk.tribble.FeatureCodec;
import htsjdk.tribble.FeatureReader;
import htsjdk.tribble.TribbleException;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCodec;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFUtils;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.stream.Collectors;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.GATKTool;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.genomicsdb.importer.GenomicsDBImporter;
import org.genomicsdb.model.BatchCompletionCallbackFunctionArgument;
import org.genomicsdb.model.Coordinates;
import org.genomicsdb.model.GenomicsDBCallsetsMapProto;
import org.genomicsdb.model.GenomicsDBImportConfiguration;
import org.genomicsdb.model.GenomicsDBVidMapProto;
import org.genomicsdb.model.ImportConfig;

@DocumentedFeature
@CommandLineProgramProperties(summary="Import VCFs to GenomicsDB", oneLineSummary="Import VCFs to GenomicsDB", programGroup=ShortVariantDiscoveryProgramGroup.class)
public final class GenomicsDBImport
extends GATKTool {
    private static final long DEFAULT_VCF_BUFFER_SIZE_PER_SAMPLE = 16384L;
    private static final long DEFAULT_SEGMENT_SIZE = 0x100000L;
    private static final int DEFAULT_ZERO_BATCH_SIZE = 0;
    public static final String WORKSPACE_ARG_LONG_NAME = "genomicsdb-workspace-path";
    public static final String INCREMENTAL_WORKSPACE_ARG_LONG_NAME = "genomicsdb-update-workspace-path";
    public static final String SEGMENT_SIZE_ARG_LONG_NAME = "genomicsdb-segment-size";
    public static final String OVERWRITE_WORKSPACE_LONG_NAME = "overwrite-existing-genomicsdb-workspace";
    public static final String INTERVAL_LIST_LONG_NAME = "output-interval-list-to-file";
    public static final String VCF_BUFFER_SIZE_ARG_NAME = "genomicsdb-vcf-buffer-size";
    public static final String BATCHSIZE_ARG_LONG_NAME = "batch-size";
    public static final String CONSOLIDATE_ARG_NAME = "consolidate";
    public static final String SAMPLE_NAME_MAP_LONG_NAME = "sample-name-map";
    public static final String VALIDATE_SAMPLE_MAP_LONG_NAME = "validate-sample-name-map";
    public static final String MERGE_INPUT_INTERVALS_LONG_NAME = "merge-input-intervals";
    public static final String VCF_INITIALIZER_THREADS_LONG_NAME = "reader-threads";
    public static final String MAX_NUM_INTERVALS_TO_IMPORT_IN_PARALLEL = "max-num-intervals-to-import-in-parallel";
    public static final String MERGE_CONTIGS_INTO_NUM_PARTITIONS = "merge-contigs-into-num-partitions";
    public static final int INTERVAL_LIST_SIZE_WARNING_THRESHOLD = 100;
    public static final int ARRAY_COLUMN_BOUNDS_START = 0;
    public static final int ARRAY_COLUMN_BOUNDS_END = 1;
    public static final String SHARED_POSIXFS_OPTIMIZATIONS = "genomicsdb-shared-posixfs-optimizations";
    @Argument(fullName="genomicsdb-workspace-path", doc="Workspace for GenomicsDB. Can be a POSIX file system absolute or relative path or a HDFS/GCS URL. Use this argument when creating a new GenomicsDB workspace. Either this or genomicsdb-update-workspace-path must be specified. Must be an empty or non-existent directory.", mutex={"genomicsdb-update-workspace-path", "output-interval-list-to-file"})
    private String workspace;
    @Argument(fullName="genomicsdb-update-workspace-path", doc="Workspace when updating GenomicsDB. Can be a POSIX file system absolute or relative path or a HDFS/GCS URL. Use this argument when adding new samples to an existing GenomicsDB workspace or when using the output-interval-list-to-file option. Either this or genomicsdb-workspace-path must be specified. Must point to an existing workspace.", mutex={"genomicsdb-workspace-path"})
    private String incrementalImportWorkspace;
    @Argument(fullName="genomicsdb-segment-size", doc="Buffer size in bytes allocated for GenomicsDB attributes during import. Should be large enough to hold data from one site. ", optional=true)
    private long segmentSize = 0x100000L;
    @Argument(fullName="variant", shortName="V", doc="GVCF files to be imported to GenomicsDB. Each file must contain data for only a single sample. Either this or sample-name-map must be specified.", optional=true, mutex={"sample-name-map"})
    private List<String> variantPaths;
    @Argument(fullName="genomicsdb-vcf-buffer-size", doc="Buffer size in bytes to store variant contexts. Larger values are better as smaller values cause frequent disk writes. Defaults to 16384 which was empirically determined to work well for many inputs.", optional=true, minValue=1024.0, minRecommendedValue=10240.0)
    private long vcfBufferSizePerSample = 16384L;
    @Argument(fullName="overwrite-existing-genomicsdb-workspace", doc="Will overwrite given workspace if it exists. Otherwise a new workspace is created. Cannot be set to true if genomicsdb-update-workspace-path is also set. Defaults to false", optional=true)
    private Boolean overwriteExistingWorkspace = false;
    @Argument(fullName="batch-size", doc="Batch size controls the number of samples for which readers are open at once and therefore provides a way to minimize memory consumption. However, it can take longer to complete. Use the consolidate flag if more than a hundred batches were used. This will improve feature read time. batchSize=0 means no batching (i.e. readers for all samples will be opened at once) Defaults to 0", optional=true)
    private int batchSize = 0;
    @Argument(fullName="consolidate", doc="Boolean flag to enable consolidation. If importing data in batches, a new fragment is created for each batch. In case thousands of fragments are created, GenomicsDB feature readers will try to open ~20x as many files. Also, internally GenomicsDB would consume more memory to maintain bookkeeping data from all fragments. Use this flag to merge all fragments into one. Merging can potentially improve read performance, however overall benefit might not be noticeable as the top Java layers have significantly higher overheads. This flag has no effect if only one batch is used. Defaults to false", optional=true)
    private Boolean doConsolidation = false;
    @Advanced
    @Argument(fullName="sample-name-map", doc="Path to file containing a mapping of sample name to file uri in tab delimited format.  If this is specified then the header from the first sample will be treated as the merged header rather than merging the headers, and the sample names will be taken from this file.  This may be used to rename input samples. This is a performance optimization that relaxes the normal checks for consistent headers.  Using vcfs with incompatible headers may result in silent data corruption.", optional=true, mutex={"variant"})
    private String sampleNameMapFile;
    @Argument(fullName="validate-sample-name-map", doc="Boolean flag to enable checks on the sampleNameMap file. If true, tool checks whetherfeature readers are valid and shows a warning if sample names do not match with the headers. Defaults to false", optional=true)
    private Boolean validateSampleToReaderMap = false;
    @Argument(fullName="merge-input-intervals", doc="Boolean flag to import all data in between intervals.  Improves performance using large lists of intervals, as in exome sequencing, especially if GVCF data only exists for specified intervals.")
    private boolean mergeInputIntervals = false;
    @Argument(fullName="output-interval-list-to-file", doc="Path to output file where intervals from existing workspace should be written.If this option is specified, the tools outputs the interval_list of the workspace pointed to by genomicsdb-update-workspace-path at the path specified here in a Picard-style interval_list with a sequence dictionary header", optional=true, mutex={"genomicsdb-workspace-path"})
    private String intervalListOutputPathString;
    @Advanced
    @Argument(fullName="reader-threads", doc="How many simultaneous threads to use when opening VCFs in batches; higher values may improve performance when network latency is an issue. Multiple reader threads are not supported when running with multiple intervals.", optional=true, minValue=1.0)
    private int vcfInitializerThreads = 1;
    @Advanced
    @Argument(fullName="max-num-intervals-to-import-in-parallel", doc="Max number of intervals to import in parallel; higher values may improve performance, but require more memory and a higher number of file descriptors open at the same time", optional=true, minValue=1.0)
    private int maxNumIntervalsToImportInParallel = 1;
    @Advanced
    @Argument(fullName="merge-contigs-into-num-partitions", shortName="merge-contigs-into-num-partitions", doc="Number of GenomicsDB arrays to merge input intervals into. Defaults to 0, which disables this merging. This option can only be used if entire contigs are specified as intervals. The tool will not split up a contig into multiple arrays, which means the actual number of partitions may be less than what is specified for this argument. This can improve performance in the case where the user is trying to import a very large number of contigs - larger than 100", optional=true, minValue=0.0)
    private int mergeContigsIntoNumPartitions = 0;
    @Argument(fullName="genomicsdb-shared-posixfs-optimizations", doc="Allow for optimizations to improve the usability and performance for shared Posix Filesystems(e.g. NFS, Lustre). If set, file level locking is disabled and file system writes are minimized by keeping a higher number of file descriptors open for longer periods of time. Use with batch-size option if keeping a large number of file descriptors open is an issue", optional=true)
    private boolean sharedPosixFSOptimizations = false;
    private ExecutorService inputPreloadExecutorService;
    private List<SimpleInterval> intervals;
    private SortedMap<String, URI> sampleNameToVcfPath = new TreeMap<String, URI>();
    private Set<VCFHeaderLine> mergedHeaderLines = null;
    private SAMSequenceDictionary mergedHeaderSequenceDictionary;
    private String vidMapJSONFile;
    private String callsetMapJSONFile;
    private String vcfHeaderFile;
    private GenomicsDBCallsetsMapProto.CallsetMappingPB callsetMappingPB;
    private int batchCount = 1;
    private Boolean doIncrementalImport = false;
    private Boolean getIntervalsFromExistingWorkspace = false;

    @Override
    protected List<SimpleInterval> transformTraversalIntervals(List<SimpleInterval> getIntervals, SAMSequenceDictionary sequenceDictionary) {
        if (this.mergeInputIntervals) {
            return IntervalUtils.getSpanningIntervals(getIntervals, sequenceDictionary);
        }
        return getIntervals;
    }

    @Override
    public int getDefaultCloudPrefetchBufferSize() {
        return 0;
    }

    @Override
    public int getDefaultCloudIndexPrefetchBufferSize() {
        return 0;
    }

    @Override
    public String getProgressMeterRecordLabel() {
        return "batches";
    }

    @Override
    public void onStartup() {
        this.initializeWorkspaceAndToolMode();
        this.assertVariantPathsOrSampleNameFileWasSpecified();
        this.assertOverwriteWorkspaceAndIncrementalImportMutuallyExclusive();
        this.initializeHeaderAndSampleMappings();
        this.initializeIntervals();
        super.onStartup();
    }

    private void initializeWorkspaceAndToolMode() {
        if (this.incrementalImportWorkspace != null && !this.incrementalImportWorkspace.isEmpty()) {
            this.doIncrementalImport = true;
            this.workspace = this.incrementalImportWorkspace;
            if (this.intervalListOutputPathString != null && !this.intervalListOutputPathString.isEmpty()) {
                this.getIntervalsFromExistingWorkspace = true;
            }
        }
    }

    private void assertOverwriteWorkspaceAndIncrementalImportMutuallyExclusive() {
        if (this.overwriteExistingWorkspace.booleanValue() && this.doIncrementalImport.booleanValue()) {
            throw new CommandLineException("overwrite-existing-genomicsdb-workspace cannot be set to true when genomicsdb-update-workspace-path is set");
        }
    }

    private void assertVariantPathsOrSampleNameFileWasSpecified() {
        if ((this.variantPaths == null || this.variantPaths.isEmpty()) && this.sampleNameMapFile == null && !this.getIntervalsFromExistingWorkspace.booleanValue()) {
            throw new CommandLineException.MissingArgument("variant", "One of --variant or --sample-name-map must be specified");
        }
    }

    private static void assertIntervalsCoverEntireContigs(GenomicsDBImporter importer, List<SimpleInterval> intervals) {
        GenomicsDBVidMapProto.VidMappingPB vidMapPB = importer.getProtobufVidMapping();
        if (vidMapPB == null) {
            throw new UserException("Could not get protobuf vid mappping object from GenomicsDBImporter");
        }
        Map<String, GenomicsDBVidMapProto.Chromosome> vidContigs = vidMapPB.getContigsList().stream().collect(Collectors.toMap(item -> item.getName(), item -> item));
        for (SimpleInterval interval : intervals) {
            GenomicsDBVidMapProto.Chromosome vidContig = vidContigs.get(interval.getContig());
            long contigLength = vidContig.getLength();
            if (interval.getStart() == 1 && (long)interval.getEnd() >= contigLength) continue;
            String inputInterval = String.format("Contig:%s, Start:%d, End:%d", interval.getContig(), interval.getStart(), interval.getEnd());
            String vidInterval = String.format("Contig:%s, Start:%d, End:%d", vidContig.getName(), 1, vidContig.getLength());
            throw new UserException("--merge-contigs-into-num-partitions requires that entire contigs be specified for input intervals. Input interval contained: " + inputInterval + " while reference contig was: " + vidInterval);
        }
    }

    private void initializeHeaderAndSampleMappings() {
        if (this.variantPaths != null && this.variantPaths.size() > 0) {
            ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>(this.variantPaths.size());
            for (String variantPathString : this.variantPaths) {
                Path variantPath = IOUtils.getPath(variantPathString);
                VCFHeader header = this.getHeaderFromPath(variantPath);
                Utils.validate(header != null, "Null header was found in " + variantPath + ".");
                GenomicsDBImport.assertGVCFHasOnlyOneSample(variantPathString, header);
                headers.add(header);
                String sampleName = (String)header.getGenotypeSamples().get(0);
                try {
                    URI previousPath = this.sampleNameToVcfPath.put(sampleName, new URI(variantPathString));
                    if (previousPath == null) continue;
                    throw new UserException("Duplicate sample: " + sampleName + ". Sample was found in both " + variantPath.toUri() + " and " + previousPath + ".");
                }
                catch (URISyntaxException e) {
                    throw new UserException("Malformed URI " + e.toString(), e);
                }
            }
            this.mergedHeaderLines = VCFUtils.smartMergeHeaders(headers, (boolean)true);
            this.mergedHeaderSequenceDictionary = new VCFHeader(this.mergedHeaderLines).getSequenceDictionary();
            this.mergedHeaderLines.addAll(this.getDefaultToolVCFHeaderLines());
        } else if (this.sampleNameMapFile != null) {
            this.sampleNameToVcfPath = GenomicsDBImport.loadSampleNameMapFileInSortedOrder(IOUtils.getPath(this.sampleNameMapFile));
            Path firstHeaderPath = IOUtils.getPath(this.sampleNameToVcfPath.entrySet().iterator().next().getValue().toString());
            VCFHeader header = this.getHeaderFromPath(firstHeaderPath);
            this.mergedHeaderLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaDataInInputOrder());
            this.mergedHeaderSequenceDictionary = header.getSequenceDictionary();
            this.mergedHeaderLines.addAll(this.getDefaultToolVCFHeaderLines());
        } else if (this.getIntervalsFromExistingWorkspace.booleanValue()) {
            String vcfHeader = IOUtils.appendPathToDir(this.workspace, "vcfheader.vcf");
            IOUtils.assertPathsAreReadable(vcfHeader);
            String header = org.genomicsdb.GenomicsDBUtils.readEntireFile((String)vcfHeader);
            try {
                File tempHeader = File.createTempFile("tempheader", ".vcf");
                tempHeader.deleteOnExit();
                Files.write(tempHeader.toPath(), header.getBytes(StandardCharsets.UTF_8), new OpenOption[0]);
                this.mergedHeaderSequenceDictionary = VCFFileReader.getSequenceDictionary((File)tempHeader);
            }
            catch (IOException e) {
                throw new UserException("Unable to create temporary header file to get sequence dictionary");
            }
        } else {
            throw new UserException("variant or sample-name-map must be specified unless output-interval-list-to-file is specified");
        }
        if (this.mergedHeaderSequenceDictionary == null) {
            throw new UserException("The merged vcf header has no sequence dictionary. Please provide a header that contains a sequence dictionary.");
        }
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    private VCFHeader getHeaderFromPath(Path variantPath) {
        try (FeatureReader<VariantContext> reader = this.getReaderFromPath(variantPath);){
            VCFHeader vCFHeader = (VCFHeader)reader.getHeader();
            return vCFHeader;
        }
        catch (IOException e) {
            throw new UserException("Error while reading vcf header from " + variantPath.toUri(), e);
        }
    }

    private static void assertGVCFHasOnlyOneSample(String variantPath, VCFHeader header) {
        int numberOfSamples = header.getNGenotypeSamples();
        if (numberOfSamples != 1) {
            throw new UserException("Input GVCF: " + variantPath + " was expected to contain a single sample but actually contained " + numberOfSamples + " samples.");
        }
    }

    @VisibleForTesting
    static LinkedHashMap<String, URI> loadSampleNameMapFile(Path sampleToFileMapPath) {
        try {
            List<String> lines = Files.readAllLines(sampleToFileMapPath);
            if (lines.isEmpty()) {
                throw new UserException.BadInput("At least 1 sample is required but none were found in the sample mapping file");
            }
            LinkedHashMap<String, URI> sampleToFilename = new LinkedHashMap<String, URI>();
            for (String line : lines) {
                String[] split = line.split("\\t", -1);
                if (split.length != 2) {
                    throw new UserException.BadInput("Expected a file with 2 fields per line in the format\nSample\tFile\n but found line: \"" + line + "\" with " + split.length + " fields");
                }
                if (!split[0].trim().equals(split[0]) || split[0].trim().isEmpty() || split[1].trim().isEmpty()) {
                    throw new UserException.BadInput("Expected a file of format\nSample\tFile\n but found line: '" + line + "'\nValid sample names must be non-empty strings that cannot begin or end with whitespace and valid file names must be non-empty and not all whitespace");
                }
                String sample = split[0];
                String path = split[1].trim();
                try {
                    URI oldPath = sampleToFilename.put(sample, new URI(path));
                    if (oldPath == null) continue;
                    throw new UserException.BadInput("Found two mappings for the same sample: " + sample + "\n" + path + "\n" + oldPath);
                }
                catch (URISyntaxException e) {
                    throw new UserException("Malformed URI " + e.toString());
                }
            }
            return sampleToFilename;
        }
        catch (IOException e) {
            throw new UserException.CouldNotReadInputFile(sampleToFileMapPath, "exception while reading sample->filename mapping file", (Throwable)e);
        }
    }

    public static SortedMap<String, URI> loadSampleNameMapFileInSortedOrder(Path sampleToFileMapPath) {
        return new TreeMap<String, URI>(GenomicsDBImport.loadSampleNameMapFile(sampleToFileMapPath));
    }

    private void writeIntervalListToFile() {
        IntervalList outputList = new IntervalList(this.getBestAvailableSequenceDictionary());
        this.intervals.forEach(i -> outputList.add(new Interval((Locatable)i)));
        outputList.write(IOUtils.getPath(this.intervalListOutputPathString));
    }

    @Override
    public void onTraversalStart() {
        String workspaceDir = BucketUtils.makeFilePathAbsolute(this.overwriteCreateOrCheckWorkspace());
        this.vidMapJSONFile = IOUtils.appendPathToDir(workspaceDir, "vidmap.json");
        this.callsetMapJSONFile = IOUtils.appendPathToDir(workspaceDir, "callset.json");
        this.vcfHeaderFile = IOUtils.appendPathToDir(workspaceDir, "vcfheader.vcf");
        if (this.getIntervalsFromExistingWorkspace.booleanValue()) {
            this.logger.info("Interval file list will be written out to " + this.intervalListOutputPathString);
        } else if (this.doIncrementalImport.booleanValue()) {
            this.logger.info("Callset Map JSON file will be re-written to " + this.callsetMapJSONFile);
            this.logger.info("Incrementally importing to workspace - " + workspaceDir);
        } else {
            this.logger.info("Vid Map JSON file will be written to " + this.vidMapJSONFile);
            this.logger.info("Callset Map JSON file will be written to " + this.callsetMapJSONFile);
            this.logger.info("Complete VCF Header will be written to " + this.vcfHeaderFile);
            this.logger.info("Importing to workspace - " + workspaceDir);
        }
        this.initializeInputPreloadExecutorService();
    }

    private void initializeInputPreloadExecutorService() {
        if (this.vcfInitializerThreads > 1) {
            if (this.intervals != null && this.intervals.size() == 1) {
                ThreadFactory threadFactory = new ThreadFactoryBuilder().setNameFormat("readerInitializer-thread-%d").setDaemon(true).build();
                this.inputPreloadExecutorService = Executors.newFixedThreadPool(this.vcfInitializerThreads, threadFactory);
            } else {
                this.logger.warn("GenomicsDBImport cannot use multiple VCF reader threads for initialization when the number of intervals is greater than 1. Falling back to serial VCF reader initialization.");
                this.inputPreloadExecutorService = null;
            }
        } else {
            this.inputPreloadExecutorService = null;
        }
    }

    private Map<String, FeatureReader<VariantContext>> createSampleToReaderMap(Map<String, URI> sampleNameToVcfPath, int batchSize, int index) {
        return this.inputPreloadExecutorService != null ? this.getFeatureReadersInParallel((SortedMap)sampleNameToVcfPath, batchSize, index) : this.getFeatureReadersSerially(sampleNameToVcfPath, batchSize, index);
    }

    private Void logMessageOnBatchCompletion(BatchCompletionCallbackFunctionArgument arg) {
        this.progressMeter.update(null);
        this.logger.info("Done importing batch " + arg.batchCount + "/" + arg.totalBatchCount);
        this.batchCount = arg.batchCount + 1;
        return null;
    }

    private GenomicsDBImportConfiguration.Partition createPartitionWithBeginAndEnd(Coordinates.GenomicsDBColumn begin, Coordinates.GenomicsDBColumn end) {
        GenomicsDBImportConfiguration.Partition.Builder partitionBuilder = GenomicsDBImportConfiguration.Partition.newBuilder();
        partitionBuilder.setBegin(begin);
        partitionBuilder.setEnd(end);
        partitionBuilder.setWorkspace(this.workspace);
        partitionBuilder.setGenerateArrayNameFromPartitionBounds(true);
        return partitionBuilder.build();
    }

    private List<GenomicsDBImportConfiguration.Partition> generatePartitionListFromWorkspace() {
        Object[] partitions = org.genomicsdb.GenomicsDBUtils.listGenomicsDBArrays((String)this.workspace);
        Arrays.sort(partitions);
        ArrayList<GenomicsDBImportConfiguration.Partition> configPartitions = new ArrayList<GenomicsDBImportConfiguration.Partition>();
        for (Object partition : partitions) {
            long[] bounds = org.genomicsdb.GenomicsDBUtils.getArrayColumnBounds((String)this.workspace, (String)partition);
            Coordinates.GenomicsDBColumn.Builder beginBuilder = Coordinates.GenomicsDBColumn.newBuilder();
            Coordinates.GenomicsDBColumn.Builder endBuilder = Coordinates.GenomicsDBColumn.newBuilder();
            beginBuilder.setTiledbColumn(bounds[0]);
            endBuilder.setTiledbColumn(bounds[1]);
            configPartitions.add(this.createPartitionWithBeginAndEnd(beginBuilder.build(), endBuilder.build()));
        }
        return configPartitions;
    }

    private List<GenomicsDBImportConfiguration.Partition> generatePartitionListFromIntervals() {
        return this.intervals.stream().map(interval -> {
            Coordinates.ContigPosition.Builder contigPositionBuilder = Coordinates.ContigPosition.newBuilder();
            contigPositionBuilder.setContig(interval.getContig());
            Coordinates.GenomicsDBColumn.Builder beginBuilder = Coordinates.GenomicsDBColumn.newBuilder();
            Coordinates.GenomicsDBColumn.Builder endBuilder = Coordinates.GenomicsDBColumn.newBuilder();
            contigPositionBuilder.setPosition((long)interval.getStart());
            beginBuilder.setContigPosition(contigPositionBuilder.build());
            contigPositionBuilder.setPosition((long)interval.getEnd());
            endBuilder.setContigPosition(contigPositionBuilder.build());
            return this.createPartitionWithBeginAndEnd(beginBuilder.build(), endBuilder.build());
        }).collect(Collectors.toList());
    }

    private List<SimpleInterval> generateIntervalListFromVidMap() {
        try {
            GenomicsDBVidMapProto.VidMappingPB vidMapPB = GenomicsDBUtils.getProtobufVidMappingFromJsonFile(this.vidMapJSONFile);
            List<String> partitions = Arrays.asList(org.genomicsdb.GenomicsDBUtils.listGenomicsDBArrays((String)this.workspace));
            return partitions.stream().flatMap(partition -> {
                long[] bounds = org.genomicsdb.GenomicsDBUtils.getArrayColumnBounds((String)this.workspace, (String)partition);
                return vidMapPB.getContigsList().stream().filter(x -> x.getTiledbColumnOffset() >= bounds[0] && x.getTiledbColumnOffset() <= bounds[1]).map(x -> new SimpleInterval(x.getName(), 1, Math.toIntExact(x.getLength())));
            }).collect(Collectors.toList());
        }
        catch (IOException e) {
            throw new UserException("Could not get vid map protobuf from file:" + this.vidMapJSONFile + ". Is the workspace corrupted?", e);
        }
    }

    private List<SimpleInterval> generateIntervalListFromWorkspace() {
        List<String> partitions = Arrays.asList(org.genomicsdb.GenomicsDBUtils.listGenomicsDBArrays((String)this.workspace));
        return partitions.stream().map(partition -> {
            String[] partitionInfo = partition.split("\\$");
            if (partitionInfo.length != 3) {
                return null;
            }
            String contig = partitionInfo[0];
            int start = Integer.parseInt(partitionInfo[1]);
            int end = Integer.parseInt(partitionInfo[2]);
            return new SimpleInterval(contig, start, end);
        }).filter(o -> o != null).collect(Collectors.toList());
    }

    private ImportConfig createImportConfig(int batchSize) {
        List<GenomicsDBImportConfiguration.Partition> partitions = this.intervals == null || this.intervals.isEmpty() ? this.generatePartitionListFromWorkspace() : this.generatePartitionListFromIntervals();
        GenomicsDBImportConfiguration.ImportConfiguration.Builder importConfigurationBuilder = GenomicsDBImportConfiguration.ImportConfiguration.newBuilder();
        importConfigurationBuilder.addAllColumnPartitions(partitions);
        importConfigurationBuilder.setSizePerColumnPartition(this.vcfBufferSizePerSample);
        importConfigurationBuilder.setFailIfUpdating(this.doIncrementalImport == false);
        importConfigurationBuilder.setSegmentSize(this.segmentSize);
        importConfigurationBuilder.setConsolidateTiledbArrayAfterLoad(this.doConsolidation.booleanValue());
        importConfigurationBuilder.setEnableSharedPosixfsOptimizations(this.sharedPosixFSOptimizations);
        ImportConfig importConfig = new ImportConfig(importConfigurationBuilder.build(), this.validateSampleToReaderMap.booleanValue(), true, batchSize, this.mergedHeaderLines, this.sampleNameToVcfPath, this::createSampleToReaderMap, this.doIncrementalImport.booleanValue());
        importConfig.setOutputCallsetmapJsonFile(this.callsetMapJSONFile);
        importConfig.setOutputVidmapJsonFile(this.vidMapJSONFile);
        importConfig.setOutputVcfHeaderFile(this.vcfHeaderFile);
        importConfig.setUseSamplesInOrder(true);
        importConfig.setFunctionToCallOnBatchCompletion(this::logMessageOnBatchCompletion);
        return importConfig;
    }

    @Override
    public void traverse() {
        if (this.getIntervalsFromExistingWorkspace.booleanValue()) {
            if (this.intervals == null || this.intervals.isEmpty()) {
                this.intervals = this.generateIntervalListFromVidMap();
            }
            this.writeIntervalListToFile();
            return;
        }
        this.progressMeter.setRecordsBetweenTimeChecks(1L);
        int sampleCount = this.sampleNameToVcfPath.size();
        int updatedBatchSize = this.batchSize == 0 ? sampleCount : this.batchSize;
        ImportConfig importConfig = this.createImportConfig(updatedBatchSize);
        try {
            GenomicsDBImporter importer = new GenomicsDBImporter(importConfig);
            GenomicsDBUtils.updateImportProtobufVidMapping(importer);
            if (this.mergeContigsIntoNumPartitions != 0) {
                if (!this.doIncrementalImport.booleanValue()) {
                    GenomicsDBImport.assertIntervalsCoverEntireContigs(importer, this.intervals);
                    importer.coalesceContigsIntoNumPartitions(this.mergeContigsIntoNumPartitions);
                } else {
                    this.logger.warn("genomicsdb-update-workspace-path was set, so ignoring merge-contigs-into-num-partitions. When updating workspaces, GenomicsDBImport must use the same partition boundaries/intervals as the original import");
                }
            }
            importer.executeImport(this.maxNumIntervalsToImportInParallel);
        }
        catch (IOException e) {
            throw new UserException("Error initializing GenomicsDBImporter", e);
        }
        catch (IllegalArgumentException iae) {
            throw new GATKException("Null feature reader found in sampleNameMap file: " + this.sampleNameMapFile, iae);
        }
        catch (CompletionException ce) {
            throw ce.getCause() instanceof RuntimeException ? (RuntimeException)ce.getCause() : ce;
        }
    }

    @Override
    public Object onTraversalSuccess() {
        if (this.getIntervalsFromExistingWorkspace.booleanValue()) {
            this.logger.info("Interval list generated!");
        } else if (this.batchSize == 0) {
            this.logger.info("Import completed!");
        } else {
            this.logger.info("Import of all batches to GenomicsDB completed!");
        }
        return true;
    }

    private SortedMap<String, FeatureReader<VariantContext>> getFeatureReadersInParallel(SortedMap<String, URI> sampleNametoPath, int batchSize, int lowerSampleIndex) {
        TreeMap<String, FeatureReader<VariantContext>> sampleToReaderMap = new TreeMap<String, FeatureReader<VariantContext>>();
        this.logger.info("Starting batch input file preload");
        LinkedHashMap<String, Future> futures = new LinkedHashMap<String, Future>();
        ArrayList<String> sampleNames = new ArrayList<String>(sampleNametoPath.keySet());
        for (int i = lowerSampleIndex; i < sampleNametoPath.size() && i < lowerSampleIndex + batchSize; ++i) {
            String sampleName2 = (String)sampleNames.get(i);
            futures.put(sampleName2, this.inputPreloadExecutorService.submit(() -> {
                Path variantPath = IOUtils.getPath(((URI)sampleNametoPath.get(sampleName2)).toString());
                try {
                    return new InitializedQueryWrapper(this.getReaderFromPath(variantPath), this.intervals.get(0));
                }
                catch (IOException e) {
                    throw new UserException.CouldNotReadInputFile("Couldn't read file: " + variantPath.toUri(), (Exception)e);
                }
            }));
        }
        futures.forEach((sampleName, future) -> {
            try {
                FeatureReader reader = (FeatureReader)future.get();
                sampleToReaderMap.put((String)sampleName, (FeatureReader<VariantContext>)reader);
            }
            catch (InterruptedException | ExecutionException e) {
                throw new UserException.CouldNotReadInputFile("Failure while waiting for FeatureReader to initialize ", e);
            }
        });
        this.logger.info("Finished batch preload");
        this.logger.info("Importing batch " + this.batchCount + " with " + sampleToReaderMap.size() + " samples");
        return sampleToReaderMap;
    }

    private SortedMap<String, FeatureReader<VariantContext>> getFeatureReadersSerially(Map<String, URI> sampleNameToPath, int batchSize, int lowerSampleIndex) {
        TreeMap<String, FeatureReader<VariantContext>> sampleToReaderMap = new TreeMap<String, FeatureReader<VariantContext>>();
        ArrayList<String> sampleNames = new ArrayList<String>(sampleNameToPath.keySet());
        for (int i = lowerSampleIndex; i < sampleNameToPath.size() && i < lowerSampleIndex + batchSize; ++i) {
            String sampleName = (String)sampleNames.get(i);
            FeatureReader<VariantContext> reader = this.getReaderFromPath(IOUtils.getPath(sampleNameToPath.get(sampleName).toString()));
            sampleToReaderMap.put(sampleName, reader);
        }
        this.logger.info("Importing batch " + this.batchCount + " with " + sampleToReaderMap.size() + " samples");
        return sampleToReaderMap;
    }

    private FeatureReader<VariantContext> getReaderFromPath(final Path variantPath) {
        String variantURI = variantPath.toAbsolutePath().toUri().toString();
        try {
            AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader((String)variantURI, null, (FeatureCodec)new VCFCodec(), (boolean)true, BucketUtils.getPrefetchingWrapper(this.cloudPrefetchBuffer), BucketUtils.getPrefetchingWrapper(this.cloudIndexPrefetchBuffer));
            return new FeatureReader<VariantContext>((FeatureReader)reader){
                final /* synthetic */ FeatureReader val$reader;
                {
                    this.val$reader = featureReader;
                }

                public void close() throws IOException {
                    this.val$reader.close();
                }

                public List<String> getSequenceNames() {
                    return this.val$reader.getSequenceNames();
                }

                public Object getHeader() {
                    return this.val$reader.getHeader();
                }

                public boolean isQueryable() {
                    return this.val$reader.isQueryable();
                }

                public CloseableTribbleIterator<VariantContext> query(Locatable locus) throws IOException {
                    return new NoMnpIterator((CloseableTribbleIterator<VariantContext>)this.val$reader.query(locus));
                }

                public CloseableTribbleIterator<VariantContext> query(String chr, int start, int end) throws IOException {
                    return new NoMnpIterator((CloseableTribbleIterator<VariantContext>)this.val$reader.query(chr, start, end));
                }

                public CloseableTribbleIterator<VariantContext> iterator() throws IOException {
                    return new NoMnpIterator((CloseableTribbleIterator<VariantContext>)this.val$reader.iterator());
                }

                class NoMnpIterator
                implements CloseableTribbleIterator<VariantContext> {
                    private final CloseableTribbleIterator<VariantContext> inner;

                    NoMnpIterator(CloseableTribbleIterator<VariantContext> inner) {
                        this.inner = inner;
                    }

                    public void close() {
                        this.inner.close();
                    }

                    public Iterator<VariantContext> iterator() {
                        return this;
                    }

                    public boolean hasNext() {
                        return this.inner.hasNext();
                    }

                    public VariantContext next() {
                        if (!this.hasNext()) {
                            throw new NoSuchElementException();
                        }
                        VariantContext vc = (VariantContext)this.inner.next();
                        if (GATKVariantContextUtils.isUnmixedMnpIgnoringNonRef(vc)) {
                            throw new UserException.BadInput(String.format("GenomicsDBImport does not support GVCFs with MNPs. MNP found at %1s:%2d in VCF %3s", vc.getContig(), vc.getStart(), variantPath.toAbsolutePath()));
                        }
                        return vc;
                    }
                }
            };
        }
        catch (TribbleException e) {
            throw new UserException("Failed to create reader from " + variantURI, e);
        }
    }

    private String overwriteCreateOrCheckWorkspace() {
        String workspaceDir = BucketUtils.makeFilePathAbsolute(this.workspace);
        int returnCode = org.genomicsdb.GenomicsDBUtils.createTileDBWorkspace((String)workspaceDir, (boolean)this.overwriteExistingWorkspace);
        if (returnCode == -1) {
            throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + this.workspace + " already exists and is not a directory");
        }
        if (returnCode < 0) {
            throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + this.workspace);
        }
        if (!this.overwriteExistingWorkspace.booleanValue() && returnCode == 1) {
            if (this.doIncrementalImport.booleanValue()) {
                return workspaceDir;
            }
            throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + this.workspace + " already exists");
        }
        if (this.doIncrementalImport.booleanValue()) {
            throw new UserException("We require an existing valid workspace when incremental import is set");
        }
        return workspaceDir;
    }

    protected void initializeIntervals() {
        if (this.intervalArgumentCollection.intervalsSpecified()) {
            if (this.getIntervalsFromExistingWorkspace.booleanValue() || this.doIncrementalImport.booleanValue()) {
                this.logger.warn("genomicsdb-update-workspace-path was set, so ignoring specified intervals.The tool will use the intervals specified by the initial import");
                this.intervals = this.generateIntervalListFromWorkspace();
                return;
            }
            SAMSequenceDictionary intervalDictionary = this.getBestAvailableSequenceDictionary();
            if (intervalDictionary == null) {
                throw new UserException("We require at least one input source that has a sequence dictionary (reference or reads) when intervals are specified");
            }
            this.intervals = new ArrayList<SimpleInterval>();
            List<SimpleInterval> simpleIntervalList = this.intervalArgumentCollection.getIntervals(intervalDictionary);
            if (!this.mergeInputIntervals && simpleIntervalList.size() > 100) {
                this.logger.warn(String.format("A large number of intervals were specified. Using more than %d intervals in a single import is not recommended and can cause performance to suffer. If GVCF data only exists within those intervals, performance can be improved by aggregating intervals with the merge-input-intervals argument.", 100));
            }
            this.intervals = this.mergeInputIntervals ? IntervalUtils.getSpanningIntervals(simpleIntervalList, this.getBestAvailableSequenceDictionary()) : simpleIntervalList;
        } else if (this.getIntervalsFromExistingWorkspace.booleanValue() || this.doIncrementalImport.booleanValue()) {
            this.intervals = this.generateIntervalListFromWorkspace();
        } else {
            throw new UserException("No intervals specified");
        }
    }

    @Override
    public void onShutdown() {
        if (this.inputPreloadExecutorService != null) {
            this.inputPreloadExecutorService.shutdownNow();
        }
    }

    @Override
    public SAMSequenceDictionary getBestAvailableSequenceDictionary() {
        SAMSequenceDictionary sequenceDictionary = this.mergedHeaderSequenceDictionary;
        if (sequenceDictionary == null) {
            return super.getBestAvailableSequenceDictionary();
        }
        return sequenceDictionary;
    }

    private static final class InitializedQueryWrapper
    implements FeatureReader<VariantContext> {
        private final FeatureReader<VariantContext> reader;
        private final SimpleInterval interval;
        private CloseableTribbleIterator<VariantContext> query;

        private InitializedQueryWrapper(FeatureReader<VariantContext> reader, Locatable interval) throws IOException {
            this.reader = reader;
            this.interval = new SimpleInterval(interval);
            this.query = reader.query(interval.getContig(), interval.getStart(), interval.getEnd());
        }

        public CloseableTribbleIterator<VariantContext> query(String chr, int start, int end) {
            SimpleInterval queryInterval = new SimpleInterval(chr, start, end);
            if (!this.interval.equals(queryInterval)) {
                throw new GATKException("Cannot call query with different interval, expected:" + this.interval + " queried with: " + queryInterval);
            }
            if (this.query != null) {
                CloseableTribbleIterator<VariantContext> tmp = this.query;
                this.query = null;
                return tmp;
            }
            throw new GATKException("Cannot call query twice on this wrapper.");
        }

        public CloseableTribbleIterator<VariantContext> iterator() {
            throw new UnsupportedOperationException("iterator() not supported, this should not have been called and indicates an issue with GenomicsDB integration");
        }

        public void close() throws IOException {
            this.reader.close();
        }

        public List<String> getSequenceNames() {
            throw new UnsupportedOperationException("getSequenceNames() not supported, this should not have been called and indicates an issue with GenomicsDB integration");
        }

        public Object getHeader() {
            return this.reader.getHeader();
        }
    }

    static class UnableToCreateGenomicsDBWorkspace
    extends UserException {
        private static final long serialVersionUID = 1L;

        UnableToCreateGenomicsDBWorkspace(String message) {
            super(message);
        }
    }
}

