/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.hellbender.tools.spark.pathseq;

import java.util.Collection;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.programgroups.MetagenomicsProgramGroup;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.spark.datasources.ReferenceFileSparkSource;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSKmerBloomFilter;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSKmerSet;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSKmerUtils;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSUtils;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVKmerShort;
import org.broadinstitute.hellbender.tools.spark.utils.LargeLongHopscotchSet;
import org.broadinstitute.hellbender.tools.spark.utils.LongBloomFilter;

@DocumentedFeature
@CommandLineProgramProperties(summary="Produce a set of k-mers from the given host reference. The output file from this tool is required to run the PathSeq pipeline.", oneLineSummary="Builds set of host reference k-mers", programGroup=MetagenomicsProgramGroup.class)
public final class PathSeqBuildKmers
extends CommandLineProgram {
    public static final String REFERENCE_LONG_NAME = "reference";
    public static final String REFERENCE_SHORT_NAME = "R";
    public static final String BLOOM_FILTER_FALSE_POSITIVE_P_LONG_NAME = "bloom-false-positive-probability";
    public static final String BLOOM_FILTER_FALSE_POSITIVE_P_SHORT_NAME = "P";
    public static final String KMER_SIZE_LONG_NAME = "kmer-size";
    public static final String KMER_SIZE_SHORT_NAME = "SZ";
    public static final String KMER_MASK_LONG_NAME = "kmer-mask";
    public static final String KMER_MASK_SHORT_NAME = "M";
    public static final String KMER_SPACING_LONG_NAME = "kmer-spacing";
    public static final String KMER_SPACING_SHORT_NAME = "SP";
    @Argument(doc="File for k-mer set output. Extension will be automatically added if not present (.hss for hash set or .bfi for Bloom filter)", shortName="O", fullName="output")
    public String outputFile;
    @Argument(doc="Reference FASTA file path on local disk", fullName="reference", shortName="R")
    public GATKPath reference;
    @Argument(doc="If non-zero, creates a Bloom filter with this false positive probability", fullName="bloom-false-positive-probability", shortName="P", minValue=0.0, maxValue=1.0, maxRecommendedValue=0.001, optional=true)
    public double bloomFpp = 0.0;
    @Argument(doc="K-mer size, must be odd and less than 32", fullName="kmer-size", shortName="SZ", minValue=1.0, maxValue=31.0, optional=true)
    public int kmerSize = 31;
    @Argument(doc="Comma-delimited list of base indices (starting with 0) to mask in each k-mer", fullName="kmer-mask", shortName="M", optional=true)
    public String kmerMaskString = "";
    @Argument(doc="Spacing between successive k-mers", fullName="kmer-spacing", shortName="SP", minValue=1.0, optional=true)
    public int kmerSpacing = 1;

    @Override
    protected Object doWork() {
        ReferenceFileSparkSource reference = new ReferenceFileSparkSource(this.reference);
        byte[] maskBytes = PSUtils.parseMask(this.kmerMaskString, this.kmerSize);
        SVKmerShort kmerMask = SVKmerShort.getMask(maskBytes, this.kmerSize);
        this.logger.info("Loading reference kmers...");
        Collection<long[]> maskedKmerCollection = PSKmerUtils.getMaskedKmersFromLocalReference(reference, this.kmerSize, this.kmerSpacing, kmerMask);
        long numLongs = PSKmerUtils.longArrayCollectionSize(maskedKmerCollection);
        if (this.bloomFpp > 0.0) {
            this.logger.info("Building Bloom filter with false positive probability " + this.bloomFpp + "...");
            LongBloomFilter bloomFilter = PSKmerUtils.longArrayCollectionToBloomFilter(maskedKmerCollection, numLongs, this.bloomFpp);
            PSKmerBloomFilter kmerBloomFilter = new PSKmerBloomFilter(bloomFilter, this.kmerSize, kmerMask, numLongs);
            this.logger.info("Theoretical Bloom filter false positive probability: " + kmerBloomFilter.getFalsePositiveProbability());
            PSKmerUtils.writeKmerBloomFilter(this.outputFile, kmerBloomFilter);
        } else {
            this.logger.info("Building kmer hash set...");
            LargeLongHopscotchSet kmerHopscotchSet = PSKmerUtils.longArrayCollectionToSet(maskedKmerCollection, numLongs);
            PSKmerSet kmerSet = new PSKmerSet(kmerHopscotchSet, this.kmerSize, kmerMask);
            PSKmerUtils.writeKmerSet(this.outputFile, kmerSet);
        }
        return null;
    }
}

