/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.hellbender.tools.spark.sv.discovery;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeaderLine;
import java.io.Serializable;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.broadcast.Broadcast;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary;
import org.broadinstitute.hellbender.engine.spark.GATKSparkTool;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection;
import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryPipelineSpark;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.AnnotatedVariantProducer;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputMetaData;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigAlignmentsRDDProcessor;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.CpxVariantInterpreter;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SegmentedCpxVariantSimpleVariantExtractor;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SimpleNovelAdjacencyInterpreter;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVVCFWriter;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import scala.Tuple2;

@DocumentedFeature
@BetaFeature
@CommandLineProgramProperties(oneLineSummary="(Internal) Examines aligned contigs from local assemblies and calls structural variants or their breakpoints", summary="This tool is used in development and should not be of interest to most researchers. It is a prototype of structural variant calling, and has been under active developments. For more stable version, please see DiscoverVariantsFromContigAlignmentsSAMSpark. This tool takes a file containing the alignments of assembled contigs (typically the output file produced by FindBreakpointEvidenceSpark) and searches for reads with split alignments or large gaps indicating the presence of structural variation breakpoints. Variations' types are determined by analyzing the signatures of the split alignments, and are written to VCF files in the designated output directory.", programGroup=StructuralVariantDiscoveryProgramGroup.class)
public final class SvDiscoverFromLocalAssemblyContigAlignmentsSpark
extends GATKSparkTool {
    private static final long serialVersionUID = 1L;
    private final Logger localLogger = LogManager.getLogger(SvDiscoverFromLocalAssemblyContigAlignmentsSpark.class);
    @ArgumentCollection
    private StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigAlignmentsArgumentCollection discoverStageArgs = new StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigAlignmentsArgumentCollection();
    @Argument(doc="file containing non-canonical chromosome names (e.g chrUn_KI270588v1) in the reference, human reference (hg19 or hg38) assumed when omitted", shortName="alt-tigs", fullName="non-canonical-contig-names-file", optional=true)
    private String nonCanonicalChromosomeNamesFile;
    @Argument(doc="prefix for output files (including VCF files and if enabled, the signaling assembly contig's alignments); sample name will be appended after the provided argument", shortName="O", fullName="output")
    private String outputPrefix;
    @Argument(doc="output query-name sorted SAM files for local assembly contigs whose alignment signature could not be used for emitting un-ambiguous calls", fullName="write-sam", optional=true)
    private boolean writeSAMFiles;
    public static final String SIMPLE_CHIMERA_VCF_FILE_NAME = "NonComplex.vcf";
    public static final String COMPLEX_CHIMERA_VCF_FILE_NAME = "Complex.vcf";
    public static final String REINTERPRETED_1_SEG_CALL_VCF_FILE_NAME = "cpx_reinterpreted_simple_1_seg.vcf";
    public static final String REINTERPRETED_MULTI_SEG_CALL_VCF_FILE_NAME = "cpx_reinterpreted_simple_multi_seg.vcf";
    public static final String MERGED_VCF_FILE_NAME = "merged_simple.vcf";

    @Override
    public boolean requiresReference() {
        return true;
    }

    @Override
    public boolean requiresReads() {
        return true;
    }

    @Override
    public List<ReadFilter> getDefaultReadFilters() {
        return Collections.singletonList(ReadFilterLibrary.MAPPED);
    }

    @Override
    protected void runTool(JavaSparkContext ctx) {
        this.validateParams();
        Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast = StructuralVariationDiscoveryPipelineSpark.broadcastCNVCalls(ctx, this.getHeaderForReads(), this.discoverStageArgs.cnvCallsFile);
        String outputPrefixWithSampleName = this.getOutputPrefix();
        SvDiscoveryInputMetaData svDiscoveryInputMetaData = new SvDiscoveryInputMetaData(ctx, this.discoverStageArgs, this.nonCanonicalChromosomeNamesFile, outputPrefixWithSampleName, null, null, null, cnvCallsBroadcast, this.getHeaderForReads(), this.getReference(), this.getDefaultToolVCFHeaderLines(), this.localLogger);
        JavaRDD<GATKRead> assemblyRawAlignments = this.getReads();
        AssemblyContigsClassifiedByAlignmentSignatures contigsByPossibleRawTypes = SvDiscoverFromLocalAssemblyContigAlignmentsSpark.preprocess(svDiscoveryInputMetaData, assemblyRawAlignments);
        List<VariantContext> variants = SvDiscoverFromLocalAssemblyContigAlignmentsSpark.dispatchJobs(ctx, contigsByPossibleRawTypes, svDiscoveryInputMetaData, assemblyRawAlignments, this.writeSAMFiles);
        contigsByPossibleRawTypes.unpersist();
        List<VariantContext> filteredVariants = AnnotatedVariantProducer.filterMergedVariantList(variants, svDiscoveryInputMetaData.getDiscoverStageArgs());
        String out = outputPrefixWithSampleName + MERGED_VCF_FILE_NAME;
        SVVCFWriter.writeVCF(filteredVariants, out, (SAMSequenceDictionary)svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(), svDiscoveryInputMetaData.getDefaultToolVCFHeaderLines(), svDiscoveryInputMetaData.getToolLogger());
    }

    private void validateParams() {
        this.discoverStageArgs.validate();
    }

    private String getOutputPrefix() {
        if (Files.exists(Paths.get(this.outputPrefix, new String[0]), new LinkOption[0])) {
            if (Files.isDirectory(Paths.get(this.outputPrefix, new String[0]), new LinkOption[0])) {
                return this.outputPrefix + (this.outputPrefix.endsWith("/") ? "" : "/") + SVUtils.getSampleId(this.getHeaderForReads()) + "_";
            }
            throw new UserException("Provided prefix for output is pointing to an existing file: " + this.outputPrefix);
        }
        return this.outputPrefix + (this.outputPrefix.endsWith("/") ? "" : "_") + SVUtils.getSampleId(this.getHeaderForReads()) + "_";
    }

    public static AssemblyContigsClassifiedByAlignmentSignatures preprocess(SvDiscoveryInputMetaData svDiscoveryInputMetaData, JavaRDD<GATKRead> assemblyRawAlignments) {
        Broadcast<SAMFileHeader> headerBroadcast = svDiscoveryInputMetaData.getSampleSpecificData().getHeaderBroadcast();
        Broadcast<Set<String>> canonicalChromosomesBroadcast = svDiscoveryInputMetaData.getReferenceData().getCanonicalChromosomesBroadcast();
        Logger toolLogger = svDiscoveryInputMetaData.getToolLogger();
        JavaRDD contigsWithChimericAlignmentsReconstructed = AssemblyContigAlignmentsRDDProcessor.createOptimalCoverageAlignmentSetsForContigs(assemblyRawAlignments, (SAMFileHeader)headerBroadcast.getValue(), (Set)canonicalChromosomesBroadcast.getValue(), 0.0, toolLogger).cache();
        toolLogger.info(contigsWithChimericAlignmentsReconstructed.count() + " contigs with chimeric alignments potentially giving SV signals.");
        return new AssemblyContigsClassifiedByAlignmentSignatures(contigsWithChimericAlignmentsReconstructed);
    }

    public static List<VariantContext> dispatchJobs(JavaSparkContext ctx, AssemblyContigsClassifiedByAlignmentSignatures contigsByPossibleRawTypes, SvDiscoveryInputMetaData svDiscoveryInputMetaData, JavaRDD<GATKRead> assemblyRawAlignments, boolean writeSAMFiles) {
        String outputPrefixWithSampleName = svDiscoveryInputMetaData.getOutputPath();
        List<VariantContext> simpleChimeraVariants = SvDiscoverFromLocalAssemblyContigAlignmentsSpark.extractSimpleVariants((JavaRDD<AssemblyContigWithFineTunedAlignments>)contigsByPossibleRawTypes.simple, svDiscoveryInputMetaData, outputPrefixWithSampleName);
        CpxAndReInterpretedSimpleVariants complexChimeraVariants = SvDiscoverFromLocalAssemblyContigAlignmentsSpark.extractCpxVariants(ctx, (JavaRDD<AssemblyContigWithFineTunedAlignments>)contigsByPossibleRawTypes.complex, svDiscoveryInputMetaData, assemblyRawAlignments, outputPrefixWithSampleName);
        if (writeSAMFiles) {
            contigsByPossibleRawTypes.writeSAMfilesForUnknown(outputPrefixWithSampleName, (JavaRDD<GATKRead>)assemblyRawAlignments, (SAMFileHeader)svDiscoveryInputMetaData.getSampleSpecificData().getHeaderBroadcast().getValue());
        }
        List<VariantContext> inversions = SvDiscoverFromLocalAssemblyContigAlignmentsSpark.extractInversions();
        ArrayList<VariantContext> merged = new ArrayList<VariantContext>(simpleChimeraVariants.size() + complexChimeraVariants.reInterpretedSimpleVariants.size() + inversions.size());
        merged.addAll(simpleChimeraVariants);
        merged.addAll(complexChimeraVariants.reInterpretedSimpleVariants);
        merged.addAll(inversions);
        return merged;
    }

    private static List<VariantContext> extractSimpleVariants(JavaRDD<AssemblyContigWithFineTunedAlignments> contigsWithSimpleChimera, SvDiscoveryInputMetaData svDiscoveryInputMetaData, String outputPrefixWithSampleName) {
        List<VariantContext> simpleVariants = SimpleNovelAdjacencyInterpreter.makeInterpretation(contigsWithSimpleChimera, svDiscoveryInputMetaData);
        Logger logger = svDiscoveryInputMetaData.getDiscoverStageArgs().runInDebugMode != false ? svDiscoveryInputMetaData.getToolLogger() : null;
        SVVCFWriter.writeVCF(simpleVariants, outputPrefixWithSampleName + SIMPLE_CHIMERA_VCF_FILE_NAME, (SAMSequenceDictionary)svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(), svDiscoveryInputMetaData.getDefaultToolVCFHeaderLines(), logger);
        return simpleVariants;
    }

    private static CpxAndReInterpretedSimpleVariants extractCpxVariants(JavaSparkContext ctx, JavaRDD<AssemblyContigWithFineTunedAlignments> contigsWithCpxAln, SvDiscoveryInputMetaData svDiscoveryInputMetaData, JavaRDD<GATKRead> assemblyRawAlignments, String outputPrefixWithSampleName) {
        Logger toolLogger = svDiscoveryInputMetaData.getDiscoverStageArgs().runInDebugMode != false ? svDiscoveryInputMetaData.getToolLogger() : null;
        Set<VCFHeaderLine> defaultToolVCFHeaderLines = svDiscoveryInputMetaData.getDefaultToolVCFHeaderLines();
        List<VariantContext> complexVariants = CpxVariantInterpreter.makeInterpretation(contigsWithCpxAln, svDiscoveryInputMetaData);
        SVVCFWriter.writeVCF(complexVariants, outputPrefixWithSampleName + COMPLEX_CHIMERA_VCF_FILE_NAME, (SAMSequenceDictionary)svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(), defaultToolVCFHeaderLines, toolLogger);
        JavaRDD complexVariantsRDD = ctx.parallelize(complexVariants);
        SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants reInterpretedSimple = SegmentedCpxVariantSimpleVariantExtractor.extract((JavaRDD<VariantContext>)complexVariantsRDD, svDiscoveryInputMetaData, assemblyRawAlignments);
        SAMSequenceDictionary refSeqDict = (SAMSequenceDictionary)svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue();
        String derivedOneSegmentSimpleVCF = outputPrefixWithSampleName + REINTERPRETED_1_SEG_CALL_VCF_FILE_NAME;
        String derivedMultiSegmentSimpleVCF = outputPrefixWithSampleName + REINTERPRETED_MULTI_SEG_CALL_VCF_FILE_NAME;
        SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretZeroOrOneSegmentCalls(), derivedOneSegmentSimpleVCF, refSeqDict, defaultToolVCFHeaderLines, toolLogger);
        SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretMultiSegmentsCalls(), derivedMultiSegmentSimpleVCF, refSeqDict, defaultToolVCFHeaderLines, toolLogger);
        return new CpxAndReInterpretedSimpleVariants(complexVariants, reInterpretedSimple.getMergedReinterpretedCalls());
    }

    private static List<VariantContext> extractInversions() {
        return Collections.emptyList();
    }

    private static final class CpxAndReInterpretedSimpleVariants {
        private final List<VariantContext> cpxVariants;
        private final List<VariantContext> reInterpretedSimpleVariants;

        CpxAndReInterpretedSimpleVariants(List<VariantContext> cpxVariants, List<VariantContext> reInterpretedSimpleVariants) {
            this.cpxVariants = cpxVariants;
            this.reInterpretedSimpleVariants = reInterpretedSimpleVariants;
        }
    }

    public static final class AssemblyContigsClassifiedByAlignmentSignatures {
        private final JavaRDD<AssemblyContigWithFineTunedAlignments> unknown;
        private final JavaRDD<AssemblyContigWithFineTunedAlignments> simple;
        private final JavaRDD<AssemblyContigWithFineTunedAlignments> complex;

        private AssemblyContigsClassifiedByAlignmentSignatures(JavaRDD<AssemblyContigWithFineTunedAlignments> contigs) {
            this.unknown = contigs.filter((Function & Serializable)tig -> tig.getAlignmentSignatureBasicType().equals((Object)AssemblyContigWithFineTunedAlignments.AlignmentSignatureBasicType.UNKNOWN)).cache();
            this.simple = contigs.filter((Function & Serializable)tig -> tig.getAlignmentSignatureBasicType().equals((Object)AssemblyContigWithFineTunedAlignments.AlignmentSignatureBasicType.SIMPLE_CHIMERA)).cache();
            this.complex = contigs.filter((Function & Serializable)tig -> tig.getAlignmentSignatureBasicType().equals((Object)AssemblyContigWithFineTunedAlignments.AlignmentSignatureBasicType.COMPLEX)).cache();
        }

        public JavaRDD<AssemblyContigWithFineTunedAlignments> getContigsWithSignatureClassifiedAsUnknown() {
            return this.unknown;
        }

        public JavaRDD<AssemblyContigWithFineTunedAlignments> getContigsWithSignatureClassifiedAsSimpleChimera() {
            return this.simple;
        }

        public JavaRDD<AssemblyContigWithFineTunedAlignments> getContigsWithSignatureClassifiedAsComplex() {
            return this.complex;
        }

        public void unpersist() {
            this.simple.unpersist(false);
            this.complex.unpersist(false);
            this.unknown.unpersist(false);
        }

        private void writeSAMfilesForUnknown(String outputPrefix, JavaRDD<GATKRead> assemblyRawAlignments, SAMFileHeader header) {
            Map tigNameToReason = this.unknown.mapToPair((PairFunction & Serializable)tig -> new Tuple2((Object)tig.getContigName(), (Object)tig.getReasonForAlignmentClassificationFailure())).collectAsMap();
            HashSet namesOfInterest = new HashSet(tigNameToReason.keySet());
            ArrayList<GATKRead> contigRawAlignments = new ArrayList<GATKRead>(assemblyRawAlignments.filter((Function & Serializable)read -> namesOfInterest.contains(read.getName())).collect());
            contigRawAlignments.sort(Comparator.comparing(GATKRead::getName));
            SAMFileHeader clone = header.clone();
            clone.setSortOrder(SAMFileHeader.SortOrder.queryname);
            EnumMap<AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure, SAMFileWriter> writerForEachCase = new EnumMap<AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure, SAMFileWriter>(AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure.class);
            SAMFileWriterFactory factory = new SAMFileWriterFactory().setCreateIndex(true);
            writerForEachCase.put(AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure.AMBIGUOUS, factory.makeSAMOrBAMWriter(clone, true, IOUtils.getPath(outputPrefix + AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure.AMBIGUOUS.name() + ".bam")));
            writerForEachCase.put(AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure.INCOMPLETE, factory.makeSAMOrBAMWriter(clone, true, IOUtils.getPath(outputPrefix + AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure.INCOMPLETE.name() + ".bam")));
            writerForEachCase.put(AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure.UNINFORMATIVE, factory.makeSAMOrBAMWriter(clone, true, IOUtils.getPath(outputPrefix + AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure.UNINFORMATIVE.name() + ".bam")));
            contigRawAlignments.forEach(read -> {
                AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure reason = (AssemblyContigWithFineTunedAlignments.ReasonForAlignmentClassificationFailure)((Object)((Object)tigNameToReason.get(read.getName())));
                ((SAMFileWriter)writerForEachCase.get((Object)reason)).addAlignment(read.convertToSAMRecord(header));
            });
            writerForEachCase.values().forEach(SAMFileWriter::close);
        }
    }
}

