/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.hellbender.tools.walkers.sv;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.StructuralVariantType;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.MutablePair;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.MultiVariantWalkerGroupedOnStart;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFHeaderLines;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils;
import org.broadinstitute.hellbender.tools.sv.SVCallRecord;
import org.broadinstitute.hellbender.tools.sv.SVCallRecordWithEvidence;
import org.broadinstitute.hellbender.tools.sv.SVClusterEngine;
import org.broadinstitute.hellbender.tools.sv.SVDepthOnlyCallDefragmenter;
import org.broadinstitute.hellbender.utils.GenomeLoc;
import org.broadinstitute.hellbender.utils.GenomeLocParser;
import org.broadinstitute.hellbender.utils.IntervalSetRule;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList;
import org.broadinstitute.hellbender.utils.reference.ReferenceUtils;
import org.broadinstitute.hellbender.utils.samples.PedigreeValidationType;
import org.broadinstitute.hellbender.utils.samples.SampleDB;
import org.broadinstitute.hellbender.utils.samples.Sex;
import org.broadinstitute.hellbender.utils.variant.GATKSVVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.VariantContextGetters;

@BetaFeature
@CommandLineProgramProperties(summary="Gathers single-sample or multi-sample segmented gCNV VCFs, harmonizes breakpoints, and outputs a cohort VCF with genotypes.", oneLineSummary="Combine segmented gCNV VCFs.", programGroup=StructuralVariantDiscoveryProgramGroup.class)
public class JointGermlineCNVSegmentation
extends MultiVariantWalkerGroupedOnStart {
    private SortedSet<String> samples;
    private VariantContextWriter vcfWriter;
    private SAMSequenceDictionary dictionary;
    private SVDepthOnlyCallDefragmenter defragmenter;
    private SVClusterEngine clusterEngine;
    private List<GenomeLoc> callIntervals;
    private String currentContig;
    private SampleDB sampleDB;
    private boolean isMultiSampleInput = false;
    private ReferenceSequenceFile reference;
    private final Set<String> allosomalContigs = new LinkedHashSet<String>(Arrays.asList("X", "Y", "chrX", "chrY"));
    public static final String MIN_QUALITY_LONG_NAME = "minimum-qs-score";
    public static final String MIN_SAMPLE_NUM_OVERLAP_LONG_NAME = "min-sample-set-fraction-overlap";
    public static final String DEFRAGMENTATION_PADDING_LONG_NAME = "defragmentation-padding-fraction";
    public static final String MODEL_CALL_INTERVALS_LONG_NAME = "model-call-intervals";
    public static final String BREAKPOINT_SUMMARY_STRATEGY_LONG_NAME = "breakpoint-summary-strategy";
    @Argument(fullName="minimum-qs-score", doc="Minimum QS score to combine a variant segment", optional=true)
    private int minQS = 20;
    @Argument(fullName="min-sample-set-fraction-overlap", doc="Minimum fraction of common samples for two variants to cluster together", optional=true)
    private double minSampleSetOverlap = 0.8;
    @Argument(fullName="defragmentation-padding-fraction", doc="Extend events by this fraction on each side when determining overlap to merge", optional=true)
    private double defragmentationPadding = SVDepthOnlyCallDefragmenter.getDefaultPaddingFraction();
    @Argument(fullName="model-call-intervals", doc="gCNV model intervals created with the FilterIntervals tool.")
    private GATKPath modelCallIntervalList = null;
    @Argument(fullName="breakpoint-summary-strategy", doc="Strategy to use for choosing a representative value for a breakpoint cluster.", optional=true)
    private SVClusterEngine.BreakpointSummaryStrategy breakpointSummaryStrategy = SVClusterEngine.BreakpointSummaryStrategy.MEDIAN_START_MEDIAN_END;
    @Argument(fullName="output", shortName="O", doc="The combined output VCF")
    private GATKPath outputFile;
    @Argument(doc="Reference copy-number on autosomal intervals.", fullName="autosomal-ref-copy-number", minValue=0.0, optional=true)
    private int refAutosomalCopyNumber = 2;
    @Argument(fullName="pedigree", shortName="ped", doc="Pedigree file for samples")
    private GATKPath pedigreeFile = null;

    @Override
    public boolean doDictionaryCrossValidation() {
        return false;
    }

    @Override
    public boolean requiresReference() {
        return true;
    }

    @Override
    public void onTraversalStart() {
        this.reference = ReferenceUtils.createReferenceReader(this.referenceArguments.getReferenceSpecifier());
        this.sampleDB = SampleDB.createSampleDBFromPedigreeAndDataSources(this.pedigreeFile, this.getSamplesForVariants(), PedigreeValidationType.STRICT);
        this.dictionary = this.getBestAvailableSequenceDictionary();
        GenomeLocParser parser = new GenomeLocParser(this.dictionary);
        this.setIntervals(parser);
        this.defragmenter = new SVDepthOnlyCallDefragmenter(this.dictionary, this.minSampleSetOverlap, this.defragmentationPadding, this.callIntervals);
        this.clusterEngine = new SVClusterEngine(this.dictionary, true, this.breakpointSummaryStrategy);
        this.vcfWriter = this.getVCFWriter();
        if (this.getSamplesForVariants().size() != 1) {
            this.logger.warn("Multi-sample VCFs found, which are assumed to be pre-clustered. Skipping defragmentation.");
            this.isMultiSampleInput = true;
        } else {
            this.isMultiSampleInput = false;
        }
    }

    private void setIntervals(GenomeLocParser parser) {
        if (this.modelCallIntervalList != null) {
            List<GenomeLoc> inputCoverageIntervals = IntervalUtils.featureFileToIntervals(parser, this.modelCallIntervalList.getURIString());
            List<GenomeLoc> inputTraversalIntervals = IntervalUtils.genomeLocsFromLocatables(parser, this.getTraversalIntervals());
            this.callIntervals = IntervalUtils.mergeListsBySetOperator(inputCoverageIntervals, inputTraversalIntervals, IntervalSetRule.INTERSECTION);
        }
    }

    private VariantContextWriter getVCFWriter() {
        this.samples = this.getSamplesForVariants();
        VCFHeader inputVCFHeader = new VCFHeader(this.getHeaderForVariants().getMetaDataInInputOrder(), this.samples);
        LinkedHashSet<Object> headerLines = new LinkedHashSet<Object>(inputVCFHeader.getMetaDataInInputOrder());
        headerLines.addAll(this.getDefaultToolVCFHeaderLines());
        headerLines.add(GATKSVVCFHeaderLines.getInfoLine("SVLEN"));
        headerLines.add(GATKSVVCFHeaderLines.getInfoLine("SVTYPE"));
        headerLines.add(VCFStandardHeaderLines.getInfoLine((String)"AF"));
        headerLines.add(VCFStandardHeaderLines.getInfoLine((String)"AC"));
        headerLines.add(VCFStandardHeaderLines.getInfoLine((String)"AN"));
        VariantContextWriter writer = this.createVCFWriter(this.outputFile);
        Set<String> sampleNameSet = new IndexedSampleList(this.samples).asSetOfSamples();
        VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<String>(sampleNameSet));
        writer.writeHeader(vcfHeader);
        return writer;
    }

    @Override
    public void apply(List<VariantContext> variantContexts, ReferenceContext referenceContext, List<ReadsContext> readsContexts) {
        if (this.currentContig == null) {
            this.currentContig = variantContexts.get(0).getContig();
        } else if (!variantContexts.get(0).getContig().equals(this.currentContig)) {
            this.processClusters();
            this.currentContig = variantContexts.get(0).getContig();
        }
        for (VariantContext vc : variantContexts) {
            SVCallRecord record = SVCallRecord.createDepthOnlyFromGCNV(vc, this.minQS);
            if (record == null) continue;
            if (!this.isMultiSampleInput) {
                this.defragmenter.add(new SVCallRecordWithEvidence(record));
                continue;
            }
            this.clusterEngine.add(new SVCallRecordWithEvidence(record));
        }
    }

    @Override
    public Object onTraversalSuccess() {
        this.processClusters();
        return null;
    }

    private void processClusters() {
        if (!this.defragmenter.isEmpty()) {
            List defragmentedCalls = this.defragmenter.getOutput();
            defragmentedCalls.stream().forEachOrdered(this.clusterEngine::add);
        }
        List<SVCallRecordWithEvidence> clusteredCalls = this.clusterEngine.getOutput();
        this.write(clusteredCalls);
    }

    private void write(List<SVCallRecordWithEvidence> calls) {
        List sortedCalls = calls.stream().sorted(Comparator.comparing(c -> new SimpleInterval(c.getContig(), c.getStart(), c.getEnd()), IntervalUtils.getDictionaryOrderComparator(this.dictionary))).map(record -> JointGermlineCNVSegmentation.buildVariantContext(record, this.reference)).collect(Collectors.toList());
        Iterator it = sortedCalls.iterator();
        ArrayList<Object> overlappingVCs = new ArrayList<VariantContext>(calls.size());
        if (!it.hasNext()) {
            return;
        }
        int clusterEnd = -1;
        String clusterContig = null;
        while (it.hasNext()) {
            VariantContext curr = (VariantContext)it.next();
            if (!(clusterEnd != -1 && curr.getStart() >= clusterEnd || clusterContig != null && !curr.getContig().equals(clusterContig))) {
                overlappingVCs.add(curr);
                if (curr.getEnd() > clusterEnd) {
                    clusterEnd = curr.getEnd();
                }
                if (clusterContig != null) continue;
                clusterContig = curr.getContig();
                continue;
            }
            List<VariantContext> resolvedVCs = this.resolveVariantContexts(this.allosomalContigs, this.refAutosomalCopyNumber, this.sampleDB, this.samples, overlappingVCs);
            resolvedVCs.forEach(arg_0 -> ((VariantContextWriter)this.vcfWriter).add(arg_0));
            overlappingVCs = new ArrayList();
            overlappingVCs.add(curr);
            clusterEnd = curr.getEnd();
            clusterContig = curr.getContig();
        }
        List<VariantContext> resolvedVCs = this.resolveVariantContexts(this.allosomalContigs, this.refAutosomalCopyNumber, this.sampleDB, this.samples, overlappingVCs);
        resolvedVCs.forEach(arg_0 -> ((VariantContextWriter)this.vcfWriter).add(arg_0));
    }

    @VisibleForTesting
    protected List<VariantContext> resolveVariantContexts(Set<String> allosomalContigs, int refAutosomalCopyNumber, SampleDB sampleDB, SortedSet<String> samples, List<VariantContext> overlappingVCs) {
        Utils.nonNull(overlappingVCs);
        ArrayList<VariantContext> resolvedVCs = new ArrayList<VariantContext>(overlappingVCs.size());
        Iterator<VariantContext> it = overlappingVCs.iterator();
        LinkedHashMap<String, CopyNumberAndEndRecord> sampleCopyNumbers = new LinkedHashMap<String, CopyNumberAndEndRecord>(SVUtils.hashMapCapacity(overlappingVCs.size()));
        while (it.hasNext()) {
            VariantContext curr = it.next();
            resolvedVCs.add(JointGermlineCNVSegmentation.updateGenotypes(allosomalContigs, refAutosomalCopyNumber, sampleDB, samples, curr, sampleCopyNumbers));
            for (Genotype g : curr.getGenotypes()) {
                if (!g.hasAnyAttribute("CN")) continue;
                sampleCopyNumbers.put(g.getSampleName(), new CopyNumberAndEndRecord(VariantContextGetters.getAttributeAsInt(g, "CN", refAutosomalCopyNumber), curr.getAttributeAsInt("END", curr.getStart())));
            }
        }
        return resolvedVCs;
    }

    @VisibleForTesting
    protected static VariantContext updateGenotypes(Set<String> allosomalContigs, int refAutosomalCopyNumber, SampleDB sampleDB, SortedSet<String> samples, VariantContext vc, Map<String, CopyNumberAndEndRecord> sampleCopyNumbers) {
        VariantContextBuilder builder = new VariantContextBuilder(vc);
        ArrayList<Genotype> newGenotypes = new ArrayList<Genotype>();
        Allele vcRefAllele = vc.getReference();
        LinkedHashMap<Allele, Long> alleleCountMap = new LinkedHashMap<Allele, Long>(2);
        if (vc.getAlternateAlleles().stream().filter(a -> !a.equals((Object)GATKSVVCFConstants.DEL_ALLELE)).filter(a -> !a.equals((Object)GATKSVVCFConstants.DUP_ALLELE)).count() > 0L) {
            throw new IllegalArgumentException("At site " + vc.getContig() + ":" + vc.getStart() + " variant context contains alternate alleles in addition to CNV <DEL> and <DUP> alleles: " + vc.getAlternateAlleles());
        }
        alleleCountMap.put(GATKSVVCFConstants.DEL_ALLELE, 0L);
        alleleCountMap.put(GATKSVVCFConstants.DUP_ALLELE, 0L);
        int alleleNumber = 0;
        for (String sample : samples) {
            Long count;
            List<Allele> alleles;
            int copyNumber;
            Genotype g = vc.getGenotype(sample);
            GenotypeBuilder genotypeBuilder = g == null ? new GenotypeBuilder(sample) : new GenotypeBuilder(g);
            int samplePloidy = JointGermlineCNVSegmentation.getSamplePloidy(allosomalContigs, refAutosomalCopyNumber, sampleDB, sample, vc.getContig(), g);
            alleleNumber += samplePloidy;
            if (!sampleCopyNumbers.containsKey(sample) && !vc.hasGenotype(sample)) {
                genotypeBuilder.alleles(GATKVariantContextUtils.makePloidyLengthAlleleList(samplePloidy, vcRefAllele));
                genotypeBuilder.attribute("CN", (Object)samplePloidy);
                newGenotypes.add(genotypeBuilder.make());
                continue;
            }
            if (sampleCopyNumbers.containsKey(sample) && sampleCopyNumbers.get(sample).getEndPosition() > vc.getStart()) {
                copyNumber = sampleCopyNumbers.get(sample).getCopyNumber();
                alleles = GATKVariantContextUtils.makePloidyLengthAlleleList(samplePloidy, vcRefAllele);
            } else if (g != null) {
                copyNumber = VariantContextGetters.getAttributeAsInt(g, "CN", samplePloidy);
                alleles = samplePloidy == g.getPloidy() ? g.getAlleles() : GATKSVVariantContextUtils.makeGenotypeAllelesFromCopyNumber(copyNumber, samplePloidy, vcRefAllele);
            } else {
                copyNumber = samplePloidy;
                alleles = GATKSVVariantContextUtils.makeGenotypeAllelesFromCopyNumber(copyNumber, samplePloidy, vcRefAllele);
            }
            genotypeBuilder.attribute("CN", (Object)copyNumber);
            genotypeBuilder.alleles(alleles);
            newGenotypes.add(genotypeBuilder.make());
            if (!vc.hasGenotype(sample)) continue;
            if (alleles.contains(GATKSVVCFConstants.DEL_ALLELE)) {
                count = (Long)alleleCountMap.get(GATKSVVCFConstants.DEL_ALLELE);
                alleleCountMap.put(GATKSVVCFConstants.DEL_ALLELE, count + alleles.stream().filter(Allele::isNonReference).count());
                continue;
            }
            if (copyNumber <= samplePloidy) continue;
            count = (Long)alleleCountMap.get(GATKSVVCFConstants.DUP_ALLELE);
            alleleCountMap.put(GATKSVVCFConstants.DUP_ALLELE, count + 1L);
        }
        builder.genotypes(newGenotypes);
        if (alleleNumber > 0) {
            if (vc.getAlternateAlleles().size() == 1) {
                long AC = (Long)alleleCountMap.get(vc.getAlternateAllele(0));
                builder.attribute("AC", (Object)AC).attribute("AF", (Object)((double)AC / (double)alleleNumber)).attribute("AN", (Object)alleleNumber);
            } else {
                ArrayList alleleCounts = new ArrayList(vc.getNAlleles());
                ArrayList<Double> alleleFreqs = new ArrayList<Double>(vc.getNAlleles());
                for (Allele a2 : builder.getAlleles()) {
                    if (a2.isReference()) continue;
                    alleleCounts.add(alleleCountMap.get(a2));
                    alleleFreqs.add(Double.valueOf(((Long)alleleCountMap.get(a2)).longValue()));
                }
                builder.attribute("AC", alleleCounts).attribute("AF", alleleFreqs).attribute("AN", (Object)alleleNumber);
            }
        }
        return builder.make();
    }

    @VisibleForTesting
    protected static int getSamplePloidy(Set<String> allosomalContigs, int refAutosomalCopyNumber, SampleDB sampleDB, String sampleName, String contig, Genotype g) {
        if (!allosomalContigs.contains(contig)) {
            return refAutosomalCopyNumber;
        }
        if (sampleDB == null || sampleDB.getSample(sampleName) == null) {
            if (g != null) {
                return g.getPloidy();
            }
            throw new IllegalStateException("Sample " + sampleName + " is missing from the pedigree");
        }
        Sex sampleSex = sampleDB.getSample(sampleName).getSex();
        if (contig.equals("X") || contig.equals("chrX")) {
            if (sampleSex.equals((Object)Sex.FEMALE)) {
                return 2;
            }
            if (sampleSex.equals((Object)Sex.MALE)) {
                return 1;
            }
            return 1;
        }
        if (contig.equals("Y") || contig.equals("chrY")) {
            if (sampleSex.equals((Object)Sex.FEMALE)) {
                return 0;
            }
            if (sampleSex.equals((Object)Sex.MALE)) {
                return 1;
            }
            return 1;
        }
        throw new IllegalArgumentException("Encountered unknown allosomal contig: " + contig + ". This tool only supports mammalian genomes with XX/XY sex determination.");
    }

    @VisibleForTesting
    protected static VariantContext buildVariantContext(SVCallRecordWithEvidence call, ReferenceSequenceFile reference) {
        Utils.nonNull(call);
        Utils.nonNull(reference);
        boolean isCNV = call.getType().equals((Object)StructuralVariantType.CNV);
        ArrayList<Allele> outputAlleles = new ArrayList<Allele>(3);
        Allele refAllele = Allele.create((byte[])ReferenceUtils.getRefBaseAtPosition(reference, call.getContig(), call.getStart()), (boolean)true);
        outputAlleles.add(refAllele);
        if (!isCNV) {
            outputAlleles.add(Allele.create((String)("<" + call.getType().name() + ">"), (boolean)false));
        } else {
            outputAlleles.add(GATKSVVCFConstants.DEL_ALLELE);
            outputAlleles.add(GATKSVVCFConstants.DUP_ALLELE);
        }
        VariantContextBuilder builder = new VariantContextBuilder("", call.getContig(), (long)call.getStart(), (long)call.getEnd(), outputAlleles);
        builder.attribute("END", (Object)call.getEnd());
        builder.attribute("SVLEN", (Object)call.getLength());
        if (isCNV) {
            builder.attribute("SVTYPE", (Object)"MCNV");
        } else {
            builder.attribute("SVTYPE", (Object)call.getType());
        }
        ArrayList<Genotype> genotypes = new ArrayList<Genotype>(call.getGenotypes().size());
        for (Genotype g : call.getGenotypes()) {
            GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g);
            ArrayList<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
            for (Allele a : g.getAlleles()) {
                if (a.isReference()) {
                    newGenotypeAlleles.add(refAllele);
                    continue;
                }
                newGenotypeAlleles.add(a);
            }
            genotypeBuilder.alleles(newGenotypeAlleles);
            if (g.hasAnyAttribute("CN")) {
                genotypeBuilder.attribute("CN", g.getExtendedAttribute("CN"));
            }
            genotypes.add(genotypeBuilder.make());
        }
        builder.genotypes(genotypes);
        return builder.make();
    }

    @Override
    public void closeTool() {
        if (this.vcfWriter != null) {
            this.vcfWriter.close();
        }
    }

    class CopyNumberAndEndRecord {
        private MutablePair<Integer, Integer> record;

        public CopyNumberAndEndRecord(int copyNumber, int end) {
            this.record = new MutablePair((Object)copyNumber, (Object)end);
        }

        public int getCopyNumber() {
            return (Integer)this.record.getLeft();
        }

        public int getEndPosition() {
            return (Integer)this.record.getRight();
        }
    }
}

