/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFileHeader;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy;
import org.broadinstitute.hellbender.engine.AssemblyRegion;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReadErrorCorrector;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.AdaptiveChainPruner;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.BaseEdge;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.BaseGraph;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.BaseVertex;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.ChainPruner;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.GraphBasedKBestHaplotypeFinder;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.JTBestHaplotype;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.JunctionTreeKBestHaplotypeFinder;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.LowWeightChainPruner;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.MultiSampleEdge;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqGraph;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqVertex;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.AbstractReadThreadingGraph;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.JunctionTreeLinkedDeBruijnGraph;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.MultiDeBruijnVertex;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph;
import org.broadinstitute.hellbender.utils.Histogram;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.clipping.ReadClipper;
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
import org.broadinstitute.hellbender.utils.param.ParamUtils;
import org.broadinstitute.hellbender.utils.read.CigarUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner;

public final class ReadThreadingAssembler {
    private static final Logger logger = LogManager.getLogger(ReadThreadingAssembler.class);
    static final int DEFAULT_NUM_PATHS_PER_GRAPH = 128;
    private static final int KMER_SIZE_ITERATION_INCREASE = 10;
    private static final int MAX_KMER_ITERATIONS_TO_ATTEMPT = 6;
    private final List<Integer> kmerSizes;
    private final boolean dontIncreaseKmerSizesForCycles;
    private final boolean allowNonUniqueKmersInRef;
    private final boolean generateSeqGraph;
    private boolean recoverHaplotypesFromEdgesNotCoveredInJunctionTrees = true;
    private final int numPruningSamples;
    private final int numBestHaplotypesPerGraph;
    private final boolean pruneBeforeCycleCounting;
    private boolean removePathsNotConnectedToRef = true;
    private boolean justReturnRawGraph = false;
    private static final boolean PRINT_FULL_GRAPH_FOR_DEBUGGING = true;
    private static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = 10;
    private static final int MIN_HAPLOTYPE_REFERENCE_LENGTH = 30;
    private boolean debug = false;
    private boolean debugGraphTransformations = false;
    private boolean recoverDanglingBranches = true;
    private boolean recoverAllDanglingBranches = false;
    private int minDanglingBranchLength = 0;
    protected byte minBaseQualityToUseInAssembly = (byte)10;
    private int pruneFactor;
    private final ChainPruner<MultiDeBruijnVertex, MultiSampleEdge> chainPruner;
    private int minMatchingBasesToDanglingEndRecovery;
    private File debugGraphOutputPath = null;
    private File graphOutputPath = null;
    private File graphHaplotypeHistogramPath = null;
    private Histogram haplotypeHistogram = null;
    private Histogram kmersUsedHistogram = null;

    public ReadThreadingAssembler(int maxAllowedPathsForReadThreadingAssembler, List<Integer> kmerSizes, boolean dontIncreaseKmerSizesForCycles, boolean allowNonUniqueKmersInRef, int numPruningSamples, int pruneFactor, boolean useAdaptivePruning, double initialErrorRateForPruning, double pruningLogOddsThreshold, double pruningSeedingLogOddsThreshold, int maxUnprunedVariants, boolean useLinkedDebruijnGraphs, boolean enableLegacyGraphCycleDetection, int minMachingBasesToDanglngEndRecovery) {
        Utils.validateArg(maxAllowedPathsForReadThreadingAssembler >= 1, "numBestHaplotypesPerGraph should be >= 1 but got " + maxAllowedPathsForReadThreadingAssembler);
        this.kmerSizes = kmerSizes.stream().sorted(Integer::compareTo).collect(Collectors.toList());
        this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles;
        this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef;
        this.numPruningSamples = numPruningSamples;
        this.pruneFactor = pruneFactor;
        this.generateSeqGraph = !useLinkedDebruijnGraphs;
        boolean bl = this.pruneBeforeCycleCounting = !enableLegacyGraphCycleDetection;
        if (!this.generateSeqGraph) {
            logger.error("JunctionTreeLinkedDeBruijnGraph is enabled.\n This is an experimental assembly graph mode that has not been fully validated\n\n");
        }
        this.chainPruner = useAdaptivePruning ? new AdaptiveChainPruner(initialErrorRateForPruning, pruningLogOddsThreshold, pruningSeedingLogOddsThreshold, maxUnprunedVariants) : new LowWeightChainPruner(pruneFactor);
        this.numBestHaplotypesPerGraph = maxAllowedPathsForReadThreadingAssembler;
        this.minMatchingBasesToDanglingEndRecovery = minMachingBasesToDanglngEndRecovery;
    }

    @VisibleForTesting
    ReadThreadingAssembler(int maxAllowedPathsForReadThreadingAssembler, List<Integer> kmerSizes, int pruneFactor) {
        this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, true, 1, pruneFactor, false, 0.001, 2.0, 2.0, Integer.MAX_VALUE, false, false, 3);
    }

    @VisibleForTesting
    ReadThreadingAssembler() {
        this(128, Arrays.asList(25), 2);
    }

    @VisibleForTesting
    void setMinMatchingBasesToDanglingEndRecovery(int mimMatchingBases) {
        this.minMatchingBasesToDanglingEndRecovery = mimMatchingBases;
    }

    public AssemblyResultSet runLocalAssembly(AssemblyRegion assemblyRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, SimpleInterval refLoc, ReadErrorCorrector readErrorCorrector, SAMFileHeader header, SmithWatermanAligner aligner) {
        Utils.nonNull(assemblyRegion, "Assembly engine cannot be used with a null AssemblyRegion.");
        Utils.nonNull(assemblyRegion.getPaddedSpan(), "Active region must have an extended location.");
        Utils.nonNull(refHaplotype, "Reference haplotype cannot be null.");
        Utils.nonNull(fullReferenceWithPadding, "fullReferenceWithPadding");
        Utils.nonNull(refLoc, "refLoc");
        Utils.nonNull(aligner, "aligner");
        Utils.validateArg(fullReferenceWithPadding.length == refLoc.size(), "Reference bases and reference loc must be the same size.");
        ParamUtils.isPositiveOrZero(this.pruneFactor, "Pruning factor cannot be negative");
        List<GATKRead> correctedReads = readErrorCorrector == null ? assemblyRegion.getReads() : readErrorCorrector.correctReads(assemblyRegion.getReads());
        correctedReads = correctedReads.stream().map(r -> ReadClipper.hardClipSoftClippedBases(r)).collect(Collectors.toList());
        LinkedList<AbstractReadThreadingGraph> nonRefRTGraphs = new LinkedList<AbstractReadThreadingGraph>();
        LinkedList<SeqGraph> nonRefSeqGraphs = new LinkedList<SeqGraph>();
        AssemblyResultSet resultSet = new AssemblyResultSet();
        resultSet.setRegionForGenotyping(assemblyRegion);
        resultSet.setFullReferenceWithPadding(fullReferenceWithPadding);
        resultSet.setPaddedReferenceLoc(refLoc);
        SimpleInterval activeRegionExtendedLocation = assemblyRegion.getPaddedSpan();
        refHaplotype.setGenomeLocation(activeRegionExtendedLocation);
        resultSet.add(refHaplotype);
        if (this.generateSeqGraph) {
            this.assembleKmerGraphsAndHaplotypeCall(refHaplotype, refLoc, header, aligner, correctedReads, nonRefSeqGraphs, resultSet, activeRegionExtendedLocation);
        } else {
            this.assembleGraphsAndExpandKmersGivenHaplotypes(refHaplotype, refLoc, header, aligner, correctedReads, nonRefRTGraphs, resultSet, activeRegionExtendedLocation);
        }
        if (resultSet.getHaplotypeList().isEmpty()) {
            logger.debug("Graph at position " + resultSet.getPaddedReferenceLoc() + " failed to assemble anything informative; emitting just the reference here");
        }
        if (this.graphOutputPath != null) {
            if (this.generateSeqGraph) {
                this.printGraphs(nonRefSeqGraphs);
            } else {
                this.printGraphs(nonRefRTGraphs);
            }
        }
        if (this.graphHaplotypeHistogramPath != null) {
            this.haplotypeHistogram.add(Double.valueOf(resultSet.getHaplotypeCount()));
        }
        return resultSet;
    }

    private void assembleKmerGraphsAndHaplotypeCall(Haplotype refHaplotype, SimpleInterval refLoc, SAMFileHeader header, SmithWatermanAligner aligner, List<GATKRead> correctedReads, List<SeqGraph> nonRefSeqGraphs, AssemblyResultSet resultSet, SimpleInterval activeRegionExtendedLocation) {
        HashMap<SeqGraph, AssemblyResult> assemblyResultBySeqGraph = new HashMap<SeqGraph, AssemblyResult>();
        for (AssemblyResult result : this.assemble(correctedReads, refHaplotype, header, aligner)) {
            if (result.getStatus() != AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) continue;
            ReadThreadingAssembler.sanityCheckGraph(result.getSeqGraph(), refHaplotype);
            assemblyResultBySeqGraph.put(result.getSeqGraph(), result);
            nonRefSeqGraphs.add(result.getSeqGraph());
            if (this.graphHaplotypeHistogramPath == null) continue;
            this.kmersUsedHistogram.add(Double.valueOf(result.getKmerSize()));
        }
        this.findBestPaths(nonRefSeqGraphs, assemblyResultBySeqGraph, refHaplotype, refLoc, activeRegionExtendedLocation, resultSet, aligner);
    }

    private void assembleGraphsAndExpandKmersGivenHaplotypes(Haplotype refHaplotype, SimpleInterval refLoc, SAMFileHeader header, SmithWatermanAligner aligner, List<GATKRead> correctedReads, List<AbstractReadThreadingGraph> nonRefRTGraphs, AssemblyResultSet resultSet, SimpleInterval activeRegionExtendedLocation) {
        HashMap<AbstractReadThreadingGraph, AssemblyResult> assemblyResultByRTGraph = new HashMap<AbstractReadThreadingGraph, AssemblyResult>();
        ArrayList<AssemblyResult> savedAssemblyResults = new ArrayList<AssemblyResult>();
        boolean hasAdequatelyAssembledGraph = false;
        List<Integer> kmersToTry = this.getExpandedKmerList();
        for (int i = 0; i < kmersToTry.size(); ++i) {
            boolean isLastCycle;
            int kmerSize = kmersToTry.get(i);
            boolean bl = isLastCycle = i == kmersToTry.size() - 1;
            if (hasAdequatelyAssembledGraph) continue;
            AssemblyResult assembledResult = this.createGraph(correctedReads, refHaplotype, kmerSize, isLastCycle || this.dontIncreaseKmerSizesForCycles, isLastCycle || this.allowNonUniqueKmersInRef, header, aligner);
            if (assembledResult != null && assembledResult.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
                ReadThreadingAssembler.sanityCheckGraph(assembledResult.getThreadingGraph(), refHaplotype);
                assembledResult.getThreadingGraph().postProcessForHaplotypeFinding(this.debugGraphOutputPath, refHaplotype.getLocation());
                assemblyResultByRTGraph.put(assembledResult.getThreadingGraph(), assembledResult);
                nonRefRTGraphs.add(assembledResult.getThreadingGraph());
                if (this.graphHaplotypeHistogramPath != null) {
                    this.kmersUsedHistogram.add(Double.valueOf(assembledResult.getKmerSize()));
                }
                AbstractReadThreadingGraph graph = assembledResult.getThreadingGraph();
                this.findBestPaths(Collections.singletonList(graph), Collections.singletonMap(graph, assembledResult), refHaplotype, refLoc, activeRegionExtendedLocation, null, aligner);
                savedAssemblyResults.add(assembledResult);
                if (((AssemblyResult)savedAssemblyResults.get(savedAssemblyResults.size() - 1)).getDiscoveredHaplotypes().isEmpty() || assembledResult.containsSuspectHaplotypes()) continue;
                for (Haplotype h : assembledResult.getDiscoveredHaplotypes()) {
                    resultSet.add(h, assembledResult);
                }
                hasAdequatelyAssembledGraph = true;
                continue;
            }
            if (assembledResult == null || assembledResult.getStatus() != AssemblyResult.Status.JUST_ASSEMBLED_REFERENCE) continue;
            hasAdequatelyAssembledGraph = true;
        }
        if (!hasAdequatelyAssembledGraph) {
            for (AssemblyResult result : Lists.reverse(savedAssemblyResults)) {
                if (result.getDiscoveredHaplotypes().size() <= 1) continue;
                for (Haplotype h : result.getDiscoveredHaplotypes()) {
                    resultSet.add(h, result);
                }
            }
        }
    }

    private <V extends BaseVertex, E extends BaseEdge, T extends BaseGraph<V, E>> List<Haplotype> findBestPaths(Collection<T> graphs, Map<T, AssemblyResult> assemblyResultByGraph, Haplotype refHaplotype, SimpleInterval refLoc, SimpleInterval activeRegionWindow, AssemblyResultSet resultSet, SmithWatermanAligner aligner) {
        LinkedHashSet<Haplotype> returnHaplotypes = new LinkedHashSet<Haplotype>();
        int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
        int failedCigars = 0;
        for (BaseGraph graph : graphs) {
            AssemblyResult assemblyResult = assemblyResultByGraph.get((Object)graph);
            Object source = graph.getReferenceSourceVertex();
            Object sink = graph.getReferenceSinkVertex();
            Utils.validateArg(source != null && sink != null, () -> "Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph " + (Object)((Object)graph));
            for (KBestHaplotype kBestHaplotype : (this.generateSeqGraph ? new GraphBasedKBestHaplotypeFinder(graph, source, sink) : new JunctionTreeKBestHaplotypeFinder(graph, source, sink, 3, this.recoverHaplotypesFromEdgesNotCoveredInJunctionTrees)).findBestHaplotypes(this.numBestHaplotypesPerGraph)) {
                Cigar cigar;
                if (kBestHaplotype instanceof JTBestHaplotype && ((JTBestHaplotype)kBestHaplotype).isWasPoorlyRecovered()) {
                    assemblyResult.setContainsSuspectHaplotypes(true);
                }
                Haplotype h = kBestHaplotype.haplotype();
                h.setKmerSize(kBestHaplotype.getGraph().getKmerSize());
                if (returnHaplotypes.contains((Object)h)) continue;
                if (kBestHaplotype.isReference()) {
                    refHaplotype.setScore(kBestHaplotype.score());
                }
                if ((cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), h.getBases(), aligner, SWOverhangStrategy.SOFTCLIP)) == null) {
                    ++failedCigars;
                    continue;
                }
                if (cigar.isEmpty()) {
                    throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength());
                }
                if (ReadThreadingAssembler.pathIsTooDivergentFromReference(cigar) || cigar.getReferenceLength() < 30) continue;
                if (cigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength()) {
                    Cigar cigarWithIndelStrategy = CigarUtils.calculateCigar(refHaplotype.getBases(), h.getBases(), aligner, SWOverhangStrategy.INDEL);
                    if (cigarWithIndelStrategy.getReferenceLength() == refHaplotype.getCigar().getReferenceLength()) {
                        ++failedCigars;
                        continue;
                    }
                    throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength() + " ref = " + (Object)((Object)refHaplotype) + " path " + new String(h.getBases()));
                }
                h.setCigar(cigar);
                h.setAlignmentStartHapwrtRef(activeRegionStart);
                h.setGenomeLocation(activeRegionWindow);
                returnHaplotypes.add(h);
                if (resultSet != null) {
                    resultSet.add(h);
                }
                if (!this.debug) continue;
                logger.info("Adding haplotype " + h.getCigar() + " from graph with kmer " + assemblyResult.getKmerSize());
            }
            if (!returnHaplotypes.isEmpty() && !returnHaplotypes.contains((Object)refHaplotype)) {
                returnHaplotypes.add(refHaplotype);
            }
            assemblyResult.setDiscoveredHaplotypes(returnHaplotypes);
        }
        if (failedCigars != 0) {
            logger.debug(String.format("failed to align some haplotypes (%d) back to the reference (loc=%s); these will be ignored.", failedCigars, refLoc.toString()));
        }
        if (this.debug) {
            if (returnHaplotypes.size() > 1) {
                logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
            } else {
                logger.info("Found only the reference haplotype in the assembly graph.");
            }
            for (Haplotype h : returnHaplotypes) {
                logger.info(h.toString());
                logger.info("> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() + " ref " + h.isReference());
            }
        }
        return new ArrayList<Haplotype>(returnHaplotypes);
    }

    private static boolean pathIsTooDivergentFromReference(Cigar c) {
        return c.getCigarElements().stream().anyMatch(ce -> ce.getOperator() == CigarOperator.N);
    }

    private void printDebugGraphTransform(BaseGraph<?, ?> graph, String fileName) {
        File outputFile = new File(this.debugGraphOutputPath, fileName);
        graph.printGraph(outputFile, this.pruneFactor);
    }

    private AssemblyResult getResultSetForRTGraph(AbstractReadThreadingGraph rtGraph) {
        if (rtGraph.getReferenceSourceVertex() == null || rtGraph.getReferenceSinkVertex() == null) {
            return new AssemblyResult(AssemblyResult.Status.JUST_ASSEMBLED_REFERENCE, null, rtGraph);
        }
        return new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, null, rtGraph);
    }

    private AssemblyResult cleanupSeqGraph(SeqGraph seqGraph, Haplotype refHaplotype) {
        if (this.debugGraphTransformations) {
            this.printDebugGraphTransform(seqGraph, refHaplotype.getLocation() + "-sequenceGraph." + seqGraph.getKmerSize() + ".1.0.non_ref_removed.dot");
        }
        seqGraph.zipLinearChains();
        if (this.debugGraphTransformations) {
            this.printDebugGraphTransform(seqGraph, refHaplotype.getLocation() + "-sequenceGraph." + seqGraph.getKmerSize() + ".1.1.zipped.dot");
        }
        seqGraph.removeSingletonOrphanVertices();
        seqGraph.removeVerticesNotConnectedToRefRegardlessOfEdgeDirection();
        if (this.debugGraphTransformations) {
            this.printDebugGraphTransform(seqGraph, refHaplotype.getLocation() + "-sequenceGraph." + seqGraph.getKmerSize() + ".1.2.pruned.dot");
        }
        seqGraph.simplifyGraph();
        if (this.debugGraphTransformations) {
            this.printDebugGraphTransform(seqGraph, refHaplotype.getLocation() + "-sequenceGraph." + seqGraph.getKmerSize() + ".1.3.merged.dot");
        }
        if (seqGraph.getReferenceSourceVertex() == null || seqGraph.getReferenceSinkVertex() == null) {
            return new AssemblyResult(AssemblyResult.Status.JUST_ASSEMBLED_REFERENCE, seqGraph, null);
        }
        seqGraph.removePathsNotConnectedToRef();
        seqGraph.simplifyGraph();
        if (seqGraph.vertexSet().size() == 1) {
            SeqVertex complete = (SeqVertex)seqGraph.vertexSet().iterator().next();
            SeqVertex dummy = new SeqVertex("");
            seqGraph.addVertex(dummy);
            seqGraph.addEdge(complete, dummy, new BaseEdge(true, 0));
        }
        if (this.debugGraphTransformations) {
            this.printDebugGraphTransform(seqGraph, refHaplotype.getLocation() + "-sequenceGraph." + seqGraph.getKmerSize() + ".1.4.final.dot");
        }
        return new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, seqGraph, null);
    }

    private static <T extends BaseVertex, E extends BaseEdge> void sanityCheckGraph(BaseGraph<T, E> graph, Haplotype refHaplotype) {
        ReadThreadingAssembler.sanityCheckReferenceGraph(graph, refHaplotype);
    }

    private static <T extends BaseVertex, E extends BaseEdge> void sanityCheckReferenceGraph(BaseGraph<T, E> graph, Haplotype refHaplotype) {
        if (graph.getReferenceSourceVertex() == null) {
            throw new IllegalStateException("All reference graphs must have a reference source vertex.");
        }
        if (graph.getReferenceSinkVertex() == null) {
            throw new IllegalStateException("All reference graphs must have a reference sink vertex.");
        }
        if (!Arrays.equals(graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true), refHaplotype.getBases())) {
            throw new IllegalStateException("Mismatch between the reference haplotype and the reference assembly graph path. for graph " + graph + " graph = " + new String(graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true)) + " haplotype = " + new String(refHaplotype.getBases()));
        }
    }

    private static void addResult(Collection<AssemblyResult> results, AssemblyResult maybeNullResult) {
        if (maybeNullResult != null) {
            results.add(maybeNullResult);
        }
    }

    @VisibleForTesting
    List<AssemblyResult> assemble(List<GATKRead> reads, Haplotype refHaplotype, SAMFileHeader header, SmithWatermanAligner aligner) {
        LinkedList<AssemblyResult> results = new LinkedList<AssemblyResult>();
        for (int kmerSize : this.kmerSizes) {
            ReadThreadingAssembler.addResult(results, this.createGraph(reads, refHaplotype, kmerSize, this.dontIncreaseKmerSizesForCycles, this.allowNonUniqueKmersInRef, header, aligner));
        }
        if (results.isEmpty() && !this.dontIncreaseKmerSizesForCycles) {
            int kmerSize = ReadThreadingAssembler.arrayMaxInt(this.kmerSizes) + 10;
            for (int numIterations = 1; results.isEmpty() && numIterations <= 6; ++numIterations) {
                boolean lastAttempt = numIterations == 6;
                ReadThreadingAssembler.addResult(results, this.createGraph(reads, refHaplotype, kmerSize, lastAttempt, lastAttempt, header, aligner));
                kmerSize += 10;
            }
        }
        return results;
    }

    List<Integer> getExpandedKmerList() {
        ArrayList<Integer> returnList = new ArrayList<Integer>(this.kmerSizes);
        if (!this.dontIncreaseKmerSizesForCycles) {
            int kmerSize = ReadThreadingAssembler.arrayMaxInt(this.kmerSizes) + 10;
            for (int numIterations = 1; numIterations <= 6; ++numIterations) {
                returnList.add(kmerSize);
                kmerSize += 10;
            }
        }
        return returnList;
    }

    private static int arrayMaxInt(List<Integer> array) {
        return array.stream().mapToInt(Integer::intValue).max().orElseThrow(() -> new IllegalArgumentException("Array size cannot be 0!"));
    }

    private AssemblyResult createGraph(Iterable<GATKRead> reads, Haplotype refHaplotype, int kmerSize, boolean allowLowComplexityGraphs, boolean allowNonUniqueKmersInRef, SAMFileHeader header, SmithWatermanAligner aligner) {
        if (refHaplotype.length() < kmerSize) {
            return new AssemblyResult(AssemblyResult.Status.FAILED, null, null);
        }
        if (!allowNonUniqueKmersInRef && !ReadThreadingGraph.determineNonUniqueKmers(new AbstractReadThreadingGraph.SequenceForKmers("ref", refHaplotype.getBases(), 0, refHaplotype.getBases().length, 1, true), kmerSize).isEmpty()) {
            if (this.debug) {
                logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because reference contains non-unique kmers");
            }
            return null;
        }
        AbstractReadThreadingGraph rtgraph = this.generateSeqGraph ? new ReadThreadingGraph(kmerSize, this.debugGraphTransformations, this.minBaseQualityToUseInAssembly, this.numPruningSamples, this.minMatchingBasesToDanglingEndRecovery) : new JunctionTreeLinkedDeBruijnGraph(kmerSize, this.debugGraphTransformations, this.minBaseQualityToUseInAssembly, this.numPruningSamples, this.minMatchingBasesToDanglingEndRecovery);
        rtgraph.setThreadingStartOnlyAtExistingVertex(!this.recoverDanglingBranches);
        rtgraph.addSequence("ref", refHaplotype.getBases(), true);
        for (GATKRead read : reads) {
            rtgraph.addRead(read, header);
        }
        rtgraph.buildGraphIfNecessary();
        if (this.debugGraphTransformations) {
            this.printDebugGraphTransform(rtgraph, refHaplotype.getLocation() + "-sequenceGraph." + kmerSize + ".0.0.raw_readthreading_graph.dot");
        }
        if (this.pruneBeforeCycleCounting) {
            this.chainPruner.pruneLowWeightChains(rtgraph);
        }
        if (this.generateSeqGraph && rtgraph.hasCycles()) {
            if (this.debug) {
                logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because it contains a cycle");
            }
            return null;
        }
        if (!allowLowComplexityGraphs && rtgraph.isLowQualityGraph()) {
            if (this.debug) {
                logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because it does not produce a graph with enough complexity");
            }
            return null;
        }
        AssemblyResult result = this.getAssemblyResult(refHaplotype, kmerSize, rtgraph, aligner);
        if (this.recoverAllDanglingBranches && rtgraph.hasCycles()) {
            return null;
        }
        return result;
    }

    private AssemblyResult getAssemblyResult(Haplotype refHaplotype, int kmerSize, AbstractReadThreadingGraph rtgraph, SmithWatermanAligner aligner) {
        if (!this.pruneBeforeCycleCounting) {
            this.chainPruner.pruneLowWeightChains(rtgraph);
        }
        if (this.debugGraphTransformations) {
            this.printDebugGraphTransform(rtgraph, refHaplotype.getLocation() + "-sequenceGraph." + kmerSize + ".0.1.chain_pruned_readthreading_graph.dot");
        }
        if (this.recoverDanglingBranches) {
            rtgraph.recoverDanglingTails(this.pruneFactor, this.minDanglingBranchLength, this.recoverAllDanglingBranches, aligner);
            rtgraph.recoverDanglingHeads(this.pruneFactor, this.minDanglingBranchLength, this.recoverAllDanglingBranches, aligner);
        }
        if (this.removePathsNotConnectedToRef) {
            rtgraph.removePathsNotConnectedToRef();
        }
        if (this.debugGraphTransformations) {
            this.printDebugGraphTransform(rtgraph, refHaplotype.getLocation() + "-sequenceGraph." + kmerSize + ".0.2.cleaned_readthreading_graph.dot");
        }
        if (this.generateSeqGraph) {
            SeqGraph initialSeqGraph = rtgraph.toSequenceGraph();
            if (this.debugGraphTransformations) {
                rtgraph.printGraph(new File(this.debugGraphOutputPath, refHaplotype.getLocation() + "-sequenceGraph." + kmerSize + ".0.3.initial_seqgraph.dot"), 10000);
            }
            if (this.justReturnRawGraph) {
                return new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, initialSeqGraph, null);
            }
            if (this.debug) {
                logger.info("Using kmer size of " + rtgraph.getKmerSize() + " in read threading assembler");
            }
            if (this.debugGraphTransformations) {
                this.printDebugGraphTransform(initialSeqGraph, refHaplotype.getLocation() + "-sequenceGraph." + kmerSize + ".0.4.initial_seqgraph.dot");
            }
            initialSeqGraph.cleanNonRefPaths();
            AssemblyResult cleaned = this.cleanupSeqGraph(initialSeqGraph, refHaplotype);
            AssemblyResult.Status status = cleaned.getStatus();
            return new AssemblyResult(status, cleaned.getSeqGraph(), rtgraph);
        }
        if (this.justReturnRawGraph) {
            return new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, null, rtgraph);
        }
        if (this.debug) {
            logger.info("Using kmer size of " + rtgraph.getKmerSize() + " in read threading assembler");
        }
        AssemblyResult cleaned = this.getResultSetForRTGraph(rtgraph);
        AssemblyResult.Status status = cleaned.getStatus();
        return new AssemblyResult(status, null, cleaned.getThreadingGraph());
    }

    public String toString() {
        return "ReadThreadingAssembler{kmerSizes=" + this.kmerSizes + '}';
    }

    private <T extends BaseGraph<?, ?>> void printGraphs(List<T> graphs) {
        int writeFirstGraphWithSizeSmallerThan = 50;
        try (PrintStream graphWriter = new PrintStream(this.graphOutputPath);){
            graphWriter.println("digraph assemblyGraphs {");
            for (BaseGraph graph : graphs) {
                if (this.debugGraphTransformations && graph.getKmerSize() >= 50) {
                    logger.info("Skipping writing of graph with kmersize " + graph.getKmerSize());
                    continue;
                }
                graph.printGraph(graphWriter, false, this.pruneFactor);
                if (!this.debugGraphTransformations) continue;
                break;
            }
            graphWriter.println("}");
        }
        catch (IOException e) {
            throw new UserException.CouldNotCreateOutputFile(this.graphOutputPath, (Exception)e);
        }
    }

    public void printDebugHistograms() {
        if (this.graphHaplotypeHistogramPath != null) {
            try (PrintStream histogramWriter = new PrintStream(this.graphHaplotypeHistogramPath);){
                histogramWriter.println("Histogram over the number of haplotypes recovered per active region:");
                histogramWriter.println(this.haplotypeHistogram.toString());
                histogramWriter.println("\nHistogram over the average kmer size used for assembly:");
                histogramWriter.println(this.kmersUsedHistogram.toString());
            }
            catch (IOException e) {
                throw new UserException.CouldNotCreateOutputFile(this.graphHaplotypeHistogramPath, (Exception)e);
            }
        }
    }

    public void setGraphWriter(File graphOutputPath) {
        this.graphOutputPath = graphOutputPath;
    }

    public byte getMinBaseQualityToUseInAssembly() {
        return this.minBaseQualityToUseInAssembly;
    }

    public void setMinBaseQualityToUseInAssembly(byte minBaseQualityToUseInAssembly) {
        this.minBaseQualityToUseInAssembly = minBaseQualityToUseInAssembly;
    }

    public boolean isDebug() {
        return this.debug;
    }

    public void setDebug(boolean debug) {
        this.debug = debug;
    }

    public boolean isDebugGraphTransformations() {
        return this.debugGraphTransformations;
    }

    public boolean isRecoverDanglingBranches() {
        return this.recoverDanglingBranches;
    }

    public void setDebugHistogramOutput(File file) {
        this.graphHaplotypeHistogramPath = file;
        this.haplotypeHistogram = new Histogram(1.0);
        this.kmersUsedHistogram = new Histogram(1.0);
    }

    public void setDebugGraphTransformations(boolean debugHaplotypeFinding) {
        this.debugGraphTransformations = debugHaplotypeFinding;
    }

    public void setDebugGraphOutputPath(File debugGraphOutputPath) {
        this.debugGraphOutputPath = debugGraphOutputPath;
    }

    public void setRecoverDanglingBranches(boolean recoverDanglingBranches) {
        this.recoverDanglingBranches = recoverDanglingBranches;
    }

    public void setRecoverAllDanglingBranches(boolean recoverAllDanglingBranches) {
        this.recoverAllDanglingBranches = recoverAllDanglingBranches;
        this.recoverDanglingBranches = true;
    }

    public void setMinDanglingBranchLength(int minDanglingBranchLength) {
        this.minDanglingBranchLength = minDanglingBranchLength;
    }

    @VisibleForTesting
    void setJustReturnRawGraph(boolean justReturnRawGraph) {
        this.justReturnRawGraph = justReturnRawGraph;
    }

    public void setRemovePathsNotConnectedToRef(boolean removePathsNotConnectedToRef) {
        this.removePathsNotConnectedToRef = removePathsNotConnectedToRef;
    }

    public void setArtificialHaplotypeRecoveryMode(boolean disableUncoveredJunctionTreeHaplotypeRecovery) {
        if (disableUncoveredJunctionTreeHaplotypeRecovery) {
            if (!this.generateSeqGraph) {
                throw new UserException("Argument '--experimental-dangling-branch-recover' requires '--linked-de-bruijn-graph' to be set");
            }
            this.recoverHaplotypesFromEdgesNotCoveredInJunctionTrees = false;
        }
    }
}

