/*
 * Decompiled with CFR 0.152.
 */
package picard.sam.markduplicates;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IterableAdapter;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import java.io.File;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import picard.PicardException;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.SamOrBam;
import picard.sam.markduplicates.MarkDuplicatesWithMateCigarIterator;
import picard.sam.markduplicates.util.AbstractMarkDuplicatesCommandLineProgram;

@CommandLineProgramProperties(usage="Identifies duplicate reads, accounting for mate CIGAR.  This tool locates and tags duplicate reads (both PCR and optical) in a BAM or SAM file, where duplicate reads are defined as originating from the same original fragment of DNA, taking into account the CIGAR string of read mates. <br /><br />It is intended as an improvement upon the original MarkDuplicates algorithm, from which it differs in several ways, includingdifferences in how it breaks ties. It may be the most effective duplicate marking program available, as it handles all cases including clipped and gapped alignments and locates duplicate molecules using mate cigar information. However, please note that it is not yet used in the Broad's production pipeline, so use it at your own risk. <br /><br />Note also that this tool will not work with alignments that have large gaps or deletions, such as those from RNA-seq data.  This is due to the need to buffer small genomic windows to ensure integrity of the duplicate marking, while large skips (ex. skipping introns) in the alignment records would force making that window very large, thus exhausting memory. <br /><h4>Usage example:</h4><pre>java -jar picard.jar MarkDuplicatesWithMateCigar \\<br />      I=input.bam \\<br />      O=mark_dups_w_mate_cig.bam \\<br />      M=mark_dups_w_mate_cig_metrics.txt</pre><hr />", usageShort="Identifies duplicate reads, accounting for mate CIGAR.  ", programGroup=SamOrBam.class)
public class MarkDuplicatesWithMateCigar
extends AbstractMarkDuplicatesCommandLineProgram {
    static final String USAGE_SUMMARY = "Identifies duplicate reads, accounting for mate CIGAR.  ";
    static final String USAGE_DETAILS = "This tool locates and tags duplicate reads (both PCR and optical) in a BAM or SAM file, where duplicate reads are defined as originating from the same original fragment of DNA, taking into account the CIGAR string of read mates. <br /><br />It is intended as an improvement upon the original MarkDuplicates algorithm, from which it differs in several ways, includingdifferences in how it breaks ties. It may be the most effective duplicate marking program available, as it handles all cases including clipped and gapped alignments and locates duplicate molecules using mate cigar information. However, please note that it is not yet used in the Broad's production pipeline, so use it at your own risk. <br /><br />Note also that this tool will not work with alignments that have large gaps or deletions, such as those from RNA-seq data.  This is due to the need to buffer small genomic windows to ensure integrity of the duplicate marking, while large skips (ex. skipping introns) in the alignment records would force making that window very large, thus exhausting memory. <br /><h4>Usage example:</h4><pre>java -jar picard.jar MarkDuplicatesWithMateCigar \\<br />      I=input.bam \\<br />      O=mark_dups_w_mate_cig.bam \\<br />      M=mark_dups_w_mate_cig_metrics.txt</pre><hr />";
    private final Log log = Log.getInstance(MarkDuplicatesWithMateCigar.class);
    @Option(doc="The minimum distance to buffer records to account for clipping on the 5' end of the records.Set this number to -1 to use twice the first read's read length (or 100, whichever is smaller).", optional=true)
    public int MINIMUM_DISTANCE = -1;
    @Option(doc="Skip record pairs with no mate cigar and include them in the output.")
    boolean SKIP_PAIRS_WITH_NO_MATE_CIGAR = true;
    @Option(doc="The block size for use in the coordinate-sorted record buffer.", optional=true)
    public int BLOCK_SIZE = 100000;
    private boolean warnedNullProgramRecords = false;
    private boolean warnedMissingProgramRecords = false;

    public static void main(String[] stringArray) {
        new MarkDuplicatesWithMateCigar().instanceMainWithExit(stringArray);
    }

    @Override
    protected int doWork() {
        IOUtil.assertInputsAreValid((List)this.INPUT);
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        IOUtil.assertFileIsWritable((File)this.METRICS_FILE);
        AbstractMarkDuplicatesCommandLineProgram.SamHeaderAndIterator samHeaderAndIterator = this.openInputs();
        SAMFileHeader sAMFileHeader = samHeaderAndIterator.header;
        SAMFileHeader sAMFileHeader2 = sAMFileHeader.clone();
        if (sAMFileHeader2.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            throw new PicardException("This program requires inputs in coordinate SortOrder");
        }
        this.COMMENT.forEach(arg_0 -> ((SAMFileHeader)sAMFileHeader2).addComment(arg_0));
        this.setPGIdsSeen(sAMFileHeader2);
        Map<String, String> map = this.getChainedPgIds(sAMFileHeader2);
        SAMFileWriter sAMFileWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(sAMFileHeader2, true, this.OUTPUT);
        MarkDuplicatesWithMateCigarIterator markDuplicatesWithMateCigarIterator = new MarkDuplicatesWithMateCigarIterator(samHeaderAndIterator.header, samHeaderAndIterator.iterator, this.opticalDuplicateFinder, this.DUPLICATE_SCORING_STRATEGY, this.MINIMUM_DISTANCE, this.REMOVE_DUPLICATES, this.SKIP_PAIRS_WITH_NO_MATE_CIGAR, this.MAX_RECORDS_IN_RAM, this.BLOCK_SIZE, this.TMP_DIR);
        ProgressLogger progressLogger = new ProgressLogger(this.log, 1000000, "Read");
        for (SAMRecord sAMRecord : new IterableAdapter((Iterator)((Object)markDuplicatesWithMateCigarIterator))) {
            if (progressLogger.record(sAMRecord)) {
                markDuplicatesWithMateCigarIterator.logMemoryStats(this.log);
            }
            this.updateProgramRecord(sAMRecord, map);
            sAMFileWriter.addAlignment(sAMRecord);
        }
        markDuplicatesWithMateCigarIterator.close();
        sAMFileWriter.close();
        Histogram<Short> histogram = markDuplicatesWithMateCigarIterator.getOpticalDupesByLibraryId();
        this.log.info(new Object[]{"Processed " + progressLogger.getCount() + " records"});
        this.log.info(new Object[]{"Found " + markDuplicatesWithMateCigarIterator.getNumRecordsWithNoMateCigar() + " records with no mate cigar optional tag."});
        this.log.info(new Object[]{"Marking " + markDuplicatesWithMateCigarIterator.getNumDuplicates() + " records as duplicates."});
        this.log.info(new Object[]{"Found " + (long)histogram.getSumOfValues() + " optical duplicate clusters."});
        this.finalizeAndWriteMetrics(markDuplicatesWithMateCigarIterator.getLibraryIdGenerator());
        return 0;
    }

    private void updateProgramRecord(SAMRecord sAMRecord, Map<String, String> map) {
        if (this.PROGRAM_RECORD_ID != null) {
            String string = sAMRecord.getStringAttribute(SAMTag.PG.name());
            if (null == string) {
                if (!this.warnedNullProgramRecords) {
                    this.warnedNullProgramRecords = true;
                    this.log.warn(new Object[]{"Encountered a record with no program record, program group chaining will not occur for this read: " + sAMRecord});
                }
            } else if (!map.containsKey(string)) {
                if (!this.warnedMissingProgramRecords) {
                    this.warnedMissingProgramRecords = true;
                    this.log.warn(new Object[]{"Encountered a record with an intermediate program record, program group chaining will not occur for this read: " + sAMRecord});
                }
            } else {
                sAMRecord.setAttribute(SAMTag.PG.name(), (Object)map.get(string));
            }
        }
    }

    private void setPGIdsSeen(SAMFileHeader sAMFileHeader) {
        String string;
        HashSet<String> hashSet = new HashSet<String>();
        for (SAMProgramRecord sAMProgramRecord : sAMFileHeader.getProgramRecords()) {
            string = sAMProgramRecord.getPreviousProgramGroupId();
            if (null == string) continue;
            hashSet.add(string);
        }
        for (SAMProgramRecord sAMProgramRecord : sAMFileHeader.getProgramRecords()) {
            string = sAMProgramRecord.getId();
            if (hashSet.contains(string)) continue;
            this.pgIdsSeen.add(string);
        }
    }
}

