/*
 * Decompiled with CFR 0.152.
 */
package picard.analysis;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Set;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.analysis.MetricAccumulationLevel;
import picard.analysis.SinglePassSamProgram;
import picard.analysis.directed.InsertSizeMetricsCollector;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.RExecutor;

@CommandLineProgramProperties(summary="Collect metrics about the insert size distribution of a paired-end library. This tool provides useful metrics for validating library construction including the insert size distribution and read orientation of paired-end libraries.</p>The expected proportions of these metrics vary depending on the type of library preparation used, resulting from technical differences between pair-end libraries and mate-pair libraries. For a brief primer on paired-end sequencing and mate-pair reads, see the <a href='https://www.broadinstitute.org/gatk/guide/article?id=6327'>GATK Dictionary</a>.<p>The CollectInsertSizeMetrics tool outputs the percentages of read pairs in each of the three orientations (FR, RF, and TANDEM) as a histogram. In addition, the insert size distribution is output as both a histogram (.insert_size_Histogram.pdf) and as a data table (.insert_size_metrics.txt).</p><p>Note: Metrics labeled as percentages are actually expressed as fractions!</p><h4>Usage example:</h4><pre>java -jar picard.jar CollectInsertSizeMetrics \\<br />      I=input.bam \\<br />      O=insert_size_metrics.txt \\<br />      H=insert_size_histogram.pdf \\<br />      M=0.5</pre>Note: If processing a small file, set the minimum percentage option (M) to 0.5, otherwise an error may occur. <br /><br />Please see <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#InsertSizeMetrics'>InsertSizeMetrics</a> for detailed explanations of each metric.<hr />", oneLineSummary="Collect metrics about the insert size distribution of a paired-end library. ", programGroup=DiagnosticsAndQCProgramGroup.class)
@DocumentedFeature
public class CollectInsertSizeMetrics
extends SinglePassSamProgram {
    static final String USAGE_SUMMARY = "Collect metrics about the insert size distribution of a paired-end library. ";
    static final String USAGE_DETAILED = "This tool provides useful metrics for validating library construction including the insert size distribution and read orientation of paired-end libraries.</p>The expected proportions of these metrics vary depending on the type of library preparation used, resulting from technical differences between pair-end libraries and mate-pair libraries. For a brief primer on paired-end sequencing and mate-pair reads, see the <a href='https://www.broadinstitute.org/gatk/guide/article?id=6327'>GATK Dictionary</a>.<p>The CollectInsertSizeMetrics tool outputs the percentages of read pairs in each of the three orientations (FR, RF, and TANDEM) as a histogram. In addition, the insert size distribution is output as both a histogram (.insert_size_Histogram.pdf) and as a data table (.insert_size_metrics.txt).</p><p>Note: Metrics labeled as percentages are actually expressed as fractions!</p><h4>Usage example:</h4><pre>java -jar picard.jar CollectInsertSizeMetrics \\<br />      I=input.bam \\<br />      O=insert_size_metrics.txt \\<br />      H=insert_size_histogram.pdf \\<br />      M=0.5</pre>Note: If processing a small file, set the minimum percentage option (M) to 0.5, otherwise an error may occur. <br /><br />Please see <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#InsertSizeMetrics'>InsertSizeMetrics</a> for detailed explanations of each metric.<hr />";
    private static final Log log = Log.getInstance(CollectInsertSizeMetrics.class);
    protected static final String Histogram_R_SCRIPT = "picard/analysis/insertSizeHistogram.R";
    @Argument(shortName="H", doc="File to write insert size Histogram chart to.")
    public File Histogram_FILE;
    @Argument(doc="Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically includes enough anomalous values from chimeras and other artifacts to make the mean and sd grossly misleading regarding the real distribution.")
    public double DEVIATIONS = 10.0;
    @Argument(shortName="W", doc="Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be included.", optional=true)
    public Integer HISTOGRAM_WIDTH = null;
    @Argument(shortName="MW", doc="Minimum width of histogram plots. In the case when the histogram would otherwise betruncated to a shorter range of sizes, the MIN_HISTOGRAM_WIDTH will enforce a minimum range.", optional=true)
    public Integer MIN_HISTOGRAM_WIDTH = null;
    @Argument(shortName="M", doc="When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads. (Range: 0 to 1).")
    public float MINIMUM_PCT = 0.05f;
    @Argument(shortName="LEVEL", doc="The level(s) at which to accumulate metrics.  ")
    public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet((Object[])new MetricAccumulationLevel[]{MetricAccumulationLevel.ALL_READS});
    @Argument(doc="If true, also include reads marked as duplicates in the insert size histogram.")
    public boolean INCLUDE_DUPLICATES = false;
    private InsertSizeMetricsCollector multiCollector;

    @Override
    protected String[] customCommandLineValidation() {
        ArrayList<String> errorMsgs = new ArrayList<String>();
        if (this.MINIMUM_PCT < 0.0f || (double)this.MINIMUM_PCT > 0.5) {
            errorMsgs.add("MINIMUM_PCT was set to " + this.MINIMUM_PCT + ". It must be between 0 and 0.5 so all data categories don't get discarded.");
        }
        if (!CollectInsertSizeMetrics.checkRInstallation(this.Histogram_FILE != null)) {
            errorMsgs.add("R is not installed on this machine. It is required for creating the chart.");
        }
        return errorMsgs.isEmpty() ? null : errorMsgs.toArray(new String[errorMsgs.size()]);
    }

    @Override
    protected boolean usesNoRefReads() {
        return false;
    }

    @Override
    protected void setup(SAMFileHeader header, File samFile) {
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        IOUtil.assertFileIsWritable((File)this.Histogram_FILE);
        this.multiCollector = new InsertSizeMetricsCollector(this.METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), this.MINIMUM_PCT, this.HISTOGRAM_WIDTH, this.MIN_HISTOGRAM_WIDTH, this.DEVIATIONS, this.INCLUDE_DUPLICATES);
    }

    @Override
    protected void acceptRead(SAMRecord record, ReferenceSequence ref) {
        this.multiCollector.acceptRecord(record, ref);
    }

    @Override
    protected void finish() {
        this.multiCollector.finish();
        MetricsFile file = this.getMetricsFile();
        this.multiCollector.addAllLevelsToFile(file);
        if (file.getNumHistograms() == 0) {
            log.warn(new Object[]{"All data categories were discarded because they contained < " + this.MINIMUM_PCT + " of the total aligned paired data."});
            InsertSizeMetricsCollector.PerUnitInsertSizeMetricsCollector allReadsCollector = (InsertSizeMetricsCollector.PerUnitInsertSizeMetricsCollector)this.multiCollector.getAllReadsCollector();
            log.warn(new Object[]{"Total mapped pairs in all categories: " + (allReadsCollector == null ? allReadsCollector : Double.valueOf(allReadsCollector.getTotalInserts()))});
        } else {
            file.write(this.OUTPUT);
            ArrayList<String> plotArgs = new ArrayList<String>();
            Collections.addAll(plotArgs, this.OUTPUT.getAbsolutePath(), this.Histogram_FILE.getAbsolutePath().replaceAll("%", "%%"), this.INPUT.getName());
            if (this.HISTOGRAM_WIDTH != null) {
                plotArgs.add(String.valueOf(this.HISTOGRAM_WIDTH));
            } else if (this.MIN_HISTOGRAM_WIDTH != null) {
                int max = (int)file.getAllHistograms().stream().mapToDouble(Histogram::getMax).max().getAsDouble();
                plotArgs.add(String.valueOf(Math.max(max, this.MIN_HISTOGRAM_WIDTH)));
            }
            int rResult = RExecutor.executeFromClasspath(Histogram_R_SCRIPT, plotArgs.toArray(new String[0]));
            if (rResult != 0) {
                throw new PicardException("R script picard/analysis/insertSizeHistogram.R failed with return code " + rResult);
            }
        }
    }
}

