/*
 * Decompiled with CFR 0.152.
 */
package picard.analysis.directed;

import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.analysis.MetricAccumulationLevel;
import picard.analysis.directed.CollectTargetedMetrics;
import picard.analysis.directed.HsMetricCollector;
import picard.analysis.directed.HsMetrics;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;

@CommandLineProgramProperties(summary="Collects hybrid-selection (HS) metrics for a SAM or BAM file.  <p>This tool takes a SAM/BAM file input and collects metrics that are specific for sequence datasets generated through hybrid-selection. Hybrid-selection (HS) is the most commonly used technique to capture exon-specific sequences for targeted sequencing experiments such as exome sequencing; for more information, please see the corresponding <a href='http://www.broadinstitute.org/gatk/guide/article?id=6331'>GATK Dictionary entry</a>. </p> <p>This tool requires an aligned SAM or BAM file as well as bait and target interval files in Picard interval_list format. You should use the bait and interval files that correspond to the capture kit that was used to generate the capture libraries for sequencing, which can generally be obtained from the kit manufacturer. If the baits and target intervals are provided in BED format, you can convert them to the Picard interval_list format using Picard's <a href='http://broadinstitute.github.io/picard/command-line-overview.html#BedToIntervalList'>BedToInterval</a> tool. </p><p>If a reference sequence is provided, this program will calculate both AT_DROPOUT and GC_DROPOUT metrics. Dropout metrics are an attempt to measure the reduced representation of reads, in regions that deviate from 50% G/C content. This reduction in the number of aligned reads is due to the increased numbers of errors associated with sequencing regions with excessive or deficient numbers of G/C bases, ultimately leading to poor mapping efficiencies and lowcoverage in the affected regions. </p><p>If you are interested in getting G/C content and mean sequence depth information for every target interval, use the PER_TARGET_COVERAGE option. </p><p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>  <h4>Usage Example:</h4><pre>java -jar picard.jar CollectHsMetrics \\<br />      I=input_reads.bam \\<br />      O=output_hs_metrics.txt \\<br />      R=reference.fasta \\<br />      BAIT_INTERVALS=bait.interval_list \\<br />      TARGET_INTERVALS=target.interval_list</pre> <p>Please see <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#HsMetrics'>CollectHsMetrics</a> for detailed descriptions of the output metrics produced by this tool.</p><hr />", oneLineSummary="Collects hybrid-selection (HS) metrics for a SAM or BAM file.  ", programGroup=DiagnosticsAndQCProgramGroup.class)
@DocumentedFeature
public class CollectHsMetrics
extends CollectTargetedMetrics<HsMetrics, HsMetricCollector> {
    static final String USAGE_SUMMARY = "Collects hybrid-selection (HS) metrics for a SAM or BAM file.  ";
    static final String USAGE_DETAILS = "<p>This tool takes a SAM/BAM file input and collects metrics that are specific for sequence datasets generated through hybrid-selection. Hybrid-selection (HS) is the most commonly used technique to capture exon-specific sequences for targeted sequencing experiments such as exome sequencing; for more information, please see the corresponding <a href='http://www.broadinstitute.org/gatk/guide/article?id=6331'>GATK Dictionary entry</a>. </p> <p>This tool requires an aligned SAM or BAM file as well as bait and target interval files in Picard interval_list format. You should use the bait and interval files that correspond to the capture kit that was used to generate the capture libraries for sequencing, which can generally be obtained from the kit manufacturer. If the baits and target intervals are provided in BED format, you can convert them to the Picard interval_list format using Picard's <a href='http://broadinstitute.github.io/picard/command-line-overview.html#BedToIntervalList'>BedToInterval</a> tool. </p><p>If a reference sequence is provided, this program will calculate both AT_DROPOUT and GC_DROPOUT metrics. Dropout metrics are an attempt to measure the reduced representation of reads, in regions that deviate from 50% G/C content. This reduction in the number of aligned reads is due to the increased numbers of errors associated with sequencing regions with excessive or deficient numbers of G/C bases, ultimately leading to poor mapping efficiencies and lowcoverage in the affected regions. </p><p>If you are interested in getting G/C content and mean sequence depth information for every target interval, use the PER_TARGET_COVERAGE option. </p><p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>  <h4>Usage Example:</h4><pre>java -jar picard.jar CollectHsMetrics \\<br />      I=input_reads.bam \\<br />      O=output_hs_metrics.txt \\<br />      R=reference.fasta \\<br />      BAIT_INTERVALS=bait.interval_list \\<br />      TARGET_INTERVALS=target.interval_list</pre> <p>Please see <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#HsMetrics'>CollectHsMetrics</a> for detailed descriptions of the output metrics produced by this tool.</p><hr />";
    @Argument(shortName="BI", doc="An interval list file that contains the locations of the baits used.", minElements=1)
    public List<File> BAIT_INTERVALS;
    @Argument(shortName="N", doc="Bait set name. If not provided it is inferred from the filename of the bait intervals.", optional=true)
    public String BAIT_SET_NAME;

    public CollectHsMetrics() {
        this.MINIMUM_MAPPING_QUALITY = 20;
        this.MINIMUM_BASE_QUALITY = 20;
        this.CLIP_OVERLAPPING_READS = true;
    }

    @Override
    protected IntervalList getProbeIntervals() {
        for (File file : this.BAIT_INTERVALS) {
            IOUtil.assertFileIsReadable((File)file);
        }
        return IntervalList.fromFiles(this.BAIT_INTERVALS);
    }

    @Override
    protected String getProbeSetName() {
        if (this.BAIT_SET_NAME != null) {
            return this.BAIT_SET_NAME;
        }
        TreeSet<String> baitSetNames = new TreeSet<String>();
        for (File file : this.BAIT_INTERVALS) {
            baitSetNames.add(CollectTargetedMetrics.renderProbeNameFromFile(file));
        }
        return StringUtil.join((String)".", baitSetNames);
    }

    public static void main(String[] argv) {
        System.exit(new CollectHsMetrics().instanceMain(argv));
    }

    @Override
    protected HsMetricCollector makeCollector(Set<MetricAccumulationLevel> accumulationLevels, List<SAMReadGroupRecord> samRgRecords, ReferenceSequenceFile refFile, File perTargetCoverage, File perBaseCoverage, IntervalList targetIntervals, IntervalList probeIntervals, String probeSetName, int nearProbeDistance) {
        return new HsMetricCollector(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, this.MINIMUM_MAPPING_QUALITY, this.MINIMUM_BASE_QUALITY, this.CLIP_OVERLAPPING_READS, true, this.COVERAGE_CAP, this.SAMPLE_SIZE);
    }
}

