/*
 * Decompiled with CFR 0.152.
 */
package picard.illumina.quality;

import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.illumina.parser.BaseIlluminaDataProvider;
import picard.illumina.parser.ClusterData;
import picard.illumina.parser.IlluminaDataProviderFactory;
import picard.illumina.parser.IlluminaDataType;
import picard.illumina.parser.ReadData;
import picard.illumina.parser.ReadStructure;
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;

@CommandLineProgramProperties(summary="Classify PF-Failing reads in a HiSeqX Illumina Basecalling directory into various categories.<p>This tool categorizes the reads that did not pass filter (PF-Failing) into four groups.  These groups are based on a heuristic that was derived by looking at a few titration experiments. </p><p>After examining the called bases from the first 24 cycles of each read, the PF-Failed reads are grouped into the following four categories: <ul><li>MISALIGNED - The first 24 basecalls of a read are uncalled (numNs~24).   These types of reads appear to be flow cell artifacts because reads were only found near tile boundaries and were concentration (library) independent</li> <li>EMPTY - All 24 bases are called (numNs~0) but the number of bases with quality scores greater than two is less than or equal to eight (numQGtTwo<=8).  These reads were location independent within the tiles and were inversely proportional to the library concentration</li><li>POLYCLONAL - All 24 bases were called and numQGtTwo>=12, were independent of their location with the tiles, and were directly proportional to the library concentration.  These reads are likely the result of PCR artifacts </li><li>UNKNOWN - The remaining reads that are PF-Failing but did not fit into any of the groups listed above</li></ul></p>  <p>The tool defaults to the SUMMARY output which indicates the number of PF-Failed reads per tile and groups them into the categories described above accordingly.</p> <p>A DETAILED metrics option is also available that subdivides the SUMMARY outputs by the x- y- position of these reads within each tile.  To obtain the DETAILED metric table, you must add the PROB_EXPLICIT_READS option to your command line and set the value between 0 and 1.  This value represents the fractional probability of PF-Failed reads to send to output.  For example, if PROB_EXPLICIT_READS=0, then no metrics will be output.  If PROB_EXPLICIT_READS=1, then it will provide detailed metrics for all (100%) of the reads.  It follows that setting the PROB_EXPLICIT_READS=0.5, will provide detailed metrics for half of the PF-Failed reads.</p> <p>Note: Metrics labeled as percentages are actually expressed as fractions!</p><h4>Usage example: (SUMMARY Metrics)</h4> <pre>java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      OUTPUT=/metrics/ \\<br />      LANE=001</pre><h4>Usage example: (DETAILED Metrics)</h4><pre>java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      OUTPUT=/Detail_metrics/ \\<br />      LANE=001 \\<br />      PROB_EXPLICIT_READS=1</pre>Please see our documentation on the <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailSummaryMetric'>SUMMARY</a> and <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailDetailedMetric'>DETAILED</a> metrics for comprehensive explanations of the outputs produced by this tool.<hr />", oneLineSummary="Classify PF-Failing reads in a HiSeqX Illumina Basecalling directory into various categories.", programGroup=DiagnosticsAndQCProgramGroup.class)
@DocumentedFeature
public class CollectHiSeqXPfFailMetrics
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Classify PF-Failing reads in a HiSeqX Illumina Basecalling directory into various categories.";
    static final String USAGE_DETAILS = "<p>This tool categorizes the reads that did not pass filter (PF-Failing) into four groups.  These groups are based on a heuristic that was derived by looking at a few titration experiments. </p><p>After examining the called bases from the first 24 cycles of each read, the PF-Failed reads are grouped into the following four categories: <ul><li>MISALIGNED - The first 24 basecalls of a read are uncalled (numNs~24).   These types of reads appear to be flow cell artifacts because reads were only found near tile boundaries and were concentration (library) independent</li> <li>EMPTY - All 24 bases are called (numNs~0) but the number of bases with quality scores greater than two is less than or equal to eight (numQGtTwo<=8).  These reads were location independent within the tiles and were inversely proportional to the library concentration</li><li>POLYCLONAL - All 24 bases were called and numQGtTwo>=12, were independent of their location with the tiles, and were directly proportional to the library concentration.  These reads are likely the result of PCR artifacts </li><li>UNKNOWN - The remaining reads that are PF-Failing but did not fit into any of the groups listed above</li></ul></p>  <p>The tool defaults to the SUMMARY output which indicates the number of PF-Failed reads per tile and groups them into the categories described above accordingly.</p> <p>A DETAILED metrics option is also available that subdivides the SUMMARY outputs by the x- y- position of these reads within each tile.  To obtain the DETAILED metric table, you must add the PROB_EXPLICIT_READS option to your command line and set the value between 0 and 1.  This value represents the fractional probability of PF-Failed reads to send to output.  For example, if PROB_EXPLICIT_READS=0, then no metrics will be output.  If PROB_EXPLICIT_READS=1, then it will provide detailed metrics for all (100%) of the reads.  It follows that setting the PROB_EXPLICIT_READS=0.5, will provide detailed metrics for half of the PF-Failed reads.</p> <p>Note: Metrics labeled as percentages are actually expressed as fractions!</p><h4>Usage example: (SUMMARY Metrics)</h4> <pre>java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      OUTPUT=/metrics/ \\<br />      LANE=001</pre><h4>Usage example: (DETAILED Metrics)</h4><pre>java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      OUTPUT=/Detail_metrics/ \\<br />      LANE=001 \\<br />      PROB_EXPLICIT_READS=1</pre>Please see our documentation on the <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailSummaryMetric'>SUMMARY</a> and <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailDetailedMetric'>DETAILED</a> metrics for comprehensive explanations of the outputs produced by this tool.<hr />";
    @Argument(doc="The Illumina basecalls directory. ", shortName="B")
    public File BASECALLS_DIR;
    @Argument(shortName="O", doc="Basename for metrics file. Resulting file will be <OUTPUT>.pffail_summary_metrics", optional=false)
    public File OUTPUT;
    @Argument(shortName="P", doc="The fraction of (non-PF) reads for which to output explicit classification. Output file will be <OUTPUT>.pffail_detailed_metrics (if PROB_EXPLICIT_READS != 0)", optional=true)
    public double PROB_EXPLICIT_READS = 0.0;
    @Argument(doc="Lane number.", shortName="L")
    public Integer LANE;
    @Argument(shortName="NP", doc="Run this many PerTileBarcodeExtractors in parallel.  If NUM_PROCESSORS = 0, number of cores is automatically set to the number of cores available on the machine. If NUM_PROCESSORS < 0 then the number of cores used will be the number available on the machine less NUM_PROCESSORS.", optional=true)
    public int NUM_PROCESSORS = 1;
    @Argument(doc="Number of cycles to look at. At time of writing PF status gets determined at cycle 24 so numbers greater than this will yield strange results. In addition, PF status is currently determined at cycle 24, so running this with any other value is neither tested nor recommended.", optional=true)
    public int N_CYCLES = 24;
    private static final Log LOG = Log.getInstance(CollectHiSeqXPfFailMetrics.class);
    private final Map<Integer, PFFailSummaryMetric> tileToSummaryMetrics = new LinkedHashMap<Integer, PFFailSummaryMetric>();
    private final Map<Integer, List<PFFailDetailedMetric>> tileToDetailedMetrics = new LinkedHashMap<Integer, List<PFFailDetailedMetric>>();
    private final ReadStructure READ_STRUCTURE = new ReadStructure(this.N_CYCLES + "T");
    public static final String detailedMetricsExtension = ".pffail_detailed_metrics";
    public static final String summaryMetricsExtension = ".pffail_summary_metrics";

    @Override
    protected String[] customCommandLineValidation() {
        ArrayList<String> errors = new ArrayList<String>();
        if (this.N_CYCLES < 0) {
            errors.add("Number of Cycles to look at must be greater than 0");
        }
        if (this.PROB_EXPLICIT_READS > 1.0 || this.PROB_EXPLICIT_READS < 0.0) {
            errors.add("PROB_EXPLICIT_READS must be a probability, i.e., 0 <= PROB_EXPLICIT_READS <= 1");
        }
        if (!errors.isEmpty()) {
            return errors.toArray(new String[errors.size()]);
        }
        return super.customCommandLineValidation();
    }

    @Override
    protected int doWork() {
        IlluminaDataProviderFactory factory = new IlluminaDataProviderFactory(this.BASECALLS_DIR, this.LANE, this.READ_STRUCTURE, new BclQualityEvaluationStrategy(2), new HashSet<IlluminaDataType>(Arrays.asList(IlluminaDataType.BaseCalls, IlluminaDataType.PF, IlluminaDataType.QualityScores, IlluminaDataType.Position)));
        File summaryMetricsFileName = new File(this.OUTPUT + summaryMetricsExtension);
        File detailedMetricsFileName = new File(this.OUTPUT + detailedMetricsExtension);
        IOUtil.assertFileIsWritable((File)summaryMetricsFileName);
        if (this.PROB_EXPLICIT_READS != 0.0) {
            IOUtil.assertFileIsWritable((File)detailedMetricsFileName);
        }
        int numProcessors = this.NUM_PROCESSORS == 0 ? Runtime.getRuntime().availableProcessors() : (this.NUM_PROCESSORS < 0 ? Runtime.getRuntime().availableProcessors() + this.NUM_PROCESSORS : this.NUM_PROCESSORS);
        LOG.info(new Object[]{"Processing with " + numProcessors + " PerTilePFMetricsExtractor(s)."});
        ExecutorService pool = Executors.newFixedThreadPool(numProcessors);
        ArrayList<PerTilePFMetricsExtractor> extractors = new ArrayList<PerTilePFMetricsExtractor>(factory.getAvailableTiles().size());
        for (int tile : factory.getAvailableTiles()) {
            this.tileToSummaryMetrics.put(tile, new PFFailSummaryMetric(Integer.toString(tile)));
            this.tileToDetailedMetrics.put(tile, new ArrayList());
            PerTilePFMetricsExtractor perTilePFMetricsExtractor = new PerTilePFMetricsExtractor(tile, this.tileToSummaryMetrics.get(tile), (Collection<PFFailDetailedMetric>)this.tileToDetailedMetrics.get(tile), factory, this.PROB_EXPLICIT_READS);
            extractors.add(perTilePFMetricsExtractor);
        }
        try {
            for (PerTilePFMetricsExtractor extractor : extractors) {
                pool.submit(extractor);
            }
            pool.shutdown();
            pool.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
        }
        catch (Throwable e) {
            LOG.error(e, new Object[]{"Parent thread encountered problem submitting extractors to thread pool or awaiting shutdown of threadpool.  Attempting to kill threadpool."});
            pool.shutdownNow();
            return 2;
        }
        LOG.info(new Object[]{"Processed " + extractors.size() + " tiles."});
        for (PerTilePFMetricsExtractor extractor : extractors) {
            if (extractor.getException() == null) continue;
            LOG.error(new Object[]{"Abandoning metrics calculation because one or more PerTilePFMetricsExtractors failed."});
            return 4;
        }
        MetricsFile detailedMetrics = this.getMetricsFile();
        for (Collection collection : this.tileToDetailedMetrics.values()) {
            for (PFFailDetailedMetric metric : collection) {
                detailedMetrics.addMetric((MetricBase)metric);
            }
        }
        if (this.PROB_EXPLICIT_READS > 0.0) {
            detailedMetrics.write(detailedMetricsFileName);
        }
        PFFailSummaryMetric totalMetric = new PFFailSummaryMetric("All");
        for (PFFailSummaryMetric summaryMetric : this.tileToSummaryMetrics.values()) {
            totalMetric.merge(summaryMetric);
        }
        totalMetric.calculateDerivedFields();
        MetricsFile metricsFile = this.getMetricsFile();
        metricsFile.addMetric((MetricBase)totalMetric);
        for (PFFailSummaryMetric summaryMetric : this.tileToSummaryMetrics.values()) {
            summaryMetric.calculateDerivedFields();
            metricsFile.addMetric((MetricBase)summaryMetric);
        }
        metricsFile.write(summaryMetricsFileName);
        return 0;
    }

    private static int countEquals(byte[] array, byte toCount) {
        int count = 0;
        for (byte t : array) {
            if (t != toCount) continue;
            ++count;
        }
        return count;
    }

    private static int countGreaterThan(byte[] array, byte value) {
        int count = 0;
        for (byte t : array) {
            if (t <= value) continue;
            ++count;
        }
        return count;
    }

    @DocumentedFeature(groupName="Metrics", summary="Metrics")
    public static class PFFailSummaryMetric
    extends MetricBase {
        public String TILE = null;
        public int READS = 0;
        public int PF_FAIL_READS = 0;
        public double PCT_PF_FAIL_READS = 0.0;
        public int PF_FAIL_EMPTY = 0;
        public double PCT_PF_FAIL_EMPTY = 0.0;
        public int PF_FAIL_POLYCLONAL = 0;
        public double PCT_PF_FAIL_POLYCLONAL = 0.0;
        public int PF_FAIL_MISALIGNED = 0;
        public double PCT_PF_FAIL_MISALIGNED = 0.0;
        public int PF_FAIL_UNKNOWN = 0;
        public double PCT_PF_FAIL_UNKNOWN = 0.0;

        public PFFailSummaryMetric(String tile) {
            this.TILE = tile;
        }

        public PFFailSummaryMetric() {
        }

        public void merge(PFFailSummaryMetric metric) {
            this.READS += metric.READS;
            this.PF_FAIL_READS += metric.PF_FAIL_READS;
            this.PF_FAIL_EMPTY += metric.PF_FAIL_EMPTY;
            this.PF_FAIL_MISALIGNED += metric.PF_FAIL_MISALIGNED;
            this.PF_FAIL_POLYCLONAL += metric.PF_FAIL_POLYCLONAL;
            this.PF_FAIL_UNKNOWN += metric.PF_FAIL_UNKNOWN;
        }

        public void calculateDerivedFields() {
            if (this.READS != 0) {
                this.PCT_PF_FAIL_READS = (double)this.PF_FAIL_READS / (double)this.READS;
                this.PCT_PF_FAIL_EMPTY = (double)this.PF_FAIL_EMPTY / (double)this.READS;
                this.PCT_PF_FAIL_MISALIGNED = (double)this.PF_FAIL_MISALIGNED / (double)this.READS;
                this.PCT_PF_FAIL_POLYCLONAL = (double)this.PF_FAIL_POLYCLONAL / (double)this.READS;
                this.PCT_PF_FAIL_UNKNOWN = (double)this.PF_FAIL_UNKNOWN / (double)this.READS;
            }
        }
    }

    @DocumentedFeature(groupName="Metrics", summary="Metrics")
    public static class PFFailDetailedMetric
    extends MetricBase {
        public Integer TILE;
        public int X;
        public int Y;
        public int NUM_N;
        public int NUM_Q_GT_TWO;
        public ReadClassifier.PfFailReason CLASSIFICATION;

        public PFFailDetailedMetric(Integer TILE, int x, int y, int NUM_N, int NUM_Q_GT_TWO, ReadClassifier.PfFailReason CLASSIFICATION) {
            this.TILE = TILE;
            this.X = x;
            this.Y = y;
            this.NUM_N = NUM_N;
            this.NUM_Q_GT_TWO = NUM_Q_GT_TWO;
            this.CLASSIFICATION = CLASSIFICATION;
        }

        public PFFailDetailedMetric() {
        }
    }

    protected static class ReadClassifier {
        private final int numNs;
        private final int numQGtTwo;
        private PfFailReason failClass = null;

        public ReadClassifier(ReadData read) {
            int length = read.getBases().length;
            this.numNs = CollectHiSeqXPfFailMetrics.countEquals(read.getBases(), (byte)46);
            this.numQGtTwo = CollectHiSeqXPfFailMetrics.countGreaterThan(read.getQualities(), (byte)2);
            this.failClass = PfFailReason.UNKNOWN;
            if (this.numNs >= length - 1) {
                this.failClass = PfFailReason.MISALIGNED;
            } else if (this.numNs <= 1) {
                if (this.numQGtTwo <= length / 3) {
                    this.failClass = PfFailReason.EMPTY;
                } else if (this.numQGtTwo >= length / 2) {
                    this.failClass = PfFailReason.POLYCLONAL;
                }
            }
        }

        public static enum PfFailReason {
            EMPTY,
            POLYCLONAL,
            MISALIGNED,
            UNKNOWN;

        }
    }

    private static class PerTilePFMetricsExtractor
    implements Runnable {
        private final int tile;
        private final PFFailSummaryMetric summaryMetric;
        final Collection<PFFailDetailedMetric> detailedMetrics;
        private Exception exception = null;
        private final BaseIlluminaDataProvider provider;
        private final double pWriteDetailed;
        private final Random random = new Random();

        public PerTilePFMetricsExtractor(int tile, PFFailSummaryMetric summaryMetric, Collection<PFFailDetailedMetric> detailedMetrics, IlluminaDataProviderFactory factory, double pWriteDetailed) {
            this.tile = tile;
            this.summaryMetric = summaryMetric;
            this.detailedMetrics = detailedMetrics;
            this.pWriteDetailed = pWriteDetailed;
            this.provider = factory.makeDataProvider(tile);
        }

        public Exception getException() {
            return this.exception;
        }

        @Override
        public void run() {
            try {
                LOG.info(new Object[]{"Extracting PF metrics for tile " + this.tile});
                block11: while (this.provider.hasNext()) {
                    ClusterData cluster = (ClusterData)this.provider.next();
                    ++this.summaryMetric.READS;
                    if (cluster.isPf().booleanValue()) continue;
                    ++this.summaryMetric.PF_FAIL_READS;
                    ReadClassifier readClassifier = new ReadClassifier(cluster.getRead(0));
                    if (this.random.nextDouble() < this.pWriteDetailed) {
                        this.detailedMetrics.add(new PFFailDetailedMetric(this.tile, cluster.getX(), cluster.getY(), readClassifier.numNs, readClassifier.numQGtTwo, readClassifier.failClass));
                    }
                    switch (readClassifier.failClass) {
                        case EMPTY: {
                            ++this.summaryMetric.PF_FAIL_EMPTY;
                            continue block11;
                        }
                        case MISALIGNED: {
                            ++this.summaryMetric.PF_FAIL_MISALIGNED;
                            continue block11;
                        }
                        case POLYCLONAL: {
                            ++this.summaryMetric.PF_FAIL_POLYCLONAL;
                            continue block11;
                        }
                        case UNKNOWN: {
                            ++this.summaryMetric.PF_FAIL_UNKNOWN;
                            continue block11;
                        }
                    }
                    LOG.error(new Object[]{"Got unexpected fail Reason"});
                }
            }
            catch (Exception e) {
                LOG.error((Throwable)e, new Object[]{"Error processing tile ", this.tile});
                this.exception = e;
            }
            finally {
                this.provider.close();
            }
        }
    }
}

