/*
 * Decompiled with CFR 0.152.
 */
package picard.illumina;

import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.StringUtil;
import java.io.BufferedWriter;
import java.io.File;
import java.text.NumberFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.programgroups.BaseCallingProgramGroup;
import picard.illumina.BarcodeExtractor;
import picard.illumina.BarcodeMetric;
import picard.illumina.ExtractBarcodesProgram;
import picard.illumina.parser.BaseIlluminaDataProvider;
import picard.illumina.parser.ClusterData;
import picard.illumina.parser.IlluminaDataProviderFactory;
import picard.illumina.parser.IlluminaDataType;
import picard.illumina.parser.ReadDescriptor;
import picard.illumina.parser.ReadStructure;
import picard.illumina.parser.ReadType;
import picard.util.IlluminaUtil;
import picard.util.ThreadPoolExecutorUtil;
import picard.util.ThreadPoolExecutorWithExceptions;

@CommandLineProgramProperties(summary="Tool determines the barcode for each read in an Illumina lane.  <p>This tool determines the numbers of reads containing barcode-matching sequences and provides statistics on the quality of these barcode matches.</p> <p>Illumina sequences can contain at least two types of barcodes, sample and molecular (index).  Sample barcodes (B in the read structure) are used to demultiplex pooled samples while index barcodes (M in the read structure) are used to differentiate multiple reads of a template when carrying out paired-end sequencing.  Note that this tool only extracts sample (B) and not molecular barcodes (M).</p><p>Barcodes can be provided in the form of a list (BARCODE_FILE) or a string representing the barcode (BARCODE).  The BARCODE_FILE contains multiple fields including 'barcode_sequence' (or 'barcode_sequence_1'), 'barcode_sequence_2' (optional), 'barcode_name', and 'library_name'. In contrast, the BARCODE argument is used for runs with reads containing a single barcode (nonmultiplexed) and can be added directly as a string of text e.g. BARCODE=CAATAGCG.</p><p>Data is output per lane/tile within the BaseCalls directory with the file name format of 's_{lane}_{tile}_barcode.txt'.  These files contain the following tab-separated columns:<ul> <li>Read subsequence at barcode position</li><li>Y or N indicating if there was a barcode match</li><li>Matched barcode sequence (empty if read did not match one of the barcodes)</li>  <li>The number of mismatches if there was a barcode match</li>  <li>The number of mismatches to the second best barcode if there was a barcode match</li>  </ul>If there is no match but we're close to the threshold of calling it a match, we output the barcode that would have been matched but in lower case.  Threshold values can be adjusted to accommodate barcode sequence mismatches from the reads.  The metrics file produced by the ExtractIlluminaBarcodes program indicates the number of matches (and mismatches) between the barcode reads and the actual barcodes.  These metrics are provided both per-barcode and per lane and can be found in the BaseCalls directory.</p><p>For poorly matching barcodes, the order of specification of barcodes can cause arbitrary output differences.</p><h4>Usage example:</h4> <pre>java -jar picard.jar ExtractIlluminaBarcodes \\<br />              BASECALLS_DIR=/BaseCalls/ \\<br />              LANE=1 \\<br />          READ_STRUCTURE=25T8B25T \\<br />              BARCODE_FILE=barcodes.txt \\<br />              METRICS_FILE=metrics_output.txt </pre>Please see the ExtractIlluminaBarcodes.BarcodeMetric <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#ExtractIlluminaBarcodes.BarcodeMetric'>definitions</a> for a complete description of the metrics produced by this tool.</p><hr />", oneLineSummary="Tool determines the barcode for each read in an Illumina lane.  ", programGroup=BaseCallingProgramGroup.class)
@DocumentedFeature
public class ExtractIlluminaBarcodes
extends ExtractBarcodesProgram {
    static final String USAGE_SUMMARY = "Tool determines the barcode for each read in an Illumina lane.  ";
    static final String USAGE_DETAILS = "<p>This tool determines the numbers of reads containing barcode-matching sequences and provides statistics on the quality of these barcode matches.</p> <p>Illumina sequences can contain at least two types of barcodes, sample and molecular (index).  Sample barcodes (B in the read structure) are used to demultiplex pooled samples while index barcodes (M in the read structure) are used to differentiate multiple reads of a template when carrying out paired-end sequencing.  Note that this tool only extracts sample (B) and not molecular barcodes (M).</p><p>Barcodes can be provided in the form of a list (BARCODE_FILE) or a string representing the barcode (BARCODE).  The BARCODE_FILE contains multiple fields including 'barcode_sequence' (or 'barcode_sequence_1'), 'barcode_sequence_2' (optional), 'barcode_name', and 'library_name'. In contrast, the BARCODE argument is used for runs with reads containing a single barcode (nonmultiplexed) and can be added directly as a string of text e.g. BARCODE=CAATAGCG.</p><p>Data is output per lane/tile within the BaseCalls directory with the file name format of 's_{lane}_{tile}_barcode.txt'.  These files contain the following tab-separated columns:<ul> <li>Read subsequence at barcode position</li><li>Y or N indicating if there was a barcode match</li><li>Matched barcode sequence (empty if read did not match one of the barcodes)</li>  <li>The number of mismatches if there was a barcode match</li>  <li>The number of mismatches to the second best barcode if there was a barcode match</li>  </ul>If there is no match but we're close to the threshold of calling it a match, we output the barcode that would have been matched but in lower case.  Threshold values can be adjusted to accommodate barcode sequence mismatches from the reads.  The metrics file produced by the ExtractIlluminaBarcodes program indicates the number of matches (and mismatches) between the barcode reads and the actual barcodes.  These metrics are provided both per-barcode and per lane and can be found in the BaseCalls directory.</p><p>For poorly matching barcodes, the order of specification of barcodes can cause arbitrary output differences.</p><h4>Usage example:</h4> <pre>java -jar picard.jar ExtractIlluminaBarcodes \\<br />              BASECALLS_DIR=/BaseCalls/ \\<br />              LANE=1 \\<br />          READ_STRUCTURE=25T8B25T \\<br />              BARCODE_FILE=barcodes.txt \\<br />              METRICS_FILE=metrics_output.txt </pre>Please see the ExtractIlluminaBarcodes.BarcodeMetric <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#ExtractIlluminaBarcodes.BarcodeMetric'>definitions</a> for a complete description of the metrics produced by this tool.</p><hr />";
    @Argument(doc="Tab-delimited file of barcode sequences, barcode name and, optionally, library name.  Barcodes must be unique and all the same length.  Column headers must be 'barcode_sequence' (or 'barcode_sequence_1'), 'barcode_sequence_2' (optional), 'barcode_name', and 'library_name'.", mutex={"BARCODE"})
    public File BARCODE_FILE;
    @Argument(doc="Barcode sequence.  These must be unique, and all the same length.  This cannot be used with reads that have more than one barcode; use BARCODE_FILE in that case. ", mutex={"BARCODE_FILE"})
    public List<String> BARCODE = new ArrayList<String>();
    @Argument(doc="Run this many PerTileBarcodeExtractors in parallel.  If NUM_PROCESSORS = 0, number of cores is automatically set to the number of cores available on the machine. If NUM_PROCESSORS < 0 then the number of cores used will be the number available on the machine less NUM_PROCESSORS.")
    public int NUM_PROCESSORS = 1;
    @Argument(doc="Where to write _barcode.txt files.  By default, these are written to BASECALLS_DIR.", optional=true)
    public File OUTPUT_DIR;
    private static final Log LOG = Log.getInstance(ExtractIlluminaBarcodes.class);
    private final NumberFormat tileNumberFormatter = NumberFormat.getNumberInstance();

    public ExtractIlluminaBarcodes() {
        this.tileNumberFormatter.setMinimumIntegerDigits(4);
        this.tileNumberFormatter.setGroupingUsed(false);
    }

    @Override
    protected int doWork() {
        IOUtil.assertFileIsWritable((File)this.METRICS_FILE);
        if (this.OUTPUT_DIR == null) {
            this.OUTPUT_DIR = this.BASECALLS_DIR;
        }
        IOUtil.assertDirectoryIsWritable((File)this.OUTPUT_DIR);
        int numProcessors = this.NUM_PROCESSORS == 0 ? Runtime.getRuntime().availableProcessors() : (this.NUM_PROCESSORS < 0 ? Runtime.getRuntime().availableProcessors() + this.NUM_PROCESSORS : this.NUM_PROCESSORS);
        LOG.info(new Object[]{"Processing with " + numProcessors + " PerTileBarcodeExtractor(s)."});
        ThreadPoolExecutorWithExceptions pool = new ThreadPoolExecutorWithExceptions(numProcessors);
        BarcodeExtractor barcodeExtractor = this.createBarcodeExtractor();
        HashSet<IlluminaDataType> datatypes = this.MINIMUM_BASE_QUALITY > 0 ? new HashSet<IlluminaDataType>(Arrays.asList(IlluminaDataType.BaseCalls, IlluminaDataType.PF, IlluminaDataType.QualityScores)) : new HashSet<IlluminaDataType>(Arrays.asList(IlluminaDataType.BaseCalls, IlluminaDataType.PF));
        ArrayList<PerTileBarcodeExtractor> extractors = new ArrayList<PerTileBarcodeExtractor>();
        for (Integer n : this.LANE) {
            IlluminaDataProviderFactory factory = new IlluminaDataProviderFactory(this.BASECALLS_DIR, n, this.inputReadStructure, this.bclQualityEvaluationStrategy, datatypes);
            for (int tile : factory.getAvailableTiles()) {
                PerTileBarcodeExtractor extractor = new PerTileBarcodeExtractor(tile, this.getBarcodeFile(n, tile), factory, barcodeExtractor);
                extractors.add(extractor);
            }
            for (PerTileBarcodeExtractor extractor : extractors) {
                pool.submit(extractor);
            }
        }
        pool.shutdown();
        ThreadPoolExecutorUtil.awaitThreadPoolTermination("Per tile extractor executor", pool, Duration.ofMinutes(5L));
        if (pool.hasError()) {
            throw new PicardException("Exceptions in tile processing. There were " + pool.shutdownNow().size() + " tasks that were still running or queued and have been cancelled. Errors: " + pool.exception.toString());
        }
        LOG.info(new Object[]{"Processed " + extractors.size() + " tiles."});
        for (PerTileBarcodeExtractor perTileBarcodeExtractor : extractors) {
            for (String key : this.barcodeToMetrics.keySet()) {
                ((BarcodeMetric)((Object)this.barcodeToMetrics.get(key))).merge(perTileBarcodeExtractor.getMetrics().get(key));
            }
            this.noMatchMetric.merge(perTileBarcodeExtractor.getNoMatchMetric());
            if (perTileBarcodeExtractor.getException() == null) continue;
            LOG.error(new Object[]{"Abandoning metrics calculation because one or more PerTileBarcodeExtractors failed."});
            return 4;
        }
        ExtractIlluminaBarcodes.finalizeMetrics(this.barcodeToMetrics, this.noMatchMetric);
        for (Map.Entry entry : this.bclQualityEvaluationStrategy.getPoorQualityFrequencies().entrySet()) {
            LOG.warn(new Object[]{String.format("Observed low quality of %s %s times.", entry.getKey(), entry.getValue())});
        }
        this.bclQualityEvaluationStrategy.assertMinimumQualities();
        this.outputMetrics();
        return 0;
    }

    @Override
    protected String[] customCommandLineValidation() {
        ArrayList<String> messages = new ArrayList<String>();
        this.INPUT_PARAMS_FILE = this.BARCODE_FILE;
        this.inputReadStructure = new ReadStructure(this.READ_STRUCTURE.replaceAll("[TM]", "S"));
        if (this.INPUT_PARAMS_FILE == null) {
            int numBarcodes = this.inputReadStructure.sampleBarcodes.length();
            HashSet<String> barcodes = new HashSet<String>();
            for (String barcode : this.BARCODE) {
                if (barcodes.contains(barcode)) {
                    messages.add("Barcode " + barcode + " specified more than once.");
                }
                barcodes.add(barcode);
                int barcodeNum = 0;
                int pos = 0;
                String[] bcStrings = new String[numBarcodes];
                for (ReadDescriptor rd : this.inputReadStructure.descriptors) {
                    if (rd.type != ReadType.Barcode) continue;
                    bcStrings[barcodeNum] = barcode.substring(pos, pos + rd.length);
                    pos += rd.length;
                    ++barcodeNum;
                }
                BarcodeMetric metric = new BarcodeMetric(null, null, IlluminaUtil.barcodeSeqsToString(bcStrings), bcStrings);
                this.barcodeToMetrics.put(barcode, metric);
            }
        }
        String[] superErrors = super.customCommandLineValidation();
        if ((this.INPUT_PARAMS_FILE != null || !this.BARCODE.isEmpty()) && this.barcodeToMetrics.keySet().isEmpty()) {
            messages.add("No barcodes have been specified.");
        }
        return this.collectErrorMessages(messages, superErrors);
    }

    private File getBarcodeFile(int lane, int tile) {
        return new File(this.OUTPUT_DIR, "s_" + lane + "_" + this.tileNumberFormatter.format(tile) + "_barcode.txt" + (this.COMPRESS_OUTPUTS ? ".gz" : ""));
    }

    public static class PerTileBarcodeExtractor
    implements Runnable {
        private final int tile;
        private final File barcodeFile;
        private final Map<String, BarcodeMetric> metrics;
        private final BarcodeMetric noMatch;
        private Exception exception = null;
        private final boolean usingQualityScores;
        private BaseIlluminaDataProvider provider;
        private final IlluminaDataProviderFactory factory;
        private final ReadStructure outputReadStructure;
        private final BarcodeExtractor barcodeExtractor;

        public PerTileBarcodeExtractor(int tile, File barcodeFile, IlluminaDataProviderFactory factory, BarcodeExtractor extractor) {
            this.barcodeExtractor = extractor;
            this.tile = tile;
            this.barcodeFile = barcodeFile;
            this.usingQualityScores = this.barcodeExtractor.getMinimumBaseQuality() > 0;
            this.metrics = new LinkedHashMap<String, BarcodeMetric>(this.barcodeExtractor.getMetrics().size());
            for (String key : this.barcodeExtractor.getMetrics().keySet()) {
                this.metrics.put(key, this.barcodeExtractor.getMetrics().get(key).copy());
            }
            this.noMatch = this.barcodeExtractor.getNoMatchMetric().copy();
            this.factory = factory;
            this.outputReadStructure = factory.getOutputReadStructure();
        }

        public synchronized Map<String, BarcodeMetric> getMetrics() {
            return this.metrics;
        }

        public synchronized BarcodeMetric getNoMatchMetric() {
            return this.noMatch;
        }

        public synchronized Exception getException() {
            return this.exception;
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        @Override
        public synchronized void run() {
            try {
                Object qualityScores;
                LOG.info(new Object[]{"Extracting barcodes for tile " + this.tile});
                this.provider = this.factory.makeDataProvider(this.tile);
                int[] barcodeIndices = this.outputReadStructure.sampleBarcodes.getIndices();
                BufferedWriter writer = IOUtil.openFileForBufferedWriting((File)this.barcodeFile);
                byte[][] barcodeSubsequences = new byte[barcodeIndices.length][];
                Object object = qualityScores = this.usingQualityScores ? (Object)new byte[barcodeIndices.length][] : (byte[][])null;
                while (this.provider.hasNext()) {
                    ClusterData cluster = (ClusterData)this.provider.next();
                    for (int i = 0; i < barcodeIndices.length; ++i) {
                        barcodeSubsequences[i] = cluster.getRead(barcodeIndices[i]).getBases();
                        if (!this.usingQualityScores) continue;
                        qualityScores[i] = cluster.getRead(barcodeIndices[i]).getQualities();
                    }
                    boolean passingFilter = cluster.isPf();
                    BarcodeExtractor.BarcodeMatch match = this.barcodeExtractor.findBestBarcode(barcodeSubsequences, (byte[][])qualityScores, false);
                    BarcodeExtractor.updateMetrics(match, passingFilter, this.metrics, this.noMatch);
                    String yOrN = match.isMatched() ? "Y" : "N";
                    for (byte[] bc : barcodeSubsequences) {
                        writer.write(StringUtil.bytesToString((byte[])bc));
                    }
                    writer.write("\t" + yOrN + "\t" + match.getBarcode() + "\t" + match.getMismatches() + "\t" + match.getMismatchesToSecondBest());
                    writer.newLine();
                }
                writer.close();
            }
            catch (Exception e) {
                LOG.error((Throwable)e, new Object[]{"Error processing tile ", this.tile});
                this.exception = e;
            }
            finally {
                CloserUtil.close((Object)this.provider);
                this.provider = null;
            }
        }
    }
}

