/*
 * Decompiled with CFR 0.152.
 */
package picard.analysis;

import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.filter.DuplicateReadFilter;
import htsjdk.samtools.filter.InsertSizeFilter;
import htsjdk.samtools.filter.NotPrimaryAlignmentFilter;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.ListMap;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SamLocusIterator;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.DbSnpBitSetUtil;

@CommandLineProgramProperties(summary="Collect metrics to assess oxidative artifacts.This tool collects metrics quantifying the error rate resulting from oxidative artifacts. For a brief primer on oxidative artifacts, see <a href='http://gatkforums.broadinstitute.org/discussion/6328/oxog-oxidative-artifacts'>the GATK Dictionary</a>.<br /><br />This tool calculates the Phred-scaled probability that an alternate base call results from an oxidation artifact. This probability score is based on base context, sequencing read orientation, and the characteristic low allelic frequency.  Please see the following reference for an in-depth <a href='http://nar.oxfordjournals.org/content/early/2013/01/08/nar.gks1443'>discussion</a> of the OxoG error rate.  <p>Lower probability values implicate artifacts resulting from 8-oxoguanine, while higher probability values suggest that an alternate base call is due to either some other type of artifact or is a real variant.</p><h4>Usage example:</h4><pre>java -jar picard.jar CollectOxoGMetrics \\<br />      I=input.bam \\<br />      O=oxoG_metrics.txt \\<br />      R=reference_sequence.fasta</pre><hr />", oneLineSummary="Collect metrics to assess oxidative artifacts.", programGroup=DiagnosticsAndQCProgramGroup.class)
@DocumentedFeature
public class CollectOxoGMetrics
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Collect metrics to assess oxidative artifacts.";
    static final String USAGE_DETAILS = "This tool collects metrics quantifying the error rate resulting from oxidative artifacts. For a brief primer on oxidative artifacts, see <a href='http://gatkforums.broadinstitute.org/discussion/6328/oxog-oxidative-artifacts'>the GATK Dictionary</a>.<br /><br />This tool calculates the Phred-scaled probability that an alternate base call results from an oxidation artifact. This probability score is based on base context, sequencing read orientation, and the characteristic low allelic frequency.  Please see the following reference for an in-depth <a href='http://nar.oxfordjournals.org/content/early/2013/01/08/nar.gks1443'>discussion</a> of the OxoG error rate.  <p>Lower probability values implicate artifacts resulting from 8-oxoguanine, while higher probability values suggest that an alternate base call is due to either some other type of artifact or is a real variant.</p><h4>Usage example:</h4><pre>java -jar picard.jar CollectOxoGMetrics \\<br />      I=input.bam \\<br />      O=oxoG_metrics.txt \\<br />      R=reference_sequence.fasta</pre><hr />";
    @Argument(shortName="I", doc="Input BAM file for analysis.")
    public File INPUT;
    @Argument(shortName="O", doc="Location of output metrics file to write.")
    public File OUTPUT;
    @Argument(doc="An optional list of intervals to restrict analysis to.", optional=true)
    public File INTERVALS;
    @Argument(doc="VCF format dbSNP file, used to exclude regions around known polymorphisms from analysis.", optional=true)
    public File DB_SNP;
    @Argument(shortName="Q", doc="The minimum base quality score for a base to be included in analysis.")
    public int MINIMUM_QUALITY_SCORE = 20;
    @Argument(shortName="MQ", doc="The minimum mapping quality score for a base to be included in analysis.")
    public int MINIMUM_MAPPING_QUALITY = 30;
    @Argument(shortName="MIN_INS", doc="The minimum insert size for a read to be included in analysis. Set of 0 to allow unpaired reads.")
    public int MINIMUM_INSERT_SIZE = 60;
    @Argument(shortName="MAX_INS", doc="The maximum insert size for a read to be included in analysis. Set of 0 to allow unpaired reads.")
    public int MAXIMUM_INSERT_SIZE = 600;
    @Argument(shortName="NON_PF", doc="Whether or not to include non-PF reads.")
    public boolean INCLUDE_NON_PF_READS = true;
    @Argument(doc="When available, use original quality scores for filtering.")
    public boolean USE_OQ = true;
    @Argument(doc="The number of context bases to include on each side of the assayed G/C base.")
    public int CONTEXT_SIZE = 1;
    @Argument(doc="The optional set of sequence contexts to restrict analysis to. If not supplied all contexts are analyzed.", optional=true)
    public Set<String> CONTEXTS = new HashSet<String>();
    @Argument(doc="For debugging purposes: stop after visiting this many sites with at least 1X coverage.")
    public int STOP_AFTER = Integer.MAX_VALUE;
    private final Log log = Log.getInstance(CollectOxoGMetrics.class);
    private static final String UNKNOWN_LIBRARY = "UnknownLibrary";
    private static final String UNKNOWN_SAMPLE = "UnknownSample";

    @Override
    protected boolean requiresReference() {
        return true;
    }

    @Override
    protected String[] customCommandLineValidation() {
        int size = 1 + 2 * this.CONTEXT_SIZE;
        ArrayList<String> messages = new ArrayList<String>();
        for (String ctx : this.CONTEXTS) {
            if (ctx.length() != size) {
                messages.add("Context " + ctx + " is not " + size + " long as implied by CONTEXT_SIZE=" + this.CONTEXT_SIZE);
                continue;
            }
            if (ctx.charAt(ctx.length() / 2) == 'C') continue;
            messages.add("Middle base of context sequence " + ctx + " must be C");
        }
        if (this.MINIMUM_INSERT_SIZE < 0) {
            messages.add("MINIMUM_INSERT_SIZE cannot be negative");
        }
        if (this.MAXIMUM_INSERT_SIZE < 0) {
            messages.add("MAXIMUM_INSERT_SIZE cannot be negative");
        }
        if (this.MAXIMUM_INSERT_SIZE < this.MINIMUM_INSERT_SIZE) {
            messages.add("MAXIMUM_INSERT_SIZE cannot be less than MINIMUM_INSERT_SIZE");
        }
        return messages.isEmpty() ? null : messages.toArray(new String[messages.size()]);
    }

    @Override
    protected int doWork() {
        SamLocusIterator iterator;
        IOUtil.assertFileIsReadable((File)this.INPUT);
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        if (this.INTERVALS != null) {
            IOUtil.assertFileIsReadable((File)this.INTERVALS);
        }
        IOUtil.assertFileIsReadable((File)this.REFERENCE_SEQUENCE);
        ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(this.REFERENCE_SEQUENCE);
        SamReader in = SamReaderFactory.makeDefault().open(this.INPUT);
        if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
            SequenceUtil.assertSequenceDictionariesEqual((SAMSequenceDictionary)in.getFileHeader().getSequenceDictionary(), (SAMSequenceDictionary)refWalker.getSequenceDictionary());
        }
        HashSet<String> samples = new HashSet<String>();
        HashSet<String> libraries = new HashSet<String>();
        if (in.getFileHeader().getReadGroups().isEmpty()) {
            throw new PicardException("This analysis requires a read group entry in the alignment file header");
        }
        for (SAMReadGroupRecord rec : in.getFileHeader().getReadGroups()) {
            samples.add(Optional.ofNullable(rec.getSample()).orElse(UNKNOWN_SAMPLE));
            libraries.add(Optional.ofNullable(rec.getLibrary()).orElse(UNKNOWN_LIBRARY));
        }
        Set<String> contexts = this.CONTEXTS.isEmpty() ? this.makeContextStrings(this.CONTEXT_SIZE) : this.CONTEXTS;
        ListMap calculators = new ListMap();
        for (String context : contexts) {
            for (String library : libraries) {
                calculators.add((Object)context, (Object)new Calculator(library, context));
            }
        }
        this.log.info(new Object[]{"Loading dbSNP File: " + this.DB_SNP});
        DbSnpBitSetUtil dbSnp = this.DB_SNP != null ? new DbSnpBitSetUtil(this.DB_SNP, in.getFileHeader().getSequenceDictionary()) : null;
        if (this.INTERVALS != null) {
            IntervalList intervals = IntervalList.fromFile((File)this.INTERVALS);
            iterator = new SamLocusIterator(in, intervals.uniqued(), false);
        } else {
            iterator = new SamLocusIterator(in);
        }
        iterator.setEmitUncoveredLoci(false);
        iterator.setMappingQualityScoreCutoff(this.MINIMUM_MAPPING_QUALITY);
        iterator.setIncludeNonPfReads(this.INCLUDE_NON_PF_READS);
        ArrayList<Object> filters = new ArrayList<Object>();
        filters.add(new NotPrimaryAlignmentFilter());
        filters.add(new DuplicateReadFilter());
        if (this.MINIMUM_INSERT_SIZE > 0 || this.MAXIMUM_INSERT_SIZE > 0) {
            filters.add(new InsertSizeFilter(this.MINIMUM_INSERT_SIZE, this.MAXIMUM_INSERT_SIZE));
        }
        iterator.setSamFilters(filters);
        this.log.info(new Object[]{"Starting iteration."});
        long nextLogTime = 0L;
        int sites = 0;
        for (SamLocusIterator.LocusInfo info : iterator) {
            long now;
            byte base;
            String chrom = info.getSequenceName();
            int pos = info.getPosition();
            int index = pos - 1;
            if (dbSnp != null && dbSnp.isDbSnpSite(chrom, pos)) continue;
            byte[] bases = refWalker.get(info.getSequenceIndex()).getBases();
            if (pos <= this.CONTEXT_SIZE || pos > bases.length - this.CONTEXT_SIZE || (base = StringUtil.toUpperCase((byte)bases[index])) != 67 && base != 71) continue;
            String tmp = StringUtil.bytesToString((byte[])bases, (int)(index - this.CONTEXT_SIZE), (int)(1 + 2 * this.CONTEXT_SIZE)).toUpperCase();
            String context = base == 67 ? tmp : SequenceUtil.reverseComplement((String)tmp);
            List calculatorsForContext = (List)calculators.get((Object)context);
            if (calculatorsForContext == null) continue;
            for (Calculator calc : calculatorsForContext) {
                calc.accept(info, base);
            }
            if (++sites % 100 == 0 && (now = System.currentTimeMillis()) > nextLogTime) {
                this.log.info(new Object[]{"Visited " + sites + " sites of interest. Last site: " + chrom + ":" + pos});
                nextLogTime = now + 60000L;
            }
            if (sites < this.STOP_AFTER) continue;
            break;
        }
        MetricsFile file = this.getMetricsFile();
        for (List calcs : calculators.values()) {
            for (Calculator calc : calcs) {
                CpcgMetrics m = calc.finish();
                m.SAMPLE_ALIAS = StringUtil.join((String)",", new ArrayList(samples));
                file.addMetric((MetricBase)m);
            }
        }
        file.write(this.OUTPUT);
        CloserUtil.close((Object)in);
        return 0;
    }

    private Set<String> makeContextStrings(int contextSize) {
        HashSet<String> contexts = new HashSet<String>();
        for (byte[] kmer : SequenceUtil.generateAllKmers((int)(2 * contextSize + 1))) {
            if (kmer[contextSize] != 67) continue;
            contexts.add(StringUtil.bytesToString((byte[])kmer));
        }
        this.log.info(new Object[]{"Generated " + contexts.size() + " context strings."});
        return contexts;
    }

    private class Calculator {
        private final String library;
        private final String context;
        int sites = 0;
        long refCcontrolA = 0L;
        long refCoxidatedA = 0L;
        long refCcontrolC = 0L;
        long refCoxidatedC = 0L;
        long refGcontrolA = 0L;
        long refGoxidatedA = 0L;
        long refGcontrolC = 0L;
        long refGoxidatedC = 0L;

        Calculator(String library, String context) {
            this.library = library;
            this.context = context;
        }

        void accept(SamLocusIterator.LocusInfo info, byte refBase) {
            Counts counts = this.computeAlleleFraction(info, refBase);
            if (counts.total() > 0) {
                ++this.sites;
                if (refBase == 67) {
                    this.refCcontrolA += (long)counts.controlA;
                    this.refCoxidatedA += (long)counts.oxidatedA;
                    this.refCcontrolC += (long)counts.controlC;
                    this.refCoxidatedC += (long)counts.oxidatedC;
                } else if (refBase == 71) {
                    this.refGcontrolA += (long)counts.controlA;
                    this.refGoxidatedA += (long)counts.oxidatedA;
                    this.refGcontrolC += (long)counts.controlC;
                    this.refGoxidatedC += (long)counts.oxidatedC;
                } else {
                    throw new IllegalStateException("Reference bases other than G and C not supported.");
                }
            }
        }

        CpcgMetrics finish() {
            CpcgMetrics m = new CpcgMetrics();
            m.LIBRARY = this.library;
            m.CONTEXT = this.context;
            m.TOTAL_SITES = this.sites;
            m.TOTAL_BASES = this.refCcontrolC + this.refCoxidatedC + this.refCcontrolA + this.refCoxidatedA + this.refGcontrolC + this.refGoxidatedC + this.refGcontrolA + this.refGoxidatedA;
            m.REF_OXO_BASES = this.refCoxidatedC + this.refGoxidatedC;
            m.REF_NONOXO_BASES = this.refCcontrolC + this.refGcontrolC;
            m.REF_TOTAL_BASES = m.REF_OXO_BASES + m.REF_NONOXO_BASES;
            m.ALT_NONOXO_BASES = this.refCcontrolA + this.refGcontrolA;
            m.ALT_OXO_BASES = this.refCoxidatedA + this.refGoxidatedA;
            m.OXIDATION_ERROR_RATE = (double)Math.max(m.ALT_OXO_BASES - m.ALT_NONOXO_BASES, 1L) / (double)m.TOTAL_BASES;
            m.OXIDATION_Q = -10.0 * Math.log10(m.OXIDATION_ERROR_RATE);
            m.C_REF_REF_BASES = this.refCcontrolC + this.refCoxidatedC;
            m.G_REF_REF_BASES = this.refGcontrolC + this.refGoxidatedC;
            m.C_REF_ALT_BASES = this.refCcontrolA + this.refCoxidatedA;
            m.G_REF_ALT_BASES = this.refGcontrolA + this.refGoxidatedA;
            double cRefErrorRate = (double)m.C_REF_ALT_BASES / (double)(m.C_REF_ALT_BASES + m.C_REF_REF_BASES);
            double gRefErrorRate = (double)m.G_REF_ALT_BASES / (double)(m.G_REF_ALT_BASES + m.G_REF_REF_BASES);
            m.C_REF_OXO_ERROR_RATE = Math.max(cRefErrorRate - gRefErrorRate, 1.0E-10);
            m.G_REF_OXO_ERROR_RATE = Math.max(gRefErrorRate - cRefErrorRate, 1.0E-10);
            m.C_REF_OXO_Q = -10.0 * Math.log10(m.C_REF_OXO_ERROR_RATE);
            m.G_REF_OXO_Q = -10.0 * Math.log10(m.G_REF_OXO_ERROR_RATE);
            return m;
        }

        private Counts computeAlleleFraction(SamLocusIterator.LocusInfo info, byte refBase) {
            Counts counts = new Counts();
            byte altBase = refBase == 67 ? (byte)65 : 84;
            for (SamLocusIterator.RecordAndOffset rec : info.getRecordAndOffsets()) {
                int read;
                byte[] oqs;
                SAMRecord samrec = rec.getRecord();
                byte qual = CollectOxoGMetrics.this.USE_OQ ? ((oqs = samrec.getOriginalBaseQualities()) != null ? oqs[rec.getOffset()] : rec.getBaseQuality()) : rec.getBaseQuality();
                if (qual < CollectOxoGMetrics.this.MINIMUM_QUALITY_SCORE || !this.library.equals(Optional.ofNullable(samrec.getReadGroup().getLibrary()).orElse(CollectOxoGMetrics.UNKNOWN_LIBRARY))) continue;
                byte base = rec.getReadBase();
                byte baseAsRead = samrec.getReadNegativeStrandFlag() ? SequenceUtil.complement((byte)base) : base;
                int n = read = samrec.getReadPairedFlag() && samrec.getSecondOfPairFlag() ? 2 : 1;
                if (base == refBase) {
                    if (baseAsRead == 71 && read == 1) {
                        ++counts.oxidatedC;
                        continue;
                    }
                    if (baseAsRead == 71 && read == 2) {
                        ++counts.controlC;
                        continue;
                    }
                    if (baseAsRead == 67 && read == 1) {
                        ++counts.controlC;
                        continue;
                    }
                    if (baseAsRead != 67 || read != 2) continue;
                    ++counts.oxidatedC;
                    continue;
                }
                if (base != altBase) continue;
                if (baseAsRead == 84 && read == 1) {
                    ++counts.oxidatedA;
                    continue;
                }
                if (baseAsRead == 84 && read == 2) {
                    ++counts.controlA;
                    continue;
                }
                if (baseAsRead == 65 && read == 1) {
                    ++counts.controlA;
                    continue;
                }
                if (baseAsRead != 65 || read != 2) continue;
                ++counts.oxidatedA;
            }
            return counts;
        }
    }

    private static class Counts {
        int controlA;
        int oxidatedA;
        int controlC;
        int oxidatedC;

        private Counts() {
        }

        int total() {
            return this.controlC + this.oxidatedC + this.controlA + this.oxidatedA;
        }
    }

    public static final class CpcgMetrics
    extends MetricBase {
        public String SAMPLE_ALIAS;
        public String LIBRARY;
        public String CONTEXT;
        public int TOTAL_SITES;
        public long TOTAL_BASES;
        public long REF_NONOXO_BASES;
        public long REF_OXO_BASES;
        public long REF_TOTAL_BASES;
        public long ALT_NONOXO_BASES;
        public long ALT_OXO_BASES;
        public double OXIDATION_ERROR_RATE;
        public double OXIDATION_Q;
        public long C_REF_REF_BASES;
        public long G_REF_REF_BASES;
        public long C_REF_ALT_BASES;
        public long G_REF_ALT_BASES;
        public double C_REF_OXO_ERROR_RATE;
        public double C_REF_OXO_Q;
        public double G_REF_OXO_ERROR_RATE;
        public double G_REF_OXO_Q;
    }
}

