/*
 * Decompiled with CFR 0.152.
 */
package picard.util;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Intervals;

@CommandLineProgramProperties(usage="Writes an interval list based on splitting the reference by Ns.  This tool identifies positions in the reference where the basecalls are Ns and writes out an interval list using the resulting coordinates (excluding the N bases). This can be used to create an interval list for whole genome sequence (WGS) for e.g. scatter-gather purposes, as an alternative to using fixed-length intervals. The number of contiguous Ns that can be tolerated before creating a break is adjustable from the command line.<br /><h4>Usage example:</h4><pre>java -jar picard.jar ScatterIntervalsByNs \\<br />      R=reference_sequence.fasta \\<br />      OT=BOTH \\<br />      O=output.interval_list</pre><hr />", usageShort="Writes an interval list based on splitting the reference by Ns.  ", programGroup=Intervals.class)
public class ScatterIntervalsByNs
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Writes an interval list based on splitting the reference by Ns.  ";
    static final String USAGE_DETAILS = "This tool identifies positions in the reference where the basecalls are Ns and writes out an interval list using the resulting coordinates (excluding the N bases). This can be used to create an interval list for whole genome sequence (WGS) for e.g. scatter-gather purposes, as an alternative to using fixed-length intervals. The number of contiguous Ns that can be tolerated before creating a break is adjustable from the command line.<br /><h4>Usage example:</h4><pre>java -jar picard.jar ScatterIntervalsByNs \\<br />      R=reference_sequence.fasta \\<br />      OT=BOTH \\<br />      O=output.interval_list</pre><hr />";
    @Option(shortName="R", doc="Reference sequence to use.")
    public File REFERENCE;
    @Option(shortName="O", doc="Output file for interval list.")
    public File OUTPUT;
    @Option(shortName="OT", doc="Type of intervals to output.", optional=true)
    public OutputType OUTPUT_TYPE = OutputType.BOTH;
    @Option(shortName="N", doc="Maximal number of contiguous N bases to tolerate, thereby continuing the current ACGT interval.", optional=true)
    public int MAX_TO_MERGE = 1;
    static final String ACGTmer = "ACGTmer";
    static final String Nmer = "Nmer";
    private static final Log log = Log.getInstance(ScatterIntervalsByNs.class);
    final ProgressLogger locusProgress = new ProgressLogger(log, 10000000, "examined", "loci");
    final ProgressLogger intervalProgress = new ProgressLogger(log, 10, "found", "intervals");

    public static void main(String[] stringArray) {
        new ScatterIntervalsByNs().instanceMainWithExit(stringArray);
    }

    @Override
    protected int doWork() {
        IOUtil.assertFileIsReadable((File)this.REFERENCE);
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile((File)this.REFERENCE, (boolean)true);
        IntervalList intervalList = ScatterIntervalsByNs.segregateReference(referenceSequenceFile, this.MAX_TO_MERGE);
        log.info(new Object[]{String.format("Found %d intervals in %d loci during %s seconds", this.intervalProgress.getCount(), this.locusProgress.getCount(), this.locusProgress.getElapsedSeconds())});
        IntervalList intervalList2 = new IntervalList(intervalList.getHeader().clone());
        log.info(new Object[]{String.format("Collecting requested type of intervals (%s)", new Object[]{this.OUTPUT_TYPE})});
        for (Interval interval : intervalList.getIntervals()) {
            if (!this.OUTPUT_TYPE.accepts(interval.getName()).booleanValue()) continue;
            intervalList2.add(interval);
        }
        log.info(new Object[]{"Writing Intervals."});
        intervalList2.write(this.OUTPUT);
        log.info(new Object[]{String.format("Execution ending. Total time %d seconds", this.locusProgress.getElapsedSeconds())});
        return 0;
    }

    public static IntervalList segregateReference(ReferenceSequenceFile referenceSequenceFile, int n) {
        LinkedList<Interval> linkedList = new LinkedList<Interval>();
        SAMFileHeader sAMFileHeader = new SAMFileHeader();
        sAMFileHeader.setSequenceDictionary(referenceSequenceFile.getSequenceDictionary());
        sAMFileHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        IntervalList intervalList = new IntervalList(sAMFileHeader);
        for (SAMSequenceRecord sAMSequenceRecord : referenceSequenceFile.getSequenceDictionary().getSequences()) {
            ReferenceSequence referenceSequence = referenceSequenceFile.getSequence(sAMSequenceRecord.getSequenceName());
            byte[] byArray = referenceSequence.getBases();
            StringUtil.toUpperCase((byte[])byArray);
            boolean bl = byArray[0] == 78;
            int n2 = 0;
            for (int i = 0; i < byArray.length; ++i) {
                boolean bl2;
                boolean bl3 = bl2 = byArray[i] == 78;
                if (bl == bl2) continue;
                linkedList.add(new Interval(sAMSequenceRecord.getSequenceName(), n2 + 1, i, false, bl ? Nmer : ACGTmer));
                n2 = i;
                bl = !bl;
            }
            linkedList.add(new Interval(sAMSequenceRecord.getSequenceName(), n2 + 1, byArray.length, false, bl ? Nmer : ACGTmer));
        }
        while (!linkedList.isEmpty()) {
            if (linkedList.size() >= 3 && ((Interval)linkedList.get(0)).getName() == ACGTmer && ((Interval)linkedList.get(1)).getName() == Nmer && ((Interval)linkedList.get(2)).getName() == ACGTmer && ((Interval)linkedList.get(0)).abuts((Interval)linkedList.get(1)) && ((Interval)linkedList.get(1)).abuts((Interval)linkedList.get(2)) && ((Interval)linkedList.get(1)).length() <= n) {
                Interval interval = new Interval(((Interval)linkedList.get(0)).getSequence(), ((Interval)linkedList.get(0)).getStart(), ((Interval)linkedList.get(2)).getEnd(), false, ACGTmer);
                for (int i = 0; i < 3; ++i) {
                    linkedList.remove(0);
                }
                linkedList.add(0, interval);
                continue;
            }
            intervalList.add((Interval)linkedList.remove(0));
        }
        return intervalList;
    }

    private static enum OutputType {
        N("Nmer"),
        ACGT("ACGTmer"),
        BOTH("Nmer", "ACGTmer");

        private final Set acceptedTypes = new HashSet();

        public Boolean accepts(String string) {
            return this.acceptedTypes.contains(string);
        }

        private OutputType(String ... stringArray) {
            Collections.addAll(this.acceptedTypes, stringArray);
        }
    }
}

