/*
 * Decompiled with CFR 0.152.
 */
package picard.util;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.programgroups.ReferenceProgramGroup;

@CommandLineProgramProperties(summary="Writes an interval list created by splitting a reference at Ns.A Program for breaking up a reference into intervals of alternating regions of N and ACGT bases.<br/><br/><br/>Used for creating a broken-up interval list that can be used for scattering a variant-calling pipeline in a way that will not cause problems at the edges of the intervals. By using large enough N blocks (so that the tools will not be able to anchor on both sides) we can be assured that the results of scattering and gathering the variants with the resulting interval list will be the same as calling with one large region.\n<br/><h3>Input</h3>- A reference file to use for creating the intervals (needs to have index and dictionary next to it.)\n- Which type of intervals to emit in the output (Ns only, ACGT only or both.)\n- An integer indicating the largest number of Ns in a contiguous block that will be \"tolerated\" and not converted into an N block.\n\n<h3>Output</h3>- An interval list (with a SAM header) where the names of the intervals are labeled (either N-block or ACGT-block) to indicate what type of block they define.\n\n<h3>Usage example</h3><h4>Create an interval list of intervals that do not contain any N blocks for use with haplotype caller on short reads</h4><pre>java -jar picard.jar ScatterIntervalsByNs \\\n      REFERENCE=reference_sequence.fasta \\\n      OUTPUT_TYPE=ACGT \\\n      OUTPUT=output.interval_list\n</pre>\n\n", oneLineSummary="Writes an interval list created by splitting a reference at Ns.", programGroup=ReferenceProgramGroup.class)
@DocumentedFeature
public class ScatterIntervalsByNs
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Writes an interval list created by splitting a reference at Ns.";
    static final String USAGE_DETAILS = "A Program for breaking up a reference into intervals of alternating regions of N and ACGT bases.<br/><br/><br/>Used for creating a broken-up interval list that can be used for scattering a variant-calling pipeline in a way that will not cause problems at the edges of the intervals. By using large enough N blocks (so that the tools will not be able to anchor on both sides) we can be assured that the results of scattering and gathering the variants with the resulting interval list will be the same as calling with one large region.\n<br/><h3>Input</h3>- A reference file to use for creating the intervals (needs to have index and dictionary next to it.)\n- Which type of intervals to emit in the output (Ns only, ACGT only or both.)\n- An integer indicating the largest number of Ns in a contiguous block that will be \"tolerated\" and not converted into an N block.\n\n<h3>Output</h3>- An interval list (with a SAM header) where the names of the intervals are labeled (either N-block or ACGT-block) to indicate what type of block they define.\n\n<h3>Usage example</h3><h4>Create an interval list of intervals that do not contain any N blocks for use with haplotype caller on short reads</h4><pre>java -jar picard.jar ScatterIntervalsByNs \\\n      REFERENCE=reference_sequence.fasta \\\n      OUTPUT_TYPE=ACGT \\\n      OUTPUT=output.interval_list\n</pre>\n\n";
    @Argument(shortName="O", doc="Output file for interval list.")
    public File OUTPUT;
    @Argument(shortName="OT", doc="Type of intervals to output.", optional=true)
    public OutputType OUTPUT_TYPE = OutputType.BOTH;
    @Argument(shortName="N", doc="Maximal number of contiguous N bases to tolerate, thereby continuing the current ACGT interval.", optional=true)
    public int MAX_TO_MERGE = 1;
    private static final String ACGTmer = "ACGTmer";
    private static final String Nmer = "Nmer";
    private static final Log log = Log.getInstance(ScatterIntervalsByNs.class);
    private static final ProgressLogger locusProgress = new ProgressLogger(log, 10000000, "examined", "loci");
    private static final ProgressLogger intervalProgress = new ProgressLogger(log, 10, "found", "intervals");

    public static void main(String[] args) {
        new ScatterIntervalsByNs().instanceMainWithExit(args);
    }

    @Override
    protected ReferenceArgumentCollection makeReferenceArgumentCollection() {
        return new ScatterIntervalsByNReferenceArgumentCollection();
    }

    @Override
    protected int doWork() {
        IOUtil.assertFileIsReadable((File)this.REFERENCE_SEQUENCE);
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile((File)this.REFERENCE_SEQUENCE, (boolean)true);
        if (!refFile.isIndexed()) {
            throw new IllegalStateException("Reference file must be indexed, but no index file was found");
        }
        if (refFile.getSequenceDictionary() == null) {
            throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found");
        }
        IntervalList intervals = ScatterIntervalsByNs.segregateReference(refFile, this.MAX_TO_MERGE);
        log.info(new Object[]{String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds())});
        IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone());
        log.info(new Object[]{String.format("Collecting requested type of intervals (%s)", new Object[]{this.OUTPUT_TYPE})});
        intervals.getIntervals().stream().filter(i -> this.OUTPUT_TYPE.accepts(i.getName())).forEach(arg_0 -> ((IntervalList)outputIntervals).add(arg_0));
        log.info(new Object[]{"Writing Intervals."});
        outputIntervals.write(this.OUTPUT);
        log.info(new Object[]{String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds())});
        return 0;
    }

    static IntervalList segregateReference(ReferenceSequenceFile refFile, int maxNmerToMerge) {
        LinkedList<Interval> preliminaryIntervals = new LinkedList<Interval>();
        SAMFileHeader header = new SAMFileHeader();
        header.setSequenceDictionary(refFile.getSequenceDictionary());
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        IntervalList finalIntervals = new IntervalList(header);
        for (SAMSequenceRecord rec : refFile.getSequenceDictionary().getSequences()) {
            ReferenceSequence ref = refFile.getSequence(rec.getSequenceName());
            byte[] bytes = ref.getBases();
            StringUtil.toUpperCase((byte[])bytes);
            boolean nBlockIsOpen = SequenceUtil.isNoCall((byte)bytes[0]);
            int start = 0;
            for (int i = 0; i < bytes.length; ++i) {
                locusProgress.record(rec.getSequenceName(), i);
                boolean currentBaseIsN = SequenceUtil.isNoCall((byte)bytes[i]);
                if (nBlockIsOpen == currentBaseIsN) continue;
                preliminaryIntervals.add(new Interval(rec.getSequenceName(), start + 1, i, false, nBlockIsOpen ? Nmer : ACGTmer));
                start = i;
                nBlockIsOpen = !nBlockIsOpen;
            }
            preliminaryIntervals.add(new Interval(rec.getSequenceName(), start + 1, bytes.length, false, nBlockIsOpen ? Nmer : ACGTmer));
        }
        while (!preliminaryIntervals.isEmpty()) {
            if (preliminaryIntervals.size() >= 3 && ((Interval)preliminaryIntervals.get(0)).getName() == ACGTmer && ((Interval)preliminaryIntervals.get(1)).getName() == Nmer && ((Interval)preliminaryIntervals.get(2)).getName() == ACGTmer && ((Interval)preliminaryIntervals.get(0)).abuts((Interval)preliminaryIntervals.get(1)) && ((Interval)preliminaryIntervals.get(1)).abuts((Interval)preliminaryIntervals.get(2)) && ((Interval)preliminaryIntervals.get(1)).length() <= maxNmerToMerge) {
                Interval temp = new Interval(((Interval)preliminaryIntervals.get(0)).getContig(), ((Interval)preliminaryIntervals.get(0)).getStart(), ((Interval)preliminaryIntervals.get(2)).getEnd(), false, ACGTmer);
                for (int i = 0; i < 3; ++i) {
                    preliminaryIntervals.remove(0);
                }
                preliminaryIntervals.add(0, temp);
                continue;
            }
            Interval remove = (Interval)preliminaryIntervals.remove(0);
            finalIntervals.add(remove);
            intervalProgress.record(remove.getContig(), remove.getStart());
        }
        return finalIntervals;
    }

    public static class ScatterIntervalsByNReferenceArgumentCollection
    implements ReferenceArgumentCollection {
        @Argument(shortName="R", doc="Reference sequence to use. Note: this tool requires that the reference fasta has both an associated index and a dictionary.")
        public File REFERENCE;

        @Override
        public File getReferenceFile() {
            return this.REFERENCE;
        }
    }

    private static enum OutputType {
        N("Nmer"),
        ACGT("ACGTmer"),
        BOTH("Nmer", "ACGTmer");

        private final Set<String> acceptedTypes = new HashSet<String>();

        public Boolean accepts(String string) {
            return this.acceptedTypes.contains(string);
        }

        private OutputType(String ... strings) {
            Collections.addAll(this.acceptedTypes, strings);
        }
    }
}

