/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.hellbender.tools.spark.pathseq;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.SequenceUtil;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.spark.HashPartitioner;
import org.apache.spark.Partitioner;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.broadinstitute.hellbender.engine.filters.AmbiguousBaseReadFilter;
import org.broadinstitute.hellbender.engine.filters.ReadLengthReadFilter;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.spark.pathseq.ContainsKmerReadFilterSpark;
import org.broadinstitute.hellbender.tools.spark.pathseq.HostAlignmentReadFilter;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSBwaFilter;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSFilterArgumentCollection;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSPairedUnpairedSplitterSpark;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSUtils;
import org.broadinstitute.hellbender.tools.spark.pathseq.loggers.PSFilterLogger;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils;
import org.broadinstitute.hellbender.tools.spark.utils.ReadFilterSparkifier;
import org.broadinstitute.hellbender.tools.spark.utils.ReadTransformerSparkifier;
import org.broadinstitute.hellbender.transformers.AdapterTrimTransformer;
import org.broadinstitute.hellbender.transformers.BaseQualityClipReadTransformer;
import org.broadinstitute.hellbender.transformers.BaseQualityReadTransformer;
import org.broadinstitute.hellbender.transformers.DUSTReadTransformer;
import org.broadinstitute.hellbender.transformers.SimpleRepeatMaskTransformer;
import org.broadinstitute.hellbender.transformers.StripMateNumberTransformer;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.bwa.BwaMemIndexCache;
import org.broadinstitute.hellbender.utils.illumina.IlluminaAdapterPair;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter;
import scala.Tuple2;

public final class PSFilter
implements AutoCloseable {
    private final JavaSparkContext ctx;
    private final PSFilterArgumentCollection filterArgs;
    private final SAMFileHeader header;
    private static final List<String> ADAPTER_SEQUENCES = CollectionUtil.makeList((Object[])new String[]{IlluminaAdapterPair.SINGLE_END.get5PrimeAdapter(), IlluminaAdapterPair.SINGLE_END.get3PrimeAdapter(), IlluminaAdapterPair.PAIRED_END.get5PrimeAdapter(), IlluminaAdapterPair.PAIRED_END.get3PrimeAdapter(), IlluminaAdapterPair.INDEXED.get5PrimeAdapter(), IlluminaAdapterPair.INDEXED.get3PrimeAdapter()});
    private static final int REPEAT_WINDOW_SIZE_1 = 30;
    private static final int MAX_AT_CONTENT_1 = 29;
    private static final int MAX_GC_CONTENT_1 = 29;
    private static final int REPEAT_WINDOW_SIZE_2 = 100;
    private static final int MAX_AT_CONTENT_2 = 87;
    private static final int MAX_GC_CONTENT_2 = 89;

    public PSFilter(JavaSparkContext ctx, PSFilterArgumentCollection filterArgs, SAMFileHeader header) {
        Utils.nonNull(ctx, "JavaSparkContext cannot be null");
        Utils.nonNull(filterArgs, "Filter arguments cannot be null");
        this.ctx = ctx;
        this.filterArgs = filterArgs;
        this.header = header;
        this.validateFilterArguments();
    }

    @VisibleForTesting
    static JavaRDD<GATKRead> setPairFlags(JavaRDD<GATKRead> reads, int readsPerPartitionGuess) {
        return PSFilter.repartitionReadsByName(reads).mapPartitions((FlatMapFunction & Serializable)iter -> PSFilter.setPartitionUnpairedFlags(iter, readsPerPartitionGuess));
    }

    private static JavaRDD<GATKRead> clearAllAlignments(JavaRDD<GATKRead> reads, SAMFileHeader header) {
        return reads.map((Function & Serializable)read -> PSFilter.clearReadAlignment(read, header));
    }

    private static GATKRead clearReadAlignment(GATKRead read, SAMFileHeader header) {
        String readGroup;
        SAMRecordToGATKReadAdapter newRead = new SAMRecordToGATKReadAdapter(new SAMRecord(header));
        newRead.setName(read.getName());
        newRead.setBases(read.getBases());
        newRead.setBaseQualities(read.getBaseQualities());
        if (read.isReverseStrand()) {
            SequenceUtil.reverseComplement((byte[])newRead.getBases());
            SequenceUtil.reverseQualities((byte[])newRead.getBaseQualities());
        }
        newRead.setIsUnmapped();
        newRead.setIsPaired(read.isPaired());
        if (read.isPaired()) {
            newRead.setMateIsUnmapped();
            if (read.isFirstOfPair()) {
                newRead.setIsFirstOfPair();
            } else if (read.isSecondOfPair()) {
                newRead.setIsSecondOfPair();
            }
        }
        if ((readGroup = read.getReadGroup()) != null) {
            newRead.setAttribute(SAMTag.RG.name(), readGroup);
        }
        return newRead;
    }

    private static Iterator<GATKRead> setPartitionUnpairedFlags(Iterator<GATKRead> iter, int readsPerPartitionGuess) {
        Tuple2<List<GATKRead>, List<GATKRead>> lists = PSFilter.getPairedAndUnpairedLists(iter, readsPerPartitionGuess);
        List pairedReadsList = (List)lists._1;
        List unpairedReadsList = (List)lists._2;
        for (GATKRead unpairedRead : unpairedReadsList) {
            unpairedRead.setIsPaired(false);
        }
        ArrayList newPartitionList = new ArrayList(pairedReadsList.size() + unpairedReadsList.size());
        newPartitionList.addAll(pairedReadsList);
        newPartitionList.addAll(unpairedReadsList);
        return newPartitionList.iterator();
    }

    static JavaRDD<GATKRead> repartitionReadsByName(JavaRDD<GATKRead> reads) {
        return PSFilter.repartitionReadsByName(reads, reads.getNumPartitions());
    }

    static JavaRDD<GATKRead> repartitionReadsByName(JavaRDD<GATKRead> reads, int numPartitions) {
        return reads.mapToPair((PairFunction & Serializable)read -> new Tuple2((Object)read.getName(), read)).partitionBy((Partitioner)new HashPartitioner(numPartitions)).map(Tuple2::_2);
    }

    static Tuple2<List<GATKRead>, List<GATKRead>> getPairedAndUnpairedLists(Iterator<GATKRead> iter, int readsPerPartitionGuess) {
        ArrayList<GATKRead> pairedReadsList = new ArrayList<GATKRead>(readsPerPartitionGuess);
        HashMap<String, GATKRead> unpairedReads = new HashMap<String, GATKRead>(readsPerPartitionGuess);
        while (iter.hasNext()) {
            GATKRead read = iter.next();
            String readName = read.getName();
            if (unpairedReads.containsKey(readName)) {
                pairedReadsList.add(read);
                pairedReadsList.add((GATKRead)unpairedReads.remove(readName));
                continue;
            }
            unpairedReads.put(readName, read);
        }
        ArrayList unpairedReadsList = new ArrayList(unpairedReads.values());
        pairedReadsList.trimToSize();
        return new Tuple2(pairedReadsList, unpairedReadsList);
    }

    @VisibleForTesting
    static JavaRDD<GATKRead> filterDuplicateSequences(JavaRDD<GATKRead> reads) {
        return reads.mapToPair(PSFilter::canonicalizeRead).groupByKey().values().map((Function & Serializable)iter -> {
            for (GATKRead read : iter) {
                if (read.isPaired()) continue;
                return read;
            }
            return (GATKRead)iter.iterator().next();
        });
    }

    private void validateFilterArguments() {
        SAMSequenceDictionary dictionary = this.header.getSequenceDictionary();
        if (this.filterArgs.alignedInput) {
            HashSet<String> contigsToIgnoreSet = new HashSet<String>(this.filterArgs.alignmentContigsToIgnore);
            for (String contig : contigsToIgnoreSet) {
                if (dictionary.getSequence(contig) != null) continue;
                throw new UserException.BadInput("Ignored sequence " + contig + " not found in input header.");
            }
        }
    }

    @VisibleForTesting
    static Tuple2<Long, GATKRead> canonicalizeRead(GATKRead read) {
        byte[] bases = read.getBases();
        long hashForward = SVUtils.fnvByteArray64(bases);
        SequenceUtil.reverseComplement((byte[])bases);
        long hashReverse = SVUtils.fnvByteArray64(bases);
        return new Tuple2((Object)Math.min(hashForward, hashReverse), (Object)read);
    }

    private static JavaRDD<GATKRead> doKmerFiltering(JavaRDD<GATKRead> reads, String kmerLibPath, int countThresh) {
        return reads.filter((Function)new ContainsKmerReadFilterSpark(kmerLibPath, countThresh));
    }

    @VisibleForTesting
    static JavaRDD<GATKRead> doBwaFilter(JavaRDD<GATKRead> reads, String indexFileName, int minSeedLength, int numThreads, int minIdentity) {
        return reads.mapPartitions((FlatMapFunction & Serializable)itr -> new PSBwaFilter(indexFileName, minIdentity, minSeedLength, numThreads, false).apply((Iterator<GATKRead>)itr));
    }

    public Tuple2<JavaRDD<GATKRead>, JavaRDD<GATKRead>> doFilter(JavaRDD<GATKRead> reads, PSFilterLogger filterLogger) {
        Utils.nonNull(reads, "Input reads cannot be null");
        reads = PSUtils.primaryReads(reads);
        filterLogger.logPrimaryReads(reads);
        if (this.filterArgs.alignedInput) {
            Set<String> contigsToIgnoreSet = Collections.unmodifiableSet(new HashSet<String>(this.filterArgs.alignmentContigsToIgnore));
            reads = reads.filter((Function)new ReadFilterSparkifier(new HostAlignmentReadFilter(this.filterArgs.minIdentity, contigsToIgnoreSet)));
        }
        filterLogger.logReadsAfterPrealignedHostFilter(reads);
        reads = PSFilter.clearAllAlignments(reads, this.header);
        reads = reads.map((Function)new ReadTransformerSparkifier(new StripMateNumberTransformer()));
        if (!this.filterArgs.skipFilters) {
            reads = reads.map((Function)new ReadTransformerSparkifier(new AdapterTrimTransformer(this.filterArgs.maxAdapterMismatches, this.filterArgs.minAdapterLength, ADAPTER_SEQUENCES)));
            reads = reads.map((Function)new ReadTransformerSparkifier(new SimpleRepeatMaskTransformer(29, 29, 30)));
            reads = reads.map((Function)new ReadTransformerSparkifier(new SimpleRepeatMaskTransformer(87, 89, 100)));
            reads = reads.map((Function)new ReadTransformerSparkifier(new DUSTReadTransformer(this.filterArgs.dustMask, this.filterArgs.dustW, this.filterArgs.dustT)));
            reads = reads.map((Function)new ReadTransformerSparkifier(new BaseQualityClipReadTransformer(this.filterArgs.readTrimThresh)));
            reads = reads.filter((Function)new ReadFilterSparkifier(new ReadLengthReadFilter(this.filterArgs.minReadLength, Integer.MAX_VALUE)));
            reads = reads.map((Function)new ReadTransformerSparkifier(new BaseQualityReadTransformer(this.filterArgs.qualPhredThresh)));
            reads = reads.filter((Function)new ReadFilterSparkifier(new AmbiguousBaseReadFilter(this.filterArgs.maxAmbiguousBases)));
        }
        filterLogger.logReadsAfterQualityFilter((JavaRDD<GATKRead>)reads);
        if (this.filterArgs.kmerFilePath != null) {
            reads = PSFilter.doKmerFiltering(reads, this.filterArgs.kmerFilePath, this.filterArgs.hostKmerThresh);
        }
        if (!this.filterArgs.skipPreBwaRepartition) {
            reads = PSFilter.repartitionReadsByName((JavaRDD<GATKRead>)reads);
        }
        if (this.filterArgs.indexImageFile != null) {
            reads = PSFilter.doBwaFilter(reads, this.filterArgs.indexImageFile, this.filterArgs.minSeedLength, this.filterArgs.bwaThreads, this.filterArgs.minIdentity);
        }
        filterLogger.logReadsAfterHostFilter(reads);
        if (this.filterArgs.filterDuplicates) {
            reads = PSFilter.setPairFlags(reads, this.filterArgs.filterReadsPerPartition);
            reads = PSFilter.filterDuplicateSequences(reads);
        }
        filterLogger.logReadsAfterDeduplication(reads);
        reads = PSFilter.setPairFlags(reads, this.filterArgs.filterReadsPerPartition);
        reads = PSFilter.clearAllAlignments(reads, this.header);
        PSPairedUnpairedSplitterSpark splitter = new PSPairedUnpairedSplitterSpark((JavaRDD<GATKRead>)reads, this.filterArgs.filterReadsPerPartition, false);
        JavaRDD<GATKRead> pairedReads = splitter.getPairedReads();
        JavaRDD<GATKRead> unpairedReads = splitter.getUnpairedReads();
        filterLogger.logFinalPairedReads(pairedReads);
        return new Tuple2(pairedReads, unpairedReads);
    }

    @Override
    public void close() {
        BwaMemIndexCache.closeAllDistributedInstances(this.ctx);
        ContainsKmerReadFilterSpark.closeAllDistributedInstances(this.ctx);
    }
}

