/*
 * Decompiled with CFR 0.152.
 */
package org.monarchinitiative.sgenes.gtf.io.impl;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import org.monarchinitiative.sgenes.gtf.io.gtf.GtfRecord;
import org.monarchinitiative.sgenes.gtf.io.gtf.GtfRecordParser;
import org.monarchinitiative.sgenes.gtf.io.impl.GtfGeneData;
import org.monarchinitiative.sgenes.gtf.model.Biotype;
import org.monarchinitiative.sgenes.model.Gene;
import org.monarchinitiative.sgenes.model.GeneIdentifier;
import org.monarchinitiative.sgenes.model.Transcript;
import org.monarchinitiative.svart.CoordinateSystem;
import org.monarchinitiative.svart.Coordinates;
import org.monarchinitiative.svart.GenomicRegion;
import org.monarchinitiative.svart.assembly.GenomicAssembly;
import org.monarchinitiative.svart.assembly.SequenceRole;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

abstract class BaseGeneIterator<GENE extends Gene, METADATA, TX extends Transcript>
implements Iterator<GENE> {
    private static final Logger LOGGER = LoggerFactory.getLogger(BaseGeneIterator.class);
    private static final Set<String> MANDATORY_TRANSCRIPT_ATTRIBUTE_NAMES = Set.of("transcript_id");
    private static final Set<String> MANDATORY_EXON_ATTRIBUTE_NAMES = Set.of("transcript_id");
    private static final String EXON_NUMBER_ATTRIBUTE_KEY = "exon_number";
    protected final GenomicAssembly genomicAssembly;
    private final Queue<GENE> queue = new LinkedList<GENE>();
    private final Set<String> mandatoryTranscriptAttributes;
    private final Set<String> mandatoryExonAttributes;
    protected BufferedReader reader;
    private String currentContig;
    private GtfRecord firstRecordOfNextContig;

    protected BaseGeneIterator(Path gencodeGtfPath, GenomicAssembly genomicAssembly, Set<String> mandatoryTranscriptAttributes, Set<String> mandatoryExonAttributes) {
        this.genomicAssembly = genomicAssembly;
        this.mandatoryTranscriptAttributes = BaseGeneIterator.setUnion(mandatoryTranscriptAttributes, MANDATORY_TRANSCRIPT_ATTRIBUTE_NAMES);
        this.mandatoryExonAttributes = BaseGeneIterator.setUnion(mandatoryExonAttributes, MANDATORY_EXON_ATTRIBUTE_NAMES);
        try {
            this.reader = BaseGeneIterator.openForReading(gencodeGtfPath);
            this.readNextContig();
        }
        catch (IOException e) {
            LOGGER.warn("Error opening GTF at `{}`: {}", (Object)gencodeGtfPath, (Object)e.getMessage());
        }
    }

    private static Set<String> setUnion(Set<String> left, Set<String> right) {
        HashSet<String> union = new HashSet<String>(left);
        union.addAll(right);
        return union;
    }

    private void readNextContig() {
        List<GtfRecord> records = this.readPrimaryContigAndSubsequentNonPrimaryContigRecords();
        Map recordByGeneId = records.stream().collect(Collectors.groupingBy(GtfRecord::geneId, Collectors.toUnmodifiableList()));
        recordByGeneId.entrySet().stream().map(e -> this.assembleGene((String)e.getKey(), (List)e.getValue())).flatMap(Optional::stream).forEach(this.queue::add);
    }

    private List<GtfRecord> readPrimaryContigAndSubsequentNonPrimaryContigRecords() {
        ArrayList<GtfRecord> contigRecords = new ArrayList<GtfRecord>();
        try {
            String line;
            if (this.firstRecordOfNextContig != null) {
                contigRecords.add(this.firstRecordOfNextContig);
                this.firstRecordOfNextContig = null;
            }
            while ((line = this.reader.readLine()) != null) {
                Optional<GtfRecord> gtfRecord;
                if (line.startsWith("#") || !(gtfRecord = GtfRecordParser.parseLine(line, this.genomicAssembly)).isPresent()) continue;
                GtfRecord record = gtfRecord.get();
                if (this.currentContig == null) {
                    this.currentContig = record.contigName();
                }
                if (this.currentContig.equals(record.contigName()) || !record.contig().sequenceRole().equals((Object)SequenceRole.ASSEMBLED_MOLECULE)) {
                    contigRecords.add(record);
                    continue;
                }
                this.currentContig = record.contigName();
                this.firstRecordOfNextContig = record;
                break;
            }
        }
        catch (IOException e) {
            LOGGER.warn("Error occurred during reading GTF file: {}", (Object)e.getMessage(), (Object)e);
            contigRecords.clear();
        }
        return contigRecords;
    }

    private Optional<GENE> assembleGene(String geneId, List<GtfRecord> records) {
        Optional<GtfGeneData> optional = this.partitionGtfLines(geneId, records);
        if (optional.isEmpty()) {
            return Optional.empty();
        }
        GtfGeneData gtfGeneData = optional.get();
        GtfRecord gene = gtfGeneData.gene();
        if (gene == null) {
            LOGGER.warn("Did not find gene GTF row for gene {}", (Object)geneId);
            return Optional.empty();
        }
        Optional<GeneIdentifier> geneIdentifier = this.parseGeneIdentifier(geneId, gene);
        if (geneIdentifier.isEmpty()) {
            return Optional.empty();
        }
        Optional<METADATA> metadata = this.parseGeneMetadata(geneId, gene);
        if (metadata.isEmpty()) {
            return Optional.empty();
        }
        List<GtfRecord> transcripts = gtfGeneData.transcripts();
        Map<String, List<GtfRecord>> exons = gtfGeneData.exonsByTxId();
        Map<String, GtfRecord> startCodons = gtfGeneData.startCodonByTxId();
        Map<String, GtfRecord> stopCodons = gtfGeneData.stopCodonByTxId();
        ArrayList txs = new ArrayList(transcripts.size());
        for (GtfRecord txRecord : transcripts) {
            String txId = txRecord.firstAttribute("transcript_id");
            List<GtfRecord> txExons = exons.get(txId);
            GtfRecord startCodon = startCodons.get(txId);
            GtfRecord stopCodon = stopCodons.get(txId);
            Optional<TX> transcript = this.processTranscript(txId, txRecord, txExons, startCodon, stopCodon);
            transcript.ifPresent(txs::add);
        }
        if (txs.isEmpty()) {
            LOGGER.warn("No transcripts could be parsed for gene `{}`", (Object)geneId);
            return Optional.empty();
        }
        return Optional.of(this.newGeneInstance(geneIdentifier.get(), gene.location(), txs, metadata.get()));
    }

    protected abstract Optional<GeneIdentifier> parseGeneIdentifier(String var1, GtfRecord var2);

    protected abstract Optional<METADATA> parseGeneMetadata(String var1, GtfRecord var2);

    protected abstract Optional<TX> processTranscript(String var1, GtfRecord var2, List<GtfRecord> var3, GtfRecord var4, GtfRecord var5);

    protected abstract GENE newGeneInstance(GeneIdentifier var1, GenomicRegion var2, List<TX> var3, METADATA var4);

    protected Optional<GtfGeneData> partitionGtfLines(String geneId, List<GtfRecord> records) {
        GtfRecord gene = null;
        LinkedList<GtfRecord> transcripts = new LinkedList<GtfRecord>();
        HashMap<String, List<GtfRecord>> exons = new HashMap<String, List<GtfRecord>>();
        HashMap<String, GtfRecord> startCodons = new HashMap<String, GtfRecord>();
        HashMap<String, GtfRecord> stopCodons = new HashMap<String, GtfRecord>();
        for (GtfRecord record : records) {
            switch (record.feature()) {
                case GENE: {
                    if (gene != null) {
                        LOGGER.warn("2nd gene record was seen for gene {}: `{}`", (Object)geneId, (Object)record);
                        return Optional.empty();
                    }
                    gene = record;
                    break;
                }
                case TRANSCRIPT: {
                    if (!record.attributes().containsAll(this.mandatoryTranscriptAttributes)) {
                        return BaseGeneIterator.reportMissingAttributesAndReturn(record, this.mandatoryTranscriptAttributes);
                    }
                    transcripts.add(record);
                    break;
                }
                case EXON: {
                    if (!record.attributes().containsAll(this.mandatoryExonAttributes)) {
                        return BaseGeneIterator.reportMissingAttributesAndReturn(record, this.mandatoryExonAttributes);
                    }
                    String exonTxId = record.firstAttribute("transcript_id");
                    exons.putIfAbsent(exonTxId, new ArrayList());
                    ((List)exons.get(exonTxId)).add(record);
                    break;
                }
                case START_CODON: {
                    String startCodonTxId = record.firstAttribute("transcript_id");
                    if (startCodonTxId == null) {
                        LOGGER.warn("Missing `transcript_id` in start codon record for {}: `{}`", (Object)geneId, (Object)record);
                        break;
                    }
                    startCodons.put(startCodonTxId, record);
                    break;
                }
                case STOP_CODON: {
                    String stopCodonTxId = record.firstAttribute("transcript_id");
                    if (stopCodonTxId == null) {
                        LOGGER.warn("Missing `transcript_id` in stop codon record for {}: `{}`", (Object)geneId, (Object)record);
                        break;
                    }
                    stopCodons.put(stopCodonTxId, record);
                    break;
                }
            }
        }
        return Optional.of(new GtfGeneData(gene, transcripts, exons, startCodons, stopCodons));
    }

    private static <T> Optional<T> reportMissingAttributesAndReturn(GtfRecord record, Set<String> mandatoryGeneAttributeNames) {
        ArrayList<String> missingAttributes = new ArrayList<String>(mandatoryGeneAttributeNames.size());
        for (String attribute : mandatoryGeneAttributeNames) {
            if (record.hasAttribute(attribute)) continue;
            missingAttributes.add(attribute);
        }
        String missing = missingAttributes.stream().collect(Collectors.joining("`, `", "`", "`"));
        LOGGER.warn("Missing required attributes {} in record `{}`", (Object)missing, (Object)record);
        return Optional.empty();
    }

    protected static BufferedReader openForReading(Path path) throws IOException {
        LOGGER.debug("Opening Gencode GTF file at `{}`", (Object)path.toAbsolutePath());
        if (path.toFile().getName().endsWith(".gz")) {
            LOGGER.debug("Assuming the file is GZipped");
            return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(path.toFile()))));
        }
        LOGGER.debug("Opening GTF as a plain text");
        return Files.newBufferedReader(path);
    }

    @Override
    public boolean hasNext() {
        boolean hasNext;
        boolean bl = hasNext = !this.queue.isEmpty();
        if (!hasNext && this.reader != null) {
            LOGGER.debug("Closing the file.");
            try {
                this.reader.close();
            }
            catch (IOException e) {
                LOGGER.warn("Error closing the reader: {}", (Object)e.getMessage(), (Object)e);
            }
        }
        return hasNext;
    }

    @Override
    public GENE next() {
        Gene current = (Gene)this.queue.poll();
        if (this.queue.isEmpty()) {
            this.readNextContig();
        }
        return (GENE)current;
    }

    protected static List<Coordinates> processExons(List<GtfRecord> exonRecords) {
        Coordinates[] exons = new Coordinates[exonRecords.size()];
        for (GtfRecord exon : exonRecords) {
            String en = exon.firstAttribute(EXON_NUMBER_ATTRIBUTE_KEY);
            int exonNumber = Integer.parseInt(en);
            exons[exonNumber - 1] = Coordinates.of((CoordinateSystem)exon.coordinateSystem(), (int)exon.start(), (int)exon.end());
        }
        return Arrays.asList(exons);
    }

    protected static Optional<Biotype> parseBiotype(String geneType) {
        switch (geneType.toLowerCase()) {
            case "protein_coding": {
                return Optional.of(Biotype.protein_coding);
            }
            case "mirna": {
                return Optional.of(Biotype.miRNA);
            }
            case "rrna": {
                return Optional.of(Biotype.rRNA);
            }
            case "srna": {
                return Optional.of(Biotype.sRNA);
            }
            case "scrna": {
                return Optional.of(Biotype.scRNA);
            }
            case "snrna": {
                return Optional.of(Biotype.snRNA);
            }
            case "scarna": {
                return Optional.of(Biotype.scaRNA);
            }
            case "snorna": {
                return Optional.of(Biotype.snoRNA);
            }
            case "vault_rna": {
                return Optional.of(Biotype.vaultRNA);
            }
            case "misc_rna": {
                return Optional.of(Biotype.misc_RNA);
            }
            case "lncrna": {
                return Optional.of(Biotype.lncRNA);
            }
            case "pseudogene": {
                return Optional.of(Biotype.pseudogene);
            }
            case "processed_pseudogene": {
                return Optional.of(Biotype.processed_pseudogene);
            }
            case "transcribed_processed_pseudogene": {
                return Optional.of(Biotype.transcribed_processed_pseudogene);
            }
            case "translated_processed_pseudogene": {
                return Optional.of(Biotype.translated_processed_pseudogene);
            }
            case "transcribed_unprocessed_pseudogene": {
                return Optional.of(Biotype.transcribed_unprocessed_pseudogene);
            }
            case "translated_unprocessed_pseudogene": {
                return Optional.of(Biotype.translated_unprocessed_pseudogene);
            }
            case "unitary_pseudogene": {
                return Optional.of(Biotype.unitary_pseudogene);
            }
            case "transcribed_unitary_pseudogene": {
                return Optional.of(Biotype.transcribed_unitary_pseudogene);
            }
            case "unprocessed_pseudogene": {
                return Optional.of(Biotype.unprocessed_pseudogene);
            }
            case "polymorphic_pseudogene": {
                return Optional.of(Biotype.polymorphic_pseudogene);
            }
            case "ig_c_gene": {
                return Optional.of(Biotype.IG_C_gene);
            }
            case "ig_j_gene": {
                return Optional.of(Biotype.IG_J_gene);
            }
            case "ig_v_gene": {
                return Optional.of(Biotype.IG_V_gene);
            }
            case "ig_d_gene": {
                return Optional.of(Biotype.IG_D_gene);
            }
            case "ig_pseudogene": {
                return Optional.of(Biotype.IG_pseudogene);
            }
            case "ig_c_pseudogene": {
                return Optional.of(Biotype.IG_C_pseudogene);
            }
            case "ig_j_pseudogene": {
                return Optional.of(Biotype.IG_J_pseudogene);
            }
            case "ig_v_pseudogene": {
                return Optional.of(Biotype.IG_V_pseudogene);
            }
            case "tr_c_gene": {
                return Optional.of(Biotype.TR_C_gene);
            }
            case "tr_j_gene": {
                return Optional.of(Biotype.TR_J_gene);
            }
            case "tr_v_gene": {
                return Optional.of(Biotype.TR_V_gene);
            }
            case "tr_d_gene": {
                return Optional.of(Biotype.TR_D_gene);
            }
            case "tr_v_pseudogene": {
                return Optional.of(Biotype.TR_V_pseudogene);
            }
            case "tr_j_pseudogene": {
                return Optional.of(Biotype.TR_J_pseudogene);
            }
            case "mt_rrna": {
                return Optional.of(Biotype.MT_rRNA);
            }
            case "mt_trna": {
                return Optional.of(Biotype.MT_tRNA);
            }
        }
        return Optional.of(Biotype.unknown);
    }

    protected static Optional<Coordinates> createCdsCoordinates(GtfRecord startCodon, GtfRecord stopCodon, String txId, GtfRecord tx) {
        if (startCodon == null && stopCodon == null) {
            return Optional.empty();
        }
        if (startCodon == null || stopCodon == null) {
            if (startCodon == null) {
                LOGGER.warn("Start codon is missing for transcript `{}`: {}", (Object)txId, (Object)tx);
            } else {
                LOGGER.warn("Stop codon is missing for transcript `{}`: {}", (Object)txId, (Object)tx);
            }
            return Optional.empty();
        }
        CoordinateSystem cs = tx.location().coordinateSystem();
        Coordinates cds = Coordinates.of((CoordinateSystem)cs, (int)startCodon.startWithCoordinateSystem(cs), (int)stopCodon.endWithCoordinateSystem(cs));
        return Optional.of(cds);
    }
}

