/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.hellbender.tools.spark.pathseq;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.argumentcollections.ReferenceInputArgumentCollection;
import org.broadinstitute.hellbender.cmdline.argumentcollections.RequiredReferenceInputArgumentCollection;
import org.broadinstitute.hellbender.cmdline.programgroups.MetagenomicsProgramGroup;
import org.broadinstitute.hellbender.engine.ReferenceDataSource;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSBuildReferenceTaxonomyUtils;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSPathogenReferenceTaxonProperties;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSTaxonomyDatabase;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSTree;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSUtils;
import scala.Tuple2;

@DocumentedFeature
@CommandLineProgramProperties(summary="Build an annotated taxonomy datafile for a given microbe reference. The output file from this tool is required to run the PathSeq pipeline.", oneLineSummary="Builds a taxonomy datafile of the microbe reference", programGroup=MetagenomicsProgramGroup.class)
public class PathSeqBuildReferenceTaxonomy
extends CommandLineProgram {
    public static final String REFSEQ_CATALOG_LONG_NAME = "refseq-catalog";
    public static final String REFSEQ_CATALOG_SHORT_NAME = "RC";
    public static final String GENBANK_CATALOG_LONG_NAME = "genbank-catalog";
    public static final String GENBANK_CATALOG_SHORT_NAME = "GC";
    public static final String TAX_DUMP_LONG_NAME = "tax-dump";
    public static final String TAX_DUMP_SHORT_NAME = "TD";
    public static final String MIN_NON_VIRUS_CONTIG_LENGTH_LONG_NAME = "min-non-virus-contig-length";
    public static final String MIN_NON_VIRUS_CONTIG_LENGTH_SHORT_NAME = "min-non-virus-contig-length";
    @ArgumentCollection
    protected final ReferenceInputArgumentCollection referenceArguments = new RequiredReferenceInputArgumentCollection();
    @Argument(doc="Local path for the output file. By convention, the extension should be \".db\"", shortName="O", fullName="output")
    public String outputPath;
    @Argument(doc="Local path to catalog file (RefSeq-releaseXX.catalog.gz available at ftp://ftp.ncbi.nlm.nih.gov/refseq/release/release-catalog/)", fullName="refseq-catalog", shortName="RC", optional=true)
    public String refseqCatalogPath = null;
    @Argument(doc="Local path to Genbank catalog file (gbXXX.catalog.XXX.txt.gz at ftp://ftp.ncbi.nlm.nih.gov/genbank/catalog/)", fullName="genbank-catalog", shortName="GC", optional=true)
    public String genbankCatalogPath = null;
    @Argument(doc="Local path to taxonomy dump tarball (taxdump.tar.gz available at ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/)", fullName="tax-dump", shortName="TD")
    public String taxdumpPath;
    @Argument(doc="Minimum reference contig length for non-viruses", fullName="min-non-virus-contig-length", shortName="min-non-virus-contig-length", minValue=0.0, minRecommendedValue=500.0, maxRecommendedValue=10000.0)
    public int minNonVirusContigLength = 0;

    @Override
    public Object doWork() {
        Throwable throwable;
        if (this.refseqCatalogPath == null && this.genbankCatalogPath == null) {
            throw new UserException.BadInput("At least one of --refseq-catalog or --genbank-catalog must be specified");
        }
        this.logger.info("Parsing reference and files... (this may take a few minutes)");
        ReferenceDataSource reference = ReferenceDataSource.of(this.referenceArguments.getReferencePath());
        if (reference.getSequenceDictionary() == null) {
            throw new UserException.BadInput("Reference sequence dictionary not found. Please build one using CreateSequenceDictionary.");
        }
        List referenceRecords = reference.getSequenceDictionary().getSequences();
        HashMap<Integer, PSPathogenReferenceTaxonProperties> taxIdToProperties = new HashMap<Integer, PSPathogenReferenceTaxonProperties>();
        Map<String, Tuple2<String, Long>> accessionToNameAndLength = PSBuildReferenceTaxonomyUtils.parseReferenceRecords(referenceRecords, taxIdToProperties);
        Set<String> accessionsNotFound = null;
        if (this.refseqCatalogPath != null) {
            try {
                throwable = null;
                try (BufferedReader refseqCatalogStreamReader = PSBuildReferenceTaxonomyUtils.getBufferedReaderGz(this.refseqCatalogPath);){
                    accessionsNotFound = PSBuildReferenceTaxonomyUtils.parseCatalog(refseqCatalogStreamReader, accessionToNameAndLength, taxIdToProperties, false, null);
                }
                catch (Throwable throwable2) {
                    throwable = throwable2;
                    throw throwable2;
                }
            }
            catch (IOException e) {
                throw new GATKException("Error reading RefSeq catalog", e);
            }
        }
        if (this.genbankCatalogPath != null) {
            try {
                throwable = null;
                try (BufferedReader genbankCatalogStreamReader = PSBuildReferenceTaxonomyUtils.getBufferedReaderGz(this.genbankCatalogPath);){
                    accessionsNotFound = PSBuildReferenceTaxonomyUtils.parseCatalog(genbankCatalogStreamReader, accessionToNameAndLength, taxIdToProperties, true, accessionsNotFound);
                }
                catch (Throwable throwable3) {
                    throwable = throwable3;
                    throw throwable3;
                }
            }
            catch (IOException e) {
                throw new GATKException("Error reading GenBank catalog", e);
            }
        }
        if (accessionsNotFound != null && !accessionsNotFound.isEmpty()) {
            PSUtils.logItemizedWarning(this.logger, accessionsNotFound, "Did not find entries in the catalog for the following reference accessions");
        }
        try {
            throwable = null;
            try (BufferedReader namesStreamReader = PSBuildReferenceTaxonomyUtils.getBufferedReaderTarGz(this.taxdumpPath, "names.dmp");){
                PSBuildReferenceTaxonomyUtils.parseNamesFile(namesStreamReader, taxIdToProperties);
            }
            catch (Throwable throwable4) {
                throwable = throwable4;
                throw throwable4;
            }
        }
        catch (IOException e) {
            throw new GATKException("Error reading taxdump names files", e);
        }
        try {
            throwable = null;
            try (BufferedReader nodesStreamReader = PSBuildReferenceTaxonomyUtils.getBufferedReaderTarGz(this.taxdumpPath, "nodes.dmp");){
                Collection<Integer> taxNotFound = PSBuildReferenceTaxonomyUtils.parseNodesFile(nodesStreamReader, taxIdToProperties);
                PSUtils.logItemizedWarning(this.logger, taxNotFound, "Did not find entry from reference sequence names or the names file for following some tax ID's. Setting name to tax_<tax ID>");
            }
            catch (Throwable throwable5) {
                throwable = throwable5;
                throw throwable5;
            }
        }
        catch (IOException e) {
            throw new GATKException("Error reading taxdump names files", e);
        }
        this.logger.info("Building taxonomic database...");
        PSTree tree = PSBuildReferenceTaxonomyUtils.buildTaxonomicTree(taxIdToProperties);
        PSBuildReferenceTaxonomyUtils.removeUnusedTaxIds(taxIdToProperties, tree);
        Map<String, Integer> accessionToTaxId = PSBuildReferenceTaxonomyUtils.buildAccessionToTaxIdMap(taxIdToProperties, tree, this.minNonVirusContigLength);
        PSBuildReferenceTaxonomyUtils.writeTaxonomyDatabase(this.outputPath, new PSTaxonomyDatabase(tree, accessionToTaxId));
        return null;
    }
}

