/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.hellbender.tools.spark.pathseq;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Output;
import htsjdk.samtools.SAMSequenceRecord;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSPathogenReferenceTaxonProperties;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSTaxonomyDatabase;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSTree;
import org.broadinstitute.hellbender.tools.spark.pathseq.PSUtils;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import scala.Tuple2;

public final class PSBuildReferenceTaxonomyUtils {
    protected static final Logger logger = LogManager.getLogger(PSBuildReferenceTaxonomyUtils.class);
    private static final String VERTICAL_BAR_DELIMITER_REGEX = "\\s*\\|\\s*";

    protected static Map<String, Tuple2<String, Long>> parseReferenceRecords(List<SAMSequenceRecord> dictionaryList, Map<Integer, PSPathogenReferenceTaxonProperties> taxIdToProperties) {
        HashMap<String, Tuple2<String, Long>> accessionToNameAndLength = new HashMap<String, Tuple2<String, Long>>();
        for (SAMSequenceRecord record : dictionaryList) {
            String recordName = record.getSequenceName();
            long recordLength = record.getSequenceLength();
            String[] tokens = recordName.split(VERTICAL_BAR_DELIMITER_REGEX);
            String recordAccession = null;
            int recordTaxId = 0;
            for (int i = 0; i < tokens.length - 1 && recordTaxId == 0; ++i) {
                if (tokens[i].equals("ref")) {
                    recordAccession = tokens[i + 1];
                    continue;
                }
                if (!tokens[i].equals("taxid")) continue;
                recordTaxId = PSBuildReferenceTaxonomyUtils.parseTaxonId(tokens[i + 1]);
            }
            if (recordTaxId == 0) {
                if (recordAccession == null) {
                    String[] tokens2 = tokens[0].split(" ");
                    recordAccession = tokens2[0];
                }
                accessionToNameAndLength.put(recordAccession, (Tuple2<String, Long>)new Tuple2((Object)recordName, (Object)recordLength));
                continue;
            }
            PSBuildReferenceTaxonomyUtils.addReferenceAccessionToTaxon(recordTaxId, recordName, recordLength, taxIdToProperties);
        }
        return accessionToNameAndLength;
    }

    private static int parseTaxonId(String taxonId) {
        try {
            return Integer.valueOf(taxonId);
        }
        catch (NumberFormatException e) {
            throw new UserException.BadInput("Expected taxonomy ID to be an integer but found \"" + taxonId + "\"", e);
        }
    }

    protected static Set<String> parseCatalog(BufferedReader reader, Map<String, Tuple2<String, Long>> accessionToNameAndLength, Map<Integer, PSPathogenReferenceTaxonProperties> taxIdToProperties, boolean bGenBank, Set<String> accessionsNotFoundIn) {
        HashSet<String> accessionsNotFoundOut;
        try {
            String line;
            AccessionCatalogFormat catalogFormat = bGenBank ? new GenBankCatalogFormat() : new RefSeqCatalogFormat();
            int taxIdColumnIndex = catalogFormat.getTaxIdColumn();
            int accessionColumnIndex = catalogFormat.getAccessionColumn();
            accessionsNotFoundOut = accessionsNotFoundIn == null ? new HashSet<String>(accessionToNameAndLength.keySet()) : new HashSet<String>(accessionsNotFoundIn);
            int minColumns = Math.max(taxIdColumnIndex, accessionColumnIndex) + 1;
            long lineNumber = 1L;
            while ((line = reader.readLine()) != null && !line.isEmpty()) {
                String[] tokens = line.trim().split("\t", minColumns + 1);
                if (tokens.length >= minColumns) {
                    int taxId = PSBuildReferenceTaxonomyUtils.parseTaxonId(tokens[taxIdColumnIndex]);
                    String accession = tokens[accessionColumnIndex];
                    if (accessionToNameAndLength.containsKey(accession)) {
                        Tuple2<String, Long> nameAndLength = accessionToNameAndLength.get(accession);
                        PSBuildReferenceTaxonomyUtils.addReferenceAccessionToTaxon(taxId, (String)nameAndLength._1, (Long)nameAndLength._2, taxIdToProperties);
                        accessionsNotFoundOut.remove(accession);
                    }
                } else {
                    throw new UserException.BadInput("Expected at least " + minColumns + " tab-delimited columns in GenBank catalog file, but only found " + tokens.length + " on line " + lineNumber);
                }
                ++lineNumber;
            }
        }
        catch (IOException e) {
            throw new UserException.CouldNotReadInputFile("Error reading from catalog file", (Exception)e);
        }
        return accessionsNotFoundOut;
    }

    protected static void parseNamesFile(BufferedReader reader, Map<Integer, PSPathogenReferenceTaxonProperties> taxIdToProperties) {
        try {
            String line;
            while ((line = reader.readLine()) != null) {
                String[] tokens = line.split(VERTICAL_BAR_DELIMITER_REGEX);
                if (tokens.length < 4) {
                    throw new UserException.BadInput("Expected at least 4 columns in tax dump names file but found " + tokens.length);
                }
                String nameType = tokens[3];
                if (!nameType.equals("scientific name")) continue;
                int taxId = PSBuildReferenceTaxonomyUtils.parseTaxonId(tokens[0]);
                String name = tokens[1];
                if (taxIdToProperties.containsKey(taxId)) {
                    taxIdToProperties.get(taxId).setName(name);
                    continue;
                }
                taxIdToProperties.put(taxId, new PSPathogenReferenceTaxonProperties(name));
            }
        }
        catch (IOException e) {
            throw new UserException.CouldNotReadInputFile("Error reading from taxonomy dump names file", (Exception)e);
        }
    }

    protected static Collection<Integer> parseNodesFile(BufferedReader reader, Map<Integer, PSPathogenReferenceTaxonProperties> taxIdToProperties) {
        try {
            String line;
            ArrayList<Integer> taxIdsNotFound = new ArrayList<Integer>();
            while ((line = reader.readLine()) != null) {
                PSPathogenReferenceTaxonProperties taxonProperties;
                String[] tokens = line.split(VERTICAL_BAR_DELIMITER_REGEX);
                if (tokens.length < 3) {
                    throw new UserException.BadInput("Expected at least 3 columns in tax dump nodes file but found " + tokens.length);
                }
                int taxId = PSBuildReferenceTaxonomyUtils.parseTaxonId(tokens[0]);
                int parent = PSBuildReferenceTaxonomyUtils.parseTaxonId(tokens[1]);
                String rank = tokens[2];
                if (taxIdToProperties.containsKey(taxId)) {
                    taxonProperties = taxIdToProperties.get(taxId);
                } else {
                    taxonProperties = new PSPathogenReferenceTaxonProperties("tax_" + taxId);
                    taxIdsNotFound.add(taxId);
                }
                taxonProperties.setRank(rank);
                if (taxId != 1) {
                    taxonProperties.setParent(parent);
                }
                taxIdToProperties.put(taxId, taxonProperties);
            }
            return taxIdsNotFound;
        }
        catch (IOException e) {
            throw new UserException.CouldNotReadInputFile("Error reading from taxonomy dump nodes file", (Exception)e);
        }
    }

    private static void addReferenceAccessionToTaxon(int taxId, String accession, long length, Map<Integer, PSPathogenReferenceTaxonProperties> taxIdToProperties) {
        taxIdToProperties.putIfAbsent(taxId, new PSPathogenReferenceTaxonProperties());
        taxIdToProperties.get(taxId).addAccession(accession, length);
    }

    static void removeUnusedTaxIds(Map<Integer, PSPathogenReferenceTaxonProperties> taxIdToProperties, PSTree tree) {
        taxIdToProperties.keySet().retainAll(tree.getNodeIDs());
    }

    protected static Map<String, Integer> buildAccessionToTaxIdMap(Map<Integer, PSPathogenReferenceTaxonProperties> taxIdToProperties, PSTree tree, int minNonVirusContigLength) {
        HashMap<String, Integer> accessionToTaxId = new HashMap<String, Integer>();
        for (int taxId : taxIdToProperties.keySet()) {
            boolean isVirus = tree.getPathOf(taxId).contains(10239);
            PSPathogenReferenceTaxonProperties taxonProperties = taxIdToProperties.get(taxId);
            for (String name : taxonProperties.getAccessions()) {
                if (!isVirus && taxonProperties.getAccessionLength(name) < (long)minNonVirusContigLength) continue;
                accessionToTaxId.put(name, taxId);
            }
        }
        return accessionToTaxId;
    }

    protected static PSTree buildTaxonomicTree(Map<Integer, PSPathogenReferenceTaxonProperties> taxIdToProperties) {
        PSTree tree = new PSTree(1);
        HashSet<Integer> invalidIds = new HashSet<Integer>(taxIdToProperties.size());
        for (int taxId : taxIdToProperties.keySet()) {
            if (taxId == 1) continue;
            PSPathogenReferenceTaxonProperties taxonProperties = taxIdToProperties.get(taxId);
            if (taxonProperties.getName() != null && taxonProperties.getParent() != 0 && taxonProperties.getRank() != null) {
                tree.addNode(taxId, taxonProperties.getName(), taxonProperties.getParent(), taxonProperties.getTotalLength(), taxonProperties.getRank());
                continue;
            }
            invalidIds.add(taxId);
        }
        PSUtils.logItemizedWarning(logger, invalidIds, "The following taxonomic IDs did not have name/taxonomy information (this may happen when the catalog and taxdump files are inconsistent)");
        Set<Integer> unreachableNodes = tree.removeUnreachableNodes();
        if (!unreachableNodes.isEmpty()) {
            PSUtils.logItemizedWarning(logger, unreachableNodes, "Removed " + unreachableNodes.size() + " unreachable tree nodes");
        }
        tree.checkStructure();
        HashSet<Integer> relevantNodes = new HashSet<Integer>();
        for (int taxonId : taxIdToProperties.keySet()) {
            if (taxIdToProperties.get(taxonId).getAccessions().isEmpty() || !tree.hasNode(taxonId)) continue;
            relevantNodes.addAll(tree.getPathOf(taxonId));
        }
        if (relevantNodes.isEmpty()) {
            throw new UserException.BadInput("Did not find any taxa corresponding to reference sequence names.\n\nCheck that reference names follow one of the required formats:\n\n\t...|ref|<accession.version>|...\n\t...|taxid|<taxonomy_id>|...\n\t<accession.version><mask>...");
        }
        tree.retainNodes(relevantNodes);
        return tree;
    }

    public static BufferedReader getBufferedReaderGz(String path) {
        try {
            return new BufferedReader(IOUtils.makeReaderMaybeGzipped(IOUtils.getPath(path)));
        }
        catch (IOException e) {
            throw new UserException.BadInput("Could not open file " + path, e);
        }
    }

    public static BufferedReader getBufferedReaderTarGz(String tarPath, String fileName) {
        try {
            TarArchiveInputStream result = null;
            TarArchiveInputStream tarStream = new TarArchiveInputStream((InputStream)new GZIPInputStream(new FileInputStream(tarPath)));
            TarArchiveEntry entry = tarStream.getNextTarEntry();
            while (entry != null) {
                if (entry.getName().equals(fileName)) {
                    result = tarStream;
                    break;
                }
                entry = tarStream.getNextTarEntry();
            }
            if (result == null) {
                throw new UserException.BadInput("Could not find file " + fileName + " in tarball " + tarPath);
            }
            return new BufferedReader(new InputStreamReader((InputStream)result));
        }
        catch (IOException e) {
            throw new UserException.BadInput("Could not open compressed tarball file " + fileName + " in " + tarPath, e);
        }
    }

    public static void writeTaxonomyDatabase(String filePath, PSTaxonomyDatabase taxonomyDatabase) {
        try {
            Kryo kryo = new Kryo();
            kryo.setReferences(false);
            Output output = new Output((OutputStream)new FileOutputStream(filePath));
            kryo.writeObject(output, (Object)taxonomyDatabase);
            output.close();
        }
        catch (FileNotFoundException e) {
            throw new UserException.CouldNotCreateOutputFile("Could not serialize objects to file", (Exception)e);
        }
    }

    private static final class GenBankCatalogFormat
    implements AccessionCatalogFormat {
        private static final int TAX_ID_COLUMN = 6;
        private static final int ACCESSION_COLUMN = 1;

        private GenBankCatalogFormat() {
        }

        @Override
        public int getTaxIdColumn() {
            return 6;
        }

        @Override
        public int getAccessionColumn() {
            return 1;
        }
    }

    private static final class RefSeqCatalogFormat
    implements AccessionCatalogFormat {
        private static final int TAX_ID_COLUMN = 0;
        private static final int ACCESSION_COLUMN = 2;

        private RefSeqCatalogFormat() {
        }

        @Override
        public int getTaxIdColumn() {
            return 0;
        }

        @Override
        public int getAccessionColumn() {
            return 2;
        }
    }

    private static interface AccessionCatalogFormat {
        public int getTaxIdColumn();

        public int getAccessionColumn();
    }
}

