/*
 * Decompiled with CFR 0.152.
 */
package org.monarchinitiative.phenol.annotations.assoc;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.XMLEvent;
import org.monarchinitiative.phenol.annotations.formats.Gene;
import org.monarchinitiative.phenol.base.PhenolRuntimeException;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OrphaGeneToDiseaseParser {
    private static final Logger logger = LoggerFactory.getLogger(OrphaGeneToDiseaseParser.class);
    private static final String DISORDER = "Disorder";
    private static final String ORPHA_NUMBER = "OrphaCode";
    private static final String GENE_LIST = "GeneList";
    private static final String GENE = "Gene";
    private static final String NAME = "Name";
    private static final String DISORDER_GENE_ASSOCIATION_LIST = "DisorderGeneAssociationList";
    private static final String DISORDER_GENE_ASSOCIATION = "DisorderGeneAssociation";
    private static final String SYMBOL = "Symbol";
    private static final String EXTERNAL_REFERENCE = "ExternalReference";
    private static final String REFERENCE = "Reference";
    private static final String SOURCE = "Source";
    private boolean inDisorder = false;
    private boolean inGeneList = false;
    private boolean inGene = true;
    private boolean inDisorderGeneAssociation = false;
    private boolean inDisorderGeneAssociationList = false;
    private boolean inExternalReference = false;
    private boolean inOMIMReference = false;
    private final Multimap<TermId, Gene> orphaDiseaseToGeneMultiMap = ArrayListMultimap.create();
    private Map<String, TermId> mimIdToGeneIdMap;
    private Map<String, TermId> symbolToGeneIdMap;

    public OrphaGeneToDiseaseParser(File orphaGeneXMLfile, File mim2geneFile) {
        this.parseMim2GeneMedgen(mim2geneFile);
        this.initMissingGeneIdMap();
        this.parseOrphaGeneXml(orphaGeneXMLfile);
    }

    public Multimap<TermId, Gene> getOrphaDiseaseToGeneSymbolMap() {
        return this.orphaDiseaseToGeneMultiMap;
    }

    private void parseMim2GeneMedgen(File mim2geneFile) {
        this.mimIdToGeneIdMap = new HashMap<String, TermId>();
        try (BufferedReader br = new BufferedReader(new FileReader(mim2geneFile));){
            String line;
            while ((line = br.readLine()) != null) {
                String[] fields = line.split("\t");
                if (fields.length < 3) {
                    System.err.printf("[ERROR] Malformed line (only %d fields): %s.\n", fields.length, line);
                    continue;
                }
                if (!fields[2].equals("gene")) continue;
                String mim = fields[0];
                TermId geneId = TermId.of((String)"NCBIGene", (String)fields[1]);
                this.mimIdToGeneIdMap.put(mim, geneId);
            }
        }
        catch (IOException e) {
            throw new PhenolRuntimeException("Could not parse mim2gene file because of I/O exception: " + e.getMessage());
        }
        logger.info("Parsed {} OMIM id to NCBI Gene id mappings.\n", (Object)this.mimIdToGeneIdMap.size());
    }

    private void parseOrphaGeneXml(File orphaGeneXMLfile) {
        try {
            XMLInputFactory inputFactory = XMLInputFactory.newInstance();
            FileInputStream in = new FileInputStream(orphaGeneXMLfile);
            XMLEventReader eventReader = inputFactory.createXMLEventReader(in);
            String currentOrphanum = null;
            String currentDiseasename = null;
            String currentGeneSymbol = null;
            String currentOmimId = null;
            while (eventReader.hasNext()) {
                XMLEvent event = eventReader.nextEvent();
                if (event.isStartElement()) {
                    String localPart = event.asStartElement().getName().getLocalPart();
                    if (this.inGeneList) continue;
                    if (localPart.equals(DISORDER)) {
                        this.inDisorder = true;
                        continue;
                    }
                    if (this.inDisorder && !this.inGeneList && !this.inDisorderGeneAssociationList && localPart.equals(ORPHA_NUMBER)) {
                        event = eventReader.nextEvent();
                        currentOrphanum = event.asCharacters().getData();
                        continue;
                    }
                    if (this.inDisorder && !this.inGeneList && !this.inDisorderGeneAssociationList && localPart.equals(NAME)) {
                        event = eventReader.nextEvent();
                        currentDiseasename = event.asCharacters().getData();
                        continue;
                    }
                    if (this.inDisorder && !this.inGeneList && !this.inDisorderGeneAssociationList && localPart.equals(GENE_LIST)) {
                        this.inGeneList = true;
                        continue;
                    }
                    if (localPart.equals(DISORDER_GENE_ASSOCIATION_LIST)) {
                        this.inDisorderGeneAssociationList = true;
                        continue;
                    }
                    if (this.inDisorderGeneAssociationList && localPart.equals(DISORDER_GENE_ASSOCIATION)) {
                        this.inDisorderGeneAssociation = true;
                        continue;
                    }
                    if (this.inDisorderGeneAssociation && localPart.equals(GENE)) {
                        this.inGene = true;
                        continue;
                    }
                    if (this.inDisorderGeneAssociation && localPart.equals(EXTERNAL_REFERENCE)) {
                        this.inExternalReference = true;
                        continue;
                    }
                    if (this.inExternalReference && localPart.equals(SOURCE)) {
                        event = eventReader.nextEvent();
                        String sourceString = event.asCharacters().getData();
                        if (!sourceString.equals("OMIM")) continue;
                        this.inOMIMReference = true;
                        continue;
                    }
                    if (this.inOMIMReference && localPart.equals(REFERENCE)) {
                        event = eventReader.nextEvent();
                        currentOmimId = event.asCharacters().getData();
                        continue;
                    }
                    if (!this.inDisorder || !this.inDisorderGeneAssociation || !this.inDisorderGeneAssociationList || !this.inGene || !localPart.equals(SYMBOL)) continue;
                    event = eventReader.nextEvent();
                    currentGeneSymbol = event.asCharacters().getData();
                    continue;
                }
                if (!event.isEndElement()) continue;
                EndElement endElement = event.asEndElement();
                String localPart = endElement.getName().getLocalPart();
                if (localPart.equals(DISORDER)) {
                    this.inDisorder = false;
                    continue;
                }
                if (localPart.equals(GENE_LIST)) {
                    this.inGeneList = false;
                    continue;
                }
                if (localPart.equals(EXTERNAL_REFERENCE)) {
                    this.inExternalReference = false;
                    this.inOMIMReference = false;
                    continue;
                }
                if (localPart.equals(DISORDER_GENE_ASSOCIATION_LIST)) {
                    this.inDisorderGeneAssociationList = false;
                    currentOrphanum = null;
                    currentOmimId = null;
                    currentDiseasename = null;
                    continue;
                }
                if (localPart.equals(DISORDER_GENE_ASSOCIATION)) {
                    this.inDisorderGeneAssociation = false;
                    if (currentOrphanum != null && currentDiseasename != null && currentGeneSymbol != null) {
                        Gene g;
                        TermId orphaId = TermId.of((String)"ORPHA", (String)currentOrphanum);
                        TermId geneId = this.mimIdToGeneIdMap.get(currentOmimId);
                        if (currentOmimId == null || geneId == null) {
                            if (this.symbolToGeneIdMap.containsKey(currentGeneSymbol)) {
                                geneId = this.symbolToGeneIdMap.get(currentGeneSymbol);
                                g = new Gene(geneId, currentGeneSymbol);
                                this.orphaDiseaseToGeneMultiMap.put((Object)orphaId, (Object)g);
                            } else {
                                System.err.printf("[ERROR] Could not find OMIM gene id for %s.\n", currentGeneSymbol);
                            }
                        } else {
                            g = new Gene(geneId, currentGeneSymbol);
                            this.orphaDiseaseToGeneMultiMap.put((Object)orphaId, (Object)g);
                        }
                    }
                    currentGeneSymbol = null;
                    currentOmimId = null;
                    continue;
                }
                if (!endElement.getName().getLocalPart().equals(GENE)) continue;
                this.inGene = false;
            }
        }
        catch (IOException | XMLStreamException e) {
            throw new PhenolRuntimeException("Could not parse orpha disease to gene xml: " + e.getMessage());
        }
    }

    private void initMissingGeneIdMap() {
        this.symbolToGeneIdMap = new HashMap<String, TermId>();
        this.symbolToGeneIdMap.put("ATP5F1D", TermId.of((String)"NCBIGene:513"));
        this.symbolToGeneIdMap.put("CHD1", TermId.of((String)"NCBIGene:1105"));
        this.symbolToGeneIdMap.put("CD55", TermId.of((String)"NCBIGene:1604"));
        this.symbolToGeneIdMap.put("DLST", TermId.of((String)"NCBIGene:1743"));
        this.symbolToGeneIdMap.put("EPHB4", TermId.of((String)"NCBIGene:2050"));
        this.symbolToGeneIdMap.put("HLA-DPA1", TermId.of((String)"NCBIGene:3113"));
        this.symbolToGeneIdMap.put("IGH", TermId.of((String)"NCBIGene:3492"));
        this.symbolToGeneIdMap.put("OPA2", TermId.of((String)"NCBIGene:4977"));
        this.symbolToGeneIdMap.put("PIK3C2A", TermId.of((String)"NCBIGene:5286"));
        this.symbolToGeneIdMap.put("ROS1", TermId.of((String)"NCBIGene:6098"));
        this.symbolToGeneIdMap.put("TRA", TermId.of((String)"NCBIGene:6955"));
        this.symbolToGeneIdMap.put("TRB", TermId.of((String)"NCBIGene:6957"));
        this.symbolToGeneIdMap.put("TRD", TermId.of((String)"NCBIGene:6964"));
        this.symbolToGeneIdMap.put("TRG", TermId.of((String)"NCBIGene:6965"));
        this.symbolToGeneIdMap.put("USH1E", TermId.of((String)"NCBIGene:7396"));
        this.symbolToGeneIdMap.put("RIPK1", TermId.of((String)"NCBIGene:8737"));
        this.symbolToGeneIdMap.put("PLAA", TermId.of((String)"NCBIGene:9373"));
        this.symbolToGeneIdMap.put("SH3PXD2A", TermId.of((String)"NCBIGene:9644"));
        this.symbolToGeneIdMap.put("CWC27", TermId.of((String)"NCBIGene:10283"));
        this.symbolToGeneIdMap.put("YME1L1", TermId.of((String)"NCBIGene:10730"));
        this.symbolToGeneIdMap.put("RPL35", TermId.of((String)"NCBIGene:11224"));
        this.symbolToGeneIdMap.put("RRAS2", TermId.of((String)"NCBIGene:22800"));
        this.symbolToGeneIdMap.put("MRAS", TermId.of((String)"NCBIGene:22808"));
        this.symbolToGeneIdMap.put("DUX4L1", TermId.of((String)"NCBIGene:22947"));
        this.symbolToGeneIdMap.put("NCAPD3", TermId.of((String)"NCBIGene:23310"));
        this.symbolToGeneIdMap.put("IGHV4-34", TermId.of((String)"NCBIGene:28395"));
        this.symbolToGeneIdMap.put("IGHV3-21", TermId.of((String)"NCBIGene:28444"));
        this.symbolToGeneIdMap.put("PUS7", TermId.of((String)"NCBIGene:54517"));
        this.symbolToGeneIdMap.put("TBC1D8B", TermId.of((String)"NCBIGene:54885"));
        this.symbolToGeneIdMap.put("TRPV6", TermId.of((String)"NCBIGene:55503"));
        this.symbolToGeneIdMap.put("RNPC3", TermId.of((String)"NCBIGene:55599"));
        this.symbolToGeneIdMap.put("SPG14", TermId.of((String)"NCBIGene:57309"));
        this.symbolToGeneIdMap.put("SPG16", TermId.of((String)"NCBIGene:57760"));
        this.symbolToGeneIdMap.put("RINT1", TermId.of((String)"NCBIGene:60561"));
        this.symbolToGeneIdMap.put("GINGF2", TermId.of((String)"NCBIGene:64644"));
        this.symbolToGeneIdMap.put("MMEL1", TermId.of((String)"NCBIGene:79258"));
        this.symbolToGeneIdMap.put("GREB1L", TermId.of((String)"NCBIGene:80000"));
        this.symbolToGeneIdMap.put("USP45", TermId.of((String)"NCBIGene:85015"));
        this.symbolToGeneIdMap.put("CFAP300", TermId.of((String)"NCBIGene:85016"));
        this.symbolToGeneIdMap.put("ALKBH8", TermId.of((String)"NCBIGene:91801"));
        this.symbolToGeneIdMap.put("STRADA", TermId.of((String)"NCBIGene:92335"));
        this.symbolToGeneIdMap.put("TIMM50", TermId.of((String)"NCBIGene:92609"));
        this.symbolToGeneIdMap.put("DYT13", TermId.of((String)"NCBIGene:93983"));
        this.symbolToGeneIdMap.put("GPRASP2", TermId.of((String)"NCBIGene:114928"));
        this.symbolToGeneIdMap.put("TDRD9", TermId.of((String)"NCBIGene:122402"));
        this.symbolToGeneIdMap.put("NAXE", TermId.of((String)"NCBIGene:128240"));
        this.symbolToGeneIdMap.put("TRIM71", TermId.of((String)"NCBIGene:131405"));
        this.symbolToGeneIdMap.put("SPG19", TermId.of((String)"NCBIGene:140907"));
        this.symbolToGeneIdMap.put("WDR66", TermId.of((String)"NCBIGene:144406"));
        this.symbolToGeneIdMap.put("POLR3H", TermId.of((String)"NCBIGene:171568"));
        this.symbolToGeneIdMap.put("CFAP221", TermId.of((String)"NCBIGene:200373"));
        this.symbolToGeneIdMap.put("RNU12", TermId.of((String)"NCBIGene:267010"));
        this.symbolToGeneIdMap.put("NUTM2E", TermId.of((String)"NCBIGene:283008"));
        this.symbolToGeneIdMap.put("DYT15", TermId.of((String)"NCBIGene:317714"));
        this.symbolToGeneIdMap.put("UBAC2", TermId.of((String)"NCBIGene:337867"));
        this.symbolToGeneIdMap.put("SPG24", TermId.of((String)"NCBIGene:338090"));
        this.symbolToGeneIdMap.put("SCA25", TermId.of((String)"NCBIGene:338435"));
        this.symbolToGeneIdMap.put("SPG25", TermId.of((String)"NCBIGene:387583"));
        this.symbolToGeneIdMap.put("LIPT2", TermId.of((String)"NCBIGene:387787"));
        this.symbolToGeneIdMap.put("SCA20", TermId.of((String)"NCBIGene:407973"));
        this.symbolToGeneIdMap.put("SPG27", TermId.of((String)"NCBIGene:414886"));
        this.symbolToGeneIdMap.put("SPG29", TermId.of((String)"NCBIGene:619379"));
        this.symbolToGeneIdMap.put("SPG32", TermId.of((String)"NCBIGene:724107"));
        this.symbolToGeneIdMap.put("SPG34", TermId.of((String)"NCBIGene:724110"));
        this.symbolToGeneIdMap.put("NUTM2A", TermId.of((String)"NCBIGene:728118"));
        this.symbolToGeneIdMap.put("NUTM2B", TermId.of((String)"NCBIGene:729262"));
        this.symbolToGeneIdMap.put("SPG36", TermId.of((String)"NCBIGene:791228"));
        this.symbolToGeneIdMap.put("SPG37", TermId.of((String)"NCBIGene:100049159"));
        this.symbolToGeneIdMap.put("SPG38", TermId.of((String)"NCBIGene:100049707"));
        this.symbolToGeneIdMap.put("DYT17", TermId.of((String)"NCBIGene:100216344"));
        this.symbolToGeneIdMap.put("USH1H", TermId.of((String)"NCBIGene:100271837"));
        this.symbolToGeneIdMap.put("SCA30", TermId.of((String)"NCBIGene:100359393"));
        this.symbolToGeneIdMap.put("DYT21", TermId.of((String)"NCBIGene:100885773"));
        this.symbolToGeneIdMap.put("USH1K", TermId.of((String)"NCBIGene:101180907"));
        this.symbolToGeneIdMap.put("IL12A-AS1", TermId.of((String)"NCBIGene:101928376"));
        this.symbolToGeneIdMap.put("SCA37", TermId.of((String)"NCBIGene:103753527"));
    }
}

