/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.util;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

public class XMLUtils {
    private static final Redwood.RedwoodChannels log = Redwood.channels(XMLUtils.class);
    private static final Set<String> breakingTags = Generics.newHashSet(Arrays.asList("blockquote", "br", "div", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "li", "ol", "p", "pre", "ul", "tr", "td"));
    private static final Pattern xmlEscapingPattern = Pattern.compile("&.+?;");

    private XMLUtils() {
    }

    public static DocumentBuilderFactory safeDocumentBuilderFactory() {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        try {
            dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
            dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
            dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
            dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
            dbf.setFeature("http://apache.org/xml/features/dom/create-entity-ref-nodes", false);
            dbf.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true);
        }
        catch (ParserConfigurationException e) {
            log.warn(e);
        }
        return dbf;
    }

    public static List<String> getTextContentFromTagsFromFile(File f, String tag) {
        ArrayList<String> sents = Generics.newArrayList();
        try {
            sents = XMLUtils.getTextContentFromTagsFromFileSAXException(f, tag);
        }
        catch (SAXException e) {
            log.warn(e);
        }
        return sents;
    }

    private static List<String> getTextContentFromTagsFromFileSAXException(File f, String tag) throws SAXException {
        ArrayList<String> sents = Generics.newArrayList();
        try {
            DocumentBuilderFactory dbf = XMLUtils.safeDocumentBuilderFactory();
            DocumentBuilder db = dbf.newDocumentBuilder();
            Document doc = db.parse(f);
            doc.getDocumentElement().normalize();
            NodeList nodeList = doc.getElementsByTagName(tag);
            for (int i = 0; i < nodeList.getLength(); ++i) {
                Element element = (Element)nodeList.item(i);
                String raw = element.getTextContent();
                StringBuilder builtUp = new StringBuilder();
                boolean inTag = false;
                for (int j = 0; j < raw.length(); ++j) {
                    if (raw.charAt(j) == '<') {
                        inTag = true;
                    }
                    if (!inTag) {
                        builtUp.append(raw.charAt(j));
                    }
                    if (raw.charAt(j) != '>') continue;
                    inTag = false;
                }
                sents.add(builtUp.toString());
            }
        }
        catch (IOException | ParserConfigurationException e) {
            log.warn(e);
        }
        return sents;
    }

    public static List<Element> getTagElementsFromFile(File f, String tag) {
        ArrayList<Element> sents = Generics.newArrayList();
        try {
            sents = XMLUtils.getTagElementsFromFileSAXException(f, tag);
        }
        catch (SAXException e) {
            log.warn(e);
        }
        return sents;
    }

    private static List<Element> getTagElementsFromFileSAXException(File f, String tag) throws SAXException {
        ArrayList<Element> sents = Generics.newArrayList();
        try {
            DocumentBuilderFactory dbf = XMLUtils.safeDocumentBuilderFactory();
            DocumentBuilder db = dbf.newDocumentBuilder();
            Document doc = db.parse(f);
            doc.getDocumentElement().normalize();
            NodeList nodeList = doc.getElementsByTagName(tag);
            for (int i = 0; i < nodeList.getLength(); ++i) {
                Element element = (Element)nodeList.item(i);
                sents.add(element);
            }
        }
        catch (IOException | ParserConfigurationException e) {
            log.warn(e);
        }
        return sents;
    }

    public static List<Triple<String, Element, String>> getTagElementTriplesFromFile(File f, String tag) {
        ArrayList<Triple<String, Element, String>> sents = Generics.newArrayList();
        try {
            sents = XMLUtils.getTagElementTriplesFromFileSAXException(f, tag);
        }
        catch (SAXException e) {
            log.warn(e);
        }
        return sents;
    }

    public static List<Triple<String, Element, String>> getTagElementTriplesFromFileNumBounded(File f, String tag, int num) {
        ArrayList<Triple<String, Element, String>> sents = Generics.newArrayList();
        try {
            sents = XMLUtils.getTagElementTriplesFromFileNumBoundedSAXException(f, tag, num);
        }
        catch (SAXException e) {
            log.warn(e);
        }
        return sents;
    }

    public static List<Triple<String, Element, String>> getTagElementTriplesFromFileSAXException(File f, String tag) throws SAXException {
        return XMLUtils.getTagElementTriplesFromFileNumBoundedSAXException(f, tag, 2);
    }

    public static List<Triple<String, Element, String>> getTagElementTriplesFromFileNumBoundedSAXException(File f, String tag, int numIncludedSiblings) throws SAXException {
        ArrayList<Triple<String, Element, String>> sents = Generics.newArrayList();
        try {
            DocumentBuilderFactory dbf = XMLUtils.safeDocumentBuilderFactory();
            DocumentBuilder db = dbf.newDocumentBuilder();
            Document doc = db.parse(f);
            doc.getDocumentElement().normalize();
            NodeList nodeList = doc.getElementsByTagName(tag);
            for (int i = 0; i < nodeList.getLength(); ++i) {
                int count;
                Node prevNode = nodeList.item(i).getPreviousSibling();
                String prev = "";
                for (count = 0; prevNode != null && count <= numIncludedSiblings; prevNode = prevNode.getPreviousSibling(), ++count) {
                    prev = prevNode.getTextContent() + prev;
                }
                Node nextNode = nodeList.item(i).getNextSibling();
                String next = "";
                for (count = 0; nextNode != null && count <= numIncludedSiblings; nextNode = nextNode.getNextSibling(), ++count) {
                    next = next + nextNode.getTextContent();
                }
                Element element = (Element)nodeList.item(i);
                Triple<String, Element, String> t = new Triple<String, Element, String>(prev, element, next);
                sents.add(t);
            }
        }
        catch (IOException | ParserConfigurationException e) {
            log.warn(e);
        }
        return sents;
    }

    public static DocumentBuilder getXmlParser() {
        DocumentBuilder db = null;
        try {
            DocumentBuilderFactory dbf = XMLUtils.safeDocumentBuilderFactory();
            dbf.setValidating(false);
            dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
            dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
            db = dbf.newDocumentBuilder();
            db.setErrorHandler(new SAXErrorHandler());
        }
        catch (ParserConfigurationException e) {
            log.warnf("%s: Unable to create XML parser\n", XMLUtils.class.getName());
            log.warn(e);
        }
        catch (UnsupportedOperationException e) {
            log.warnf("%s: API error while setting up XML parser. Check your JAXP version\n", XMLUtils.class.getName());
            log.warn(e);
        }
        return db;
    }

    public static DocumentBuilder getValidatingXmlParser(File schemaFile) {
        DocumentBuilder db = null;
        try {
            DocumentBuilderFactory dbf = XMLUtils.safeDocumentBuilderFactory();
            SchemaFactory factory = SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema");
            factory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true);
            Schema schema = factory.newSchema(schemaFile);
            dbf.setSchema(schema);
            db = dbf.newDocumentBuilder();
            db.setErrorHandler(new SAXErrorHandler());
        }
        catch (ParserConfigurationException e) {
            log.warnf("%s: Unable to create XML parser\n", XMLUtils.class.getName());
            log.warn(e);
        }
        catch (SAXException e) {
            log.warnf("%s: XML parsing exception while loading schema %s\n", XMLUtils.class.getName(), schemaFile.getPath());
            log.warn(e);
        }
        catch (UnsupportedOperationException e) {
            log.warnf("%s: API error while setting up XML parser. Check your JAXP version\n", XMLUtils.class.getName());
            log.warn(e);
        }
        return db;
    }

    public static String stripTags(Reader r, List<Integer> mapBack, boolean markLineBreaks) {
        if (mapBack != null) {
            mapBack.clear();
        }
        StringBuilder result = new StringBuilder();
        try {
            int position = 0;
            while (true) {
                String tag;
                String text;
                if ((text = XMLUtils.readUntilTag(r)).length() > 0) {
                    for (int i = 0; i < text.length(); ++i) {
                        result.append(text.charAt(i));
                        if (mapBack == null) continue;
                        mapBack.add(position + i);
                    }
                    position += text.length();
                }
                if ((tag = XMLUtils.readTag(r)) != null) {
                    if (markLineBreaks && XMLUtils.isBreaking(XMLUtils.parseTag(tag))) {
                        result.append("\n");
                        if (mapBack != null) {
                            mapBack.add(-position);
                        }
                    }
                    position += tag.length();
                    continue;
                }
                break;
            }
        }
        catch (IOException e) {
            log.warn("Error reading string");
            log.warn(e);
        }
        return result.toString();
    }

    public static boolean isBreaking(String tag) {
        return breakingTags.contains(tag);
    }

    public static boolean isBreaking(XMLTag tag) {
        return breakingTags.contains(tag.name);
    }

    public static String readUntilTag(Reader r) throws IOException {
        if (!r.ready()) {
            return "";
        }
        StringBuilder b = new StringBuilder();
        int c = r.read();
        while (c >= 0 && c != 60) {
            b.append((char)c);
            c = r.read();
        }
        return b.toString();
    }

    public static XMLTag readAndParseTag(Reader r) throws IOException {
        String s = XMLUtils.readTag(r);
        if (s == null) {
            return null;
        }
        XMLTag ret = null;
        try {
            ret = new XMLTag(s);
        }
        catch (Exception e) {
            log.warn("Failed to handle |" + s + "|");
        }
        return ret;
    }

    public static String unescapeStringForXML(String s) {
        StringBuilder result = new StringBuilder();
        Matcher m = xmlEscapingPattern.matcher(s);
        int end = 0;
        while (m.find()) {
            int start = m.start();
            result.append(s, end, start);
            end = m.end();
            result.append(XMLUtils.translate(s.substring(start, end)));
        }
        result.append(s, end, s.length());
        return result.toString();
    }

    private static char translate(String s) {
        switch (s) {
            case "&amp;": {
                return '&';
            }
            case "&lt;": 
            case "&Lt;": {
                return '<';
            }
            case "&gt;": 
            case "&Gt;": {
                return '>';
            }
            case "&quot;": {
                return '\"';
            }
            case "&apos;": {
                return '\'';
            }
            case "&ast;": {
                return '*';
            }
            case "&sharp;": {
                return '\u266f';
            }
            case "&equals;": {
                return '=';
            }
            case "&nbsp;": {
                return '\u00a0';
            }
            case "&iexcl;": {
                return '\u00a1';
            }
            case "&cent;": 
            case "&shilling;": {
                return '\u00a2';
            }
            case "&pound;": {
                return '\u00a3';
            }
            case "&curren;": {
                return '\u00a4';
            }
            case "&yen;": {
                return '\u00a5';
            }
            case "&brvbar;": {
                return '\u00a6';
            }
            case "&sect;": {
                return '\u00a7';
            }
            case "&uml;": {
                return '\u00a8';
            }
            case "&copy;": {
                return '\u00a9';
            }
            case "&ordf;": {
                return '\u00aa';
            }
            case "&laquo; ": {
                return '\u00ab';
            }
            case "&not;": {
                return '\u00ac';
            }
            case "&shy; ": {
                return '\u00ad';
            }
            case "&reg;": {
                return '\u00ae';
            }
            case "&macr;": {
                return '\u00af';
            }
            case "&deg;": {
                return '\u00b0';
            }
            case "&plusmn;": {
                return '\u00b1';
            }
            case "&sup2;": {
                return '\u00b2';
            }
            case "&sup3;": {
                return '\u00b3';
            }
            case "&acute;": {
                return '\u00b4';
            }
            case "&micro;": {
                return '\u00b5';
            }
            case "&middot;": {
                return '\u00b7';
            }
            case "&cedil;": {
                return '\u00b8';
            }
            case "&sup1;": {
                return '\u00b9';
            }
            case "&ordm;": {
                return '\u00ba';
            }
            case "&raquo;": {
                return '\u00bb';
            }
            case "&frac14; ": {
                return '\u00bc';
            }
            case "&frac12;": {
                return '\u00bd';
            }
            case "&frac34; ": {
                return '\u00be';
            }
            case "&iquest;": {
                return '\u00bf';
            }
            case "&Agrave;": {
                return '\u00c0';
            }
            case "&Aacute;": {
                return '\u00c1';
            }
            case "&Acirc;": {
                return '\u00c2';
            }
            case "&Atilde;": {
                return '\u00c3';
            }
            case "&Auml;": {
                return '\u00c4';
            }
            case "&Aring;": {
                return '\u00c5';
            }
            case "&AElig;": {
                return '\u00c6';
            }
            case "&Ccedil;": {
                return '\u00c7';
            }
            case "&Egrave;": {
                return '\u00c8';
            }
            case "&Eacute;": {
                return '\u00c9';
            }
            case "&Ecirc;": {
                return '\u00ca';
            }
            case "&Euml;": {
                return '\u00cb';
            }
            case "&Igrave;": {
                return '\u00cc';
            }
            case "&Iacute;": {
                return '\u00cd';
            }
            case "&Icirc;": {
                return '\u00ce';
            }
            case "&Iuml;": {
                return '\u00cf';
            }
            case "&ETH;": {
                return '\u00d0';
            }
            case "&Ntilde;": {
                return '\u00d1';
            }
            case "&Ograve;": {
                return '\u00d2';
            }
            case "&Oacute;": {
                return '\u00d3';
            }
            case "&Ocirc;": {
                return '\u00d4';
            }
            case "&Otilde;": {
                return '\u00d5';
            }
            case "&Ouml;": {
                return '\u00d6';
            }
            case "&times;": {
                return '\u00d7';
            }
            case "&Oslash;": {
                return '\u00d8';
            }
            case "&Ugrave;": {
                return '\u00d9';
            }
            case "&Uacute;": {
                return '\u00da';
            }
            case "&Ucirc;": {
                return '\u00db';
            }
            case "&Uuml;": {
                return '\u00dc';
            }
            case "&Yacute;": {
                return '\u00dd';
            }
            case "&THORN;": {
                return '\u00de';
            }
            case "&szlig;": {
                return '\u00df';
            }
            case "&agrave;": {
                return '\u00e0';
            }
            case "&aacute;": {
                return '\u00e1';
            }
            case "&acirc;": {
                return '\u00e2';
            }
            case "&atilde;": {
                return '\u00e3';
            }
            case "&auml;": {
                return '\u00e4';
            }
            case "&aring;": {
                return '\u00e5';
            }
            case "&aelig;": {
                return '\u00e6';
            }
            case "&ccedil;": {
                return '\u00e7';
            }
            case "&egrave;": {
                return '\u00e8';
            }
            case "&eacute;": {
                return '\u00e9';
            }
            case "&ecirc;": {
                return '\u00ea';
            }
            case "&euml; ": {
                return '\u00eb';
            }
            case "&igrave;": {
                return '\u00ec';
            }
            case "&iacute;": {
                return '\u00ed';
            }
            case "&icirc;": {
                return '\u00ee';
            }
            case "&iuml;": {
                return '\u00ef';
            }
            case "&eth;": {
                return '\u00f0';
            }
            case "&ntilde;": {
                return '\u00f1';
            }
            case "&ograve;": {
                return '\u00f2';
            }
            case "&oacute;": {
                return '\u00f3';
            }
            case "&ocirc;": {
                return '\u00f4';
            }
            case "&otilde;": {
                return '\u00f5';
            }
            case "&ouml;": {
                return '\u00f6';
            }
            case "&divide;": {
                return '\u00f7';
            }
            case "&oslash;": {
                return '\u00f8';
            }
            case "&ugrave;": {
                return '\u00f9';
            }
            case "&uacute;": {
                return '\u00fa';
            }
            case "&ucirc;": {
                return '\u00fb';
            }
            case "&uuml;": {
                return '\u00fc';
            }
            case "&yacute;": {
                return '\u00fd';
            }
            case "&thorn;": {
                return '\u00fe';
            }
            case "&yuml;": {
                return '\u00ff';
            }
            case "&OElig;": {
                return '\u0152';
            }
            case "&oelig;": {
                return '\u0153';
            }
            case "&Scaron;": {
                return '\u0160';
            }
            case "&scaron;": {
                return '\u0161';
            }
            case "&Yuml;": {
                return '\u0178';
            }
            case "&circ;": {
                return '\u02c6';
            }
            case "&tilde;": {
                return '\u02dc';
            }
            case "&lrm;": {
                return '\u200e';
            }
            case "&rlm;": {
                return '\u200f';
            }
            case "&ndash;": {
                return '\u2013';
            }
            case "&mdash;": {
                return '\u2014';
            }
            case "&lsquo;": {
                return '\u2018';
            }
            case "&rsquo;": {
                return '\u2019';
            }
            case "&sbquo;": {
                return '\u201a';
            }
            case "&ldquo;": 
            case "&bquo;": 
            case "&bq;": {
                return '\u201c';
            }
            case "&rdquo;": 
            case "&equo;": {
                return '\u201d';
            }
            case "&bdquo;": {
                return '\u201e';
            }
            case "&sim;": {
                return '\u223c';
            }
            case "&radic;": {
                return '\u221a';
            }
            case "&le;": {
                return '\u2264';
            }
            case "&ge;": {
                return '\u2265';
            }
            case "&larr;": {
                return '\u2190';
            }
            case "&darr;": {
                return '\u2193';
            }
            case "&rarr;": {
                return '\u2192';
            }
            case "&hellip;": {
                return '\u2026';
            }
            case "&prime;": {
                return '\u2032';
            }
            case "&Prime;": 
            case "&ins;": {
                return '\u2033';
            }
            case "&trade;": {
                return '\u2122';
            }
            case "&Alpha;": 
            case "&Agr;": {
                return '\u0391';
            }
            case "&Beta;": 
            case "&Bgr;": {
                return '\u0392';
            }
            case "&Gamma;": 
            case "&Ggr;": {
                return '\u0393';
            }
            case "&Delta;": 
            case "&Dgr;": {
                return '\u0394';
            }
            case "&Epsilon;": 
            case "&Egr;": {
                return '\u0395';
            }
            case "&Zeta;": 
            case "&Zgr;": {
                return '\u0396';
            }
            case "&Eta;": {
                return '\u0397';
            }
            case "&Theta;": 
            case "&THgr;": {
                return '\u0398';
            }
            case "&Iota;": 
            case "&Igr;": {
                return '\u0399';
            }
            case "&Kappa;": 
            case "&Kgr;": {
                return '\u039a';
            }
            case "&Lambda;": 
            case "&Lgr;": {
                return '\u039b';
            }
            case "&Mu;": 
            case "&Mgr;": {
                return '\u039c';
            }
            case "&Nu;": 
            case "&Ngr;": {
                return '\u039d';
            }
            case "&Xi;": 
            case "&Xgr;": {
                return '\u039e';
            }
            case "&Omicron;": 
            case "&Ogr;": {
                return '\u039f';
            }
            case "&Pi;": 
            case "&Pgr;": {
                return '\u03a0';
            }
            case "&Rho;": 
            case "&Rgr;": {
                return '\u03a1';
            }
            case "&Sigma;": 
            case "&Sgr;": {
                return '\u03a3';
            }
            case "&Tau;": 
            case "&Tgr;": {
                return '\u03a4';
            }
            case "&Upsilon;": 
            case "&Ugr;": {
                return '\u03a5';
            }
            case "&Phi;": 
            case "&PHgr;": {
                return '\u03a6';
            }
            case "&Chi;": 
            case "&KHgr;": {
                return '\u03a7';
            }
            case "&Psi;": 
            case "&PSgr;": {
                return '\u03a8';
            }
            case "&Omega;": 
            case "&OHgr;": {
                return '\u03a9';
            }
            case "&alpha;": 
            case "&agr;": {
                return '\u03b1';
            }
            case "&beta;": 
            case "&bgr;": {
                return '\u03b2';
            }
            case "&gamma;": 
            case "&ggr;": {
                return '\u03b3';
            }
            case "&delta;": 
            case "&dgr;": {
                return '\u03b4';
            }
            case "&epsilon;": 
            case "&egr;": {
                return '\u03b5';
            }
            case "&zeta;": 
            case "&zgr;": {
                return '\u03b6';
            }
            case "&eta;": 
            case "&eegr;": {
                return '\u03b7';
            }
            case "&theta;": 
            case "&thgr;": {
                return '\u03b8';
            }
            case "&iota;": 
            case "&igr;": {
                return '\u03b9';
            }
            case "&kappa;": 
            case "&kgr;": {
                return '\u03ba';
            }
            case "&lambda;": 
            case "&lgr;": {
                return '\u03bb';
            }
            case "&mu;": 
            case "&mgr;": {
                return '\u03bc';
            }
            case "&nu;": 
            case "&ngr;": {
                return '\u03bd';
            }
            case "&xi;": 
            case "&xgr;": {
                return '\u03be';
            }
            case "&omicron;": 
            case "&ogr;": {
                return '\u03bf';
            }
            case "&pi;": 
            case "&pgr;": {
                return '\u03c0';
            }
            case "&rho;": 
            case "&rgr;": {
                return '\u03c1';
            }
            case "&sigma;": 
            case "&sgr;": {
                return '\u03c3';
            }
            case "&tau;": 
            case "&tgr;": {
                return '\u03c4';
            }
            case "&upsilon;": 
            case "&ugr;": {
                return '\u03c5';
            }
            case "&phi;": 
            case "&phgr;": {
                return '\u03c6';
            }
            case "&chi;": 
            case "&khgr;": {
                return '\u03c7';
            }
            case "&psi;": 
            case "&psgr;": {
                return '\u03c8';
            }
            case "&omega;": 
            case "&ohgr;": {
                return '\u03c9';
            }
            case "&bull;": {
                return '\u2022';
            }
            case "&percnt;": {
                return '%';
            }
            case "&plus;": {
                return '+';
            }
            case "&dash;": {
                return '-';
            }
            case "&abreve;": 
            case "&amacr;": 
            case "&ape;": 
            case "&aogon;": {
                return 'a';
            }
            case "&Amacr;": {
                return 'A';
            }
            case "&cacute;": 
            case "&ccaron;": 
            case "&ccirc;": {
                return 'c';
            }
            case "&Ccaron;": {
                return 'C';
            }
            case "&dcaron;": {
                return 'd';
            }
            case "&ecaron;": 
            case "&emacr;": 
            case "&eogon;": {
                return 'e';
            }
            case "&Emacr;": 
            case "&Ecaron;": {
                return 'E';
            }
            case "&lacute;": {
                return 'l';
            }
            case "&Lacute;": {
                return 'L';
            }
            case "&nacute;": 
            case "&ncaron;": 
            case "&ncedil;": {
                return 'n';
            }
            case "&rcaron;": 
            case "&racute;": {
                return 'r';
            }
            case "&Rcaron;": {
                return 'R';
            }
            case "&omacr;": {
                return 'o';
            }
            case "&imacr;": {
                return 'i';
            }
            case "&sacute;": 
            case "&scedil;": 
            case "&scirc;": {
                return 's';
            }
            case "&Sacute": 
            case "&Scedil;": {
                return 'S';
            }
            case "&tcaron;": 
            case "&tcedil;": {
                return 't';
            }
            case "&umacr;": 
            case "&uring;": {
                return 'u';
            }
            case "&wcirc;": {
                return 'w';
            }
            case "&Ycirc;": {
                return 'Y';
            }
            case "&ycirc;": {
                return 'y';
            }
            case "&zcaron;": 
            case "&zacute;": {
                return 'z';
            }
            case "&Zcaron;": {
                return 'Z';
            }
            case "&hearts;": {
                return '\u2665';
            }
            case "&infin;": {
                return '\u221e';
            }
            case "&dollar;": {
                return '$';
            }
            case "&sub;": 
            case "&lcub;": {
                return '\u2282';
            }
            case "&sup;": 
            case "&rcub;": {
                return '\u2283';
            }
            case "&lsqb;": {
                return '[';
            }
            case "&rsqb;": {
                return ']';
            }
        }
        return ' ';
    }

    public static String escapeXML(String in) {
        int leng = in.length();
        StringBuilder sb = new StringBuilder(leng);
        for (int i = 0; i < leng; ++i) {
            char c = in.charAt(i);
            if (c == '&') {
                sb.append("&amp;");
                continue;
            }
            if (c == '<') {
                sb.append("&lt;");
                continue;
            }
            if (c == '>') {
                sb.append("&gt;");
                continue;
            }
            if (c == '\"') {
                sb.append("&quot;");
                continue;
            }
            if (c == '\'') {
                sb.append("&apos;");
                continue;
            }
            sb.append(c);
        }
        return sb.toString();
    }

    public static String escapeElementXML(String in) {
        int leng = in.length();
        StringBuilder sb = new StringBuilder(leng);
        for (int i = 0; i < leng; ++i) {
            char c = in.charAt(i);
            if (c == '&') {
                sb.append("&amp;");
                continue;
            }
            if (c == '<') {
                sb.append("&lt;");
                continue;
            }
            if (c == '>') {
                sb.append("&gt;");
                continue;
            }
            sb.append(c);
        }
        return sb.toString();
    }

    public static String escapeAttributeXML(String in) {
        int leng = in.length();
        StringBuilder sb = new StringBuilder(leng);
        for (int i = 0; i < leng; ++i) {
            char c = in.charAt(i);
            if (c == '&') {
                sb.append("&amp;");
                continue;
            }
            if (c == '\"') {
                sb.append("&quot;");
                continue;
            }
            sb.append(c);
        }
        return sb.toString();
    }

    public static String escapeTextAroundXMLTags(String s) {
        StringBuilder result = new StringBuilder();
        StringReader r = new StringReader(s);
        try {
            while (true) {
                String text = XMLUtils.readUntilTag(r);
                result.append(XMLUtils.escapeXML(text));
                XMLTag tag = XMLUtils.readAndParseTag(r);
                if (tag != null) {
                    result.append(tag);
                    continue;
                }
                break;
            }
        }
        catch (IOException e) {
            log.warn("Error reading string");
            log.warn(e);
        }
        return result.toString();
    }

    public static int findSpace(String haystack, int begin) {
        int space = haystack.indexOf(32, begin);
        int nbsp = haystack.indexOf(160, begin);
        if (space == -1 && nbsp == -1) {
            return -1;
        }
        if (space >= 0 && nbsp >= 0) {
            return Math.min(space, nbsp);
        }
        return Math.max(space, nbsp);
    }

    public static String readTag(Reader r) throws IOException {
        if (!r.ready()) {
            return null;
        }
        StringBuilder b = new StringBuilder("<");
        int c = r.read();
        while (c >= 0) {
            b.append((char)c);
            if (c == 62) break;
            c = r.read();
        }
        if (b.length() == 1) {
            return null;
        }
        return b.toString();
    }

    public static XMLTag parseTag(String tagString) {
        if (tagString == null || tagString.isEmpty()) {
            return null;
        }
        if (tagString.charAt(0) != '<' || tagString.charAt(tagString.length() - 1) != '>') {
            return null;
        }
        return new XMLTag(tagString);
    }

    public static Document readDocumentFromFile(String filename) throws Exception {
        InputSource in = new InputSource(new FileReader(filename));
        DocumentBuilderFactory factory = XMLUtils.safeDocumentBuilderFactory();
        factory.setNamespaceAware(false);
        DocumentBuilder db = factory.newDocumentBuilder();
        db.setErrorHandler(new SAXErrorHandler());
        return db.parse(in);
    }

    public static Document readDocumentFromString(String s) throws Exception {
        InputSource in = new InputSource(new StringReader(s));
        DocumentBuilderFactory factory = XMLUtils.safeDocumentBuilderFactory();
        factory.setNamespaceAware(false);
        return factory.newDocumentBuilder().parse(in);
    }

    public static void main(String[] args) throws Exception {
        if (args[0].equals("-readDoc")) {
            Document doc = XMLUtils.readDocumentFromFile(args[1]);
            System.out.println(doc);
        } else {
            String s = IOUtils.slurpFile(args[0]);
            StringReader r = new StringReader(s);
            String tag = XMLUtils.readTag(r);
            while (tag != null && !tag.isEmpty()) {
                XMLUtils.readUntilTag(r);
                tag = XMLUtils.readTag(r);
                if (tag == null || tag.isEmpty()) break;
                System.out.println("got tag=" + new XMLTag(tag));
            }
        }
    }

    private static class SAXErrorHandler
    implements ErrorHandler {
        private SAXErrorHandler() {
        }

        public static String makeBetterErrorString(String msg, SAXParseException ex) {
            StringBuilder sb = new StringBuilder(msg);
            sb.append(": ");
            String str = ex.getMessage();
            if (str.lastIndexOf(46) == str.length() - 1) {
                str = str.substring(0, str.length() - 1);
            }
            sb.append(str);
            sb.append(" at document line ").append(ex.getLineNumber());
            sb.append(", column ").append(ex.getColumnNumber());
            if (ex.getSystemId() != null) {
                sb.append(" in entity from systemID ").append(ex.getSystemId());
            } else if (ex.getPublicId() != null) {
                sb.append(" in entity from publicID ").append(ex.getPublicId());
            }
            sb.append('.');
            return sb.toString();
        }

        @Override
        public void warning(SAXParseException exception) {
            log.warn(SAXErrorHandler.makeBetterErrorString("Warning", exception));
        }

        @Override
        public void error(SAXParseException exception) {
            log.error(SAXErrorHandler.makeBetterErrorString("Error", exception));
        }

        @Override
        public void fatalError(SAXParseException ex) throws SAXParseException {
            throw new SAXParseException(SAXErrorHandler.makeBetterErrorString("Fatal Error", ex), ex.getPublicId(), ex.getSystemId(), ex.getLineNumber(), ex.getColumnNumber());
        }
    }

    public static class XMLTag {
        public String text;
        public String name;
        public Map<String, String> attributes;
        public boolean isEndTag;
        public boolean isSingleTag;

        public XMLTag(String tag) {
            if (tag == null || tag.isEmpty()) {
                throw new NullPointerException("Attempted to parse empty/null tag");
            }
            if (tag.charAt(0) != '<') {
                throw new IllegalArgumentException("Tag did not start with <");
            }
            if (tag.charAt(tag.length() - 1) != '>') {
                throw new IllegalArgumentException("Tag did not end with >");
            }
            this.text = tag;
            int begin = 1;
            if (tag.charAt(1) == '/') {
                begin = 2;
                this.isEndTag = true;
            } else {
                this.isEndTag = false;
            }
            int end = tag.length() - 1;
            if (tag.charAt(tag.length() - 2) == '/') {
                end = tag.length() - 2;
                this.isSingleTag = true;
            } else {
                this.isSingleTag = false;
            }
            tag = tag.substring(begin, end);
            this.attributes = Generics.newHashMap();
            begin = 0;
            end = XMLUtils.findSpace(tag, 0);
            if (end < 0) {
                this.name = tag;
            } else {
                this.name = tag.substring(begin, end);
                do {
                    String att;
                    for (begin = end + 1; begin < tag.length() && tag.charAt(begin) < '!'; ++begin) {
                    }
                    if (begin == tag.length()) break;
                    end = tag.indexOf(61, begin);
                    if (end < 0) {
                        att = tag.substring(begin);
                        this.attributes.put(att, "");
                        break;
                    }
                    att = tag.substring(begin, end).trim();
                    String value = null;
                    if (tag.length() > begin) {
                        for (begin = end + 1; begin < tag.length() && tag.charAt(begin) < '!'; ++begin) {
                        }
                        if (begin < tag.length() && tag.charAt(begin) == '\"') {
                            if ((end = tag.indexOf(34, ++begin)) < 0) break;
                            value = tag.substring(begin, end);
                            ++end;
                        } else {
                            end = XMLUtils.findSpace(tag, begin);
                            if (end < 0) {
                                end = tag.length();
                            }
                            value = tag.substring(begin, end);
                        }
                    }
                    this.attributes.put(att, value);
                } while (end < tag.length() - 3);
            }
        }

        public String toString() {
            return this.text;
        }

        public String getFirstNonNullAttributeFromList(List<String> attributesList) {
            for (String attribute : attributesList) {
                if (this.attributes.get(attribute) == null) continue;
                return this.attributes.get(attribute);
            }
            return null;
        }
    }
}

