/*  Sesame - Storage and Querying architecture for RDF and RDF Schema
 *  Copyright (C) 2001-2006 Aduna
 *
 *  Contact: 
 *  	Aduna
 *  	Prinses Julianaplein 14 b
 *  	3817 CS Amersfoort
 *  	The Netherlands
 *  	tel. +33 (0)33 465 99 87
 *  	fax. +33 (0)33 465 99 87
 *
 *  	http://aduna-software.com/
 *  	http://www.openrdf.org/
 *  
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.openrdf.rio.rdfxml;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

import org.openrdf.util.uri.URI;
import org.openrdf.util.xml.XmlUtil;
import org.openrdf.vocabulary.RDF;

import org.openrdf.rio.NamespaceListener;
import org.openrdf.rio.ParseLocationListener;

/**
 * A filter on SAX events to make life easier on the RDF parser
 * itself. This filter does things like combining a call to
 * startElement() that is directly followed by a call to
 * endElement() to a single call to emptyElement().
 **/
class SAXFilter implements ContentHandler {

/*----------+
| Variables |
+----------*/

	/**
	 * The RDF parser to supply the filtered SAX events to.
	 **/
	private RdfXmlParser _rdfParser;

	/**
	 * A Locator indicating a position in the text that is currently being
	 * parsed by the SAX parser.
	 **/
	private Locator _locator;

	/**
	 * A listener that is interested in the progress of the SAX parser.
	 **/
	private ParseLocationListener _locListener;

	/**
	 * A listener that is interested in the namespaces that are defined in the
	 * parsed RDF.
	 **/
	private NamespaceListener _nsListener;

	/**
	 * Stack of ElementInfo objects.
	 **/
	private Stack _elInfoStack = new Stack();

	/**
	 * StringBuffer used to collect text during parsing.
	 **/
	private StringBuffer _charBuf = new StringBuffer(512);

	/**
	 * The document's URI.
	 **/
	private URI _documentURI;

	/**
	 * Flag indicating whether the parser parses stand-alone RDF documents. In
	 * stand-alone documents, the rdf:RDF element is optional if it contains
	 * just one element.
	 **/
	private boolean _parseStandAloneDocuments = false;

	/**
	 * Variable used to defer reporting of start tags. Reporting start tags is
	 * deferred to be able to combine a start tag and an immediately following
	 * end tag to a single call to emptyElement().
	 **/
	private ElementInfo _deferredElement = null;

	/**
	 * New namespace mappings that have been reported for the next start tag by
	 * the SAX parser, but that are not yet assigned to an ElementInfo object.
	 **/
	private Map _newNamespaceMappings = new HashMap();

	/**
	 * Flag indicating whether we're currently parsing RDF elements.
	 **/
	private boolean _inRdfContext;
	
	/**
	 * The number of elements on the stack that are in the RDF context.
	 **/
	private int _rdfContextStackHeight;

	/**
	 * Flag indicating whether we're currently parsing an XML literal.
	 **/
	private boolean _parseLiteralMode = false;

	/**
	 * The number of elements on the stack that are part of an XML literal.
	 **/
	private int _xmlLiteralStackHeight;

	/**
	 * The prefixes that are defined in the XML literal itself (this in contrast
	 * to the namespaces from the XML literal's context).
	 **/
	private List _xmlLiteralPrefixes = new ArrayList();

	/**
	 * The prefixes that were used in an XML literal, but that were not defined
	 * in it (but rather in the XML literal's context).
	 **/
	private List _unknownPrefixesInXmlLiteral = new ArrayList();

/*-------------+
| Constructors |
+-------------*/

	public SAXFilter(RdfXmlParser rdfParser) {
		_rdfParser = rdfParser;
	}

/*--------+
| Methods |
+--------*/

	public Locator getLocator() {
		return _locator;
	}

	public void setParseLocationListener(ParseLocationListener el) {
		_locListener = el;

		if (_locator != null) {
			_locListener.parseLocationUpdate(
					_locator.getLineNumber(), _locator.getColumnNumber());
		}
	}

	public ParseLocationListener getParseLocationListener() {
		return _locListener;
	}

	public void setNamespaceListener(NamespaceListener nl) {
		_nsListener = nl;
	}

	public NamespaceListener getNamespaceListener() {
		return _nsListener;
	}

	public void clear() {
		_locator = null;
		_elInfoStack.clear();
		_charBuf.setLength(0);
		_documentURI = null;
		_deferredElement = null;

		_newNamespaceMappings.clear();

		_inRdfContext = false;
		_rdfContextStackHeight = 0;

		_parseLiteralMode = false;
		_xmlLiteralStackHeight = 0;

		_xmlLiteralPrefixes.clear();
		_unknownPrefixesInXmlLiteral.clear();
	}

	public void setDocumentURI(String documentURI) {
		_documentURI = _createBaseURI(documentURI);
	}

	public void setParseStandAloneDocuments(boolean standAloneDocs) {
		_parseStandAloneDocuments = standAloneDocs;
	}

	public boolean getParseStandAloneDocuments() {
		return _parseStandAloneDocuments;
	}

/*--------------------------------------+
| Methods from interface ContentHandler |
+--------------------------------------*/

	public void setDocumentLocator(Locator locator) {
		_locator = locator;
		if (_locListener != null) {
			_locListener.parseLocationUpdate(
					locator.getLineNumber(), locator.getColumnNumber());
		}
	}

	public void startDocument() {
		// ignore
	}

	public void endDocument() {
		// ignore
	}

	public void startPrefixMapping(String prefix, String uri)
		throws SAXException
	{
		if (_deferredElement != null) {
			// This new prefix mapping must come from a new start tag
			_reportDeferredStartElement();
		}

		_newNamespaceMappings.put(prefix, uri);

		if (_parseLiteralMode) {
			// This namespace is introduced inside an XML literal
			_xmlLiteralPrefixes.add(prefix);
		}

		if (_nsListener != null) {
			_nsListener.handleNamespace(prefix, uri);
		}
	}

	public void endPrefixMapping(String prefix) {
		if (_parseLiteralMode) {
			_xmlLiteralPrefixes.remove(prefix);
		}
	}

	public void startElement(String namespaceURI, String localName, String qName, Attributes attributes)
		throws SAXException
	{
		if (_deferredElement != null) {
			// The next call could set _parseLiteralMode to true!
			_reportDeferredStartElement();
		}

		if (_parseLiteralMode) {
			_appendStartTag(qName, attributes);
			_xmlLiteralStackHeight++;
		}
		else {
			ElementInfo parent = _peekStack();
			ElementInfo elInfo = new ElementInfo(parent, qName, namespaceURI, localName);

			elInfo.setNamespaceMappings(_newNamespaceMappings);
			_newNamespaceMappings.clear();

			if (!_inRdfContext && _parseStandAloneDocuments &&
				(!localName.equals("RDF") || !namespaceURI.equals(RDF.NAMESPACE)))
			{
				// Stand-alone document that does not start with an rdf:RDF root
				// element. Assume this root element is omitted.
				_inRdfContext = true;
			}

			if (!_inRdfContext) {
				// Check for presence of xml:base and xlm:lang attributes.
				for (int i = 0; i < attributes.getLength(); i++) {
					String attQName = attributes.getQName(i);

					if ("xml:base".equals(attQName)) {
						elInfo.setBaseURI( attributes.getValue(i) );
					}
					else if ("xml:lang".equals(attQName)) {
						elInfo.xmlLang = attributes.getValue(i);
					}
				}

				_elInfoStack.push(elInfo);

				// Check if we are entering RDF context now.
				if (localName.equals("RDF") && namespaceURI.equals(RDF.NAMESPACE)) {
					_inRdfContext = true;
					_rdfContextStackHeight = 0;
				}
			}
			else {
				// We're parsing RDF elements.
				_checkAndCopyAttributes(attributes, elInfo);

				// Don't report the new element to the RDF parser just yet.
				_deferredElement = elInfo;
			}
		}
	}

	private void _reportDeferredStartElement()
		throws SAXException
	{
/*
		// Only useful for debugging.
		if (_deferredElement == null) {
			throw new RuntimeException("no deferred start element available");
		}
*/

		_elInfoStack.push(_deferredElement);
		_rdfContextStackHeight++;

		_rdfParser.setBaseURI(_deferredElement.baseURI);
		_rdfParser.setXmlLang(_deferredElement.xmlLang);

		_rdfParser.startElement(
				_deferredElement.namespaceURI, _deferredElement.localName,
				_deferredElement.qName, _deferredElement.atts);

		_deferredElement = null;
	}

	public void endElement(String namespaceURI, String localName, String qName)
		throws SAXException
	{
		// FIXME: in parseLiteralMode we should also check if start- and
		// end-tags match but these start tags are not tracked yet.

		if (_rdfParser._verifyData && !_parseLiteralMode) {
			// Verify that the end tag matches the start tag.
			ElementInfo elInfo;

			if (_deferredElement != null) {
				elInfo = _deferredElement;
			}
			else {
				elInfo = _peekStack();
			}

			if (!qName.equals(elInfo.qName)) {
				_rdfParser.sendFatalError("expected end tag </'" + elInfo.qName + ">, " + "found </" + qName + ">");
			}
		}

		if (!_inRdfContext) {
			_elInfoStack.pop();
			_charBuf.setLength(0);
			return;
		}

		if (_deferredElement == null && _rdfContextStackHeight == 0) {
			// This end tag removes the element that signaled the start
			// of the RDF context (i.e. <rdf:RDF>) from the stack.
			_inRdfContext = false;

			_elInfoStack.pop();
			_charBuf.setLength(0);
			return;
		}

		// We're still in RDF context.

		if (_parseLiteralMode && _xmlLiteralStackHeight > 0) {
			_appendEndTag(qName);
			_xmlLiteralStackHeight--;
			return;
		}

		// Check for any deferred start elements
		if (_deferredElement != null) {
			// Start element still deferred, this is an empty element
			_rdfParser.setBaseURI(_deferredElement.baseURI);
			_rdfParser.setXmlLang(_deferredElement.xmlLang);

			_rdfParser.emptyElement(
					_deferredElement.namespaceURI, _deferredElement.localName,
					_deferredElement.qName, _deferredElement.atts);

			_deferredElement = null;
		}
		else {
			if (_parseLiteralMode) {
				// Insert any used namespace prefixes from the XML literal's
				// context that are not defined in the XML literal itself.
				_insertUsedContextPrefixes();
			}

			// Check if any character data has been collected in the _charBuf
			String s = _charBuf.toString().trim();
			_charBuf.setLength(0);

			if (s.length() > 0 || _parseLiteralMode) {
				_rdfParser.text(s);

				_parseLiteralMode = false;
			}

			// Handle the end tag
			_elInfoStack.pop();
			_rdfContextStackHeight--;

			_rdfParser.endElement(namespaceURI, localName, qName);
		}
	}

	public void characters(char[] ch, int start, int length)
		throws SAXException
	{
		if (_inRdfContext) {
			if (_deferredElement != null) {
				_reportDeferredStartElement();
			}

			if (_parseLiteralMode) {
				// Characters like '<', '>', and '&' must be escaped to
				// prevent breaking the XML text.
				String s = new String(ch, start, length);
				s = XmlUtil.escapeCharacterData(s);
				_charBuf.append(s);
			}
			else {
				_charBuf.append(ch, start, length);
			}
		}
	}

	public void ignorableWhitespace(char[] ch, int start, int length) {
		if (_parseLiteralMode) {
			_charBuf.append(ch, start, length);
		}
	}

	public void processingInstruction(String target, String data) {
		// ignore
	}

	public void skippedEntity(String name) {
		// ignore
	}

	private void _checkAndCopyAttributes(Attributes attributes, ElementInfo elInfo)
		throws SAXException
	{
		Atts atts = new Atts(attributes.getLength());

		int attCount = attributes.getLength();
		for (int i = 0; i < attCount; i++) {
			String qName = attributes.getQName(i);
			String value = attributes.getValue(i);

			// attributes starting with "xml" should be ignored, except for the
			// ones that are handled by this parser (xml:lang and xml:base).
			if (qName.startsWith("xml")) {
				if (qName.equals("xml:lang")) {
					elInfo.xmlLang = value;
				}
				else if (qName.equals("xml:base")) {
					elInfo.setBaseURI(value);
				}
			}
			else {
				String namespace = attributes.getURI(i);
				String localName = attributes.getLocalName(i);
				
				// A limited set of unqualified attributes must be supported by
				// parsers, as is specified in section 6.1.4 of the spec
				if ("".equals(namespace)) {
					if (localName.equals("ID") ||
						localName.equals("about") ||
						localName.equals("resource") ||
						localName.equals("parseType") ||
						localName.equals("type"))
					{
						_rdfParser.sendWarning(
								"use of unqualified attribute " + localName + " has been deprecated");
						namespace = RDF.NAMESPACE;
					}
				}

				if (_rdfParser._verifyData) {
					if ("".equals(namespace)) {
						_rdfParser.sendError("unqualified attribute '" + qName + "' not allowed");
					}
				}

				Att att = new Att(namespace, localName, qName, value);
				atts.addAtt(att);
			}
		}

		elInfo.atts = atts;
	}

	public void setParseLiteralMode() {
		_parseLiteralMode = true;
		_xmlLiteralStackHeight = 0;

		// All currently known namespace prefixes are
		// new for this XML literal.
		_xmlLiteralPrefixes.clear();
		_unknownPrefixesInXmlLiteral.clear();
	}

	private URI _createBaseURI(String uriString) {
		if (uriString.length() > 4 && uriString.substring(0, 4).equalsIgnoreCase("jar:")) {
			// uriString is e.g. jar:http://www.foo.com/bar/baz.jar!/COM/foo/Quux.class
			// Treat the part up to and including the exclamation mark as the scheme and
			// the rest as the path to enable 'correct' resolving of relative URIs
			int idx = uriString.indexOf('!');
			if (idx != -1) {
				String scheme = uriString.substring(0, idx + 1);
				String path = uriString.substring(idx + 1);
				return new URI(scheme, null, path, null, null);
			}
		}

		URI uri = new URI(uriString);
		uri.normalize();
		return uri;
	}

/*--------------------------------+
| Methods related to XML literals |
+--------------------------------*/

	/**
	 * Appends a start tag to _charBuf. This method is used during the
	 * parsing of an XML Literal.
	 **/
	private void _appendStartTag(String qName, Attributes attributes) {
		// Write start of start tag
		_charBuf.append("<" + qName);

		// Write any new namespace prefix definitions
		Iterator prefixes = _newNamespaceMappings.keySet().iterator();
		while (prefixes.hasNext()) {
			String prefix = (String)prefixes.next();
			String namespace = (String)_newNamespaceMappings.get(prefix);
			_appendNamespaceDecl(_charBuf, prefix, namespace);
		}

		// Write attributes
		int attCount = attributes.getLength();
		for (int i = 0; i < attCount; i++) {
			_appendAttribute(_charBuf, attributes.getQName(i), attributes.getValue(i));
		}

		// Write end of start tag
		_charBuf.append(">");

		// Check for any used prefixes that are not
		// defined in the XML literal itself
		int colonIdx = qName.indexOf(':');
		String prefix = (colonIdx > 0) ? qName.substring(0, colonIdx) : "";

		if (!_xmlLiteralPrefixes.contains(prefix) &&
			!_unknownPrefixesInXmlLiteral.contains(prefix))
		{
			_unknownPrefixesInXmlLiteral.add(prefix);
		}
	}

	/**
	 * Appends an end tag to _charBuf. This method is used during the
	 * parsing of an XML Literal.
	 **/
	private void _appendEndTag(String qName) {
		_charBuf.append("</" + qName + ">");
	}

	/**
	 * Inserts prefix mappings from an XML Literal's context for all prefixes
	 * that are used in the XML Literal and that are not defined in the XML
	 * Literal itself.
	 **/
	private void _insertUsedContextPrefixes() {
		int unknownPrefixesCount = _unknownPrefixesInXmlLiteral.size();

		if (unknownPrefixesCount > 0) {
			// Create a String with all needed context prefixes
			StringBuffer contextPrefixes = new StringBuffer(1024);
			ElementInfo topElement = _peekStack();

			for (int i = 0; i < unknownPrefixesCount; i++) {
				String prefix = (String)_unknownPrefixesInXmlLiteral.get(i);
				String namespace = topElement.getNamespace(prefix);
				if (namespace != null) {
					_appendNamespaceDecl(contextPrefixes, prefix, namespace);
				}
			}

			// Insert this String before the first '>' character

			// StringBuffer.indexOf(String) requires JDK1.4 or newer
			//int endOfFirstStartTag = _charBuf.indexOf(">");
			int endOfFirstStartTag = 0;
			while (_charBuf.charAt(endOfFirstStartTag) != '>') {
				endOfFirstStartTag++;
			}
			_charBuf.insert(endOfFirstStartTag, contextPrefixes.toString());
		}

		_unknownPrefixesInXmlLiteral.clear();
	}

	private void _appendNamespaceDecl(StringBuffer sb, String prefix, String namespace) {
		String attName = "xmlns";

		if (!"".equals(prefix)) {
			attName += ":" + prefix;
		}

		_appendAttribute(sb, attName, namespace);
	}

	private void _appendAttribute(StringBuffer sb, String name, String value) {
		sb.append(" ");
		sb.append(name);
		sb.append("=\"");
		sb.append(XmlUtil.escapeDoubleQuotedAttValue(value));
		sb.append("\"");
	}

/*-----------------------------------------+
| Methods related to the ElementInfo stack |
+-----------------------------------------*/

	private ElementInfo _peekStack() {
		ElementInfo result = null;

		if (!_elInfoStack.empty()) {
			result = (ElementInfo)_elInfoStack.peek();
		}

		return result;
	}

/*---------------------------+
| Internal class ElementInfo |
+---------------------------*/

	private class ElementInfo {

		public String qName;
		public String namespaceURI;
		public String localName;
		public Atts atts;

		public ElementInfo parent;
		private Map _namespaceMap;

		public URI baseURI;
		public String xmlLang;

		public ElementInfo(String qName, String namespaceURI, String localName) {
			this(null, qName, namespaceURI, localName);
		}

		public ElementInfo(ElementInfo parent, String qName, String namespaceURI, String localName) {
			this.parent = parent;
			this.qName = qName;
			this.namespaceURI = namespaceURI;
			this.localName = localName;

			if (parent != null) {
				// Inherit baseURI and xmlLang from parent
				this.baseURI = parent.baseURI;
				this.xmlLang = parent.xmlLang;
			}
			else {
				this.baseURI = _documentURI;
				this.xmlLang = "";
			}
		}

		public void setBaseURI(String uriString) {
			// Resolve the specified base URI against the inherited base URI
			baseURI = baseURI.resolve( _createBaseURI(uriString) );
		}

		public void setNamespaceMappings(Map namespaceMappings) {
			if (namespaceMappings.isEmpty()) {
				_namespaceMap = null;
			}
			else {
				_namespaceMap = new HashMap(namespaceMappings);
			}
		}

		public String getNamespace(String prefix) {
			String result = null;

			if (_namespaceMap != null) {
				result = (String)_namespaceMap.get(prefix);
			}

			if (result == null && parent != null) {
				result = parent.getNamespace(prefix);
			}

			return result;
		}
	}
}
