/*  Sesame - Storage and Querying architecture for RDF and RDF Schema
 *  Copyright (C) 2001-2006 Aduna
 *
 *  Contact: 
 *  	Aduna
 *  	Prinses Julianaplein 14 b
 *  	3817 CS Amersfoort
 *  	The Netherlands
 *  	tel. +33 (0)33 465 99 87
 *  	fax. +33 (0)33 465 99 87
 *
 *  	http://aduna-software.com/
 *  	http://www.openrdf.org/
 *  
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.openrdf.rio.ntriples;


import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;

import org.openrdf.util.xml.XmlDatatypeUtil;


import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.ValueFactoryImpl;


import org.openrdf.rio.NamespaceListener;
import org.openrdf.rio.ParseErrorListener;
import org.openrdf.rio.ParseException;
import org.openrdf.rio.ParseLocationListener;
import org.openrdf.rio.Parser;
import org.openrdf.rio.StatementHandler;
import org.openrdf.rio.StatementHandlerException;

/**
 * Parser for N-Triples files. A specification of NTriples can be found in
 * <a href="http://www.w3.org/TR/rdf-testcases/#ntriples">this section</a> of
 * the RDF Test Cases document. This parser is not thread-safe, therefore
 * its public methods are synchronized.
 **/
public class NTriplesParser implements Parser {

/*--------------+
| Variables     |
+--------------*/

	private StatementHandler _statementHandler;

	private ParseErrorListener _errListener;

	private ParseLocationListener _locListener;

	private Reader _reader;
	
	private int _lineNo;

	private ValueFactory _valFactory;

	/**
	 * Mapping from bNode ID's as used in the RDF document to the
	 * object created for it by the ValueFactory.
	 **/
	private Map _bNodeIdMap;

	private Resource _subject;
	private URI _predicate;
	private Value _object;

	/**
	 * Indicates how datatyped literals should be handled. Legal
	 * values are <tt>DT_IGNORE</tt>, <tt>DT_VERIFY</tt> and
	 * <tt>DT_NORMALIZE</tt>.
	 **/
	private int _datatypeHandling;

	/** 
	 * Flag indicating whether the parser should preserve bnode identifiers used
	 * in the source.
	 **/
	boolean _preserveBNodeIds = false;
	
/*--------------+
| Constructors  |
+--------------*/

	/**
	 * Creates a new NTriplesParser that will use a <tt>ValueFactoryImpl</tt> to
	 * create object for resources, bNodes and literals.
	 * @see org.openrdf.model.impl.ValueFactoryImpl
	 **/
	public NTriplesParser() {
		this(new ValueFactoryImpl());
	}

	/**
	 * Creates a new NTriplesParser that will use the supplied ValueFactory to
	 * create objects for resources, bNodes and literals.
	 *
	 * @param valueFactory A ValueFactory.
	 **/
	public NTriplesParser(ValueFactory valueFactory) {
		_valFactory = valueFactory;
		_bNodeIdMap = new HashMap();
		_datatypeHandling = DT_VERIFY;
	}

/*--------------+
| Methods       |
+--------------*/

	// Implements Parser.setStatementHandler(StatementHandler)
	public synchronized void setStatementHandler(StatementHandler sh) {
		_statementHandler = sh;
	}

	// Implements Parser.setParseErrorListener(ParseErrorListener)
	public synchronized void setParseErrorListener(ParseErrorListener el) {
		_errListener = el;
	}

	// Implements Parser.setParseLocationListener(ParseLocationListener)
	public synchronized void setParseLocationListener(ParseLocationListener el) {
		_locListener = el;
	}

	/**
	 * A NamespaceListener is of no use when parsing N-Triples as N-Triples
	 * doesn't have any namespace mechanism (yet). A call to this method will
	 * be ignored and the supplied NamespaceListener will not receive any events
	 * from this parser.
	 **/
	public void setNamespaceListener(NamespaceListener nl) {
		// ignore
	}

	// Implements Parser.setVerifyData(boolean)
	public synchronized void setVerifyData(boolean verifyData) {
		// ignore
	}

	// Implements Parser.setPreserveBNodeIds(boolean)
	public void setPreserveBNodeIds(boolean preserveBNodeIds) {
		_preserveBNodeIds = preserveBNodeIds;
	}

	// Implements Parser.setStopAtFirstError(boolean)
	public synchronized void setStopAtFirstError(boolean stopAtFirstError) {
		// ignore
	}

	// Implements Parser.setDatatypeHandling(int)
	public void setDatatypeHandling(int datatypeHandling) {
		_datatypeHandling = datatypeHandling;
	}

	/**
	 * Implementation of the <tt>parse(InputStream, String)</tt> method defined
	 * in the Parser interface. 	
	 * 
	 * @param in The InputStream from which to read the data. The InputStream is
	 * supposed to contain 7-bit US-ASCII characters, as per the N-Triples
	 * specification.
	 * @param baseURI The URI associated with the data in the InputStream.
	 * @exception IOException If an I/O error occurred while data was read
	 * from the InputStream.
	 * @exception ParseException If the parser has found an unrecoverable
	 * parse error.
	 * @exception StatementHandler If the configured statement handler
	 * encountered an unrecoverable error.
	 * @exception IllegalArgumentException If the supplied input stream or
	 * base URI is <tt>null</tt>.
	 **/
	public synchronized void parse(InputStream in, String baseURI)
		throws IOException, ParseException, StatementHandlerException
	{
		if (in == null) {
			throw new IllegalArgumentException("Input stream can not be 'null'");
		}
		// Note: baseURI will be checked in parse(Reader, String)

		try {
			parse(new InputStreamReader(in, "US-ASCII"), baseURI);
		}
		catch (UnsupportedEncodingException e) {
			// Every platform should support the US-ASCII encoding...
			throw new RuntimeException(e);
		}
	}

	/**
	 * Implementation of the <tt>parse(Reader, String)</tt> method defined in
	 * the Parser interface. 	
	 * 
	 * @param reader The Reader from which to read the data.
	 * @param baseURI The URI associated with the data in the Reader.
	 * @exception IOException If an I/O error occurred while data was read
	 * from the InputStream.
	 * @exception ParseException If the parser has found an unrecoverable
	 * parse error.
	 * @exception StatementHandler If the configured statement handler
	 * encountered an unrecoverable error.
	 * @exception IllegalArgumentException If the supplied reader or base URI
	 * is <tt>null</tt>.
	 **/
	public synchronized void parse(Reader reader, String baseURI)
		throws IOException, ParseException, StatementHandlerException
	{
		if (reader == null) {
			throw new IllegalArgumentException("Reader can not be 'null'");
		}
		if (baseURI == null) {
			throw new IllegalArgumentException("base URI can not be 'null'");
		}

		_reader = reader;
		_lineNo = 1;

		if (_locListener != null) {
			_locListener.parseLocationUpdate(_lineNo, 1);
		}

		try {
			int c = _reader.read();
			c = _skipWhitespace(c);

			while (c != -1) {
				if (c == '#') {
					// Comment, ignore
					c = _skipLine(c);
				}
				else if (c == '\r' || c == '\n') {
					// Empty line, ignore
					c = _skipLine(c);
				}
				else {
					c = _parseTriple(c);
				}

				c = _skipWhitespace(c);
			}
		}
		finally {
			_bNodeIdMap.clear();
		}
	}

	/**
	 * Reads characters from _reader until it finds a character that is not
	 * a space or tab, and returns this last character. In case the end of the
	 * character stream has been reached, -1 is returned.
	 **/
	private int _skipWhitespace(int c)
		throws IOException
	{
		while (c == ' ' || c == '\t') {
			c = _reader.read();
		}

		return c;
	}

	/**
	 * Reads characters from _reader until the first EOL has been read. The
	 * first character after the EOL is returned. In case the end of the
	 * character stream has been reached, -1 is returned.
	 **/
	private int _skipLine(int c)
		throws IOException
	{
		while (c != -1 && c != '\r' && c != '\n') {
			c = _reader.read();
		}

		// c is equal to -1, \r or \n. In case of a \r, we should
		// check whether it is followed by a \n.

		if (c == '\n') {
			c = _reader.read();

			_lineNo++;

			if (_locListener != null) {
				_locListener.parseLocationUpdate(_lineNo, 1);
			}
		}
		else if (c == '\r') {
			c = _reader.read();

			if (c == '\n') {
				c = _reader.read();
			}

			_lineNo++;

			if (_locListener != null) {
				_locListener.parseLocationUpdate(_lineNo, 1);
			}
		}

		return c;
	}

	private int _parseTriple(int c)
		throws IOException, ParseException, StatementHandlerException
	{
		c = _parseSubject(c);

		c = _skipWhitespace(c);

		c = _parsePredicate(c);

		c = _skipWhitespace(c);

		c = _parseObject(c);

		c = _skipWhitespace(c);

		if (c == -1) {
			_throwEOFException();
		}
		else if (c != '.') {
			_throwParseException("Expected '.', found: " + (char)c);
		}

		c = _skipLine(c);

		_statementHandler.handleStatement(_subject, _predicate, _object);

		_subject = null;
		_predicate = null;
		_object = null;

		return c;
	}

	private int _parseSubject(int c)
		throws IOException, ParseException
	{
		StringBuffer buf = new StringBuffer(100);

		// subject is either an uriref (<foo://bar>) or a nodeID (_:node1)
		if (c == '<') {
			// subject is an uriref
			c = _parseUriRef(c, buf);
			_subject = _createURI(buf.toString());
		}
		else if (c == '_') {
			// subject is a bNode
			c = _parseNodeID(c, buf);
			_subject = _createBNode(buf.toString());
		}
		else if (c == -1) {
			_throwEOFException();
		}
		else {
			_throwParseException("Expected '<' or '_', found: " + (char)c);
		}

		return c;
	}

	private int _parsePredicate(int c)
		throws IOException, ParseException
	{
		StringBuffer buf = new StringBuffer(100);

		// predicate must be an uriref (<foo://bar>)
		if (c == '<') {
			// predicate is an uriref
			c = _parseUriRef(c, buf);
			_predicate = _createURI(buf.toString());
		}
		else if (c == -1) {
			_throwEOFException();
		}
		else {
			_throwParseException("Expected '<', found: " + (char)c);
		}

		return c;
	}

	private int _parseObject(int c)
		throws IOException, ParseException
	{
		StringBuffer buf = new StringBuffer(100);

		// object is either an uriref (<foo://bar>), a nodeID (_:node1) or a
		// literal ("foo"-en or "1"^^<xsd:integer>).
		if (c == '<') {
			// object is an uriref
			c = _parseUriRef(c, buf);
			_object = _createURI(buf.toString());
		}
		else if (c == '_') {
			// object is a bNode
			c = _parseNodeID(c, buf);
			_object = _createBNode(buf.toString());
		}
		else if (c == '"') {
			// object is a literal
			StringBuffer lang = new StringBuffer(8);
			StringBuffer datatype = new StringBuffer(40);
			c = _parseLiteral(c, buf, lang, datatype);
			_object = _createLiteral(buf.toString(), lang.toString(), datatype.toString());
		}
		else if (c == -1) {
			_throwEOFException();
		}
		else {
			_throwParseException("Expected '<', '_' or '\"', found: " + (char)c);
		}

		return c;
	}

	private int _parseUriRef(int c, StringBuffer uriRef)
		throws IOException, ParseException
	{
		// Supplied char is '<', ignore it.

		// Read up to the next '>' character
		c = _reader.read();
		while (c != '>') {
			if (c == -1) {
				_throwEOFException();
			}
			uriRef.append( (char)c );
			c = _reader.read();
		}

		// c == '>', read next char
		c = _reader.read();

		return c;
	}

	private int _parseNodeID(int c, StringBuffer name)
		throws IOException, ParseException
	{
		// Supplied char is '_', ignore it.

		c = _reader.read();
		if (c == -1) {
			_throwEOFException();
		}
		else if (c != ':') {
			_throwParseException("Expected ':', found: " + (char)c);
		}

		c = _reader.read();
		if (c == -1) {
			_throwEOFException();
		}
		else if (!NTriplesUtil.isLetter(c)) {
			_throwParseException("Expected a letter, found: " + (char)c);
		}
		name.append( (char)c );

		// Read all following letter and numbers, they are part of the name
		c = _reader.read();
		while (c != -1 && NTriplesUtil.isLetterOrNumber(c)) {
			name.append( (char)c );
			c = _reader.read();
		}

		return c;
	}

	private int _parseLiteral(
		int c, StringBuffer value, StringBuffer lang, StringBuffer datatype)
		throws IOException, ParseException
	{
		// Supplied char is '"', ignore it.

		// Read up to the next '"' character
		c = _reader.read();
		while (c != '"') {
			if (c == -1) {
				_throwEOFException();
			}
			value.append( (char)c );

			if (c == '\\') {
				// This escapes the next character, which might be a double quote
				c = _reader.read();
				if (c == -1) {
					_throwEOFException();
				}
				value.append( (char)c );
			}

			c = _reader.read();
		}

		// c == '"', read next char
		c = _reader.read();

		if (c == '@') {
			// Read language
			c = _reader.read();
			while (c != -1 && c != '.' && c != '^' && c != ' ' && c != '\t') {
				lang.append( (char)c );
				c = _reader.read();
			}
		}
		else if (c == '^') {
			// Read datatype
			c = _reader.read();

			// c should be another '^'
			if (c == -1) {
				_throwEOFException();
			}
			else if (c != '^') {
				_throwParseException("Expected '^', found: " + (char)c);
			}

			c = _reader.read();

			// c should be a '<'
			if (c == -1) {
				_throwEOFException();
			}
			else if (c != '<') {
				_throwParseException("Expected '<', found: " + (char)c);
			}

			c = _parseUriRef(c, datatype);
		}

		return c;
	}

	private URI _createURI(String uri)
		throws ParseException
	{
		try {
			uri = NTriplesUtil.unescapeString(uri);
		}
		catch (IllegalArgumentException e) {
			_throwParseException(e.getMessage());
		}

		try {
			return _valFactory.createURI(uri);
		}
		catch (Exception e) {
			_throwParseException(e);
			return null;
		}
	}

	private BNode _createBNode(String nodeID)
		throws ParseException
	{
		// Maybe the node ID has been used before:
		BNode result = (BNode)_bNodeIdMap.get(nodeID);

		if (result == null) {
			// This is a new node ID, create a new BNode object for it
			try {
				if (_preserveBNodeIds) {
					result = _valFactory.createBNode(nodeID);
				}
				else {
					result = _valFactory.createBNode();
				}
			}
			catch (Exception e) {
				_throwParseException(e);
			}

			// Remember it, the nodeID might occur again.
			_bNodeIdMap.put(nodeID, result);
		}

		return result;
	}

	private Literal _createLiteral(String label, String lang, String datatype)
		throws ParseException
	{
		try {
			label = NTriplesUtil.unescapeString(label);
		}
		catch (IllegalArgumentException e) {
			_throwParseException(e.getMessage());
		}

		if (lang.length() == 0) {
			lang = null;
		}

		if (datatype.length() == 0) {
			datatype = null;
		}

		URI dtURI = null;
		if (datatype != null) {
			dtURI = _createURI(datatype);

			if (_datatypeHandling == DT_VERIFY) {
				if (!XmlDatatypeUtil.isValidValue(label, datatype)) {
					_throwParseException("'" + label + "' is not a valid value for datatype " + datatype);
				}
			}
			else if (_datatypeHandling == DT_NORMALIZE) {
				try {
					label = XmlDatatypeUtil.normalize(label, datatype);
				}
				catch (IllegalArgumentException e) {
					_throwParseException("'" + label + "' is not a valid value for datatype " + datatype + ": " + e.getMessage());
				}
			}
		}

		try {
			if (dtURI != null) {
				return _valFactory.createLiteral(label, dtURI);
			}
			else if (lang != null) {
				return _valFactory.createLiteral(label, lang);
			}
			else {
				return _valFactory.createLiteral(label);
			}
		}
		catch (Exception e) {
			_throwParseException(e);
			return null;
		}
	}

	private void _throwParseException(String msg)
		throws ParseException
	{
		if (_errListener != null) {
			_errListener.fatalError(msg, _lineNo, -1);
		}

		throw new ParseException(msg, _lineNo, -1);
	}

	private void _throwParseException(Exception e)
		throws ParseException
	{
		if (e instanceof ParseException) {
			throw (ParseException)e;
		}
		else {
			if (_errListener != null) {
				_errListener.fatalError(e.getMessage(), _lineNo, -1);
			}

			throw new ParseException(e, _lineNo, -1);
		}
	}

	private void _throwEOFException()
		throws ParseException
	{
		_throwParseException("Unexpected end of file");
	}
}
