package it.unimi.dsi.mg4j.graph;

/*		 
 * MG4J: Managing Gigabytes for Java
 *
 * Copyright (C) 2007-2011 Paolo Boldi 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 */

import it.unimi.dsi.mg4j.document.Document;
import it.unimi.dsi.mg4j.document.DocumentIterator;
import it.unimi.dsi.mg4j.document.DocumentSequence;
import it.unimi.dsi.mg4j.tool.Scan;
import it.unimi.dsi.mg4j.tool.Scan.VirtualDocumentFragment;
import it.unimi.dsi.mg4j.tool.VirtualDocumentResolver;
import it.unimi.dsi.fastutil.ints.IntAVLTreeSet;
import it.unimi.dsi.fastutil.ints.IntSortedSet;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.webgraph.ImmutableGraph;
import it.unimi.dsi.webgraph.ImmutableSequentialGraph;
import it.unimi.dsi.webgraph.NodeIterator;

import java.io.IOException;
import java.util.NoSuchElementException;

/** Exposes a document sequence as a (sequentially accessible) immutable graph, according to some
 *  virtual field provided by the documents in the sequence. A suitable {@link VirtualDocumentResolver}
 *  is used to associate node numbers to each fragment.
 *  
 *  <p>More precisely, the graph will have as many nodes as there are documents in the sequence, the
 *  <var>k</var>-th document (starting from 0) representing node number <var>k</var>.
 *  The successors of a document are obtained by extracting the virtual field from the
 *  document, turning each {@linkplain it.unimi.dsi.mg4j.tool.Scan.VirtualDocumentFragment document specifier}
 *  into a document number (using the given {@linkplain VirtualDocumentResolver resolver},
 *  and discarding unresolved URLs).
 */
public class DocumentSequenceImmutableGraph extends ImmutableSequentialGraph {
	
	/** The underlying sequence. */
	private DocumentSequence sequence;
	/** The number of the virtual field to be used. */
	private int virtualField;
	/** The resolver to be used. */
	private VirtualDocumentResolver resolver;
	
	/** Creates an immutable graph from a sequence.
	 * 
	 * @param sequence the sequence whence the immutable graph should be created.
	 * @param virtualField the number of the virtual field to be used to get the successors from.
	 * @param resolver the resolver to be used to map document specs to node numbers.
	 */
	public DocumentSequenceImmutableGraph( final DocumentSequence sequence, final int virtualField, final VirtualDocumentResolver resolver ) {
		this.sequence = sequence;
		this.virtualField = virtualField;
		this.resolver = resolver;
	}

	/** Creates a new immutable graph with the specified arguments.
	 * 
	 * @param arg a 3-element array: the first is the basename of a {@link DocumentSequence}, the second is an integer specifying the virtual
	 * field number, the third is the basename of a {@link VirtualDocumentResolver}.
	 */
	public DocumentSequenceImmutableGraph( final String... arg ) throws IOException, ClassNotFoundException {
		this( (DocumentSequence)BinIO.loadObject( arg[ 0 ] ), Integer.parseInt( arg[ 1 ] ), (VirtualDocumentResolver)BinIO.loadObject( arg[ 2 ] ) );
	}
	
	@Override
	public ImmutableGraph copy() {
		throw new UnsupportedOperationException();
	}

	@Override
	public int numNodes() {
		if ( resolver.numberOfDocuments() > Integer.MAX_VALUE ) throw new IllegalArgumentException();
		return resolver.numberOfDocuments();
	}

	@Override
	public boolean randomAccess() {
		return false;
	}
	
	public NodeIterator nodeIterator() {
		try {
			final DocumentIterator documentIterator = sequence.iterator();
			return new NodeIterator() {
				Document cachedDocument = documentIterator.nextDocument();
				int cachedDocumentNumber = 0;
				int[] cachedSuccessors;
				IntSortedSet succ = new IntAVLTreeSet();

				public boolean hasNext() {
					return cachedDocument != null;
				}
				
				@SuppressWarnings("unchecked")
				public int nextInt() {
					if ( !hasNext() ) throw new NoSuchElementException();
					ObjectList<Scan.VirtualDocumentFragment> vdf;
					try {
						vdf = (ObjectList<VirtualDocumentFragment>)cachedDocument.content( virtualField );
					}
					catch ( IOException exc1 ) {
						throw new RuntimeException( exc1 );
					}
					succ.clear();
					resolver.context( cachedDocument );
					ObjectIterator<VirtualDocumentFragment> it = vdf.iterator();
					while ( it.hasNext() ) {
						int successor = resolver.resolve( it.next().documentSpecifier() );
						if ( successor >= 0 ) succ.add( successor );
					}
					cachedSuccessors = succ.toIntArray();
					// Get ready for the next request
					try {
						cachedDocument.close();
						cachedDocument = documentIterator.nextDocument();
					}
					catch ( IOException e ) {
						throw new RuntimeException( e );
					}
					return cachedDocumentNumber++;
				}

				public int outdegree() {
					return cachedSuccessors.length;
				}
				
				public int[] successorArray() {
					return cachedSuccessors;
				}
				
			};
		}
		catch ( IOException e ) {
			throw new RuntimeException( e );
		}
		
	}

}
