package it.unimi.dsi.mg4j.search;

/*		 
 * MG4J: Managing Gigabytes for Java
 *
 * Copyright (C) 2003-2011 Paolo Boldi and Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 */

import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.search.score.Scorer;
import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.util.Interval;

import java.io.IOException;
import java.util.Map;
import java.util.NoSuchElementException;

/** An iterator over documents (pointers) and their intervals.
 *
 * <P>Each call to {@link #nextDocument()}
 * will return a document pointer, or -1 if no more documents are available. Just
 * after the call to {@link #nextDocument()}, {@link #intervalIterator(Index)} will return an interval iterator
 * enumerating intervals in the last returned document for the specified index. The latter method may return, as a special result, a
 * special {@link it.unimi.dsi.mg4j.search.IntervalIterators#TRUE TRUE} value: this means that 
 * albeit the current document satisfies the query, there is only a generic
 * empty witness to prove it (see {@link it.unimi.dsi.mg4j.search.IntervalIterators#TRUE TRUE} for some elaboration).
 * 
 * <p>Note that this class implements {@link IntIterator}. Nonetheless, for performance reasons, 
 * the preferred access to the document pointers is {@link #nextDocument()}.
 * 
 * <P>The {@link #iterator()} method <strong>must</strong> be an alias for {@link #intervalIterator()}, and shares
 * the same limitations.
 * 
 * <p>A document iterator is usually structured as composite,
 * with operators as internal nodes and {@link it.unimi.dsi.mg4j.index.IndexIterator}s
 * as leaves. The methods {@link #accept(DocumentIteratorVisitor)} 
 * and {@link #acceptOnTruePaths(DocumentIteratorVisitor)} implement the visitor pattern.
 * 
 * <p>The {@link #dispose()} method is intended to recursively release all resources associated
 * to a composite document iterator. Note that this is not always what you want, as you might
 * be, say, pooling {@linkplain it.unimi.dsi.mg4j.index.IndexReader index readers} to reduce the number
 * of file open/close operations. For this reason, we intentionally avoid calling the method &ldquo;close&rdquo;.
 * 
 * <p><strong>Warning:</strong> the interval enumeration can be carried out only just after a call
 * to {@link #nextDocument()}. Subsequent calls to {@link #nextDocument()} <em>or even to {@link java.util.Iterator#hasNext()}</em>
 * will reset the internal state of the iterator. In particular, trying to enumerate intervals after a call
 * to {@link java.util.Iterator#hasNext()} will usually throw an {@link java.lang.IllegalStateException}.
 */

public interface DocumentIterator extends IntIterator, Iterable<Interval> {


	/** Returns the interval iterator of this document iterator for single-index queries.
	 * 
	 * <P>This is a commodity method that can be used only for queries
	 * built over a single index.
	 * 
	 * @return an interval iterator.
	 * @see #intervalIterator(Index) 
	 * @throws IllegalStateException if this document iterator is not built on a single index.
	 */
	public IntervalIterator intervalIterator() throws IOException;


	/** Returns the interval iterator of this document iterator for the given index.
	 * 
	 * <P>After a call to {@link #nextDocument()}, this iterator
	 * can be used to retrieve the intervals in the current document (the
	 * one returned by {@link #nextDocument()}) for 
	 * the index <code>index</code>.
	 *  
	 * <P>Note that if all indices have positions, 
	 * it is guaranteed that at least one index will return an interval.
	 * However, for disjunctive queries it cannot be guaranteed that <em>all</em>
	 * indices will return an interval.
	 * 
	 * <p>Indices without positions always return {@link IntervalIterators#TRUE}.
	 * Thus, in presence of indices without positions it is possible that no
	 * intervals at all are available.
	 * 
	 * @param index an index (must be one over which the query was built).
	 * @return an interval iterator over the current document in <code>index</code>.
	 */

	public IntervalIterator intervalIterator( Index index ) throws IOException;

	/** Returns an unmodifiable map from indices to interval iterators.
	 * 
	 * <P>After a call to {@link #nextDocument()}, this map
	 * can be used to retrieve the intervals in the current document. An invocation of {@link Map#get(java.lang.Object)}
	 * on this map with argument <code>index</code> yields the same result as
	 * {@link #intervalIterator(Index) intervalIterator(index)}.
	 *  
	 * @return a map from indices to interval iterators over the current document.
	 * @throws UnsupportedOperationException if this index does not contain positions.
	 * @see #intervalIterator(Index)
	 */

	public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException;
	
	/** Returns the set of indices over which this iterator is built.
	 * 
	 * @return the set of indices over which this iterator is built.
	 */

	public ReferenceSet<Index> indices();

	/** Returns the next document.
	 * 
	 * @deprecated As of MG4J 1.2, the suggested way of iterating over document iterators
	 * is {@link #nextDocument()}, which has been modified so to provide fully lazy
	 * iteration. After a couple of releases, however, this annotation will be removed, as it
	 * is very practical to have document iterators implementing {@link IntIterator}. Its
	 * main purpose is to warn people about performance issues solved by {@link #nextDocument()}.
	 * @see #nextDocument()
	 */
	@Deprecated
	public int nextInt();	
	
	/** Returns the next document provided by this document iterator, or -1 if no more documents are available.
	 * 
	 * <p><strong>Warning</strong>: the specification of this method has significantly changed as of MG4J 1.2.
	 * The special return value -1 is used to mark the end of iteration (a {@link NoSuchElementException}
	 * would have been thrown before in that case, so ho harm should be caused by this change). The reason
	 * for this change is providing <em>fully lazy</em> iteration over documents. Fully lazy iteration
	 * does not provide an <code>hasNext()</code> method&mdash;you have to actually ask for the next
	 * element and check the return value. Fully lazy iteration is much lighter on method calls (half) and
	 * in most (if not all) MG4J classes leads to a much simpler logic. Moreover, {@link #nextDocument()}
	 * can be specified as throwing an {@link IOException}, which avoids the pernicious proliferation
	 * of try/catch blocks in very short, low-level methods (it was having a detectable impact on performance).
	 *
	 * @return the next document, or -1 if no more documents are available.
	 */
	public int nextDocument() throws IOException;

	/** Returns the last document returned by {@link #nextDocument()}.
	 * 
	 * @return the last document returned by {@link #nextDocument()}, -1 if no document has been returned yet, and
	 * {@link #END_OF_LIST} if the list of results has been exhausted.
	 */

	public int document();

	/** A special value denoting that the end of the list has been reached. */
	public static final int END_OF_LIST = Integer.MAX_VALUE;
	
	/** Skips all documents smaller than <code>n</code>. If {@link #hasNext()} has been called returning
	 * true but {@link #nextDocument()} has <em>not</em> been called afterwards, then a call 
	 * to {@link #skipTo(int)} will be implicitly preceded by
	 * a call to {@link #nextDocument()} (the only consequence is that skipping to the current
	 * document after a call to {@link #hasNext()} will return the <em>next</em> document).
	 *
	 * <P>Define the <em>current document</em> <code>k</code> associated with this document iterator
	 * as follows:
	 * <ul>
	 * <li>-1, if {@link #nextDocument()} and this method have never been called;
	 * <li>{@link #END_OF_LIST}, if a call to this method returned {@link #END_OF_LIST}, or
	 * {@link #nextDocument()} returned -1;
	 * <li>the last value returned by a call to {@link #nextDocument()} or this method, otherwise. 
	 * </ul>
	 * 
	 * <p>If <code>k</code> is larger than or equal to <code>n</code>, then
	 * this method does nothing and returns <code>k</code>. Otherwise, a 
	 * call to this method is equivalent to 
	 * <pre>
	 * while( ( k = nextDocument() ) < n && k != -1 );
	 * return k == -1 ? END_OF_LIST : k;
	 * </pre>
	 *
	 * <P>Thus, when a result <code>k</code> &ne; {@link #END_OF_LIST}
	 * is returned, the state of this iterator
	 * will be exactly the same as after a call to {@link #nextDocument()} 
	 * that returned <code>k</code>.
	 * In particular, the first document larger than or equal to <code>n</code> (when returned
	 * by this method) will <em>not</em> be returned by the next call to 
	 * {@link #nextDocument()}.
	 *
	 * @param n a document pointer.
	 * @return a document pointer larger than or equal to <code>n</code> if available, {@link #END_OF_LIST}
	 * otherwise.
	 */

	int skipTo( int n ) throws IOException;
	
	/** Accepts a visitor.
	 * 
	 * <p>A document iterator is usually structured as composite,
	 * with operators as internal nodes and {@link it.unimi.dsi.mg4j.index.IndexIterator}s
	 * as leaves. This method implements the visitor pattern.
	 * 
	 * @param visitor the visitor.
	 * @return an object resulting from the visit, or <code>null</code> if the visit was interrupted.
	 */
	<T> T accept( DocumentIteratorVisitor<T> visitor ) throws IOException;

	/** Accepts a visitor after a call to {@link #nextDocument()}, 
	 * limiting recursion to true paths.
	 * 
	 * <p>After a call to {@link #nextDocument()}, a document iterator
	 * is positioned over a document. This call is equivalent to {@link #accept(DocumentIteratorVisitor)},
	 * but visits only along <em>true paths</em>. 
	 * 
	 * <p>We define a <em>true path</em> as a path from the root of the composite that passes only through 
	 * nodes whose associated subtree is positioned on the same document of the root. Note that {@link OrDocumentIterator}s
	 * detach exhausted iterators from the composite tree, so true paths define the subtree that is causing
	 * the current document to satisfy the query represented by this document iterator.
	 * 
	 * <p>For more elaboration, and the main application of this method, see {@link it.unimi.dsi.mg4j.search.visitor.CounterCollectionVisitor}.
	 * 
	 * @param visitor the visitor.
	 * @return an object resulting from the visit, or <code>null</code> if the visit was interrupted.
	 * @see #accept(DocumentIteratorVisitor)
	 * @see it.unimi.dsi.mg4j.search.visitor.CounterCollectionVisitor
	 */
	<T> T acceptOnTruePaths( DocumentIteratorVisitor<T> visitor ) throws IOException;

	/** Returns the weight associated with this iterator.
	 * 
	 * <p>The number returned by this method has no fixed semantics: different {@linkplain Scorer scorers}
	 * might choose different interpretations, or even ignore it.
	 * 
	 * @return the weight associated with this iterator.
	 */
	double weight();
	
	/** Sets the weight of this index iterator. 
	 * 
	 * @param weight the weight of this index iterator.
	 * @return this document iterator.
	 */
	public DocumentIterator weight( double weight );

	/** Disposes this document iterator, releasing all resources.
	 * 
	 * <p>This method should propagate down to the underlying index iterators, where it should release resources
	 * such as open files and network connections. If you're doing your own resource tracking and pooling,
	 * then you do not need to call this method.
	 */
	void dispose() throws IOException;

	/** An alias for {@link #intervalIterator()}, that has the same limitations (i.e., it will work only if
	 *  there is just one index), and that catches {@link IOException}s.
	 * 
	 *  @return an interval iterator.
	 */
	IntervalIterator iterator();
	
}
