package it.unimi.dsi.mg4j.search;

/*		 
 * MG4J: Managing Gigabytes for Java
 *
 * Copyright (C) 2003-2011 Paolo Boldi and Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 */

import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;


/** An abstract iterator on documents, generating the intersection of the documents returned by 
 * a number of document iterators.
 * 
 * <P>To be usable, this class must be subclassed so to provide also an iterator on intervals.
 * Such iterators must be instantiable using {@link #getComposedIntervalIterator(Index)}.
 * The latter is an example of a <em>non-static factory method</em>, that is, a factory method which
 * depends on the enclosing instance. This pattern allows to easily specialise this class to iterators
 * that do different things, such as {@link it.unimi.dsi.mg4j.search.AndDocumentIterator} and
 * {@link it.unimi.dsi.mg4j.search.ConsecutiveDocumentIterator}, but that have a similar semantics at the document
 * level (the semantics may in fact be slightly different: for instance, not all document belonging
 * to all components will actually appear in a consecutive iterator, as there may be documents filtered
 * at the interval level).
 * 
 * <P>The important invariant is that <em>only</em> after a call to {@link #nextDocument()}, a call
 * to {@link #intervalIterator(Index)} will return an interval iterator over the document
 * just returned, and that for at least one index in {@link #indices()} the iterator will not be empty
 * or {@link it.unimi.dsi.mg4j.search.IntervalIterators#TRUE TRUE}.
 * 
 * <h2>The intersection algorithm</h2>
 * 
 * <p>Since MG4J 1.1, this class implements a new intersection algorithm that should be significantly
 * faster than the previous one. The main idea is that of letting sparser iterator interact as much
 * as possible to obtain a candidate common document, and then trying to align the others. At construction
 * time, the component iterators are sorted so that index iterators are separated, and sorted by frequency.
 * Then, each time we have to align the iterators we align them greedily starting from the index
 * iterators, in frequency order. This has the effect of skipping very quickly (and usually by
 * large jumps, which are handled nicely by indices with skips),
 * as the main interaction happens between low-frequency index iterators.
 * 
 * <p>Moreover, this class treats in a special way
 *  {@linkplain PayloadPredicateDocumentIterator index iterators coming from payload-based indices}. Such
 * iterators are checked at the end of the alignment process, 
 * after all standard index iterators (and general document iterators)
 * are aligned. At that point, the special method {@link PayloadPredicateDocumentIterator#skipUnconditionallyTo(int)}
 * is used to position unconditionally such iterators and check whether the payload predicate is satisfied.
 * If this doesn't happen, the current candidate (obtained by alignment of standard iterators) is increased and the
 * whole process is restarted. This procedure guarantees that we will never search exhaustively in a 
 * payload-based index a document record satisfying the predicate (unless, of course, we have a query
 * containing just {@link PayloadPredicateDocumentIterator}s), which is very efficient if the payload-based
 * index uses skipping.
 * 
 */

public abstract class AbstractIntersectionDocumentIterator extends AbstractCompositeDocumentIterator {
	private final static boolean DEBUG = false;
	private final static boolean ASSERTS = false;

	/** A map from indices to interval iterators. */
	final protected Reference2ReferenceArrayMap<Index,IntervalIterator> intervalIterators;
	/** A map from indices to the iterators returned for the current document. The key set may
	 * not contain an index because the related iterator has never been requested. Moreover,
	 * the iterator in this map for a given index may differ from the one in {@link #intervalIterators}
	 * because it could be {@link IntervalIterators#TRUE TRUE} (in fact, in that case it may even
	 * happen that {@link #intervalIterators} does not contain the index). */
	final protected Reference2ReferenceArrayMap<Index,IntervalIterator> currentIterators;
	/** An unmodifiable wrapper around {@link #currentIterators}. */
	final protected Reference2ReferenceMap<Index,IntervalIterator> unmodifiableCurrentIterators;
	/** The provided document iterators, suitably sorted. */
	final private DocumentIterator[] sortedIterator;
	/** The last element of {@link #sortedIterator}. */
	final private DocumentIterator lastIterator;
	/** The prefix of {@link #sortedIterator} made of {@link PayloadPredicateDocumentIterator}s. */
	final private PayloadPredicateDocumentIterator[] payloadPredicateDocumentIterator;
	/** Iterators in {@link #sortedIterator} up to this position (exclusive) are instances of {@link PayloadPredicateDocumentIterator}. */
	final private int predicateStart;

	/** Creates a new intersection iterator using a given array of iterators and a given index.
	 *  @param index an index that will be passed to {@link AbstractCompositeDocumentIterator#AbstractCompositeDocumentIterator(Index, DocumentIterator...)}.
	 *  @param documentIterator the iterators to be intersected (at least one).
	 */
	protected AbstractIntersectionDocumentIterator( final Index index, final DocumentIterator[] documentIterator ) throws IOException {
		super( index, documentIterator ); 
		if ( documentIterator.length == 0 ) throw new IllegalArgumentException( "The provided array of document iterators is empty." );
		sortedIterator = documentIterator.clone(); // We need a copy to reorder iterators
		
		intervalIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() );
		currentIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() );
		unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators );
		
		// We now reorder iterators putting in the back the index iterators of smallest frequency and moving to front payload-predicate iterators.
		Arrays.sort( sortedIterator, new Comparator<DocumentIterator>() {
			public int compare( final DocumentIterator d0, final DocumentIterator d1 ) {
				final PayloadPredicateDocumentIterator p0 = d0 instanceof PayloadPredicateDocumentIterator ? (PayloadPredicateDocumentIterator)d0 : null;
				final PayloadPredicateDocumentIterator p1 = d1 instanceof PayloadPredicateDocumentIterator ? (PayloadPredicateDocumentIterator)d1 : null;
				
				if ( p0 != null && p1 != null ) return 0;
				if ( p0 != null ) return -1;
				if ( p1 != null ) return 1;
				
				final IndexIterator i0 = d0 instanceof IndexIterator ? (IndexIterator)d0 : null;
				final IndexIterator i1 = d1 instanceof IndexIterator ? (IndexIterator)d1 : null;
				if ( i0 == null && i1 == null ) return 0;
				if ( ( i0 != null ) != ( i1 != null ) ) return ( i0 != null ) ? 1 : -1;
				try {
					return i1.frequency() - i0.frequency();
				}
				catch ( IOException e ) {
					throw new RuntimeException( e );
				}
			}
		}
		);

		lastIterator = sortedIterator[ n - 1 ];
		int i;
		for( i = n; i-- != 0; ) if ( sortedIterator[ i ] instanceof PayloadPredicateDocumentIterator ) break;
		predicateStart = i + 1;
		payloadPredicateDocumentIterator = new PayloadPredicateDocumentIterator[ predicateStart ];
		for( i = predicateStart; i-- != 0; ) payloadPredicateDocumentIterator[ i ] = (PayloadPredicateDocumentIterator)sortedIterator[ i ];
		
		if ( DEBUG ) System.err.println( "Sorted iterators: " + Arrays.toString( sortedIterator ) );
		
		/* We now advance all iterators to their first common pointer, if any. Note
		 * that the difference between documentIterator and this.documentIterator
		 * is immaterial here. */

		for ( i = n; i-- != 0; ) 
			if ( sortedIterator[ i ].nextDocument() == -1 ) {
				// If any of the iterators is empty, we're over.
				curr = END_OF_LIST;
				return;
			}

		ahead = align();
	}

	/** Creates a new intersection iterator using a given array of iterators.
	 *  @param documentIterator the iterators to be insersected (at least one).
	 */
	protected AbstractIntersectionDocumentIterator( final DocumentIterator[] documentIterator ) throws IOException {
		this( null, documentIterator );
	}
	
	/** Align all iterators on the first common document pointer after {@link #curr}.
	 * 
	 * <P>After a call to this method, all component iterators are positioned
	 * on the same document (and {@link #curr} is set to that document), 
	 * unless the method returns false, in which case there
	 * are no more document pointer making alignment possible (and {@link #exhausted} is true).
	 * 
	 * @return true if all component iterators are aligned, false if
	 * no alignment position could be found (and in that case, {@link #curr} is {@link  DocumentIterator#END_OF_LIST}).
	 */
	private boolean align() throws IOException {
		if ( ASSERTS ) assert curr != END_OF_LIST;
		if ( DEBUG ) System.err.println( this + ".align() [curr = " + curr + "]" );
		
		int i, res;
		int candidate = lastIterator.document();
		
		for(;;) {
			for( i = n; i-- != 0 ; ) {
				
				if ( i < predicateStart ) res = payloadPredicateDocumentIterator[ i ].skipUnconditionallyTo( candidate );
				else res = sortedIterator[ i ].skipTo( candidate );

				if ( res == END_OF_LIST ) {
					curr = END_OF_LIST;
					if ( DEBUG ) System.err.println( this + ".align() => false [curr = " + curr + "]" );
					return false;
				}
				
				if ( res != candidate ) {
					// Note that for payload-predicate document iterators res might be negative
					if ( res > candidate ) candidate = res;
					else {
						if ( ASSERTS ) assert res < 0;
						candidate++;
					}
					break;
				}
			}
			
			if ( i == -1 ) {
				curr = candidate;
				if ( DEBUG ) System.err.println( this + ".align() => true [curr = " + curr + "]" );
				return true;
			}
		}
	}

	public int skipTo( final int n ) throws IOException {
		ahead = false;
		if ( curr >= n ) return curr;

		currentIterators.clear();
		if ( ( curr = lastIterator.skipTo( n ) ) != END_OF_LIST ) align();

		return curr;
	}
	
	protected int nextDocumentInternal() throws IOException {
		if ( DEBUG ) System.err.println( this + ".nextDocumentInternal()" );
		currentIterators.clear();
		if ( lastIterator.nextDocument() != -1 ) align();
		else curr = END_OF_LIST;
		if ( DEBUG ) System.err.println( this + ".nextDocumentInternal() => " + toNextDocument( curr ) );
		return toNextDocument( curr );
	}

	public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {
		final Iterator<Index> i = indices.iterator();
		while( i.hasNext() ) intervalIterator( i.next() );
		return unmodifiableCurrentIterators;
	}

	public IntervalIterator intervalIterator( final Index index ) throws IOException {
		ensureOnADocument();
		if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" );
		if ( ! indices.contains( index ) ) return IntervalIterators.FALSE;
		IntervalIterator intervalIterator;

		// If the iterator has been created and it's ready, we just return it.		
		if ( ( intervalIterator = currentIterators.get( index ) ) != null ) return intervalIterator;

		int t = 0, f = 0;
			
		/* We count the number of TRUE and FALSE iterators. In the case of index iterators, we can avoid 
		 * the check and just rely on the index internals.
		 * 
		 * If all iterators are FALSE, we return FALSE. Else if all remaining iterators are TRUE
		 * we return TRUE. 
		 */
		if ( indexIterator == null ) 
			for( int i = 0; i < n; i++ ) {
				intervalIterator = documentIterator[ i ].intervalIterator( index );
				if ( intervalIterator == IntervalIterators.TRUE ) t++;
				if ( intervalIterator == IntervalIterators.FALSE ) f++;
			}
		else
			for( int i = 0; i < n; i++ ) {
				final Index indexIteratorIndex = indexIterator[ i ].index();
				if ( indexIteratorIndex != index ) f++;
				else if ( ! indexIterator[ i ].index().hasPositions ) t++;
			}
		
		// Note that we cannot optimise the case n - t - f == 1 because of gaps in ConsecutiveDocumentIterator.
		if ( f == n ) intervalIterator = IntervalIterators.FALSE;
		else if ( t + f == n ) intervalIterator = IntervalIterators.TRUE;
		else {
			intervalIterator = intervalIterators.get( index );
			if ( intervalIterator == null ) intervalIterators.put( index, intervalIterator = getComposedIntervalIterator( index ) );
			intervalIterator.reset();
		}

		currentIterators.put( index, intervalIterator );	
		return intervalIterator;
	}
	
	abstract protected IntervalIterator getComposedIntervalIterator( Index index );
}
