package it.unimi.dsi.mg4j.io;

/*		 
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2007-2011 Sebastiano Vigna 
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, write to the Free Software
*  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/

import static it.unimi.dsi.io.OutputBitStream.DELTA;
import static it.unimi.dsi.io.OutputBitStream.GAMMA;
import static it.unimi.dsi.io.OutputBitStream.MAX_PRECOMPUTED;
import static it.unimi.dsi.mg4j.tool.Scan.Completeness.COUNTS;
import static it.unimi.dsi.mg4j.tool.Scan.Completeness.POSITIONS;
import it.unimi.dsi.bits.Fast;
import it.unimi.dsi.fastutil.bytes.ByteArrays;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.io.InputBitStream;
import it.unimi.dsi.io.OutputBitStream;
import it.unimi.dsi.mg4j.index.BitStreamIndexWriter;
import it.unimi.dsi.mg4j.tool.Scan;
import it.unimi.dsi.mg4j.tool.Scan.Completeness;

import java.io.Closeable;
import java.io.File;
import java.io.Flushable;
import java.io.IOException;

/** Lightweight posting accumulator with format similar to that generated by {@link BitStreamIndexWriter}.
 * 
 * <p>This class is essentially a dirty trick: it borrows some code and precomputed tables from {@link OutputBitStream}
 * and exposes two simple methods ({@link #setDocumentPointer(int)} and {@link #addPosition(int)}) with obvious
 * semantics. The resulting posting list is compressed exactly like an {@link BitStreamIndexWriter} would do (also in this 
 * case, duplicating some logic found therein). As a result, after completing the calls and after a call to {@link #close()}
 * the internal {@link #buffer} can be written directly to a bit stream to build an index (but see {@link #stripPointers(OutputBitStream, long)}).
 * 
 * <p>{@link Scan} uses an instance of this class for each indexed term. Instances can be <em>differential</em>, in which
 * case they assume {@link #setDocumentPointer(int)} will be called with increasing values and store gaps rather
 * than document pointers. A {@linkplain Completeness completeness level} can be used to set whether an instance of this class
 * should store positions or counts.
 *
 * @author Sebastiano Vigna
 * @since 1.2
 */

public class ByteArrayPostingList implements Flushable, Closeable {
	private final static boolean DEBUG = false;

	/** If the enlargement of the backing array causes an out-of-memory error, we set {@link #outOfMemoryError} and try again with a very small increment. This
	 * should help in the unlikely (but entirely possible) circumstance that there is not enough memory to double a posting list. */
	private final static int POSTINGS_EMERGENCY_INCREMENT = 1024;
	/** If the enlargement of the position array causes an out-of-memory error, we set {@link #outOfMemoryError} and try again with a very small increment. This
	 * should help in the unlikely (but entirely possible) circumstance that there is not enough memory to double a posting list. */
	private final static int POSITIONS_EMERGENCY_INCREMENT = 64;

	/** The internal buffer. */
	public byte[] buffer;
	/** The current frequency (number of calls to {@link #setDocumentPointer(int)}). */
	public int frequency;
	/** The current global count. */
	public long globCount;
	/** The number of bits used for positions. */
	public long posNumBits;
	/** The current count (number of valid entries in {@link #position}). */
	private int count;
	/** The maximum count ever seen. */
	public int maxCount;
	/** If true, this list experienced an {@link OutOfMemoryError} during some buffer reallocation. */
	public boolean outOfMemoryError;
	/** Current bit buffer. */
	private int current;
	/** Current number of free bits in the bit buffer (the bits in the buffer are stored high). */
	private int free;
	/** Current position in the byte buffer. */
	private int pos;
	/** Current number of bytes available in the byte buffer. */
	private int avail;
	/** A small, local cache for positions. */
	private int[] position;
	/** The last document pointer passed to {@link #setDocumentPointer(int)}. */
	private int lastPointer;
	/** Whether this stream is differential. */
	private final boolean differential;
	/** The completeness level of this stream (more precisely, its {@linkplain Completeness#ordinal() ordinal}). */
	private final int completeness;
	
	
	/** Creates a new posting list wrapping a given byte array.
	 *
	 * @param a the byte array to wrap.
	 * @param differential whether this stream should be differential (e.g., whether it should store document pointers as gaps).
	 * @param completeness
	 */
	public ByteArrayPostingList( final byte[] a, final boolean differential, final Completeness completeness ) {
		this.differential = differential;
		this.completeness = completeness.ordinal();
		free = 8;
		buffer = a;
		avail = a.length;
		if ( this.completeness >= POSITIONS.ordinal() ) position = new int[ 2 ];
		lastPointer = -1;
	}

	private void write( final int b ) {
		if ( avail == 0 ) {
			final int oldLength = buffer.length;
			try {
				buffer = ByteArrays.grow( buffer, buffer.length + 1 );
			}
			catch( OutOfMemoryError e ) {
				outOfMemoryError = true;
				try {
					// We try at all costs to avoid out-of-memory errors: we dump the buffer, try to allocate a slightly larger array and reload it.
					File temp = File.createTempFile( ByteArrayPostingList.class.getSimpleName(), "dump" );
					temp.deleteOnExit();
					BinIO.storeBytes( buffer, temp );
					buffer = null;
					buffer = new byte[ oldLength + POSTINGS_EMERGENCY_INCREMENT ];
					BinIO.loadBytes( temp, buffer );
					temp.delete();
				}
				catch ( IOException f ) {
					throw new RuntimeException( f );
				}
			}
			avail += buffer.length - oldLength;
		}

		avail--;
		buffer[ pos++ ] = (byte)b;
	}

	/** Flushes the internal bit buffer to the {@linkplain #buffer byte buffer}.
	 * 
	 * @return the number of bits written.
	 */
	
	public int align() {
		if ( free != 8 ) return writeInCurrent( 0, free );
		else return 0;
	}

	/*
	 * The code below is copied from OutputBitStream.
	 */
	
	private int writeInCurrent( final int b, final int len ) {
		if ( DEBUG ) if ( len > free ) throw new IllegalArgumentException( Integer.toString( len ) + " bit(s) to write, " + free + " available." );

		current |= ( b & ( ( 1 << len ) - 1 ) ) << ( free -= len );
		if ( free == 0 ) {
			write( current );
			free = 8;
			current = 0;
		}

		return len;
	}


	private int writeInt( int x, final int len ) {

		if ( len < 0 || len > 32 ) throw new IllegalArgumentException( "You cannot write " + len + " bits to an integer." );

		if ( len <= free ) return writeInCurrent( x, len );

		int i = len - free;
		final int queue = i & 7;
		
		if ( free != 0 ) writeInCurrent( x >>> i, free );

		// Dirty trick: since queue < 8, we pre-write the last bits in the bit buffer.
		if ( queue != 0 ) {
			i -= queue;
			writeInCurrent( x, queue );
			x >>>= queue;
		}

		if ( i == 32 ) write( x >>> 24 );
		if ( i > 23 ) write( x >>> 16 );
		if ( i > 15 ) write( x >>> 8 );
		if ( i > 7 ) write( x );
		
		return len;
	}

	private int writeUnary( int x ) {
		if ( x < 0 ) throw new IllegalArgumentException( "The argument " + x + " is negative" );

		if ( x < free ) return writeInCurrent( 1, x + 1 );

		final int shift = free;
		x -= shift;

		write( current );
		free = 8;
		current = 0;

		int i = x >> 3;

		while( i-- != 0 ) write( 0 );

		writeInCurrent( 1, ( x & 7 ) + 1 );

		return x + shift + 1;
	}

	private int writeGamma( int x ) {
		if ( x < 0 ) throw new IllegalArgumentException( "The argument " + x + " is negative" );
		if ( x < MAX_PRECOMPUTED ) return writeInt( GAMMA[ x ], GAMMA[ x ] >>> 26 );
		
		final int msb = Fast.mostSignificantBit( ++x );
		final int l = writeUnary( msb );
		return l + ( msb != 0 ? writeInt( x, msb ) : 0 );
	}

	private int writeDelta( int x ) {
		if ( x < 0 ) throw new IllegalArgumentException( "The argument " + x + " is negative" );
		if ( x < MAX_PRECOMPUTED ) return writeInt( DELTA[ x ], DELTA[ x ] >>> 26 );

		final int msb = Fast.mostSignificantBit( ++x );
		final int l = writeGamma( msb );
		return l + ( msb != 0 ? writeInt( x, msb ) : 0 );
	}

	/** Flushes the positions cached internally.
	 * 
	 */

	public void flush() {
		if ( count != 0 ) {
			if ( completeness >= COUNTS.ordinal() ) writeGamma( count - 1 );
			globCount += count;
			if ( maxCount < count ) maxCount = count;
			if ( completeness >= POSITIONS.ordinal() )  {
				posNumBits += writeDelta( position[ 0 ] );
				for( int i = 1; i < count; i++ ) posNumBits += writeDelta( position[ i ] - position[ i - 1 ] - 1 );
			}
			count = 0;
		}
	}
	/** Sets the current document pointer.
	 * 
	 * <p>If the document pointer is changed since the last call, the positions currently
	 * stored are {@linkplain #flush() flushed} and the new pointer is written to the stream.
	 * 
	 * @param pointer a document pointer.
	 */
	public void setDocumentPointer( final int pointer ) {
		if ( pointer != lastPointer ) {
			flush();
			writeDelta( differential ? pointer - lastPointer - 1 : pointer );
			lastPointer = pointer;
			frequency++;
		}
	}

	/** Adds a new position for the current document pointer.
	 * 
	 * <p>It is mandatory that successive calls to this method for
	 * the same document pointer have increasing arguments.
	 * 
	 * @param pos a position.
	 */

	public void addPosition( final int pos ) {
		if ( lastPointer == -1 ) throw new IllegalStateException();
		if ( completeness >= POSITIONS.ordinal() ) {
			if ( count == position.length ) {
				try {
					position = IntArrays.grow( position, count + 1 );
				}
				catch( OutOfMemoryError e ) {
					outOfMemoryError = true;
					try {
						// We try at all costs to avoid out-of-memory errors: we dump the buffer, try to allocate a slightly larger array and reload it.
						File temp = File.createTempFile( ByteArrayPostingList.class.getSimpleName(), "dump" );
						temp.deleteOnExit();
						BinIO.storeInts( position, temp );
						final int oldLength = position.length;
						position = null;
						position = new int[ oldLength + POSITIONS_EMERGENCY_INCREMENT ];
						BinIO.loadInts( temp, position );
						temp.delete();
					}
					catch( IOException f ) {
						throw new RuntimeException( f );
					}
				}
			}
			position[ count ] = pos;
		}
		count++;
	}
	
	
	/** Returns the number of bits written by this posting list.
	 * 
	 * @return the number of bits written by this posting list.
	 */
	public long writtenBits() {
		return pos * 8L + 8 - free;
	}
	
	/** Writes the given number of bits of the internal buffer to the provided output bit stream,
	 * stripping all document pointers.
	 * 
	 * <p>This method is a horrible kluge solving the problem of terms appearing in all documents:
	 * {@link BitStreamIndexWriter} would <em>not</em> write pointers in this case, but we do not know 
	 * whether we will need pointers or not while we are filling the internal buffer. Thus, for
	 * those (hopefully few) termas appearing in all documents this method can be used to
	 * dump the internal buffer stripping all pointers.
	 * 
	 * <p>Note that the valid number of bits should be retrieved using {@link #writtenBits()}
	 * after a {@link #flush()}. Then, a call to {@link #align()} will dump to the buffer
	 * the bits still floating in the bit buffer; at that point this method can be called safely.
	 * 
	 * @param obs an output bit stream.
	 * @param bitLength the number of bits to be scanned.
	 * @throws IOException 
	 */
	public void stripPointers( final OutputBitStream obs, final long bitLength ) throws IOException {
		final InputBitStream ibs = new InputBitStream( buffer );
		int count;
		while( ibs.readBits() < bitLength ) {
			ibs.readDelta(); // Discard pointer
			if ( completeness >= COUNTS.ordinal() ) {
				count = ibs.readGamma() + 1;
				obs.writeGamma( count - 1 );
				if ( completeness >= POSITIONS.ordinal() ) while( count-- != 0 ) obs.writeDelta( ibs.readDelta() );
			}
		}
	}
	
	/** Calls {@link #flush()} and then releases resources allocated by this byte-array posting list, keeping just the internal buffer. */
	public void close() {
		flush();
		position = null;
	}
}
