/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.extract;

import java.io.BufferedOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.Stack;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.regex.PatternSyntaxException;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.net.ftp.FTPClient;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;


import java.util.logging.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;

import com.entitystream.monster.db.Document;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.entitystream.identiza.db.Node;
import com.entitystream.identiza.entity.resolve.processing.IdentizaException;
import com.entitystream.identiza.entity.resolve.storage.RecordValues;
import com.entitystream.identiza.metadata.IdentizaSettings;
import au.com.bytecode.opencsv.CSVReader;

public class TizaReader implements Serializable {
	private transient XPath xPath = XPathFactory.newInstance().newXPath();
	protected static Logger logger = Logger.getLogger("com.entitystream.identiza.TizaReader");
	public static final int CSV=0;
	public static final int JDBC=1;
	public static final int OBJECTSTREAM=2;
	public static final int EXCEL = 3;
	public static final int XML = 4;
	public static final int MONGO=5;
	public static final int EDGAR=6;
	protected int TYPE=-1;
	private String display;
	char _delimiter;
	char _quote;
	String filename;
	String encoding;
	transient CSVReader csv;
	transient ObjectInputStream objectStream;

	//jdbc fields
	transient private ResultSet rs;
	transient Connection dbConnection=null;
	String sql=null;
	List<String> cols = new ArrayList<String>();
	private String driver;
	private String connection;
	private String username;
	private String password;
	Boolean valid=true;
	transient private Sheet sheet;
	transient private Workbook workbook;;
	private int rowcount=-1;
	private int sheetno;
	private String rootNode="lei:LEIRecord";
	boolean execRunning=false;
	private boolean started = false;
	private transient ExecutorService exec = Executors.newSingleThreadExecutor();
	protected LinkedBlockingQueue<Map<String, Object>> recordQueue = new LinkedBlockingQueue<Map<String, Object>>(10000);
	private ArrayList<String> errors=new ArrayList<String>();
	protected int status;

	private Iterable<JsonObject> docs;
	private Iterator<JsonObject> docIterator;
	private String shortFile;
	private JsonObject metaDoc;
	private int XLSfirstRowNum=0;
	private int XLSlastRowNum;
	private int XLSfirstColNum=0;
	private int XLSlastColNum;

	public ArrayList<String> getStatus(){
		return errors;
	}

	public boolean isValid(){
		return status==1;
	}
	public static TizaReader createXLSReader(String filename, int sheetno, String metadata) throws IOException{
		return new TizaReader(filename, sheetno, metadata);
	}

	public static TizaReader createOBJECTReader(String filename) throws IOException{
		return new TizaReader(filename);
	}

	

	public static TizaReader createCSVReader(String filename, String quote, String delimiter, String encoding) throws UnsupportedEncodingException, FileNotFoundException{

		return new TizaReader(filename, quote, delimiter, encoding);
	}

	public static TizaReader createJDBCReader(String driver, String connection, String user, String passwd, String sql) throws SQLException{
		return new TizaReader(driver, connection, user, passwd, sql);
	}

	public static TizaReader createXMLReader(String absolutePath, String rootNode) throws IOException {
		return new TizaReader(absolutePath, rootNode);
	}

	public static TizaReader createMongoReader(Iterable<JsonObject> docs) throws IOException {
		return new TizaReader(docs);
	}

	public static TizaReader createZIPReader(Properties properties, String groupName) throws IOException, SQLException {
		//unpack zip - get first file
		List<String> filePaths=getZIPFile(properties.getProperty("extractPattern"), properties, groupName);
		String fileType=properties.getProperty("fileType");
		for (String filePath : filePaths){
			if (filePath!=null){
				if (filePath.toUpperCase().endsWith("XLS")){
					return createXLSReader(filePath, Integer.parseInt(properties.getProperty("sheetno","0")), "{}");
				} else if (filePath.toUpperCase().endsWith("CSV")){
					return createCSVReader(filePath,  properties.getProperty("quote"),  properties.getProperty("delimiter"),  properties.getProperty("encoding"));
				} else if (filePath.toUpperCase().endsWith("XML")){
					return createXMLReader(filePath, properties.getProperty("rootNode"));
				} else if (fileType.equalsIgnoreCase("ZIP")){
					return createZIPReader(properties,groupName);
				} else if (fileType.equalsIgnoreCase("CSV")){
					return createCSVReader(filePath,  properties.getProperty("quote"),  properties.getProperty("delimiter"),  properties.getProperty("encoding"));
				} else if (fileType.equalsIgnoreCase("XML")){
					return createXMLReader(filePath, properties.getProperty("rootNode"));

				} else {
					logger.severe("File can not be processed, I simply cant tell what type it is");
					return new TizaReader("File can not be processed, I simply cant tell what type it is");
				}
			} else {
				return new TizaReader("Could get a file from the zip");
			}
		}
		return null;
	}

	private static List<String> getZIPFile(String extractPattern, Properties properties, String groupName) throws IOException {
		return unzip(properties.getProperty("filename"),
				IdentizaSettings.getUserTempPath()+"/",
				extractPattern);
	}

	public static List<String> unzip(String zipFilePath, String destDirectory, String filter) throws IOException {
		ArrayList<String> ret=new ArrayList<String>();
		File destDir = new File(destDirectory);
		if (!destDir.exists()) {
			destDir.mkdir();
		}



		if (zipFilePath.toUpperCase().endsWith("TAR.GZ")){
		    GzipCompressorInputStream gzipIn = new GzipCompressorInputStream(new FileInputStream(zipFilePath));	
		    TarArchiveInputStream zipIn = new TarArchiveInputStream(gzipIn);
		    	TarArchiveEntry entry = (TarArchiveEntry)zipIn.getNextEntry();
			// iterates over entries in the zip file
			while (entry != null) {
				String filePath = destDirectory + File.separator + entry.getName();
				if (!entry.isDirectory()) {
					// if the entry is a file, extracts it
					try{
						if(filter==null || filePath.matches(filter)){
							logger.info("Extracting " + filePath + " from zip file " + zipFilePath);
							ret.add(extractFile(zipIn, filePath));
						}
					} catch (PatternSyntaxException w){
						logger.severe("Couldnt tell if we needed this file as the pattern is throwing an error " + w.toString());
					}
				} else {
					// if the entry is a directory, make the directory
					File dir = new File(filePath);
					dir.mkdir();
				}

				entry = (TarArchiveEntry)zipIn.getNextEntry();
			}

			zipIn.close();
		} else if (zipFilePath.toUpperCase().endsWith(".GZ")){
			GZIPInputStream gzipIn = new java.util.zip.GZIPInputStream(new FileInputStream(zipFilePath));
			logger.info("Extracting gz file " + zipFilePath);
			ret.add(extractFile(gzipIn, zipFilePath.replaceAll(".gz", "")));
		} else { //zip
			ZipInputStream zipIn = new ZipInputStream(new FileInputStream(zipFilePath));
			ZipEntry entry = zipIn.getNextEntry();

			// iterates over entries in the zip file
			while (entry != null) {
				String filePath = destDirectory + File.separator + entry.getName();
				if (!entry.isDirectory()) {
					// if the entry is a file, extracts it
					try{
						if(filter==null || filePath.matches(filter)){
							logger.info("Extracting " + filePath + " from zip file " + zipFilePath);
							ret.add(extractFile(zipIn, filePath));
						}
					} catch (PatternSyntaxException w){
						logger.severe("Couldnt tell if we needed this file as the pattern is throwing an error " + w.toString());
					}
				} else {
					// if the entry is a directory, make the directory
					File dir = new File(filePath);
					dir.mkdir();
				}
				zipIn.closeEntry();
				entry = zipIn.getNextEntry();
			}

			zipIn.close();
		} 


		return ret;
	}
	
	private static String extractFile(InputStream zipIn, String filePath) throws IOException {
		BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(filePath));
		byte[] bytesIn = new byte[30000];
		int read = 0;
		while ((read = zipIn.read(bytesIn)) != -1) {
			bos.write(bytesIn, 0, read);
		}
		bos.close();
		return filePath;
	}

	public static TizaReader createHTTPReader(String fileName,Properties properties, String groupName) throws IOException, SQLException {
		//get file
		String filePath;
		try {
			logger.info("Downloading a file from a site " + fileName);
			Document headers = new Document();
			if (properties.getProperty("httpheaders")!=null)
				headers = Document.parse((String)properties.getProperty("httpheaders"));
			filePath = getHTTPFile(fileName, properties.getProperty("httpusername"), properties.getProperty("httppassword"), headers);

			if (filePath!=null){
				properties.setProperty("filename", filePath);
				String fileType=properties.getProperty("fileType");
				if (filePath.toUpperCase().endsWith("ZIP")){
					return createZIPReader(properties,groupName);
				} else if (filePath.toUpperCase().endsWith("CSV")){
					return createCSVReader(filePath,  properties.getProperty("quote"),  properties.getProperty("delimiter"),  properties.getProperty("encoding"));
				} else if (filePath.toUpperCase().endsWith("XML")){
					return createXMLReader(filePath, properties.getProperty("rootNode"));
				} else if (fileType.equalsIgnoreCase("ZIP")){
					return createZIPReader(properties,groupName);
				} else if (fileType.equalsIgnoreCase("CSV")){
					return createCSVReader(filePath,  properties.getProperty("quote"),  properties.getProperty("delimiter"),  properties.getProperty("encoding"));
				} else if (fileType.equalsIgnoreCase("XML")){
					return createXMLReader(filePath, properties.getProperty("rootNode"));

				} else {
					return new TizaReader("File can not be processed " + fileType);
				}
			}
		} catch (URISyntaxException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return new TizaReader("An error occurred getting the file from the HTTP server");
	}

	public static String decodeDates(String url, Date d){
		if (url.indexOf("$")>-1){
			HashMap<String, String> substitutions = new HashMap<String, String>();
			for (int i=0; i<url.length(); i++){
				if (url.charAt(i)=='$'){
					if (url.indexOf(";", i+1)>-1){
						String bit = url.substring(i+1, url.indexOf(";", i+1));
						SimpleDateFormat sdf=new SimpleDateFormat(bit);
						substitutions.put("\\$"+bit+"\\;", sdf.format(d));
					}
				}
			}
			for (String find: substitutions.keySet()){
				url=url.replaceAll(""+find, substitutions.get(find));
			}
		}

		if (url.indexOf("{")>-1){
			HashMap<String, String> substitutions = new HashMap<String, String>();
			for (int i=0; i<url.length(); i++){
				if (url.charAt(i)=='{'){
					if (url.indexOf("}", i+1)>-1){
						String bit = url.substring(i+1, url.indexOf("}", i+1));
						SimpleDateFormat sdf=new SimpleDateFormat(bit);
						substitutions.put("\\{"+bit+"\\}", sdf.format(d));
					}
				}
			}
			for (String find: substitutions.keySet()){
				url=url.replaceAll(""+find, substitutions.get(find));
			}
		}
		return url;
	}

	public static String getHTTPFile(String url, String username, String password, Document headers) throws URISyntaxException, ClientProtocolException, IOException {
		String target="download"+url.hashCode();
		target=IdentizaSettings.getUserTempPath()+"/"+target;
		File targetFile = new File(target);
		if (targetFile.exists() && targetFile.length()>0L){
			logger.info("Target file was already available locally and it seems ok - aborting download ("+target+")");
		} else {
			HttpClient httpClient=null;
			if (username!=null) {
				CredentialsProvider credsProvider = new BasicCredentialsProvider();
				credsProvider.setCredentials(
						new AuthScope(org.apache.http.auth.AuthScope.ANY_HOST, org.apache.http.auth.AuthScope.ANY_PORT),
						new UsernamePasswordCredentials(username, password));
				httpClient = HttpClients.custom()
						.setDefaultCredentialsProvider(credsProvider)
						.build();

			} else {
				httpClient = new DefaultHttpClient();
			}
			HttpGet http = new HttpGet();
			http.setURI(new URI(url));
			if (headers!=null)
				for (Object hd : headers.keySet())
					http.setHeader((String)hd, (String) headers.get(hd));

			HttpResponse resp = httpClient.execute(http);
			if (resp.getStatusLine().getStatusCode() != 200) {
				logger.severe("HTTP get Expected 200 but got " + resp.getStatusLine().getStatusCode() + ", for " + url);
				return null;
			}

			HttpEntity entity = resp.getEntity();
			InputStream is = entity.getContent();
			byte[] b = new byte[30000];
			FileOutputStream fs = new FileOutputStream(target);
			int r=0;
			while ((r=is.read(b))!=-1){
				fs.write(b,0,r);
			}
			fs.close();
			is.close();
		}
		return target;
	}
	
	public static TizaReader createReader(String url, Properties properties, String groupName) throws IOException, SQLException {
		
		if (url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))
			return createHTTPReader(url, properties, groupName);
		
		if (url.toLowerCase().startsWith("ftp://") || url.toLowerCase().startsWith("ftps://"))
			return createFTPReader(url, properties, groupName);
		
		return createFileReader(url, properties, groupName);
	}
	
	public static TizaReader createFileReader(String filePath, Properties properties, String groupName) throws IOException, SQLException {
		//get file
		String fileType= properties.getProperty("fileType");
		if (filePath==null) return null;
		if (filePath.endsWith("zip")){
			return createZIPReader(properties, groupName);
		} else if (filePath.endsWith("csv")){
			return createCSVReader(filePath,  properties.getProperty("quote"),  properties.getProperty("delimiter"),  properties.getProperty("encoding"));
		} else if (filePath.endsWith("xml")){
			return createXMLReader(filePath, properties.getProperty("rootNode"));
		} else if (fileType.equalsIgnoreCase("ZIP")){
			return createZIPReader(properties,groupName);
		} else if (fileType.equalsIgnoreCase("CSV")){
			return createCSVReader(filePath,  properties.getProperty("quote"),  properties.getProperty("delimiter"),  properties.getProperty("encoding"));
		} else if (fileType.equalsIgnoreCase("XML")){
			return createXMLReader(filePath, properties.getProperty("rootNode"));

		} else return new TizaReader("File can not be processed");
	}


	public static TizaReader createFTPReader(String fileName, Properties properties, String groupName) throws IOException, SQLException {
		//get file
		String filePath = getFTPFile(properties.getProperty("extractPattern"),fileName,properties.getProperty("ftpusername"),properties.getProperty("ftppassword"));
		String fileType= properties.getProperty("fileType");
		if (filePath==null) return null;
		if (filePath.endsWith("zip")){
			return createZIPReader(properties, groupName);
		} else if (filePath.endsWith("csv")){
			return createCSVReader(filePath,  properties.getProperty("quote"),  properties.getProperty("delimiter"),  properties.getProperty("encoding"));
		} else if (filePath.endsWith("xml")){
			return createXMLReader(filePath, properties.getProperty("rootNode"));
		} else if (fileType.equalsIgnoreCase("ZIP")){
			return createZIPReader(properties,groupName);
		} else if (fileType.equalsIgnoreCase("CSV")){
			return createCSVReader(filePath,  properties.getProperty("quote"),  properties.getProperty("delimiter"),  properties.getProperty("encoding"));
		} else if (fileType.equalsIgnoreCase("XML")){
			return createXMLReader(filePath, properties.getProperty("rootNode"));

		} else return new TizaReader("File can not be processed");
	}

	public static String getFTPFile(String extractPattern,String fileName, String username, String password) {
		String target="download"+fileName.hashCode();
		target=IdentizaSettings.getUserTempPath()+"/"+target;
		File targetFile = new File(target);
		if (targetFile.exists() && targetFile.length()>0L){
			logger.info("Target file was already available locally and it seems ok - aborting download ("+target+")");
		} else {
			URL url;
			try {
				url = new URL(fileName);

				FTPClient ftp = new FTPClient();
				int port=21;
				if (url.getPort()!=-1)
					port=url.getPort();
				ftp.connect(url.getHost(), port);
				if (username!=null && password!=null)
					ftp.login(username, password);

				OutputStream targetFileStream = new FileOutputStream(targetFile);
				ftp.retrieveFile(url.getFile(), targetFileStream );
				targetFileStream.close();
			} catch (Exception e) {

				e.printStackTrace();
				return null;
			}
		}
		return targetFile.getAbsolutePath();
	}

	private TizaReader(String filename, int sheetno, String metadata) throws IOException{
		this.filename = filename.replaceAll("\\\\", "/");
		TYPE=EXCEL;
		display=filename;
		this.sheetno=sheetno;
		if (metadata!=null)
			this.metaDoc = (JsonObject) new JsonParser().parse(metadata);
		else this.metaDoc=new JsonObject();
		openExcel();
		status=1;
		errors.clear();
	}


	private TizaReader(String xmlfilename, String rootNode) throws IOException{
		TYPE=XML;
		display=xmlfilename;
		filename=xmlfilename;
		this.rootNode=rootNode;
		status=1;
		errors.clear();
	}

	private TizaReader(String filename) throws IOException{
		this.filename = filename.replaceAll("\\\\", "/");
		FileInputStream stream = new FileInputStream(this.filename);
		objectStream = new ObjectInputStream(stream);
		TYPE=OBJECTSTREAM;
		display=filename;
		status=1;
		errors.clear();
	}

	private TizaReader(String filename, String quote, String delimiter, String encoding) throws UnsupportedEncodingException, FileNotFoundException{
		logger.info("TizaReader init'd");
		this.encoding=encoding;
		if (encoding==null)
			encoding="UTF-8";
		this.filename = filename.replaceAll("\\\\", "/");

		if (delimiter.equals("\\t"))
			delimiter="\t";
		if (delimiter.length()>0)
			_delimiter = delimiter.charAt(delimiter.length()-1);
		//convert hexquote to quote
		String[] quoteBits = quote.split("x");
		for (int pos=0; pos < quoteBits.length; pos++){
			if (quoteBits[pos].equalsIgnoreCase("\"") || quoteBits[pos].equalsIgnoreCase("\'"))
				_quote=quoteBits[pos].charAt(0);
			else
				_quote = (char)Integer.parseInt(quoteBits[pos].replaceAll("%", ""), 16);
			break;
		}

		InputStreamReader reader = new InputStreamReader(new FileInputStream(filename), encoding);
		csv = new CSVReader(reader, _delimiter, _quote);
		TYPE=CSV;
		display=filename;
		status=1;
		errors.clear();
		cols=this.getHeadings();
		try {
			this.reset();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	protected TizaReader(){
		logger.info("TizaReader init'd");
	}

	private TizaReader(String driver, String connection, String user, String passwd, String sql) throws SQLException{
		logger.info("TizaReader init'd");
		this.sql=sql;
		this.driver=driver;
		this.username=user;
		this.password=passwd;
		//check it exists
		try {
			Class.forName(driver).newInstance(); //Or any other driver
		}
		catch(Exception x){
			logger.severe( "Unable to load the driver class!" );
			errors.add("Unable to load the driver class for the database");
			status=-1;
			valid=false;
			return;
		}

		try{
			dbConnection=DriverManager.getConnection(connection, user, passwd);
		}
		catch( SQLException x ){
			logger.severe( "Couldn't get connection!" );
			errors.add("I couldnt get a conection to the database");
			status=-1;
			valid=false;
			return;
		}

		Statement statement = dbConnection.createStatement();
		rs = statement.executeQuery(sql);
		int Col_Count = rs.getMetaData().getColumnCount();
		for (int Index=1; Index<=Col_Count; Index++)
			cols.add(rs.getMetaData().getColumnName(Index));
		TYPE=JDBC;
		display=sql;
	}

	private TizaReader(Iterable<JsonObject> docs) {
		TYPE=MONGO;
		display="Reprocessing records";
		this.docs=docs;
	}

	public void setColumns(List<String> headings){
		this.cols=headings;
	}

	public int getType(){
		return this.TYPE;
	}

	private void writeObject(ObjectOutputStream out) throws ClassNotFoundException, IOException {
		out.writeObject(new Character(_delimiter));
		out.writeObject(new Character(_quote));
		out.writeObject(cols);
		out.writeObject(display);
		out.writeObject(driver);
		out.writeObject(encoding);
		out.writeObject(filename);
		out.writeObject(password);
		out.writeObject(sql);
		out.writeObject(new Integer(TYPE));
		out.writeObject(username);
		out.writeObject(new Boolean(valid));
		out.writeObject(rootNode);
	}

	private void readObject(
			ObjectInputStream in) throws ClassNotFoundException, IOException {

		try {
			_delimiter=(Character) in.readObject();
			_quote=(Character) in.readObject();
			cols=(ArrayList<String>) in.readObject();
			display=(String) in.readObject();
			driver=(String) in.readObject();
			encoding=(String) in.readObject();
			filename=(String) in.readObject();
			password=(String) in.readObject();
			sql=(String) in.readObject();
			TYPE=(Integer) in.readObject();
			username=(String) in.readObject();
			valid=(Boolean) in.readObject();
			rootNode=(String) in.readObject();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}


	public void reset() throws SQLException, IOException{
		if (valid){
			if (TYPE==CSV){
				InputStreamReader reader = new InputStreamReader(new FileInputStream(filename));
				csv = new CSVReader(reader, _delimiter, _quote);
			} else if (TYPE==EXCEL){
				openExcel();
			}
			else if (TYPE==JDBC){
				Statement statement = dbConnection.createStatement();
				rs = statement.executeQuery(sql);
			} else if (TYPE==OBJECTSTREAM){
				this.filename = filename.replaceAll("\\\\", "/");
				//check to see if the download needs to occur?
				InputStream stream = new FileInputStream(filename);
				objectStream = new ObjectInputStream(stream);
			} else if (TYPE==XML){
				execRunning=false;
				if (exec!=null)
					exec.shutdownNow();
				exec = Executors.newSingleThreadExecutor();
				if (recordQueue!=null)
					recordQueue.clear();
				else
					recordQueue=new LinkedBlockingQueue<Map<String, Object>>(2000);
				startXML();
			} else if (TYPE==MONGO){
				docIterator = docs.iterator();
			}
		}
	}


	public TizaReader copy() throws SQLException, IOException {
		if (TYPE==CSV)
			return new TizaReader(filename, ""+_quote, ""+_delimiter, encoding);
		else if (TYPE==EXCEL)
			return new TizaReader(filename, sheetno, metaDoc.toString());
		else if (TYPE==JDBC)
			return new TizaReader(driver, connection, username, password, sql);
		else if (TYPE==OBJECTSTREAM)
			return new TizaReader(filename);
		else if (TYPE==XML)
			return new TizaReader(filename, rootNode);
		else if (TYPE==MONGO)
			return new TizaReader(docs);
		return null;
	}


	public Map<String, Object> readNext() throws Exception{
		Map<String, Object> retval=null;
		synchronized(valid){
			if (valid){
				if (TYPE==CSV){
					String[] v = csv.readNext();
					if (v!=null){
						retval=new HashMap<String, Object>();
						int pos=0;
						for (String column : cols){
							if (pos>v.length-1)
								break;
							Object value=v[pos].trim();
							retval.put(column, value);
							pos++;
						}
					}
				}
				else if (TYPE==EXCEL){
					String[] v= readExcelRow();
					if (v!=null){
						retval=new HashMap<String, Object>();
						int pos=0;
						for (String column : cols){
							if (pos>v.length-1)
								break;
							Object value=v[pos];
							retval.put(column, value);
							pos++;
						}
					}
				}
				else if (TYPE==MONGO) {
					if (docIterator.hasNext()){
						retval=new HashMap<String, Object>();
						JsonObject d = docIterator.next();
						Node.flattenDoc("", d, retval, "");
					}
				}
				else if (TYPE==XML)
					retval=readXMLRow();
				else if(TYPE==JDBC){
					rs.next();
					if (!rs.isLast()){
						retval=new HashMap<String, Object>();
						for(int c=0; c<rs.getMetaData().getColumnCount(); c++)
							retval.put(rs.getMetaData().getColumnName(c), rs.getObject(c));
					} 

				} else if (TYPE==OBJECTSTREAM){
					RecordValues rv=null;
					try{
						rv = (RecordValues) objectStream.readObject();
						retval = rv.toHashMap();
					}
					catch (EOFException eof){
					}
				} 

			}
		}
		return retval;
	}

	private List<String> getXMLCols(boolean forMetadata) {
		//look at all records and determine the used columns
		HashSet<String> columns = new HashSet<String>();
		startXML();
		if (cols==null || cols.size()==0){
			try{

				//scan until the columns stop changing
				boolean nonStop=true;
				int lastcolcount=0;
				int c=0;

				while (nonStop && execRunning){
					Map<String, Object> rec = this.recordQueue.poll();
					if (rec!=null){
						for (String col : rec.keySet()){
							columns.add(col.replaceAll("\\[[0-9]*?\\]", ""));
						}
						//System.out.println("Record Got "+c);
						c++;
						if (c % 1000 == 0 || !execRunning){
							if (columns.size()==lastcolcount)
								nonStop=false;
							lastcolcount=columns.size();
						}
					}
				}
				reset();
			} catch (Exception e){
				e.printStackTrace();
			}
		}

		//post process the found columns adding the higher level structures with their types?
		//level1/level2/textnode1
		//level1/level2/textnode2
		//level1/level3/textnode
		//level1/level3/textnode
		//level1/textnode3
		//textnode4
		//should create :
		//level1: structure
		//level1/level2 : list
		//level1/level3 : list
		//all others are text
		//metadata represents the higher level elements only
		Map<String, Set<String>> metadata = new HashMap<String, Set<String>>();
		for (String col: columns){
			StringBuilder incrementalParts = new StringBuilder();
			String[] colparts= col.split("\\.");
			for (int pos=0; pos<colparts.length-1; pos++){
				String colpart = colparts[pos];
				String nextpart=colparts[pos+1]; //protected by loop being < -1
				incrementalParts.append(colpart+".");
				if (metadata.containsKey(incrementalParts.toString())){
					metadata.get(incrementalParts.toString()).add(nextpart);
				} else{
					Set<String> set = new HashSet<String>();
					set.add(nextpart);
					metadata.put(incrementalParts.toString(), set);
				}
			}
		}
		for (String key : metadata.keySet()){
			//if the set contains only 1 item - then its a list - else structure
			if (!key.equalsIgnoreCase(".")){
				String type="Group";
				if (metadata.get(key).size()==1)
					type="List";
				if (key.endsWith("."))
					key=key.substring(0, key.length()-1);
				columns.add(key + " ("+type+")");
			}
		}


		cols.addAll(columns);
		Collections.sort(cols);
		return cols;
	}

	public void startXML(){
		if (!execRunning && !started)
		{
			execRunning=true;
			started=true;
			recordQueue.clear();
			SAXHandler handler = new SAXHandler(rootNode);

			exec.submit(new BackgroundSAXTask(handler));
		}
	}


	private Map<String, Object> readXMLRow() throws InterruptedException {
		startXML();
		Map<String, Object> record = null;
		if (recordQueue!=null)
			while (record == null){
				record = this.recordQueue.poll();
				if (execRunning==false)
					break;
			}

		return record;
	}

	public String getDisplay() {
		if (display==null)
			return "Starting job";
		return display;
	}

	public List<String> getHeadings() {
		if (valid){
			try {
				if (TYPE==CSV){
					reset();
					String[]cs=csv.readNext();
					cols=new ArrayList<String>();
					for (String c: cs){
						cols.add(c.trim().replaceAll("\\.", "_"));
					}

					return cols;
				} else if (TYPE==EXCEL)
					return cols;
				else if(TYPE==JDBC){
					return cols;
				} else if(TYPE==OBJECTSTREAM){
					return cols;
				} else if(TYPE==XML){
					cols=getXMLCols(true);
					return cols;
				} else if (TYPE==MONGO){
					throw new IdentizaException("Please dont call get headings on a collection, its stupid");
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
		return null;
	}





	public void close() {
		try {
			if (TYPE==CSV){
				csv.close();
			}
			else if (TYPE==EXCEL){
				closeExcel();
			} else if (TYPE==JDBC){
				rs.close();
			} else if (TYPE==OBJECTSTREAM){
				objectStream.close();
			}else if (TYPE==XML){
				recordQueue=null;
				exec=null;
				execRunning=false;
			}
		} catch (
				Exception e) {
			e.printStackTrace();
		}

	}

	//excel functions
	private void openExcel() {
		try {
			logger.info("Opening excel sheet " + filename + " please increase all power in the building");
			if (filename.indexOf("/")>-1)
				shortFile=filename.substring(filename.lastIndexOf("/"));
			else shortFile=filename;
			File xlfile = new File(filename);
			if (xlfile.exists()){
				workbook = WorkbookFactory.create(xlfile, null, true);
				Row row=null;
				if (metaDoc!=null && metaDoc.get("sheetName")!=null){
					String sheetName=metaDoc.get("sheetName").getAsString();
					if (sheetName==null)
						sheetName="sheet1";
					sheet=workbook.getSheet(sheetName);
					XLSlastRowNum=metaDoc.get("endRow").getAsInt();
					XLSfirstRowNum=metaDoc.get("startRow").getAsInt();
					XLSlastColNum=metaDoc.get("endCol").getAsInt();
					XLSfirstColNum=metaDoc.get("startCol").getAsInt();
					row = sheet.getRow(XLSfirstRowNum);
				} else {
					sheet = workbook.getSheetAt(sheetno);
					XLSfirstRowNum=0;
					XLSfirstColNum=0;
					XLSlastRowNum=sheet.getLastRowNum();
					row = sheet.getRow(XLSfirstRowNum);
					XLSlastColNum=row.getLastCellNum();
				}

				cols = new ArrayList<String>();
				cols.add("ROW");
				for (int c=XLSfirstColNum; c<XLSlastColNum; c++){
					Cell cell = row.getCell(c);
					if (cell!=null)
						cols.add(getCellAsString(cell));
					else
						cols.add(null);
				}
				rowcount=XLSfirstRowNum-1;

			} else logger.severe(filename + " does not exist");

		}catch (org.apache.poi.EmptyFileException efe){
			logger.severe(filename + " is empty");
			valid=false;
		} catch (Exception e) {
			e.printStackTrace();
			valid=false;
		}

	}
	private String[] readExcelRow() {

		rowcount++;
		if (rowcount<=XLSlastRowNum){
			Row row=null;
			String[] values=null;
			try{
				row = sheet.getRow(rowcount);
				if (row!=null){
					values = new String[Math.max(cols.size()+1, row.getLastCellNum())];
					values[0]="["+shortFile+"]"+sheet.getSheetName()+"!"+row.getRowNum();
					for (int c=XLSfirstColNum; c<XLSlastColNum; c++){
						Cell cell = row.getCell(c);
						if (cell!=null)
							values[c+1]=getCellAsString(cell);
						else
							values[c+1]=null;
					}
				} else return null;
			} catch (Exception e){
				//e.printStackTrace();
				return null;
			}
			return values;
		} else
			return null;
	}


	private String getCellAsString(Cell cell){
		switch (cell.getCellType()) {
		case Cell.CELL_TYPE_STRING:
			return cell.getRichStringCellValue().getString();

		case Cell.CELL_TYPE_NUMERIC:
			if (DateUtil.isCellDateFormatted(cell)) {
				return DateFormat.getInstance().format(cell.getDateCellValue());
			} else {
				String d = Double.toString(cell.getNumericCellValue());
				if (d.endsWith(".0"))
					d=d.replaceAll("\\.0$", "");
				return d;
			}

		case Cell.CELL_TYPE_BOOLEAN:
			return Boolean.toString(cell.getBooleanCellValue());
		case Cell.CELL_TYPE_FORMULA:
		default:
			return cell.toString();
		}
	}

	private void closeExcel() {
		try {
			workbook.close();
		} catch (IOException e) {
			e.printStackTrace();
		}

	}

	public static void testJDBC (String[] args){
		Connection dbConnection2;
		try {
			Class.forName("org.hsqldb.jdbc.JDBCDriver").newInstance(); //Or any other driver
		}
		catch(Exception x){
			logger.severe( "Unable to load the driver class!" );

			return;
		}

		try{
			dbConnection2 = DriverManager.getConnection("jdbc:hsqldb:hsql:/localhost/test", "sa", null);
		}
		catch( SQLException x ){
			logger.severe( "Couldn't get connection!" );
			return;
		}

		Statement statement;
		try {
			statement = dbConnection2.createStatement();
			System.out.println(statement.execute("create table public.public.DEMODATA (acctid varchar(100), name varchar(255), stline1 varchar(255), stline2 varchar(255), city varchar(255), statecode varchar(255), postcode varchar(255))"));
			System.out.println(statement.execute("commit"));
		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

	}



	class BackgroundSAXTask implements Runnable{
		private SAXHandler handler;
		private SAXParser parser;
		public BackgroundSAXTask(SAXHandler handler){
			try {
				SAXParserFactory parserFactor = SAXParserFactory.newInstance();
				parser = parserFactor.newSAXParser();
				this.handler = handler;
			} catch (ParserConfigurationException | SAXParseException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (SAXException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			//System.out.println("Background reader spawned");
		}
		@Override
		public void run() {
			try{
				//System.out.println("Background reader running");
				parser.parse(new FileInputStream(filename), handler);
				//System.out.println("Background reader finished");
				execRunning=false;
			} catch (org.xml.sax.SAXException e){
				//shit happens, no need to shout about it
				execRunning=false;
				recordQueue.clear();
			} catch (FileNotFoundException e) {
				
				e.printStackTrace();
			}	 catch (IOException e) {
				
				e.printStackTrace();
			}
		}

	}

	class SAXHandler extends DefaultHandler  {
		String content;
		String rootNode;
		java.util.Stack<String> path=new Stack<String>();
		boolean inRoot=false;
		Map<String, Object> record = new HashMap<String, Object>();
		Map<String, Integer> pathCountList = new HashMap<String, Integer>();
		Integer pathCount=0;
		public int count;

		public SAXHandler (String rootNode){
			this.rootNode=rootNode;
			content="";
			count=0;

		}

		@Override
		public void startElement(String uri, String localName,
				String qName, Attributes attributes)
						throws SAXParseException {
			if (!execRunning)
				throw new SAXParseException("XML Reader Aborted prematurely", null);

			if (rootNode==null)
				rootNode="";

			String sqName=qName.replaceAll(".*?:","");

			if (!inRoot && (rootNode.equalsIgnoreCase(qName) || rootNode.equalsIgnoreCase(sqName))){
				inRoot=true;
				path=new Stack<String>();
			} else if (inRoot){
				String parentPath="";
				String lookuppath=parentPath;
				if (!path.isEmpty()){
					parentPath=path.peek();
					lookuppath=parentPath+"."+sqName;
				}
				pathCount=pathCountList.get(lookuppath);
				if (pathCount==null || lookuppath.length()==0){
					pathCount=0;
				}
				else{
					pathCount++;
				}
				pathCountList.put(lookuppath, pathCount);
				String delim="";
				if (parentPath.length()>0)
					delim=".";
				String thispath=parentPath+delim+sqName+"["+pathCount+"]";

				path.push(thispath);
			}

		}
		@Override
		public void endElement(String uri, String localName,
				String qName) throws SAXParseException {
			if (!execRunning)
				throw new SAXParseException("XML Reader Aborted prematurely", null);
			if (inRoot){
				String thispath="";
				if (!path.isEmpty())
					thispath=path.pop();

				if (content.length()>0){
					record.put(thispath, content);
				}

				String sqName=qName.replaceAll(".*?:","");
				if (rootNode.equalsIgnoreCase(qName) || rootNode.equalsIgnoreCase(sqName)){
					//dump it
					//logger.info("Offering record " + record.toString());
					count++;
					while (!recordQueue.offer(record)){
						try {
							Thread.sleep(1000);
						} catch (InterruptedException e) {

							e.printStackTrace();
						}
					};

					//System.out.println("Record accepted:" + record.toString());
					record = new HashMap<String, Object>();
					pathCountList = new HashMap<String, Integer>();
					inRoot=false;
				}
			}
			content="";
		}
		@Override
		public void characters(char[] ch, int start, int length)
				throws SAXParseException {
			if (!execRunning)
				throw new SAXParseException("XML Reader Aborted prematurely", null);
			content += String.copyValueOf(ch, start, length).trim();
		}

	}

	public void stop(){
		this.status=0;
		this.execRunning=false;
	}

	public static void main (String[] args){
		try {
			for (String s: unzip("/private/tmp/tmp/b9efc25b-2fe0-4dd2-bb51-2f5b401ff05e.tar.gz", "/private/tmp/tmp/b9efc25b-2fe0-4dd2-bb51-2f5b401ff05e.tar", null))
				System.out.println(s);

		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		System.exit(0);
	}






}
