/*******************************************************************************
 * Copyright notice
 * 
 * This source code is copyright of Robert James Haynes - (c) 2010, 2011. All rights reserved.
 * 
 * Any redistribution, reproduction or decompilation of part or all of the code in any form is prohibited 
 * 
 * You may not, except with our express written permission, distribute or commercially exploit the content. Nor may you transmit it or store it in or display it on any website or other form of electronic retrieval system.
 ******************************************************************************/
/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.io.Serializable;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Logger;

import com.entitystream.identiza.entity.resolve.metadata.Purpose;
import com.entitystream.identiza.wordlist.RuleSet;

//import java.util.logging.Logger;

import com.entitystream.identiza.wordlist.WordList;
import com.entitystream.identiza.wordlist.WordObject;



public class MatchBase implements MatchProcInterface, Serializable {

	protected RuleSet ruleSet;
	protected boolean partialKeys=true;

	protected String type="MatchBase";

	protected int gradient;
	protected boolean isMandatory;
	protected String ruleFunction;
	protected int maxScore = MatchProcInterface.MATCH_EXACT;	
	protected Logger logger = Logger.getLogger("com.identiza");
	protected int minWidth;
	protected int maxWidth;
	

	public MatchBase(String ruleFunction, int _minwidth, int _maxwidth) throws Exception{
		
		this.ruleFunction=ruleFunction;		
		this.minWidth=_minwidth;
		this.maxWidth=_maxwidth;	
	}
	
	public MatchBase(String ruleFunction){	
		
		this.ruleFunction=ruleFunction;	
	}

	public MatchBase(String ruleFunction, int minwidth, int maxwidth, String type) throws Exception{
		
		this.type = type;
		this.ruleFunction=ruleFunction;	
		this.minWidth=minwidth;
		this.maxWidth=maxwidth;
	}
	
	protected ArrayList<String> compressInitials(String[] in) {
		String accumulate="";
		ArrayList<String> out = new ArrayList<String>();
		for (String word : in){
			if (word.trim().length()==1){
				accumulate+=word.trim();
			} else {
				if (accumulate.length()>0){
					out.add(accumulate);
					accumulate="";
				}
				out.add(word);
			}
		}
		if (accumulate.length()>0){
			out.add(accumulate);
			accumulate="";
		}
		return out;
	}

	public void init(String ruleFunction){
		this.ruleFunction=ruleFunction;
	}

	public String getType() {
		return type;
	}


	@Override
	public void setRuleFunction(String ruleFunction) {
		// TODO Auto-generated method stub
		this.ruleFunction=ruleFunction;
	}

	public boolean isMandatory() {
		return isMandatory;
	}

	public void setMandatory(boolean isMandatory) {
		this.isMandatory = isMandatory;
	}

	//private transient Weights weights;
	public void setType(String type) {
		this.type = type;
	}


	public void setRuleSet(RuleSet ruleset) {
		this.ruleSet = ruleset;
	}

	public WordList getKeyCv() {
		return ruleSet.getKeyControlledVocabulary();
	}

	public WordList getKeyAnon() {
		return ruleSet.getKeyAnon();
	}
	
	public WordList getRuleAnon() {
		return ruleSet.getRuleAnon();
	}
	
	public WordList getRuleCv() {
		return ruleSet.getRuleControlledVocabulary();
	}
	
	public WordList getKeyLookup() {
		return ruleSet.getKeyLookup();
	}
	public WordList getRuleLookup() {
		return ruleSet.getRuleLookup();
	}


	protected HashSet<String> getAlternateWords(String word, WordList anon, WordList controlVocabulary) 
	{
		HashSet<String> alternates = new HashSet<String>();
		//add this word
		String key = this.getKey(word);
		if (key.length()>0)
			alternates.add(key);
		if (anon==null || anon.isUseful(word)){
			//now get alternate keys
			if (controlVocabulary!=null){
				String kkey = controlVocabulary.generateKey(word);
				if (kkey != null){
					WordObject wordlist = controlVocabulary.getWord(kkey);
					if (wordlist!=null){
						ArrayList<String> parents = wordlist.getSameParents();
						for (String parent : parents){
							WordObject wo = controlVocabulary.getWord(parent);
							String key2 = getKey(wo.getWord());
							if (key2.length()>0)
								alternates.add(key2);
						}
					}
				}
			}
		}
		return alternates;
	}

	@Override
	public Collection<String> getKeys(Object value, boolean batch){
		return getKeys(value, batch, false);
	}
	
	@Override
	public Collection<String> getKeys(Object value, boolean batch, boolean isSearch){
		/// 45 fraser rd killcare becomes
		//// FRASERKILLCARE
		//// KILLCAREFRASER
		//// ALTERNATEKILLCARE
		//// KILLCAREALTERNATE
		String[] tempwords = new String[0];
		if (value instanceof String){
			value = new String[]{(String)value};
		}
		//key is the original word, list of alternate keys
		HashMap<String, HashSet<String>> alternates = new HashMap<String, HashSet<String>>();
		for (int j=0; j<((String[])value).length; j++){
			if (((String[])value)[j]!=null) {
				tempwords = WordList.split(((String[])value)[j]);//.split("[.,!?:; ()&-]+\\s*");
				for (int i=0; i<tempwords.length; i++){
					String word = tempwords[i];
					if((ruleSet.getKeyAnon()!=null && ruleSet.getKeyAnon().isUsefulForComparison(word,1)) 
							|| (ruleSet.getKeyAnon()==null && WordList.sisUsefulForComparison(word,1))){						
						if (!alternates.containsKey(word)){	
							alternates.put(word, new HashSet<String>());
						}
						alternates.get(word).addAll(getAlternateWords(word, ruleSet.getKeyAnon(), ruleSet.getKeyControlledVocabulary()));
					}
				}
			}
		}		
		//construct key from all combinations		
		ArrayList<HashSet<String>> temp = new ArrayList<HashSet<String>>();				
		for (String origWord : alternates.keySet()){
			HashSet<String> res = alternates.get(origWord);
			if (res!=null && res.size()>0)
				temp.add(res);
		}

		if (temp.size()>0){			
			if (!isSearch)
			   return processKey(temp, 0, "", partialKeys, isSearch, minWidth);
			else
			   return processKey(temp, 0, "", partialKeys, isSearch, tempwords.length);
		}
		else
			return null;
	}


	//recursively creates key combinations
	protected HashSet<String> processKey(ArrayList<HashSet<String>> alternates, int startpos, String keySoFar, boolean partialKeys, boolean isSearch, int searchWidth){		
		HashSet<String> allKeys = new HashSet<String>();
		if (alternates.size()>0){
			for (String comb1 : alternates.get(startpos)){
				String newKey="";
				if (keySoFar.length()>0)
					newKey = keySoFar+":"+comb1;
				else
					newKey = comb1;
				if (startpos<alternates.size()-1)
					allKeys.addAll(processKey(alternates, startpos+1, newKey, partialKeys, isSearch, searchWidth));
				else {
					if (!isSearch) 				
					    allKeys.addAll(allCombs(newKey, partialKeys, false, minWidth, maxWidth));
				    else
				    	allKeys.addAll(allCombs(newKey, partialKeys, false, searchWidth, searchWidth));
				}
			}
		}

		return allKeys;
	}

	protected Collection<String> processKeyInOrder(ArrayList<ArrayList<String>> alternates, int startpos, String keySoFar, boolean partialKeys){		
		Set<String> allKeys = new HashSet<String>();
		if (alternates.size()>0){
			for (String comb1 : alternates.get(startpos)){
				String newKey="";
				if (keySoFar.length()>0)
					newKey = keySoFar+":"+comb1;
				else
					newKey = comb1;
				if (startpos<alternates.size()-1)
					allKeys.addAll(processKeyInOrder(alternates, startpos+1, newKey, partialKeys));
				else allKeys.add(newKey);
			}
		}

		return allKeys;
	}

	//mixes up keys so the firstname, middlename, lastname becomes lastname, middle, first, first,last, mid etc
	protected ArrayList<String> allCombs(String key, boolean partial, boolean alwaysUseFirst, int minWidth, int maxWidth){
		ArrayList<String> ret = new ArrayList<String>();
		if (key.startsWith(":"))
			key = key.substring(1);
		String[] combs = key.split(":");
		if (alwaysUseFirst && combs[0].length()>0)
			ret.add(combs[0]);		
		for (int i=0;i<combs.length; i++){
			String newkey="";
			int _width=0;
			for (int j=i;j<combs.length+i; j++){
				int k=0;
				if (j<combs.length)
					k=j;
				else
					k=j-combs.length;
				_width=_width+1;
				newkey=newkey+combs[k];
				if ((partial) || j<combs.length+i){
					if ((_width>=minWidth && _width<=maxWidth) || combs.length==1)// || (i==0 && j==i))
						if (newkey.length()>0)
							ret.add(newkey);
				}
			}				
		}
		if (!isMandatory)
			ret.add("");
		return ret;
	}

	public String sha256(String base) {
	    try{
	        MessageDigest digest = MessageDigest.getInstance("SHA-256");
	        byte[] hash = digest.digest(base.getBytes("UTF-8"));
	        StringBuffer hexString = new StringBuffer();

	        for (int i = 0; i < hash.length; i++) {
	            String hex = Integer.toHexString(0xff & hash[i]);
	            if(hex.length() == 1) hexString.append('0');
	            hexString.append(hex);
	        }

	        return hexString.toString();
	    } catch(Exception ex){
	       throw new RuntimeException(ex);
	    }
	}
	
	

	@Override
	public int getMinWidth() {
		// TODO Auto-generated method stub
		return minWidth;
	}

	@Override
	public int getMaxWidth() {
		// TODO Auto-generated method stub
		return maxWidth;
	}

	@Override
	public String getKey(String word) {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public boolean isTextIndex() {
		return false;		
	}

	

	@Override
	public void setGradient(int gradientType) {
		this.gradient=gradientType;

	}

	@Override
	public Collection<String> getKeys(HashMap<String, String> value, boolean batch) {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public double calculateComparisonScore(Standardized stdBase, Standardized stdComp, boolean isSearch, boolean asContent){
		//check match is valid first ie MR<>MRS
		if (ruleSet.getRuleControlledVocabulary()!=null && ruleSet.getRuleControlledVocabulary().isBadWords(stdBase.getComparitorWords(), stdComp.getComparitorWords()))
			return 0.0;
		try{
		return stdBase.compare(stdComp, ruleSet.getRuleAnon(), ruleSet.getRuleControlledVocabulary(), isSearch, asContent);
		} catch (Exception e){
			logger.severe("Compare Calculation failed: "+ e.toString());
			e.printStackTrace();
			return 0.0;
		}
	}
	
	@Override
	public Standardized standardise(String originalText, String[] words){
		return new StandardPhonetic(originalText, words, this.getRuleAnon(), this.getRuleLookup(), gradient,ruleFunction);
	}
	

	@Override 
	public ArrayList<String> getPurposeKeys(String in ){
		if (in!=null)
			return compressInitials(WordList.split(in.toUpperCase()));
		else
			return new ArrayList<String>();
	}

	@Override
	public Class<Standardized> getStandardClass() {
		return Standardized.class;
	}



	
}
