/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;

import com.entitystream.identiza.entity.resolve.utils.NYSIIS;
import com.entitystream.identiza.wordlist.AnonymousWordList;
import com.entitystream.identiza.wordlist.RuleSet;
import com.entitystream.identiza.wordlist.WordList;

public class MatchPersonName extends MatchBase  implements Serializable{
	public MatchPersonName(String name, int minwidth, int maxwidth) throws Exception{
		super(name, minwidth, maxwidth);
	}

	public MatchPersonName(String name, int minwidth, int maxwidth, int start, int end,int gradient)  throws Exception{
		super(name, minwidth, maxwidth, "MATCHPERSONNAME");
		maxScore = MatchProcInterface.MATCH_VCLOSE;
	}

	public MatchPersonName(String name) {
		super(name);
		partialKeys=false;		

	}
	
	@Override
	public double calculateComparisonScore(Standardized stdBase, Standardized stdComp, boolean isSearch, boolean asContent){
		
		return stdBase.compare(stdComp, getRuleAnon(), getRuleCv(), isSearch, asContent);
	}
	
	@Override
	public Standardized standardise(String originalText, String[] words){
		return new StandardPersonName(words, this.getRuleAnon(), this.getRuleLookup(), gradient,ruleFunction);
	}
	

	@Override
	public Collection<String> getKeys(Object value, boolean batch, boolean isSearch){
		/// Robert James Haynes
		/// Robert Haynes
		//becomes
		//RHAYNES
		//ROBERTHAYNES
		//ROBHAYNES
		//JAMESHAYNES
		//JIMHAYNES			
		//etc
		long sttime=System.currentTimeMillis();
		if (value instanceof String){
			value = new String[]{(String)value};
		}
		Set<String> ret = new HashSet<String>();
		//each string is a completely separate name ie alias
		String fullValue = "";
		for (int j=0; j<((String[])value).length; j++){
			if (((String[])value)[j]!=null){
				String word = ((String[])value)[j].trim();
				if((getKeyAnon()==null && WordList.sisUsefulForComparison(word,1)) ||
						(getKeyAnon()!=null && getKeyAnon().isUsefulForComparison(word,1))){
						fullValue = fullValue + " " + word;					
				}
			}
		}
		fullValue = fullValue.trim();

		//key is the original word, list of alternate keys			
		HashMap<String, HashSet<String>> FNalternates = new HashMap<String, HashSet<String>>();

		if (fullValue.length()!=0) {
			String[] tempwords = WordList.split(fullValue);//.split("[.,!?:; ()&-]+\\s*");
			String lastname="";
			if (tempwords.length>0)
			   lastname = tempwords[tempwords.length-1];
			//find all the combinations of the firstnames - can be partial keys
			for (int i=0; i<tempwords.length-1; i++){	
				String word = tempwords[i];
				if (!FNalternates.containsKey(word))
					FNalternates.put(word, new HashSet<String>());					
				FNalternates.get(word).addAll(getAlternateWords(word, getKeyAnon(), getKeyCv()));
				//add initial
				//if (word.length()>0)
					//FNalternates.get(word).add(word.substring(0,1).toUpperCase());
			}
			//construct key from all combinations of the firstname		
			HashSet<String> firstNameKeys = new HashSet<String>();
			int count=0;
			for (String origWord : FNalternates.keySet()){
				count++;
				if (count>2) break;
				HashSet<String> res = FNalternates.get(origWord);
				if (res!=null && res.size()>0)
					firstNameKeys.addAll(res);
			}
			
			//merge the two lists
			lastname=getKey(lastname);
			if (firstNameKeys.size()>0){
				for (String firstnameKey : firstNameKeys){
					if (firstnameKey !=null && firstnameKey.length()>0 && lastname !=null && lastname.length()>0){						
						ret.add(firstnameKey + lastname);
					}
				}
			} else {
				ret.add(lastname);
			}
		}			

		if (ret.size()>50)
			logger.info(fullValue + " returned a lot of keys " + ret.size());
		if (System.currentTimeMillis()-sttime>1000)
			logger.info(fullValue + " took a long time to get keys " + (System.currentTimeMillis()-sttime));
		return ret;
	}

	

	@Override
	public String getKey(String word){
		return NYSIIS.encode(word.toUpperCase().trim().replaceAll("[^A-Z0-9]", ""));
	}

	public static void main(String[] args){
		MatchPersonName mcn = new MatchPersonName("X");
		try {
			mcn.minWidth=1;
			mcn.maxWidth=3;
			mcn.setRuleSet(RuleSet.emptyRuleSet());
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		ArrayList<String> one = new ArrayList<String>();
		ArrayList<String> two = new ArrayList<String>();
		one.add("MISS");
		one.add("JOAN");
		one.add("SMITH");
		two.add("MRS");
		two.add("JOHN");
		two.add("DAVE");
		System.out.println(mcn.getKeys("FREDERICK", false));
		//System.out.println(mcn.calculateComparisonScore(one, two, false));

	}
}
