/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.entitystream.identiza.entity.resolve.utils.NYSIIS;
import com.entitystream.identiza.entity.resolve.utils.Syllables;
import com.entitystream.identiza.wordlist.WordList;

public class MatchPersonNameSyllables  extends MatchBase  implements Serializable {
	public MatchPersonNameSyllables(String name, int minwidth, int maxwidth) throws Exception{
		super(name, minwidth, maxwidth);
	}

	public MatchPersonNameSyllables(String name, int minwidth, int maxwidth, int start, int end,int gradient)  throws Exception{
		super(name, minwidth, maxwidth, "MATCHPERSONNAME");
		maxScore = MatchProcInterface.MATCH_VCLOSE;
	}

	public MatchPersonNameSyllables(String name) {
		super(name);
		partialKeys=false;		

	}
	
	@Override
	public double calculateComparisonScore(Standardized stdBase, Standardized stdComp, boolean isSearch, boolean asContent){
		
		return stdBase.compare(stdComp, getKeyAnon(), getKeyCv(), isSearch, asContent);
	}
	
	@Override
	public Standardized standardise(String originalText, String[] words){
		return new StandardPersonName(words, this.getRuleAnon(), this.getRuleLookup(), gradient,ruleFunction);
	}
	

	@Override
	public Collection<String> getKeys(Object value, boolean batch){
		/// Robert James Haynes
		/// Robert Haynes
		//becomes
		//RHAYNES
		//ROBERTHAYNES
		//ROBHAYNES
		//JAMESHAYNES
		//JIMHAYNES			
		//etc
		long sttime=System.currentTimeMillis();
		if (value instanceof String){
			value = new String[]{(String)value};
		}
		Set<String> ret = new HashSet<String>();
		//each string is a completely separate name ie alias
		String fullValue = "";
		for (int j=0; j<((String[])value).length; j++){
			if (((String[])value)[j]!=null){
				String word = ((String[])value)[j].trim();
				if((getKeyAnon()==null && WordList.sisUsefulForComparison(word,1)) ||
						(getKeyAnon()!=null && getKeyAnon().isUsefulForComparison(word,1))){
						fullValue = fullValue + " " + word;					
				}
			}
		}
		fullValue = fullValue.trim();

		//key is the original word, list of alternate keys			
		if (fullValue.length()!=0) {
			String[] words = WordList.split(fullValue);
			//generate firstname and last name combinations
			ArrayList<String> firstNames = new ArrayList<String>();
			String lastName = words[words.length-1];
			int pos=0;
			for (String name : words){
				if (pos<words.length-1)
				   firstNames.add(name);
				pos++;
			}
			
			String lastNameSy = getKey(getBitOfWord(Syllables.getSyllablesFromWord(lastName))); 
			for (String firstNameWord : firstNames){				
				HashSet<String> fnalts = getAlternateWords(firstNameWord, getKeyAnon(), getKeyCv());
				for (String alt : fnalts){
				   ret.add( getKey(getBitOfWord(Syllables.getSyllablesFromWord(alt))) + lastNameSy );
				}
			}
			//construct key from each firstname and the last name
			
		}			

		if (ret.size()>50)
			logger.info(fullValue + " returned a lot of keys " + ret.size());
		if (System.currentTimeMillis()-sttime>1000)
			logger.info(fullValue + " took a long time to get keys " + (System.currentTimeMillis()-sttime));
		return ret;
	}

	

	private String getBitOfWord(List<String> syllablesFromWord) {
		if (syllablesFromWord.size()>1)
		  return syllablesFromWord.get(0)+syllablesFromWord.get(1).charAt(0);
		else if (syllablesFromWord.size()>0)
		        return syllablesFromWord.get(0);
		else return "";
	}

	@Override
	public String getKey(String word){
		return NYSIIS.encode(word.toUpperCase().trim().replaceAll("[^A-Z0-9]", ""));
	}

	public static void main(String[] args){
		MatchPersonNameSyllables mcn = new MatchPersonNameSyllables("X");
		WordList anon=null;
		try {
		//	anon = new AnonymousWordList("/Users/roberthaynes/Documents/IdentizaServer/userdata/global/resources/AnonNames.txt");
		//	anon.load();
			mcn.minWidth=2;
			mcn.maxWidth=3;
		//	mcn.anon=anon;
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		ArrayList<String> one = new ArrayList<String>();
		ArrayList<String> two = new ArrayList<String>();
		one.add("MISS");
		one.add("JOAN");
		one.add("SMITH");
		two.add("MRS");
		two.add("JOHN");
		two.add("DAVE");
		System.out.println(mcn.getKeys("ROBERT HAYNES", false));
		//System.out.println(mcn.calculateComparisonScore(one, two, false));

	}

}
