/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.io.Serializable;
import java.text.ParseException;
import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashSet;
import java.util.Set;

import org.apache.commons.lang3.time.DateUtils;

import com.entitystream.identiza.entity.resolve.utils.NYSIIS;
import com.entitystream.identiza.wordlist.WordList;

public class MatchPersonNameXDOB extends MatchBase  implements Serializable{
	Calendar cal = new GregorianCalendar();
	final String[] possibleFormats = {
			"dd-MM-yyyy", 
			"dd/MM/yyyy", 
			"dd-MM-yy", 
			"dd/MM/yyyy","MM-dd-yyyy", "MM/dd/yyyy", "MM-dd-yy", "MM/dd/yyyy",
			"EEE, dd MMM yyyy HH:mm:ss z", // RFC_822
			"EEE, dd MMM yyyy HH:mm zzzz",
			"yyyy-MM-dd'T'HH:mm:ssZ",
			"yyyy-MM-dd'T'HH:mm:ss.SSSzzzz", // Blogger Atom feed has millisecs also
			"yyyy-MM-dd'T'HH:mm:sszzzz",
			"yyyy-MM-dd'T'HH:mm:ss z",
			"yyyy-MM-dd'T'HH:mm:ssz", // ISO_8601
			"yyyy-MM-dd'T'HH:mm:ss",
			"yyyy-MM-dd'T'HHmmss.SSSz",
			"yyyy-MM-dd" 		        
	};
	public MatchPersonNameXDOB(String name, int minwidth, int maxwidth) throws Exception{
		super(name,minwidth, maxwidth);
	}

	public MatchPersonNameXDOB(String name, int minwidth, int maxwidth, int start, int end,int gradient)  throws Exception{
		super(name, minwidth, maxwidth, "MATCHPERSONNAMEXDOB");
		maxScore = MatchProcInterface.MATCH_VCLOSE;
	}

	public MatchPersonNameXDOB(String name) {
		super(name);
		partialKeys=false;		
	}

	@Override
	public Collection<String> getKeys(Object value, boolean batch){
		/// Robert James Haynes 05/01/1971
		/// Robert Haynes 05/01/1971
		//becomes
		//RABHAN0501
		//RABHAN0105
		//HANRAB0501
		//HANRAB0105
		//BABHAN0501
		//BABHAN0105
		//HANBAB0501
		//HANBAB0105
		long sttime=System.currentTimeMillis();
		if (value instanceof String){
			value = new String[]{(String)value};
		}
		Set<String> ret = new HashSet<String>();
		//find the token that starts with a number
		String dob = "";
		String firstname ="";
		String lastname="";
		StringBuilder nameall = new StringBuilder();
		for (String val : (String[])value){
			if (val!=null && val.length()>0){
				if ("0123456789".contains(val.subSequence(0, 1))){
					dob = val;
				} else {
					String word = val.trim();
					if((getKeyAnon()==null && WordList.sisUsefulForComparison(word,1)) ||
							(getKeyAnon()!=null && getKeyAnon().isUsefulForComparison(word,1))){
						nameall.append(word + " ");
					}					
				}
			}
		}

		String fullValue=nameall.toString().trim();

		if (fullValue.length()!=0) {
			String[] tempwords = WordList.split(fullValue);//.split("[.,!?:; ()&-]+\\s*");
			if (tempwords.length>0)
			    lastname = tempwords[tempwords.length-1];
			if (tempwords.length>1)
				firstname = tempwords[0];			   							
		}	

		HashSet<String> fnAlts = getAlternateWords(firstname, getKeyAnon(), getKeyCv());

		//process the dob
		Date date = null;
		try {
			date=DateUtils.parseDate(dob, possibleFormats);
		} catch (ParseException e) {}
		String dm="0000";
		if (date!=null){
			cal.setTime(date);
			String month = String.format("%02d",cal.get(Calendar.MONTH)+1);
			String day = String.format("%02d",cal.get(Calendar.DAY_OF_MONTH));
			dm=day+month;
			try{
				int d=Integer.parseInt(day);
				int m=Integer.parseInt(month);
				if (d>m)
					dm=month+day;
			} catch (Exception e){}
		}
		//how long is the extract?
		int len = this.getMaxWidth()/10;
		//create the keys		
		String lastn=getKey(lastname);
		for (String firstn : fnAlts){
			String fb= firstn.substring(0,Math.min(len,firstn.length()));
			String lb= lastn.substring(0,Math.min(len,lastn.length()));
			ret.add(fb+lb+dm);
			ret.add(lb+fb+dm);
			ret.add(fb+lb+"0000");
		}

		if (System.currentTimeMillis()-sttime>1000)
			logger.info(fullValue + " took a long time to get keys " + (System.currentTimeMillis()-sttime));
		return ret;
	}




	@Override
	public String getKey(String word){
		return NYSIIS.encode(word.toUpperCase().trim().replaceAll("[^A-Z0-9]", ""));
	}


}
