/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.io.Serializable;
import java.util.ArrayList;

import com.entitystream.identiza.entity.resolve.processing.GradientGenerator;
import com.entitystream.identiza.entity.resolve.utils.EditDistance;
import com.entitystream.identiza.wordlist.AnonymousWordList;
import com.entitystream.identiza.wordlist.ControlledVocabularyWordList;
import com.entitystream.identiza.wordlist.WordList;

public class StandardPersonName extends StandardizedSerialized  {
	private int gradient;
	

	
	public StandardPersonName(){
		
	}
	public StandardPersonName(String[] words, WordList anon, WordList cv, int gradient, String name){
		super(words, anon, cv, gradient,name);
		this.gradient=gradient;
	}

	@Override
	public double compare(Standardized otherstd, WordList anon, WordList cv,
			 boolean isSearch, boolean asContent) {
		ArrayList<String> comparitorTokens = ((StandardPersonName)otherstd).baseTokens;
		double[] gradients;
	    gradients = GradientGenerator.generate(Math.min(comparitorTokens.size(),baseTokens.size()), Math.max(comparitorTokens.size(),baseTokens.size()), 2.0, 0.5, 1.0, gradient);
	    double ret =0;
		
	    if (gradients!=null){
		boolean notsameflag=false;
		//must be a similarity - continue
		double divisor=GradientGenerator.sum(gradients);
		double edit = 0.0;
		if (asContent)
			edit=EditDistance.getHaynesEditDistanceOfArrayList(baseTokens, comparitorTokens, anon, cv, gradients, true);
		else
			edit=EditDistance.getSimpleEditDistanceOfArrayList(baseTokens, comparitorTokens, anon, cv, gradients);
		String val1 = baseTokens.get(0);
		String val2 = comparitorTokens.get(0);

		//if the first part of the word contains a MRS, MR, MISS 
		if (
				baseTokens.contains("MR") && comparitorTokens.contains("MRS") ||
				baseTokens.contains("MR") && comparitorTokens.contains("MISS") ||
				baseTokens.contains("MR") && comparitorTokens.contains("MS") ||
				baseTokens.contains("MRS") && comparitorTokens.contains("MR") ||
				baseTokens.contains("MISS") && comparitorTokens.contains("MR") ||
				baseTokens.contains("MS") && comparitorTokens.contains("MR")
				)
			notsameflag=true;
		else {		
			if (val1.equals("MR") || val1.equals("MRS") || val1.equals("MS") || val1.equals("MISS") || val1.equals("DR")){
				if (val2.equals("MR") || val2.equals("MRS") || val2.equals("MS") || val2.equals("MISS") || val2.equals("DR")){
					//second token must be similar
					if (baseTokens.size()>1 && comparitorTokens.size()>1)
						if (baseTokens.get(1).length()>1 && comparitorTokens.get(1).length()>1 ){
							if (!baseTokens.get(1).substring(0,2).equalsIgnoreCase(comparitorTokens.get(1).substring(0,2)))
								  edit=edit+2;
						} else if (baseTokens.get(1).length()>0 && comparitorTokens.get(1).length()>0 ){					
						   if (baseTokens.get(1).charAt(0)!=comparitorTokens.get(1).charAt(0))
							  edit=edit+2;
						}
				} 
			}
		}
					
		//if the edit is low and the initials dont match - need to compare the names
		/*if (edit==1.0 && !checkInitials(baseTokens, comparitorTokens, cv, anon)){
			int compcount=0;
			for (String base : baseTokens){
				for (String comp : comparitorTokens){
					if (cv.isStemmed(base, comp)<1 || base.charAt(0)==comp.charAt(0)){
						compcount++;
						break;
					}
				}
			}
			if (compcount<Math.min(baseTokens.size(),comparitorTokens.size()))
				notsameflag=true;
		}
			
		*/
		if (isSearch) {
			if (edit>0)
				edit=edit-Math.abs(baseTokens.size()-comparitorTokens.size());
			if (edit<0) edit=0;
			
			if (notsameflag) edit=divisor;
			ret = 100*(divisor-edit)/divisor;
		}
		else{
			
			if (notsameflag) edit=divisor;
			ret = 100*(divisor-edit)/divisor;
		}
	    }
		return ret;
	}

	private boolean checkInitials(ArrayList<String> baseTokens,
			ArrayList<String> compTokens, WordList cv, WordList anon) {
		StringBuilder baseInitials = new StringBuilder();
		for (String w: baseTokens)
			if (w.length()>0){
				if ((anon!=null && !anon.inlist(w)) || anon==null) {
					ArrayList<String> parents = new ArrayList<String>();
					baseInitials.append(w.charAt(0));
				}
			}

		StringBuilder compInitials = new StringBuilder();
		for (String w: compTokens)
			if (w.length()>0){
				if ((anon!=null && !anon.inlist(w)) || anon==null) {
					ArrayList<String> parents = new ArrayList<String>();
					compInitials.append(w.charAt(0));
				}
			}
		
		int simcount=0;
		String shortest="";
		String longest="";
		if (baseInitials.length()>compInitials.length()){
			shortest=compInitials.toString();
			longest=baseInitials.toString();
		}
		else{
			shortest=baseInitials.toString();
			longest=compInitials.toString();
		}
		//compare JR with J (100%) and JR with R (100%) and MC with DC (50%)
		for (int i=0; i<shortest.length(); i++){
			//are all the letters from the shortest in the longest
			for (int j=0; j< longest.length(); j++){
				if (shortest.charAt(i)==longest.charAt(j)){
					simcount++;
					break;
				}
			}
		}
		
		return simcount==shortest.length();
	}


	
}
