/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.io.Serializable;
import java.util.ArrayList;

import com.entitystream.identiza.wordlist.WordList;

public class StandardString extends StandardizedSerialized {

	

	
	public StandardString(){
		
	}
	public StandardString(String[] words, WordList anon, WordList cv, int gradient, String name){
		super(words, anon, cv, gradient, name);
		
	}

	@Override
	public double compare(Standardized otherstd, WordList anon, WordList cv,
			boolean isSearch, boolean asContent) {
		//we need to protect matching from bad words not being the same ie MR & MRS, LTD & LLP
		
		
		
		double[] gradients;
		ArrayList<String> comparitorTokens=((StandardString)otherstd).baseTokens;		
		double ret =0;
		double edit=0;
		//try the whole thing first!
		StringBuilder sb1= new StringBuilder();
		for (String outer : baseTokens)
			sb1.append(outer+" ");
		
		StringBuilder sb2= new StringBuilder();
		for (String inner : comparitorTokens)
			sb2.append(inner+" ");
		
		String one=sb1.toString().trim();
		String two=sb2.toString().trim();
		if (one.equalsIgnoreCase(two))
			return 100.0d;
		if (cv!=null){
			double sc = cv.isStemmed(two, two);
			if (sc!=1.0)
				return (1-sc)*100.d;
		}
		
		for (String outer : baseTokens){

			double best=1.0d;
			for (String inner : comparitorTokens){		
				double thisScore=1.0d;
				if (inner.equalsIgnoreCase(outer))
					thisScore = 0.0d;
				else
				   if (cv!=null)
					  thisScore = cv.isStemmed(inner, outer);
				else
					
				if (thisScore<best)
					best=thisScore;
			}
			
			edit+=best;
		}


		double divisor=Math.max(comparitorTokens.size(), baseTokens.size());
		if (comparitorTokens.size()==0 || baseTokens.size()==0)
			edit=divisor;
		if (edit<0) edit=0;
		ret = 100*(divisor-edit)/divisor;
		return ret;	
	}
}
