/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.util.ArrayList;

import com.entitystream.identiza.entity.resolve.processing.GradientGenerator;
import com.entitystream.identiza.entity.resolve.utils.EditDistance;
import com.entitystream.identiza.wordlist.WordList;

public class StandardCompanyName extends StandardizedSerialized  {
	/**
	 * 
	 */
	private static final long serialVersionUID = -6958439718017970962L;
	private int gradient;
	private static final String badlist = "|MR|MRS|MISS|MS|DR|";


	public StandardCompanyName(){
		
	}
	public StandardCompanyName(String[] words, WordList anon, WordList cv, int gradient, String name){
		super(words, anon, cv, gradient,name);
		this.gradient=gradient;
	}

	@Override
	public double compare(Standardized otherstd, WordList anon, WordList cv,
			boolean isSearch, boolean asContent) {
		boolean notsameflag=false;

		double[] gradients;
		ArrayList<String> comparitorTokens=((StandardCompanyName)otherstd).baseTokens;
		gradients = GradientGenerator.generate(Math.min(comparitorTokens.size(),baseTokens.size()), Math.max(comparitorTokens.size(),baseTokens.size()), 2.0, 1.0, 1.0, gradient, asContent);
			
		double ret =0;

		if (gradients!=null){
			double divisor=0d;
			
			divisor=GradientGenerator.sum(gradients);
			double edit = 0.0;
			
			/*if (!isSearch)
			   edit=EditDistance.getSimpleEditDistanceOfArrayList(baseTokens, comparitorTokens, anon, cv, gradients);
			else
				edit=EditDistance.getHaynesEditDistanceOfArrayList(baseTokens, comparitorTokens, anon, cv, gradients);
				*/
			 edit=EditDistance.getSimpleEditDistanceOfArrayList(baseTokens, comparitorTokens, anon, cv, gradients);
			//if similar positioned word contain numbers then they have to be the same
			
			//if (WordList.containsAnyNumbers(baseTokens.toString()) && WordList.containsAnyNumbers(comparitorTokens.toString()))
			for (int leftpos=0; leftpos<baseTokens.size(); leftpos++){
				String left = baseTokens.get(leftpos);
				int simcount=0;
				int diffcomparisonsdone=0;
				for (int rightpos=0; rightpos<comparitorTokens.size(); rightpos++){
					String right = comparitorTokens.get(rightpos);

					int shift = Math.abs((baseTokens.size()-(leftpos+1)) - (comparitorTokens.size()-(rightpos+1)));
					if (shift<3){
						//numbers are diametrically opposite
						boolean leftb=WordList.containsAnyNumbers(left) || (left.length()==1 && "ABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(left.toUpperCase()));
						boolean rightb=WordList.containsAnyNumbers(right)  || (right.length()==1 && "ABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(right.toUpperCase()));
						if (leftb && rightb){
							if (WordList.getNumber(left).equalsIgnoreCase(WordList.getNumber(right)))
								simcount++; 
							diffcomparisonsdone++;
						}
					}
				}
				//System.out.println(comparitorTokens.toString() + " simcount=" + simcount + " nocomp=" + diffcomparisonsdone + " for word " + left);
				if (simcount==0 && diffcomparisonsdone>0){
					notsameflag=true;
					break;
				}
			}



			if (!notsameflag){
				//if the first part isnt similar then they aren the same 
				String val1=null;

				int pos=0;
				while (val1==null && pos<baseTokens.size()){
					String word = baseTokens.get(pos);							
					if (anon==null){
						if (!badlist.contains("|"+word+"|"))
							val1=word;
					} else {
						if (!anon.inlist(word) && !badlist.contains("|"+word+"|"))
							val1 = word;
					}
					pos++;
				}
				String val2=null;
				pos=0;		
				while (val2==null && pos<comparitorTokens.size()){
					String word = comparitorTokens.get(pos);
					if (anon==null){
						if (!badlist.contains("|"+word+"|"))
							val2=word;
					} else {
						if (!anon.inlist(word) && !badlist.contains("|"+word+"|"))
							val2 = word;
					}

					pos++;
				}

				String key1=val1;
				if (key1!=null)
					key1=key1.substring(0,Math.min(1, key1.length()));
				String key2=val2;
				if (key2!=null)
					key2=key2.substring(0,Math.min(1, key2.length()));
				if (key1!=null & key2!=null)
					if ( !key1.startsWith(key2) && !key2.startsWith(key1) && !isSearch)
						notsameflag=true;

				if (!notsameflag){
					//person name rule - in case

					if (
							baseTokens.contains("MR") && comparitorTokens.contains("MRS") ||
							baseTokens.contains("MR") && comparitorTokens.contains("MISS") ||
							baseTokens.contains("MR") && comparitorTokens.contains("MS") ||
							baseTokens.contains("MRS") && comparitorTokens.contains("MR") ||
							baseTokens.contains("MISS") && comparitorTokens.contains("MR") ||
							baseTokens.contains("MS") && comparitorTokens.contains("MR")
							)
						notsameflag=true;
					else {
						if (val1!=null && val2!=null){
							if (val1.equals("MR") || val1.equals("MRS") || val1.equals("MS") || val1.equals("MISS") || val1.equals("DR")){
								if (val2.equals("MR") || val2.equals("MRS") || val2.equals("MS") || val2.equals("MISS") || val2.equals("DR")){
									//second token must be similar
									if (baseTokens.size()>1 && comparitorTokens.size()>1)
										if (baseTokens.get(1).length()>0 && comparitorTokens.get(1).length()>0 )
											if (baseTokens.get(1).charAt(0)!=comparitorTokens.get(1).charAt(0))
												edit=edit+1;
								} 
							}
						}
					}
				}
			}

			if (isSearch) {
				//comparitor is always the search text
				if (edit<0) edit=0;
				
				
				if (notsameflag) edit=divisor;
				ret = 100*(divisor-edit)/divisor;
			}
			else{			
				if (notsameflag) edit=divisor;
				ret = 100*(divisor-edit)/divisor;
			}
		}
		//System.out.println("Score was "+ret);
		return ret;
	}


}
