/*******************************************************************************
 * Copyright notice
 * 
 * This source code is copyright of Robert James Haynes - (c) 2010, 2011. All rights reserved.
 * 
 * Any redistribution, reproduction or decompilation of part or all of the code in any form is prohibited 
 * 
 * You may not, except with our express written permission, distribute or commercially exploit the content. Nor may you transmit it or store it in or display it on any website or other form of electronic retrieval system.
 ******************************************************************************/
/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;

import com.entitystream.identiza.metadata.IdentizaSettings;
import com.entitystream.identiza.wordlist.WordList;

public class MatchString extends MatchBase implements Serializable{

	public MatchString(String name, int minwidth, int maxwidth) throws Exception{
		super(name, minwidth, maxwidth);
		partialKeys=false;
	}

	public MatchString (String name, int minwidth, int maxwidth, int start, int end,int gradient, String keyField)  throws Exception{
		super(name, minwidth, maxwidth, "MATCHSTR");
		partialKeys=false;
	}

	public MatchString(String name){
		super(name);	
		partialKeys=false;
	}

	@Override
	public String getKey(String word) {
		if (word==null) return "";
		else if (getKeyCv()!=null){
			word=getKeyLookup().getStem(word.toUpperCase().trim());
		}
		return word.toUpperCase().trim().replaceAll("[^A-Z0-9]", "");
	}

	@Override
	public double calculateComparisonScore(Standardized stdBase, Standardized stdComp, boolean isSearch, boolean asContent){   
		return stdBase.compare(stdComp, getRuleAnon(), getRuleCv(), isSearch, asContent);
	}

	@Override
	public Standardized standardise(String originalText, String[] words){
		return new StandardString(words, this.getRuleAnon(), this.getRuleLookup(), gradient,ruleFunction);
	}

	@Override
	public Collection<String> getKeys(Object value, boolean batch){
		if (value instanceof String){
			value = new String[]{(String)value};
		}
		StringBuilder sb = new StringBuilder();
		//key is the original word, list of alternate keys
		for (int j=0; j<((String[])value).length; j++){
			if (((String[])value)[j]!=null) {
				String[] tempwords = WordList.split(((String[])value)[j]);//.split("[.,!?:; ()&-]+\\s*");
				for (int i=0; i<tempwords.length; i++){
					String word = tempwords[i];
					if((getKeyAnon()!=null && getKeyAnon().isUsefulForComparison(word,0)) 
							|| (getKeyAnon()==null && WordList.sisUsefulForComparison(word,0))){
						sb.append(getKey(word));
					}
				}
			}
		}
		ArrayList<String> ret = new ArrayList<String>();
		ret.add(sb.toString());
		return ret;
	}

	@Override
	public Collection<String> getKeys(Object value, boolean batch, boolean isSearch){
		if (value instanceof String){
			value = new String[]{(String)value};
		}
		StringBuilder sb = new StringBuilder();
		//key is the original word, list of alternate keys
		String[] v= (String[])value;
		for (int j=0; j<v.length; j++){

			if (v[j]!=null) {
				String stem=null;
				if (getKeyLookup()!=null)
					stem=getKeyLookup().getStem(v[j]);
				if (stem!=null && !stem.equalsIgnoreCase(v[j]))
					sb.append(stem);
				else {
					String[] tempwords = WordList.split(((String[])value)[j]);//.split("[.,!?:; ()&-]+\\s*");
					for (int i=0; i<tempwords.length; i++){
						String word = tempwords[i];
						if((getKeyAnon()!=null && getKeyAnon().isUsefulForComparison(word,0)) 
								|| (getKeyAnon()==null && WordList.sisUsefulForComparison(word,0))){
							sb.append(getKey(word));
						}
					}
				}
			}
		}
		ArrayList<String> ret = new ArrayList<String>();
		ret.add(sb.toString());
		return ret;
	}


	public static void main(String[] args){
		MatchString date = new MatchString("");
		ArrayList<String> one = new ArrayList<String>();
		ArrayList<String> two = new ArrayList<String>();
		WordList cv=null;
		WordList cvl=null;
		try {
		} catch (Exception e){}
		
		
		System.out.println(cvl.getStem("US"));
		System.out.println(date.getKeys("United States of America", false, false));

	}
}
