
/*******************************************************************************
 * Copyright notice
 * 
 * This source code is copyright of Robert James Haynes - (c) 2010, 2011. All rights reserved.
 * 
 * Any redistribution, reproduction or decompilation of part or all of the code in any form is prohibited 
 * 
 * You may not, except with our express written permission, distribute or commercially exploit the content. Nor may you transmit it or store it in or display it on any website or other form of electronic retrieval system.
 ******************************************************************************/
/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.io.Serializable;
import java.util.Collection;
import java.util.HashSet;

import com.entitystream.identiza.wordlist.WordList;

public class MatchContent extends MatchBase implements Serializable{


	public MatchContent(String name, int minwidth, int maxwidth) throws Exception{
		super(name, minwidth, maxwidth);
	}

	public MatchContent (String name, int minwidth, int maxwidth, int start, int end,int gradient, String keyField)  throws Exception{
		super(name, minwidth, maxwidth, "TXTCONTENT");
	}
	public MatchContent(String name){
		super(name);	
	}

	@Override
	public Collection<String> getKeys(Object value, boolean batch){


		if (value instanceof String){
			value = new String[]{(String)value};
		}
		String fullText = "";
		for (int j=0; j<((String[])value).length; j++){
			if (((String[])value)[j]!=null) {
				fullText += " " + ((String[])value)[j];
			}
		}
		String[] split = WordList.split(fullText.trim());
		
		HashSet<String> ret = new HashSet<String>();


			StringBuilder retText = new StringBuilder();
			StringBuilder altText = new StringBuilder();
			for (String val : split){
				val=getKey(val);
				if ((getKeyAnon()==null || getKeyAnon().isUsefulForComparison(val, 3)) ){
					retText.append(val);
					retText.append(" ");
					HashSet<String> alts = getAlternateWords(val, getKeyAnon(), getKeyCv());
					for (String alt : alts){
						if (!alt.equals(val)){
						   altText.append(alt);
						   altText.append(" ");
						}
					}
					
				}
			}
			ret.add(retText.toString().trim() + " " + altText.toString().trim());
		
		return ret;
	}

	@Override
	public double calculateComparisonScore(Standardized stdBase, Standardized stdComp, boolean isSearch, boolean asContent){
		return stdBase.compare(stdComp, getRuleAnon(), getRuleCv(), isSearch, asContent);
	}

	@Override
	public Standardized standardise(String originalText, String[] words){
		return new StandardContent(words, this.getRuleAnon(), this.getRuleLookup(), gradient,ruleFunction);
	}
	
	@Override
	public String getKey(String word) {
		// TODO Auto-generated method stub
		return word.toLowerCase().trim().replaceAll("[^a-z0-9]", "");		
	}


	@Override
	public boolean isTextIndex() {
		return true;		
	}

}
