/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.match;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

import com.entitystream.monster.db.Document;
import com.entitystream.identiza.db.WorkTypes;
import com.entitystream.identiza.entity.resolve.metadata.IPurpose;
import com.entitystream.identiza.entity.resolve.metadata.IRule;
import com.entitystream.identiza.entity.resolve.metadata.ISchemaMeta;
import com.entitystream.identiza.entity.resolve.metadata.ITable;
import com.entitystream.identiza.entity.resolve.metadata.Purpose;
import com.entitystream.identiza.entity.resolve.metadata.PurposeColumn;
import com.entitystream.identiza.entity.resolve.metadata.PurposeColumnMap;
import com.entitystream.identiza.entity.resolve.metadata.RulePurpose;
import com.entitystream.identiza.entity.resolve.metadata.ISchemaMeta;
import com.entitystream.identiza.entity.resolve.storage.RecordInterface;
import com.entitystream.identiza.entity.resolve.types.MatchProcInterface;
import com.entitystream.identiza.entity.resolve.types.Standardized;
import com.entitystream.identiza.entity.resolve.types.StandardizedSerialized;

public class MatchRule implements Serializable, MatchRuleInterface{

	private Logger logger = Logger.getLogger("com.identiza");
	private IRule rule;
	private Map<String, IPurpose> ps;
	private ArrayList<RulePurpose> rulePurposes;
	private Map<String, Boolean> isTableInternal;
	private Map<String, String> tablekeys;
	private Map<String, List<PurposeColumnMap>> ppurposeColumnMaps;
	private String schemaName;

	public MatchRule(ISchemaMeta schDoc, IRule rule) {
		// for each rule calculate the score - FOR ALL RULES - DONT STOP IF
		// MATCHED
		this.rule=rule;
		ppurposeColumnMaps = new HashMap<String, List<PurposeColumnMap>> ();
		ps = new  HashMap<String, IPurpose>();
		schemaName=schDoc.getName();

		isTableInternal = new HashMap<String, Boolean>();
		tablekeys=new HashMap<String, String>();
		for (ITable base : schDoc.getTables()) {
			isTableInternal.put(base.getTableName(), !base.isTarget());
			tablekeys.put(base.getTableName(), base.getKeyField());
		}

		if (rule == null) {
			logger.severe("Rule is not valid, I cant create the match service for it");
		} else {

			rulePurposes = rule.getRulePurpose();
			for (RulePurpose rp : rulePurposes) {
				logger.fine("Running " + rp.getPurposeName());
				String basePurpose = rp.getPurposeName();
				IPurpose purpose = schDoc.getPurpose(basePurpose);
				ps.put(basePurpose, purpose);
				for (PurposeColumn pc : purpose.getPurposeColumns()) {

					for (ITable t :schDoc.getTables()) {
						List<PurposeColumnMap> map1 = schDoc.getPurposeColumnMaps(pc.getPurposeName(), pc.getColumn(), t.getTableName());
						ppurposeColumnMaps.put(basePurpose +":"+ pc.getColumn() +":"+t.getTableName(), map1);
					}
				}

			}
		}
	}
	private boolean canRun(String base, String comparitor) {
		Boolean ruleCanRun=false;

		if (!isTableInternal.get(base) || !isTableInternal.get(comparitor)) {
			if (!isTableInternal.get(base) && !isTableInternal.get(comparitor)) {
				if (!base.equals(comparitor)) 
					return false;
			} else return false;
		}


		if (!rule.isMatchSameSystem()){
			if (base.equalsIgnoreCase(comparitor))       
				ruleCanRun=false;
			else 
				ruleCanRun=true;

		} else ruleCanRun=true;



		return ruleCanRun;

	}

	/* (non-Javadoc)
	 * @see com.entitystream.identiza.entity.resolve.match.MatchRuleInterface#canRun(com.entitystream.identiza.entity.resolve.storage.RecordInterface, com.entitystream.identiza.entity.resolve.storage.RecordInterface)
	 */
	@Override
	public boolean canRun(RecordInterface base, RecordInterface comparitor) {
		return canRun(base.getTableName(),comparitor.getTableName());
	}

	/* (non-Javadoc)
	 * @see com.entitystream.identiza.entity.resolve.match.MatchRuleInterface#canRun(com.entitystream.monster.db.Document, com.entitystream.monster.db.Document)
	 */
	@Override
	public boolean canRun(Document base, Document comparitor) {
		return canRun(base.getString("Table"),comparitor.getString("Table"));
	}



	/* (non-Javadoc)
	 * @see com.entitystream.identiza.entity.resolve.match.MatchRuleInterface#score(com.entitystream.monster.db.Document, com.entitystream.monster.db.Document, boolean, boolean, boolean)
	 */
	@Override
	public MatchRecord score(Document base, Document comparitor, boolean forSearch, boolean asContent, boolean matchScoring)  {
		long starttime=System.currentTimeMillis();
		double hiScore = -1;
		MatchRecord hiScoreMr = null;

		if (rule.isActive() ) {

			double score = 0;
			double weightsum = 0;
			boolean match = true;

			for (RulePurpose rp : rulePurposes) {

				IPurpose p = ps.get(rp.getPurposeName());

				if (p != null) {
					double nextscore;
					try {
						nextscore = calculateScore(p, base,
								comparitor, forSearch,
								asContent, matchScoring);


						if (forSearch && nextscore==0.0d)
							nextscore=-1.0d;

						if (nextscore != -1.0 || (rp.isMandatory() && !forSearch ) || (rp.isMandatory() && (forSearch && matchScoring))) {
							double _acceptweight = 1.0;
							double _rejectweight = 1.0;
							if (rp.isNegate())
								nextscore = 100 - nextscore;
							if (!forSearch || (forSearch && matchScoring)) {
								_acceptweight = rp.getAcceptWeight();
								_rejectweight = rp.getRejectWeight();
							}

							// if the purposes match ie nextval > 0
							// use accept weight
							// if they dont use 1 as the score and
							// the reject weight

							double _weight = 1.0;
							score = score
									+ (Math.max(nextscore, 0.0) * _acceptweight);
							if (nextscore > 0) {
								_weight = _acceptweight;
							} else {
								_weight = _rejectweight;
							}
							weightsum = weightsum + _weight;
						}
					} catch (Exception e) {

						e.printStackTrace();
					}
				}
			}

			logger.fine(weightsum + " match " + match + " score " + score);
			if (weightsum != 0)
				score = score / weightsum;
			else
				score = 0;


			

			if (score > hiScore) {
				logger.fine(score + "% Rule #" + rule.getHashKey() + " score between " + base.getString("_id")
				+ " and " + comparitor.getString("_id") + " = " + score + " timed: " + (System.currentTimeMillis()-starttime));

				hiScore = score;
				String action= rule.getAction();
				String actionText=rule.getActionText();
				String table1=base.getString("Table");
				String table2=comparitor.getString("Table");
				//prevent merging across tables
				if (!table1.equalsIgnoreCase(table2)) {
				    if (!action.equalsIgnoreCase("LINK")){
					    action="EID";
				    }
				}
				hiScoreMr = new MatchRecord(comparitor, base, score, rule, action, actionText, comparitor.getString(tablekeys.get(table2)),comparitor.getString(tablekeys.get(table1)));
			
			
			}
		}
		return hiScoreMr;
	}


	public static List<Standardized> deserialise(List<Document> bsonlist) {
		List<Standardized> ret = new ArrayList<Standardized>();
		try {
			for (Document d : bsonlist){
				Standardized std = StandardizedSerialized.fromDocument(d);
				ret.add(std);
			}
		} catch (Exception e) {
			return null;
		}
		return ret;
	}


	private List<Standardized> getStandardised(Document baseNode, String purposeColumnName) throws Exception {
		List<Standardized> standardsarray=null;
		try {
			//is it cached?
			if (baseNode!=null) {

				Document stds = baseNode.getAsDocument("standardized");
				if (stds!=null) {
					List<Document> stdvals = (List<Document>) stds.get(purposeColumnName);
					if (stdvals!=null)
						standardsarray= (List<Standardized>) deserialise(stdvals);
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return standardsarray;
	}

	private double calculateScore(IPurpose p, Document basein, Document comparitorin,			
			boolean forSearch, boolean asContent, boolean matchScoring) throws Exception {

		double bestscore = -1;
		double weightsum = 0;
		List<PurposeColumn> pcs = p.getPurposeColumns();
		if (pcs.size() > 0) {
			double totalscore=0.0d;
			int numcols=0;
			for (PurposeColumn pc : pcs){
				MatchProcInterface proc = pc.createMatchProc();
				List<Standardized> baseStdList = getStandardised(basein, pc.getColumn());
				List<Standardized> compStdList = getStandardised(comparitorin, pc.getColumn());
				if (baseStdList !=null && compStdList != null)
					for (Standardized baseStd : baseStdList) {
						for (Standardized compStd : compStdList) {
							if (baseStd.getComparitorWords().length>0 && compStd.getComparitorWords().length>0){
								double score = proc.calculateComparisonScore(baseStd,
										compStd, forSearch, asContent);
								if (score > bestscore) {
									bestscore = score;
								}
							}
						}
					}
				totalscore+=bestscore;
				numcols++;
			}
			if (numcols>0)
				bestscore=totalscore/numcols;
			else
				bestscore=0.0d;
			Logger.getLogger("com.identiza").fine(p.getPurposeName() + " score: " + bestscore);
		} else Logger.getLogger("com.identiza").fine(p.getPurposeName() + " was ignored");
		return bestscore;
	}
}
