/**
 *
	Identiza - Fuzzy matching Libraries
    
    Copyright (C) 2019  Robert James Haynes (EntityStream KFT), Budapest Hungary

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see https://www.gnu.org/licenses/agpl-3.0.en.html
 */
package com.entitystream.identiza.entity.resolve.types;

import java.io.Serializable;
import java.util.ArrayList;

import com.entitystream.identiza.entity.resolve.processing.GradientGenerator;
import com.entitystream.identiza.entity.resolve.utils.EditDistance;
import com.entitystream.identiza.entity.resolve.utils.NYSIIS;
import com.entitystream.identiza.wordlist.WordList;

public class StandardAddress  extends StandardizedSerialized {

	/**
	 * 
	 */
	private static final long serialVersionUID = -7045577794471196382L;
	int unitNumber=-1;
	int startHouseNumber;
	int endHouseNumber;
	String houseName;
	String poboxNumber;
	String streetName;
	String locality;
	String postCode;
	int gradient;
	
	public StandardAddress()
	{

	}



	public StandardAddress(String[] words, WordList anon, WordList cv, int gradient, String ruleFunction){
		super(words, anon, cv, gradient, ruleFunction);
		this.gradient=gradient;
		String[] pobox = new String[]{"BOX", "POBOX", "PO", "PBOX"};
		String[] streetnames = new String[] {"ALLEY", "ALY", "APR", "APPROACH", "AUTOBAHN", "AUTO-ESTRADA", "AUTOROUTE", "AUTOSTRADA", "AUTOSTRASSE", "AVE", "AVENUE", "BAY", "BLVD", "BOULEVARD", "BYP", "BYPASS", "BYWAY", "CANYON", "CAUSEWAY", "CIR", "CIRC", "CIRCLE", "CIRCUS", "CLOSE", "COURT", "COVE", "CRES", "CRESCENT", "CROFT", "CSWY", "CT", "CULS-DE-SAC ", "CV", "CYN", "DELL", "DR", "DRIVE", "ESP", "ESPLANADE", "EXPRESSWAY", "EXPY", "FREEWAY", "FRONTAGE", "FWY", "GARDENS", "GARTH", "GATE", "GARDENS", "GDNS", "GRADE", "GREEN", "GRN", "GROVE", "GRV", "HEIGHTS", "HIGHLANDS", "HIGHWAY", "HILL", "HL", "HTS", "HWY", "KEY", "KNL", "KNOLL", "LANE", "LAWN", "LN", "LOOP", "MANOR", "MEWS", "MNR", "MOTORWAY", "MOUNT", "MT", "MTRWY", "MWAY", "NENE", "NOOK", "OVAL", "PARADE", "PARK", "PARKWAY", "PASSAGE", "PATHWAY", "PDE", "PIKE", "PKWY", "PL", "PLACE", "PLAZA", "PLZ", "PROM", "PROMENADE", "PSGE", "PTHWY", "QUADRANT", "QUAY", "RD", "RISE", "ROAD", "ROW", "SPUR", "SQ", "SQUARE", "ST", "STRA", "STRAVENUE", "STREET", "TCE", "TER", "TERRACE", "TPK", "TRACE", "TRAIL", "TRCE", "TRL", "TURNPIKE", "VALE", "VIEW", "VW", "WALK", "WAY", "RTT", "ROUNDABOUT", "UTCA", "UT", "KRT","TER"};
		//find POBOX
		int poboxpos = EditDistance.findToken(words, pobox);
		//find STREET TOKEN
		int streetTypePos = EditDistance.findToken(words, streetnames);
		//find if there is a flat
		int flatpos = EditDistance.findToken(words, new String[] {"FLAT", "FL", "UNIT", "U", "APT", "APARTMENT", "SHOP", "BUILDING", "BLDG", "SUITE", "SUITES","/"});

		String city = "";
		if (poboxpos>-1){
			//find first number after the po box token
			for (int i=poboxpos+1; i<words.length; i++){
				if ("0123456789".contains(""+words[i].charAt(0))){
					poboxNumber=words[i];
					if (words.length>i+1){ //is there anything left after po?
						for (int ii=streetTypePos+1; ii<words.length; ii++) //use it as the city
							city=city+words[ii]+" ";
					}
					if (city.length()>0)
						city=city.substring(0,city.length()-1);
					locality=city;
					break;
				}
			}
		}
		else 
			if (streetTypePos>-1){
				String streetnumber = "";
				int streetnumberpos=-1;
				String unitnumber = "";
				//find the flat number
				if (flatpos>-1 && words.length>flatpos){
					if (words[flatpos].equals("/") && (flatpos-1>-1))
						unitnumber=words[flatpos-1];
					else if (words.length>flatpos+1)
						unitnumber=words[flatpos+1];
				} else flatpos=-2;
				//find first word with a number before the street - this is the streetnumber
				for (int i=flatpos+2; i<streetTypePos; i++){
					if ("0123456789".contains(""+words[i].charAt(0))){					
						streetnumberpos=i;
						if (words[i].contains("\\") || words[i].contains("/")){
							String[] numberparts = words[i].split("\\|/");
							if (numberparts.length>0)
								streetnumber=numberparts[0];
							if (numberparts.length>1)
								unitnumber=numberparts[1];
						}
						else
							streetnumber=words[i];
						break;
					}				
				}
				unitNumber=EditDistance.niceParseInt(unitnumber);
				startHouseNumber=EditDistance.niceParseInt(streetnumber);
				endHouseNumber=EditDistance.niceParseInt(streetnumber);
				//street name sits between the streetnumber and the street type
				String street = "";
				if (words.length>streetnumberpos){
					for (int i=streetnumberpos+1; i<streetTypePos; i++){
						street=street+words[i]+" ";
					}
				}
				if (street.length()>0)
					street=street.substring(0,street.length()-1);
				streetName=street;
				//get city & postcode
				String postcode="";
				if (words.length>streetTypePos){ //is there anything left after po?
					boolean inPC=false;
					for (int ii=streetTypePos+1; ii<words.length; ii++) //use it as the city
						if (words[ii]!=null){
							if (words[ii].matches(".*\\d.*") || inPC){

								postcode=postcode+words[ii];
								inPC=true;
							}
							else						   
								city=city+words[ii]+" ";
						}
				}
				if (city.length()>0)
					city=city.substring(0,city.length()-1);
				locality=city;
				postCode=postcode;
				//generate tokens
				if (streetnumber.contains("-")){
					String numbers[] = streetnumber.split("-");
					if (numbers.length>1){
						try{
							startHouseNumber = EditDistance.niceParseInt(numbers[0]);
							endHouseNumber = EditDistance.niceParseInt(numbers[numbers.length-1]);							
						} catch (NumberFormatException e){
						}
					} 
				} 
			} else { //no street token
				int streetnumberpos=-1;
				String unitnumber = "";
				String streetnumber="";
				//find the flat number
				if (flatpos>-1 && words.length>flatpos+1){
					unitnumber=words[flatpos+1];
				} else flatpos=-2;
				if (streetTypePos==-1)
					streetTypePos=words.length;
				//find first word with a number before the street (after the flat) - this is the streetnumber
				for (int i=flatpos+2; i<streetTypePos; i++){
					if ("0123456789".contains(""+words[i].trim().charAt(0))){					
						streetnumberpos=i;
						if (words[i].contains("\\") || words[i].contains("/")){
							String[] numberparts = words[i].split("\\|/");
							if (numberparts.length>0)
								streetnumber=numberparts[0];
							if (numberparts.length>1)
								unitNumber=EditDistance.niceParseInt(numberparts[1]);
						}
						else
							streetnumber=words[i];
						break;
					}				
				}
				//if there was no numbers - then we take all of it
				String streetAndCity = "";
				if (words.length>streetnumberpos){
					for (int i=streetnumberpos+1; i<words.length; i++){
						streetAndCity=streetAndCity+words[i]+" ";
					}
				}
				if (streetAndCity.length()>0)
					locality=streetAndCity.substring(0,streetAndCity.length()-1);
				//generate tokens
				if (streetnumber.contains("-")){
					String numbers[] = streetnumber.split("-");
					if (numbers.length>1){
						startHouseNumber = EditDistance.niceParseInt(numbers[0]);
						endHouseNumber = EditDistance.niceParseInt(numbers[numbers.length-1]);
					}
				} else {
					startHouseNumber=EditDistance.niceParseInt(streetnumber);
					endHouseNumber=EditDistance.niceParseInt(streetnumber);
				}

			}
	}

	@Override
	public String toString(){
		if (poboxNumber!=null && poboxNumber.length()>0)
			return "PO"+poboxNumber + "," + locality + "," + postCode;
		else
			return Math.abs(unitNumber)+"/"+Math.abs(startHouseNumber)+"-"+Math.abs(endHouseNumber)+","+ streetName + "," + locality + "," + postCode;
	}

	private ArrayList<ArrayList<String>> getAlternates(){
		ArrayList<ArrayList<String>> ret = new ArrayList<ArrayList<String>>();
		if (poboxNumber!=null){
			//pocompare
			ArrayList<String> item = new ArrayList<String>();
			item.add(poboxNumber);
			if (locality!=null)
				item.add(locality);
			if (postCode!=null)
				item.add(postCode);
			ret.add(item);
		} else {
			if (startHouseNumber!=-1){
				for (int i=startHouseNumber; i<=endHouseNumber; i++ ){
					ArrayList<String> item = new ArrayList<String>();
					if (unitNumber!=-1)
						item.add(""+unitNumber);
					item.add(""+i);
					if (streetName!=null)
						item.add(streetName);
					if (locality!=null)
						item.add(locality);
					if (postCode!=null)
						item.add(postCode);
					ret.add(item);
				}
			} else {
				ArrayList<String> item = new ArrayList<String>();
				if (streetName!=null)
					item.add(streetName);
				if  (locality!=null)
					item.add(locality);
				if (postCode!=null)
					item.add(postCode);
				ret.add(item);					
			}

		}
		
		//we may have failed to structure the address - so add the ordered text
		ArrayList<String> item = new ArrayList<String>();
		for (String word : originalWords)
		   item.add(word);
		ret.add(item);
		return ret;
	}

	@Override
	public double compare(Standardized otherstd, WordList anon, WordList cv,  boolean isSearch, boolean asContent){
		StandardAddress other = (StandardAddress)otherstd;
		//compare all alternates			
		ArrayList<ArrayList<String>> thisAlts = getAlternates();
		ArrayList<ArrayList<String>> otherAlts = other.getAlternates();
		double hiscore=0;			
		for (ArrayList<String> thisAlt : thisAlts){
			for (ArrayList<String> otherAlt : otherAlts){
				int min=Math.min(thisAlt.size(), otherAlt.size());
				int max=Math.max(thisAlt.size(), otherAlt.size());	
				//the following if statement stops badly standardized records comparing
				if (min>3){
					double[] gradients = GradientGenerator.generate(min, max, 2, 1, 0.2, gradient);
					if (gradients!=null){
						double div = GradientGenerator.sum(gradients);
						double score = 0.0;
						score=EditDistance.getSimpleEditDistanceOfArrayList(thisAlt, otherAlt, anon, cv, gradients);
						hiscore=Math.max(((div-score)/div)*100, hiscore);
					}
				}
			}
		}

		boolean numberSame=false;

		if ((this.startHouseNumber >= other.startHouseNumber && this.startHouseNumber<= other.endHouseNumber) ||
				(other.startHouseNumber >= this.startHouseNumber && other.startHouseNumber<= this.endHouseNumber))
			numberSame=true;
		if (this.streetName!=null && other.streetName!=null){
			if (NYSIIS.isEncodeEqual(this.streetName, other.streetName))			  
				if (this.unitNumber==other.unitNumber && numberSame)
					return hiscore;
				else return 0;
			else
				return 0;
		} else return hiscore;
	}


	public static void main(String[] args){
		MatchAddress mcn = new MatchAddress("X");
		mcn.gradient=GradientGenerator.ADDRESS;
		WordList anon=null;

		ArrayList<String> one = new ArrayList<String>();
		ArrayList<String> two = new ArrayList<String>();
		for (String v : WordList.split("3-5 PENN STREET, DARWIN, NT. 800"))
			if (v!=null)
				one.add(v);
		System.out.println(mcn.standardise("",one.toArray(new String[one.size()])));
		for (String v : WordList.split("4 PENN STREET DARWIN"))
			if (v!=null)
				two.add(v);
		System.out.println(mcn.standardise("",two.toArray(new String[two.size()])));

		System.out.println(mcn.calculateComparisonScore(
				new StandardAddress(one.toArray(new String[one.size()]), null,null,GradientGenerator.ADDRESS,""), 
				new StandardAddress(two.toArray(new String[two.size()]), null,null,GradientGenerator.ADDRESS,""), false, false));
		/*
		MatchAddress ma = new MatchAddress("");
		System.out.println(ma.standardise(WordList.split("Shop 1130 Carousel Shopping Centre 1386 Albany HWY CANNINGTON 6107")).toString());
		System.out.println(ma.standardise(WordList.split("Shop 58 Centro Maddington 43 Attfield ST MADDINGTON 6109")).toString());
		System.out.println(ma.standardise(WordList.split("56 Mayer Gardens, SHenley Lodge, Milton Keynes, MK5 7EW")).toString());
		System.out.println(ma.standardise(WordList.split("The old post office, watling street, little brickhill, Milton Keynes, MK12 7EW")).toString());

		System.out.println(ma.standardise(WordList.split("45 fraser road, killcare, nsw 2257")).toString());
		System.out.println(ma.standardise(WordList.split("4/45 fraser road, killcare, nsw 2257")).toString());
		System.out.println(ma.standardise(WordList.split("unit 12 45 fraser road, killcare, nsw 2257")).toString());
		 */
	}

}

