/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.test.unit.spell;

import com.aliasi.lm.CompiledNGramProcessLM;
import com.aliasi.lm.NGramProcessLM;
import com.aliasi.spell.CompiledSpellChecker;
import com.aliasi.spell.FixedWeightEditDistance;
import com.aliasi.util.AbstractExternalizable;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import junit.framework.Assert;
import org.junit.Test;

public class CompiledSpellCheckerTest {
    @Test
    public void testShortTokenMidQuery() throws ClassNotFoundException, IOException {
        NGramProcessLM lm = new NGramProcessLM(5);
        for (int i = 1; i < 10000; ++i) {
            lm.train(" FINANCE ACT SCHEDULE ");
        }
        CompiledNGramProcessLM clm = (CompiledNGramProcessLM)AbstractExternalizable.compile(lm);
        FixedWeightEditDistance editDistance = new FixedWeightEditDistance(0.0, -1.0, -1.0, -1.0, -1.0);
        HashSet<String> tokenSet = new HashSet<String>();
        tokenSet.add("FINANCE");
        tokenSet.add("ACT");
        tokenSet.add("SCHEDULE");
        CompiledSpellChecker sc = new CompiledSpellChecker(clm, editDistance, tokenSet);
        sc.setFirstCharEditCost(-1.0);
        sc.setSecondCharEditCost(-1.0);
        sc.setNBest(32);
        sc.setKnownTokenEditCost(-1.0);
        sc.setNumConsecutiveInsertionsAllowed(1);
        sc.setAllowDelete(true);
        sc.setAllowInsert(true);
        sc.setAllowMatch(true);
        sc.setAllowSubstitute(true);
        sc.setAllowTranspose(true);
        sc.setMinimumTokenLengthToCorrect(2);
        this.assertCorrection(sc, "FINANCE ACT SCEDULE", "FINANCE ACT SCHEDULE");
        sc.setMinimumTokenLengthToCorrect(3);
        this.assertCorrection(sc, "FINANCE ACT SCEDULE", "FINANCE ACT SCHEDULE");
    }

    @Test
    public void testShortToken() throws ClassNotFoundException, IOException {
        NGramProcessLM lm = new NGramProcessLM(5);
        String training1 = " ab ";
        for (int i = 1; i < 1000; ++i) {
            lm.train(training1);
        }
        CompiledNGramProcessLM clm = (CompiledNGramProcessLM)AbstractExternalizable.compile(lm);
        FixedWeightEditDistance editDistance = new FixedWeightEditDistance(0.0, -1.0, -1.0, -1.0, -1.0);
        HashSet<String> tokenSet = new HashSet<String>();
        tokenSet.add("ab");
        CompiledSpellChecker sc = new CompiledSpellChecker(clm, editDistance, tokenSet);
        sc.setMinimumTokenLengthToCorrect(2);
        this.assertCorrection(sc, "ac", "ac");
    }

    @Test
    public void testTwo() throws ClassNotFoundException, IOException {
        NGramProcessLM lm = new NGramProcessLM(5);
        String training1 = " Smith ";
        for (int i = 1; i < 10000; ++i) {
            lm.train(training1);
        }
        CompiledNGramProcessLM clm = (CompiledNGramProcessLM)AbstractExternalizable.compile(lm);
        FixedWeightEditDistance editDistance = new FixedWeightEditDistance(0.0, -2.0, -2.0, -2.0, -2.0);
        HashSet<String> tokenSet = new HashSet<String>();
        tokenSet.add("Smith");
        CompiledSpellChecker sc = new CompiledSpellChecker(clm, editDistance, tokenSet);
        this.assertCorrection(sc, "Smythe", "Smith");
        this.assertCorrection(sc, "mith", "Smith");
        this.assertCorrection(sc, "Tmith", "Smith");
        this.assertCorrection(sc, "mSith", "Smith");
        this.assertCorrection(sc, "Stith", "Smith");
        this.assertCorrection(sc, "Skth", "Smith");
        this.assertCorrection(sc, "mith Smith", "Smith Smith");
        this.assertCorrection(sc, "Smith mith", "Smith Smith");
        this.assertCorrection(sc, "SmithSmith", "Smith Smith");
        this.assertCorrection(sc, "Smi th", "Smith");
        HashSet<String> doNotEditTokens = new HashSet<String>();
        doNotEditTokens.add("mith");
        sc.setDoNotEditTokens(doNotEditTokens);
        this.assertCorrection(sc, "mith", "mith");
        this.assertCorrection(sc, "Smith mith", "Smith mith");
        sc.setMinimumTokenLengthToCorrect(3);
        this.assertCorrection(sc, "Sm th", "Sm th");
        sc.setMinimumTokenLengthToCorrect(1);
        this.assertCorrection(sc, "Sm th", "Smith");
        doNotEditTokens.add("Sm");
        doNotEditTokens.add("th");
        sc.setDoNotEditTokens(doNotEditTokens);
        this.assertCorrection(sc, "Sm th", "Sm th");
        sc.setDoNotEditTokens(Collections.EMPTY_SET);
        sc.setFirstCharEditCost(-1000.0);
        this.assertCorrection(sc, "mith", "mith");
        this.assertCorrection(sc, "Tmith", "Tmith");
        CompiledSpellChecker sc2 = new CompiledSpellChecker(clm, editDistance, new HashSet<String>());
        this.assertCorrection(sc2, "Smth", "Smth");
    }

    void assertCorrection(CompiledSpellChecker sc, String query, String expectedCorrection) {
        String correction = sc.didYouMean(query);
        Assert.assertEquals((String)expectedCorrection, (String)correction);
    }

    void displayPs(String msg, String query, CompiledSpellChecker sc) {
        CompiledNGramProcessLM lm = sc.languageModel();
        System.out.println(msg + "  log2 P(" + query + ")=" + lm.log2Estimate(" " + query + " "));
    }
}

