/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.test.unit.dict;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.ChunkFactory;
import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.ChunkingImpl;
import com.aliasi.dict.DictionaryEntry;
import com.aliasi.dict.ExactDictionaryChunker;
import com.aliasi.dict.MapDictionary;
import com.aliasi.dict.TrieDictionary;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.LowerCaseTokenizerFactory;
import com.aliasi.tokenizer.RegExTokenizerFactory;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import java.io.IOException;
import java.util.Set;
import junit.framework.Assert;
import org.junit.Test;

public class ExactDictionaryChunkerTest {
    TokenizerFactory TOKENIZER_FACTORY = IndoEuropeanTokenizerFactory.INSTANCE;
    String regex = "[a-zA-Z]+|[0-9]+";
    TokenizerFactory REGEX_TOKENIZER_FACTORY = new RegExTokenizerFactory(this.regex);

    @Test
    public void testComposedFactories() {
        TokenizerFactory tf = new RegExTokenizerFactory("([a-z]+)|([A-Z]+)|([0-9]+)");
        tf = new LowerCaseTokenizerFactory(tf);
        MapDictionary<String> mapDict = new MapDictionary<String>();
        mapDict.addEntry(new DictionaryEntry<String>("p-53", "entry1"));
        ExactDictionaryChunker chunker = new ExactDictionaryChunker(mapDict, tf, true, true);
        String test = "bar mP53wt.";
        Chunking chunking = chunker.chunk(test);
        Set<Chunk> chunkSet = chunking.chunkSet();
        Assert.assertEquals((int)1, (int)chunkSet.size());
        Chunk chunk = chunkSet.iterator().next();
        Assert.assertEquals((String)"entry1", (String)chunk.type());
    }

    @Test
    public void testNulls() {
        Assert.assertNotNull((Object)this.REGEX_TOKENIZER_FACTORY);
        String test1 = "P53 should match both as should p53.";
        Assert.assertNotNull((Object)this.REGEX_TOKENIZER_FACTORY.tokenizer(test1.toCharArray(), 0, test1.length()).tokenize());
    }

    @Test
    public void testCaseSensitivity2() {
        TrieDictionary<String> trie = new TrieDictionary<String>();
        trie.addEntry(new DictionaryEntry<String>("P53", "human"));
        trie.addEntry(new DictionaryEntry<String>("p53", "mouse"));
        ExactDictionaryChunker chunker = new ExactDictionaryChunker(trie, this.REGEX_TOKENIZER_FACTORY, true, false);
        String test1 = "P53 should match both as should p53.";
        Chunking chunking = chunker.chunk(test1);
        Chunk human1 = ChunkFactory.createChunk(0, 3, "mouse", 1.0);
        Chunk mouse1 = ChunkFactory.createChunk(0, 3, "human", 1.0);
        Chunk human2 = ChunkFactory.createChunk(32, 35, "mouse", 1.0);
        Chunk mouse2 = ChunkFactory.createChunk(32, 35, "human", 1.0);
        this.assertChunking(chunker, test1, new Chunk[]{human1, mouse1, human2, mouse2});
    }

    @Test
    public void testTokenSensitivity() {
        TrieDictionary<String> trie = new TrieDictionary<String>();
        trie.addEntry(new DictionaryEntry<String>("p-53", "human"));
        trie.addEntry(new DictionaryEntry<String>("p53", "mouse"));
        ExactDictionaryChunker chunker = new ExactDictionaryChunker(trie, this.REGEX_TOKENIZER_FACTORY, true, true);
        String test1 = "p53 should match both as should p-53.";
        Chunking chunking = chunker.chunk(test1);
        Chunk human1 = ChunkFactory.createChunk(0, 3, "mouse", 1.0);
        Chunk mouse1 = ChunkFactory.createChunk(0, 3, "human", 1.0);
        Chunk human2 = ChunkFactory.createChunk(32, 36, "mouse", 1.0);
        Chunk mouse2 = ChunkFactory.createChunk(32, 36, "human", 1.0);
        this.assertChunking(chunker, test1, new Chunk[]{human1, mouse1, human2, mouse2});
    }

    @Test
    public void testEmptyDictionary() {
        MapDictionary<String> dictionary = new MapDictionary<String>();
        ExactDictionaryChunker caseInsensitiveChunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, true, false);
        caseInsensitiveChunker.chunk("John ran");
    }

    @Test
    public void testCaseSensitivity() {
        MapDictionary<String> dictionary = new MapDictionary<String>();
        dictionary.addEntry(new DictionaryEntry<String>("50 Cent", "PERSON", 1.0));
        dictionary.addEntry(new DictionaryEntry<String>("xyz120 DVD Player", "DB_ID_1232", 1.0));
        String text = "50 Cent is worth more than 50 cent.";
        Chunk capChunk = ChunkFactory.createChunk(0, 7, "PERSON", 1.0);
        Chunk lowChunk = ChunkFactory.createChunk(27, 34, "PERSON", 1.0);
        ExactDictionaryChunker caseInsensitiveChunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, true, false);
        this.assertChunking(caseInsensitiveChunker, text, new Chunk[]{lowChunk, capChunk});
        ExactDictionaryChunker caseSensitiveChunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, true, true);
        this.assertChunking(caseSensitiveChunker, text, new Chunk[]{capChunk});
    }

    @Test
    public void testOverlapsCase() {
        MapDictionary<String> dictionary = new MapDictionary<String>();
        dictionary.addEntry(new DictionaryEntry<String>("john smith", "PER", 7.0));
        dictionary.addEntry(new DictionaryEntry<String>("smith and barney", "ORG", 3.0));
        dictionary.addEntry(new DictionaryEntry<String>("smith", "LOC", 2.0));
        dictionary.addEntry(new DictionaryEntry<String>("smith", "PER", 5.0));
        Chunk chunk_0_10_PER = ChunkFactory.createChunk(0, 10, "PER", 7.0);
        Chunk chunk_5_10_PER = ChunkFactory.createChunk(5, 10, "PER", 5.0);
        Chunk chunk_5_10_LOC = ChunkFactory.createChunk(5, 10, "LOC", 2.0);
        Chunk chunk_5_21_ORG = ChunkFactory.createChunk(5, 21, "ORG", 3.0);
        Chunk[] allChunks = new Chunk[]{chunk_0_10_PER, chunk_5_10_PER, chunk_5_10_LOC, chunk_5_21_ORG};
        Chunk[] casedChunks = new Chunk[]{chunk_5_10_PER, chunk_5_10_LOC};
        Chunk[] singleChunks = new Chunk[]{chunk_0_10_PER};
        Chunk[] singleCaseChunks = new Chunk[]{chunk_5_10_PER};
        ExactDictionaryChunker chunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, true, true);
        this.assertChunking(chunker, "john smith and barney", allChunks);
        this.assertChunking(chunker, "JohN smith AND Barney", casedChunks);
        chunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, false, true);
        this.assertChunking(chunker, "john smith and barney", singleChunks);
        this.assertChunking(chunker, "JohN smith AND Barney", singleCaseChunks);
        chunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, true, false);
        this.assertChunking(chunker, "john smith and barney", allChunks);
        this.assertChunking(chunker, "JohN smith AND Barney", allChunks);
        chunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, false, false);
        this.assertChunking(chunker, "john smith and barney", singleChunks);
        this.assertChunking(chunker, "JohN smith AND Barney", singleChunks);
    }

    @Test
    public void testBoundaries() {
        MapDictionary<String> dictionary = new MapDictionary<String>();
        dictionary.addEntry(new DictionaryEntry<String>("john smith", "PER", 7.0));
        ExactDictionaryChunker chunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, true, true);
        Chunk[] noChunks = new Chunk[]{};
        this.assertChunking(chunker, "john", noChunks);
        this.assertChunking(chunker, "smith john", noChunks);
        this.assertChunking(chunker, "john smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0)});
        this.assertChunking(chunker, "john smith smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0)});
        this.assertChunking(chunker, "john smith frank", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0)});
        this.assertChunking(chunker, "then john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0)});
        this.assertChunking(chunker, "john john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0)});
        this.assertChunking(chunker, "john john smith smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0)});
    }

    void assertChunking(ExactDictionaryChunker chunker, String in, Chunk[] chunks) {
        Chunking chunking = chunker.chunk(in);
        ChunkingImpl chunkingExpected = new ChunkingImpl(in);
        for (int i = 0; i < chunks.length; ++i) {
            chunkingExpected.add(chunks[i]);
        }
        Assert.assertEquals((Object)chunkingExpected, (Object)chunking);
    }

    @Test
    public void testSuffixes() {
        MapDictionary<String> dictionary = new MapDictionary<String>();
        dictionary.addEntry(new DictionaryEntry<String>("john smith", "PER", 7.0));
        dictionary.addEntry(new DictionaryEntry<String>("Barry J. Jones III", "PER", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("Barry", "PER", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("Jones", "PER", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("J. Barry Johnson", "PER", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("I.B.M.", "ORG", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("Dean Witter", "PER", 3.0));
        dictionary.addEntry(new DictionaryEntry<String>("Dean Witter", "ORG", 7.0));
        dictionary.addEntry(new DictionaryEntry<String>("a b c", "PER", 7.0));
        dictionary.addEntry(new DictionaryEntry<String>("b", "PER", 5.2));
        ExactDictionaryChunker chunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, true, true);
        this.assertChunking(chunker, "a b d", new Chunk[]{ChunkFactory.createChunk(2, 3, "PER", 5.2)});
        this.assertChunking(chunker, "J. Barry Warwick", new Chunk[]{ChunkFactory.createChunk(3, 8, "PER", 5.0)});
    }

    @Test
    public void testSerialization() throws IOException, ClassNotFoundException {
        MapDictionary<String> dictionary = new MapDictionary<String>();
        dictionary.addEntry(new DictionaryEntry<String>("john smith", "PER", 7.0));
        ExactDictionaryChunker chunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, true, true);
        Assert.assertTrue((boolean)chunker.caseSensitive());
        Assert.assertTrue((boolean)chunker.returnAllMatches());
        Chunk[] noChunks = new Chunk[]{};
        this.assertChunking(chunker, "john", noChunks);
        ExactDictionaryChunker chunker2 = (ExactDictionaryChunker)AbstractExternalizable.serializeDeserialize(chunker);
        Assert.assertTrue((boolean)chunker2.caseSensitive());
        Assert.assertTrue((boolean)chunker2.returnAllMatches());
        this.assertChunking(chunker2, "john", noChunks);
        this.assertChunking(chunker2, "smith john", noChunks);
        this.assertChunking(chunker2, "john smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0)});
        this.assertChunking(chunker2, "john smith smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0)});
        this.assertChunking(chunker2, "john smith frank", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0)});
        this.assertChunking(chunker2, "then john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0)});
        this.assertChunking(chunker2, "john john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0)});
        this.assertChunking(chunker2, "john john smith smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0)});
    }

    @Test
    public void testSerialization2() throws IOException, ClassNotFoundException {
        MapDictionary<String> dictionary = new MapDictionary<String>();
        dictionary.addEntry(new DictionaryEntry<String>("john smith", "PER", 7.0));
        dictionary.addEntry(new DictionaryEntry<String>("Barry J. Jones III", "PER", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("Barry", "PER", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("Jones", "PER", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("J. Barry Johnson", "PER", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("I.B.M.", "ORG", 5.0));
        dictionary.addEntry(new DictionaryEntry<String>("Dean Witter", "PER", 3.0));
        dictionary.addEntry(new DictionaryEntry<String>("Dean Witter", "ORG", 7.0));
        ExactDictionaryChunker chunker = new ExactDictionaryChunker(dictionary, this.TOKENIZER_FACTORY, true, true);
        Assert.assertTrue((boolean)chunker.caseSensitive());
        Assert.assertTrue((boolean)chunker.returnAllMatches());
        ExactDictionaryChunker chunker2 = (ExactDictionaryChunker)AbstractExternalizable.serializeDeserialize(chunker);
        Assert.assertTrue((boolean)chunker2.caseSensitive());
        Assert.assertTrue((boolean)chunker2.returnAllMatches());
        Chunk[] noChunks = new Chunk[]{};
        this.assertChunking(chunker2, "john", noChunks);
        this.assertChunking(chunker2, "smith john", noChunks);
        this.assertChunking(chunker2, "J. Barry Warwick", new Chunk[]{ChunkFactory.createChunk(3, 8, "PER", 5.0)});
        this.assertChunking(chunker2, "john smith smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0)});
        this.assertChunking(chunker2, "john smith frank", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0)});
        this.assertChunking(chunker2, "then john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0)});
        this.assertChunking(chunker2, "john john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0)});
        this.assertChunking(chunker2, "john john smith smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0)});
    }
}

