/*
 * Decompiled with CFR 0.152.
 */
package smile.nlp.tokenizer;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Locale;
import smile.nlp.tokenizer.Tokenizer;

public class BreakIteratorTokenizer
implements Tokenizer {
    private final BreakIterator boundary;

    public BreakIteratorTokenizer() {
        this.boundary = BreakIterator.getWordInstance();
    }

    public BreakIteratorTokenizer(Locale locale) {
        this.boundary = BreakIterator.getWordInstance(locale);
    }

    @Override
    public String[] split(String text) {
        this.boundary.setText(text);
        ArrayList<String> words = new ArrayList<String>();
        int start = this.boundary.first();
        int end = this.boundary.next();
        while (end != -1) {
            String word = text.substring(start, end).trim();
            if (!word.isEmpty()) {
                words.add(word);
            }
            start = end;
            end = this.boundary.next();
        }
        return words.toArray(new String[0]);
    }
}

