/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tokenizers.ru;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import org.languagetool.tokenizers.WordTokenizer;

public class RussianWordTokenizer
extends WordTokenizer {
    private static final Pattern PATTERN_1 = Pattern.compile("\u0431/\u0443");
    private static final Pattern PATTERN_2 = Pattern.compile("\u0431/\u043d");
    private static final Pattern PATTERN_3 = Pattern.compile(" .. ", 16);
    private static final Pattern PATTERN_4 = Pattern.compile(" . ", 16);
    private static final Pattern PATTERN_5 = Pattern.compile(" .", 16);
    private static final Pattern PATTERN_6 = Pattern.compile("\u0001\u0001SP_DDOT_SP\u0001\u0001");
    private static final Pattern PATTERN_7 = Pattern.compile("\u0001\u0001SP_DOT_SP\u0001\u0001");
    private static final Pattern PATTERN_8 = Pattern.compile("\u0001\u0001SOCR_BU\u0001\u0001");
    private static final Pattern PATTERN_9 = Pattern.compile("\u0001\u0001SOCR_BN\u0001\u0001");
    private static final Pattern PATTERN_10 = Pattern.compile("\u0001\u0001SP_DOT\u0001\u0001");

    public String getTokenizingCharacters() {
        return super.getTokenizingCharacters() + "'.";
    }

    public List<String> tokenize(String text) {
        ArrayList<String> l = new ArrayList<String>();
        String auxText = text;
        auxText = PATTERN_1.matcher(auxText).replaceAll("\u0001\u0001SOCR_BU\u0001\u0001");
        auxText = PATTERN_2.matcher(auxText).replaceAll("\u0001\u0001SOCR_BN\u0001\u0001");
        auxText = PATTERN_3.matcher(auxText).replaceAll("\u0001\u0001SP_DDOT_SP\u0001\u0001");
        auxText = PATTERN_4.matcher(auxText).replaceAll("\u0001\u0001SP_DOT_SP\u0001\u0001");
        auxText = PATTERN_5.matcher(auxText).replaceAll(" \u0001\u0001SP_DOT\u0001\u0001");
        auxText = PATTERN_6.matcher(auxText).replaceAll(" .. ");
        auxText = PATTERN_7.matcher(auxText).replaceAll(" . ");
        StringTokenizer st = new StringTokenizer(auxText, this.getTokenizingCharacters(), true);
        while (st.hasMoreElements()) {
            String s = st.nextToken();
            s = PATTERN_8.matcher(s).replaceAll("\u0431/\u0443");
            s = PATTERN_9.matcher(s).replaceAll("\u0431/\u043d");
            s = PATTERN_10.matcher(s).replaceAll(".");
            l.addAll(this.wordsToAdd(s));
        }
        return this.joinEMailsAndUrls(l);
    }

    private List<String> wordsToAdd(String s) {
        ArrayList<String> l = new ArrayList<String>();
        l.add(s);
        return l;
    }
}

