/*
 * Decompiled with CFR 0.152.
 */
package com.apple.foundationdb.record.provider.common.text;

import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.record.provider.common.text.TextTokenizer;
import java.text.BreakIterator;
import java.text.Normalizer;
import java.util.Iterator;
import java.util.Locale;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

@API(value=API.Status.EXPERIMENTAL)
public class DefaultTextTokenizer
implements TextTokenizer {
    @Nonnull
    private static final DefaultTextTokenizer INSTANCE = new DefaultTextTokenizer();
    @Nonnull
    private static final Normalizer.Form NORMALIZED_FORM = Normalizer.Form.NFKD;
    @Nonnull
    private static final Pattern DIACRITICAL_PATTERN = Pattern.compile("\\p{M}+");
    @Nonnull
    public static final String NAME = "default";

    private DefaultTextTokenizer() {
    }

    @Nonnull
    public static DefaultTextTokenizer instance() {
        return INSTANCE;
    }

    @Nonnull
    public Iterator<String> tokenize(@Nonnull String text, int version, @Nonnull TextTokenizer.TokenizerMode mode) {
        this.validateVersion(version);
        BreakIterator breakIterator = BreakIterator.getWordInstance(Locale.ROOT);
        breakIterator.setText(text);
        return new BreakIteratorWrapper(breakIterator, text);
    }

    @Override
    @Nonnull
    public String getName() {
        return NAME;
    }

    @Override
    public int getMaxVersion() {
        return this.getMinVersion();
    }

    private static class BreakIteratorWrapper
    implements Iterator<String> {
        @Nonnull
        private final BreakIterator underlying;
        @Nonnull
        private final String text;
        @Nullable
        private String nextToken = null;
        private int lastBreak;
        @Nonnull
        private Matcher matcher;

        private BreakIteratorWrapper(@Nonnull BreakIterator underlying, @Nonnull String text) {
            this.underlying = underlying;
            this.text = text;
            this.lastBreak = underlying.first();
            this.matcher = DIACRITICAL_PATTERN.matcher("");
        }

        @Override
        public boolean hasNext() {
            if (this.nextToken != null) {
                return true;
            }
            int nextBreak = this.underlying.following(this.lastBreak);
            while (this.nextToken == null && nextBreak != -1) {
                String token = this.text.substring(this.lastBreak, nextBreak);
                if (!Normalizer.isNormalized(token, NORMALIZED_FORM)) {
                    token = Normalizer.normalize(token, NORMALIZED_FORM);
                }
                boolean isToken = false;
                for (int i = 0; i < token.length(); ++i) {
                    if (!Character.isLetterOrDigit(token.charAt(i))) continue;
                    isToken = true;
                    break;
                }
                if (isToken) {
                    this.nextToken = token = this.matcher.reset(token.toLowerCase(Locale.ROOT)).replaceAll("");
                }
                this.lastBreak = nextBreak;
                nextBreak = this.underlying.next();
            }
            return this.nextToken != null;
        }

        @Override
        @Nonnull
        public String next() {
            if (this.hasNext()) {
                String next = this.nextToken;
                this.nextToken = null;
                return next;
            }
            throw new NoSuchElementException("No more tokens found in text");
        }
    }
}

