/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.fess.crawler.util;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
import org.codelibs.fess.crawler.util.UnsafeStringBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class TextUtil {
    private static final Logger logger = LoggerFactory.getLogger(TextUtil.class);

    private TextUtil() {
    }

    public static TextNormalizeContext normalizeText(Reader reader) {
        return new TextNormalizeContext(reader);
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    @Deprecated
    public static String normalizeText(String str, int initialCapacity, int maxAlphanumTermSize, int maxSymbolTermSize, boolean removeDuplication) {
        if (str == null) {
            return "";
        }
        try (StringReader reader = new StringReader(str);){
            String string = TextUtil.normalizeText(reader, initialCapacity, maxAlphanumTermSize, maxSymbolTermSize, removeDuplication);
            return string;
        }
        catch (IOException e) {
            if (!logger.isDebugEnabled()) return "";
            logger.debug("Failed to close reader.", (Throwable)e);
            return "";
        }
    }

    @Deprecated
    public static String normalizeText(Reader reader, int initialCapacity, int maxAlphanumTermSize, int maxSymbolTermSize, boolean removeDuplication) {
        return new TextNormalizeContext(reader).initialCapacity(initialCapacity).maxAlphanumTermSize(maxAlphanumTermSize).maxSymbolTermSize(maxSymbolTermSize).duplicateTermRemoved(removeDuplication).execute();
    }

    private static boolean isLastSpaceChar(UnsafeStringBuilder buf) {
        if (buf.length() == 0) {
            return false;
        }
        return buf.charAt(buf.length() - 1) == ' ';
    }

    private static boolean removeLastDuplication(UnsafeStringBuilder buf, int size, boolean isSpace, Set<String> termCache) {
        String target = buf.rightString(size);
        if (!termCache.contains(target)) {
            termCache.add(target);
            return isSpace;
        }
        buf.setLength(buf.length() - size);
        if (!isSpace && !TextUtil.isLastSpaceChar(buf)) {
            buf.appendCodePoint(32);
            return true;
        }
        return isSpace;
    }

    public static class TextNormalizeContext {
        private final Reader reader;
        private int initialCapacity = 10000;
        private int maxAlphanumTermSize = -1;
        private int maxSymbolTermSize = -1;
        private boolean duplicateTermRemoved = false;
        private int[] spaceChars = new int[]{32, 160, 12288, 65533};

        public TextNormalizeContext(Reader reader) {
            this.reader = reader;
        }

        public String execute() {
            if (this.reader == null) {
                return "";
            }
            UnsafeStringBuilder buf = new UnsafeStringBuilder(this.initialCapacity);
            boolean isSpace = false;
            int alphanumSize = 0;
            int symbolSize = 0;
            HashSet termCache = new HashSet(1000);
            try {
                int c;
                while ((c = this.reader.read()) != -1) {
                    if (Character.isISOControl(c) || this.isSpaceChar(c)) {
                        if (this.duplicateTermRemoved) {
                            if (alphanumSize > 0) {
                                isSpace = TextUtil.removeLastDuplication(buf, alphanumSize, isSpace, termCache);
                            } else if (symbolSize > 0) {
                                isSpace = TextUtil.removeLastDuplication(buf, symbolSize, isSpace, termCache);
                            }
                        }
                        if (!isSpace && !TextUtil.isLastSpaceChar(buf)) {
                            buf.appendCodePoint(32);
                            isSpace = true;
                        }
                        alphanumSize = 0;
                        symbolSize = 0;
                        continue;
                    }
                    if (c >= 48 && c <= 57 || c >= 65 && c <= 90 || c >= 97 && c <= 122) {
                        if (this.duplicateTermRemoved && symbolSize > 0) {
                            isSpace = TextUtil.removeLastDuplication(buf, symbolSize, isSpace, termCache);
                        }
                        if (this.maxAlphanumTermSize >= 0) {
                            if (alphanumSize < this.maxAlphanumTermSize) {
                                buf.appendCodePoint(c);
                                ++alphanumSize;
                            }
                        } else {
                            buf.appendCodePoint(c);
                            ++alphanumSize;
                        }
                        isSpace = false;
                        symbolSize = 0;
                        continue;
                    }
                    if (c >= 33 && c <= 47 || c >= 58 && c <= 64 || c >= 91 && c <= 96 || c >= 123 && c <= 126) {
                        if (this.duplicateTermRemoved && alphanumSize > 0) {
                            isSpace = TextUtil.removeLastDuplication(buf, alphanumSize, isSpace, termCache);
                        }
                        if (this.maxSymbolTermSize >= 0) {
                            if (symbolSize < this.maxSymbolTermSize) {
                                buf.appendCodePoint(c);
                                ++symbolSize;
                            }
                        } else {
                            buf.appendCodePoint(c);
                            ++symbolSize;
                        }
                        isSpace = false;
                        alphanumSize = 0;
                        continue;
                    }
                    if (this.duplicateTermRemoved) {
                        if (alphanumSize > 0) {
                            isSpace = TextUtil.removeLastDuplication(buf, alphanumSize, isSpace, termCache);
                        } else if (symbolSize > 0) {
                            isSpace = TextUtil.removeLastDuplication(buf, symbolSize, isSpace, termCache);
                        }
                    }
                    buf.appendCodePoint(c);
                    isSpace = false;
                    alphanumSize = 0;
                    symbolSize = 0;
                }
                if (this.duplicateTermRemoved) {
                    if (alphanumSize > 0) {
                        TextUtil.removeLastDuplication(buf, alphanumSize, isSpace, termCache);
                    } else if (symbolSize > 0) {
                        TextUtil.removeLastDuplication(buf, symbolSize, isSpace, termCache);
                    }
                }
            }
            catch (IOException e) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Failed to read data.", (Throwable)e);
                }
                return "";
            }
            return buf.toUnsafeString().trim();
        }

        private boolean isSpaceChar(int c) {
            for (int spaceChar : this.spaceChars) {
                if (c != spaceChar) continue;
                return true;
            }
            return false;
        }

        public TextNormalizeContext initialCapacity(int initialCapacity) {
            this.initialCapacity = initialCapacity;
            return this;
        }

        public TextNormalizeContext maxAlphanumTermSize(int maxAlphanumTermSize) {
            this.maxAlphanumTermSize = maxAlphanumTermSize;
            return this;
        }

        public TextNormalizeContext maxSymbolTermSize(int maxSymbolTermSize) {
            this.maxSymbolTermSize = maxSymbolTermSize;
            return this;
        }

        public TextNormalizeContext duplicateTermRemoved(boolean duplicateTermRemoved) {
            this.duplicateTermRemoved = duplicateTermRemoved;
            return this;
        }

        public TextNormalizeContext spaceChars(int[] spaceChars) {
            this.spaceChars = spaceChars;
            return this;
        }
    }
}

