/*
 * Decompiled with CFR 0.152.
 */
package com.johnsnowlabs.nlp.annotators.cleaners.util;

import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.regex.Pattern;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.collection.Seq;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.util.matching.Regex;

public final class CleanerHelper$ {
    public static CleanerHelper$ MODULE$;
    private final List<String> UNICODE_BULLETS;
    private final String BULLETS_PATTERN;
    private final Regex UNICODE_BULLETS_RE;
    private final String HTML_APOSTROPHE_ENTITY;
    private final Regex HEXADECIMAL_ESCAPE_SEQUENCE;
    private final String DOUBLE_PARAGRAPH_PATTERN;
    private final String BLOCK_SPLIT_PATTERN;

    static {
        new CleanerHelper$();
    }

    public List<String> UNICODE_BULLETS() {
        return this.UNICODE_BULLETS;
    }

    private String BULLETS_PATTERN() {
        return this.BULLETS_PATTERN;
    }

    private Regex UNICODE_BULLETS_RE() {
        return this.UNICODE_BULLETS_RE;
    }

    private String HTML_APOSTROPHE_ENTITY() {
        return this.HTML_APOSTROPHE_ENTITY;
    }

    private Regex HEXADECIMAL_ESCAPE_SEQUENCE() {
        return this.HEXADECIMAL_ESCAPE_SEQUENCE;
    }

    public String DOUBLE_PARAGRAPH_PATTERN() {
        return this.DOUBLE_PARAGRAPH_PATTERN;
    }

    public String BLOCK_SPLIT_PATTERN() {
        return this.BLOCK_SPLIT_PATTERN;
    }

    public byte[] parseEscapedBytes(String text) {
        Charset RawByteCharset = Charset.forName("ISO-8859-1");
        return this.HEXADECIMAL_ESCAPE_SEQUENCE().replaceAllIn((CharSequence)text, (Function1 & Serializable & scala.Serializable)m -> {
            String hexValue = m.group(1);
            return Character.toString((char)Integer.parseInt(hexValue, 16));
        }).getBytes(RawByteCharset);
    }

    /*
     * WARNING - void declaration
     */
    public String formatEncodingStr(String encoding) {
        void var2_2;
        block0: {
            String formattedEncoding = encoding.toLowerCase().replace("_", "-");
            Set annotatedEncodings = (Set)Predef$.MODULE$.Set().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"iso-8859-6-i", "iso-8859-6-e", "iso-8859-8-i", "iso-8859-8-e"}));
            if (!annotatedEncodings.contains((Object)formattedEncoding)) break block0;
            formattedEncoding = (String)new StringOps(Predef$.MODULE$.augmentString(formattedEncoding)).dropRight(2);
        }
        return var2_2;
    }

    public String cleanTrailingPunctuation(String text) {
        return text.replaceAll("[.,:;]+$", "");
    }

    public String cleanDashes(String text) {
        Regex dashRegex = new StringOps(Predef$.MODULE$.augmentString("[-\u2013]")).r();
        return dashRegex.replaceAllIn((CharSequence)text, " ").trim();
    }

    public String cleanExtraWhitespace(String text) {
        String hexNbspReplaced = text.replaceAll("\\\\x[aA]0", " ");
        String normalizedText = hexNbspReplaced.replaceAll("\\p{Zs}", " ");
        Regex whitespaceRegex = new StringOps(Predef$.MODULE$.augmentString("\\s+")).r();
        return whitespaceRegex.replaceAllIn((CharSequence)normalizedText, " ").trim();
    }

    public String cleanBullets(String text) {
        String string;
        Regex manualBulletRegex = new Regex(new StringBuilder(4).append("^").append(this.UNICODE_BULLETS_RE()).append("\\s?").toString(), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0]));
        Option option = manualBulletRegex.findPrefixOf((CharSequence)text);
        if (option instanceof Some) {
            string = manualBulletRegex.replaceFirstIn((CharSequence)text, "").trim();
        } else if (None$.MODULE$.equals(option)) {
            string = text;
        } else {
            throw new MatchError((Object)option);
        }
        return string;
    }

    public String cleanNonAsciiChars(String text) {
        String decodedText = this.HEXADECIMAL_ESCAPE_SEQUENCE().replaceAllIn((CharSequence)text, (Function1 & Serializable & scala.Serializable)m -> Character.toString((char)Integer.parseInt(m.group(1), 16)));
        String entityReplacedText = decodedText.replace(this.HTML_APOSTROPHE_ENTITY(), "'");
        return entityReplacedText.replaceAll("[^ -~]", "");
    }

    public String cleanOrderedBullets(String text) {
        String[] textParts = text.split("\\s+", 2);
        if (textParts.length < 2) {
            return text;
        }
        String firstWord = textParts[0];
        String remainingText = textParts[1];
        if (!firstWord.contains(".") || firstWord.contains("..")) {
            return text;
        }
        String[] bulletParts = firstWord.split("\\.");
        String[] cleanedBulletParts = ((String)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])bulletParts)).last()).isEmpty() ? (String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])bulletParts)).dropRight(1) : bulletParts;
        return ((String)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])cleanedBulletParts)).head()).length() > 2 ? text : remainingText.trim();
    }

    public String replaceUnicodeCharacters(String text) {
        String decodedText = this.HEXADECIMAL_ESCAPE_SEQUENCE().replaceAllIn((CharSequence)text, (Function1 & Serializable & scala.Serializable)m -> {
            String hexValue = m.group(1);
            byte byteValue = (byte)Integer.parseInt(hexValue, 16);
            return new String(new byte[]{byteValue}, Charset.forName("ISO-8859-1"));
        });
        String fullyDecodedText = new String(decodedText.getBytes(Charset.forName("ISO-8859-1")), Charset.forName("Windows-1252"));
        return fullyDecodedText.replace("\u2018", "\u2018").replace("\u2019", "\u2019").replace("\u201c", "\u201c").replace("\u201d", "\u201d").replace(this.HTML_APOSTROPHE_ENTITY(), "'").replace("\u00e2\u0080\u0099", "'").replace("\u00e2\u0080\u201c", "\u2014").replace("\u00e2\u0080\u201d", "\u2013").replace("\u00e2\u0080\u00a6", "\u2026");
    }

    public String removePunctuation(String text) {
        Regex punctuationRegex = new StringOps(Predef$.MODULE$.augmentString("\\p{P}")).r();
        return punctuationRegex.replaceAllIn((CharSequence)text, "");
    }

    public String cleanPrefix(String text, String pattern, boolean ignoreCase, boolean strip) {
        String regexStr = ignoreCase ? new StringBuilder(19).append("(?i)^").append(pattern).append("[\\p{Punct}\\s]*").toString() : new StringBuilder(15).append("^").append(pattern).append("[\\p{Punct}\\s]*").toString();
        Regex regex = new StringOps(Predef$.MODULE$.augmentString(regexStr)).r();
        String cleanedText = regex.replaceAllIn((CharSequence)text, "");
        return strip ? cleanedText.replaceAll("^\\s+", "") : cleanedText;
    }

    public String cleanPostfix(String text, String pattern, boolean ignoreCase, boolean strip) {
        Regex regex = ignoreCase ? new StringOps(Predef$.MODULE$.augmentString(new StringBuilder(5).append("(?i)").append(pattern).append("$").toString())).r() : new StringOps(Predef$.MODULE$.augmentString(new StringBuilder(1).append(pattern).append("$").toString())).r();
        String cleanedText = regex.replaceAllIn((CharSequence)text, "");
        return strip ? cleanedText.trim() : cleanedText;
    }

    public String bytesStringToString(String text, String encoding) {
        byte[] textBytes = this.parseEscapedBytes(text);
        String formattedEncoding = this.formatEncodingStr(encoding);
        return new String(textBytes, Charset.forName(formattedEncoding));
    }

    private CleanerHelper$() {
        MODULE$ = this;
        this.UNICODE_BULLETS = List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\u0095", "\u2022", "\u2023", "\u2043", "\u3164", "\u204c", "\u204d", "\u2219", "\u25cb", "\u25cf", "\u25d8", "\u25e6", "\u2619", "\u2765", "\u2767", "\u29be", "\u29bf", "-", "\uf0b7", "\\*", "\u0095", "\u00b7"}));
        this.BULLETS_PATTERN = ((TraversableOnce)this.UNICODE_BULLETS().map((Function1 & Serializable & scala.Serializable)x$1 -> Pattern.quote(x$1), List$.MODULE$.canBuildFrom())).mkString("|");
        this.UNICODE_BULLETS_RE = new Regex(new StringBuilder(4).append("(?:").append(this.BULLETS_PATTERN()).append(")").toString(), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0]));
        this.HTML_APOSTROPHE_ENTITY = "&apos;";
        this.HEXADECIMAL_ESCAPE_SEQUENCE = new StringOps(Predef$.MODULE$.augmentString("\\\\x([0-9A-Fa-f]{2})")).r();
        this.DOUBLE_PARAGRAPH_PATTERN = "(?:\\s*\\n\\s*){2,}";
        this.BLOCK_SPLIT_PATTERN = "\\n\\n+";
    }
}

