/*
 * Decompiled with CFR 0.152.
 */
package org.embulk.guess.csv;

import java.io.ByteArrayOutputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;
import org.embulk.config.ConfigDiff;
import org.embulk.config.ConfigSource;
import org.embulk.config.DataSource;
import org.embulk.parser.csv.CsvParserPlugin;
import org.embulk.spi.Buffer;
import org.embulk.spi.BufferAllocator;
import org.embulk.spi.Exec;
import org.embulk.spi.FileInput;
import org.embulk.spi.GuessPlugin;
import org.embulk.util.config.ConfigMapperFactory;
import org.embulk.util.csv.CsvTokenizer;
import org.embulk.util.csv.InvalidCsvQuotationException;
import org.embulk.util.csv.RecordDoesNotHaveExpectedColumnException;
import org.embulk.util.file.ListFileInput;
import org.embulk.util.guess.CharsetGuess;
import org.embulk.util.guess.GuesstimatedType;
import org.embulk.util.guess.LineGuessHelper;
import org.embulk.util.guess.NewlineGuess;
import org.embulk.util.guess.SchemaGuess;
import org.embulk.util.text.LineDecoder;
import org.embulk.util.text.LineDelimiter;
import org.embulk.util.text.Newline;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CsvGuessPlugin
implements GuessPlugin {
    private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build();
    private static final List<Character> DELIMITER_CANDIDATES = Collections.unmodifiableList(Arrays.asList(Character.valueOf(','), Character.valueOf('\t'), Character.valueOf('|'), Character.valueOf(';')));
    private static final List<Character> QUOTE_CANDIDATES = Collections.unmodifiableList(Arrays.asList(Character.valueOf('\"'), Character.valueOf('\'')));
    private static final List<String> ESCAPE_CANDIDATES = Collections.unmodifiableList(Arrays.asList("\\", "\""));
    private static final List<String> NULL_STRING_CANDIDATES = Collections.unmodifiableList(Arrays.asList("null", "NULL", "#N/A", "\\N"));
    private static final List<String> COMMENT_LINE_MARKER_CANDIDATES = Collections.unmodifiableList(Arrays.asList("#", "//"));
    private static final SchemaGuess SCHEMA_GUESS = SchemaGuess.of((ConfigMapperFactory)CONFIG_MAPPER_FACTORY);
    private static final int MAX_SKIP_LINES = 10;
    private static final int NO_SKIP_DETECT_LINES = 10;
    private static final Logger logger = LoggerFactory.getLogger(CsvGuessPlugin.class);

    public ConfigDiff guess(ConfigSource config, Buffer sample) {
        ConfigSource parserConfig = config.getNestedOrGetEmpty("parser");
        if (!parserConfig.has("charset")) {
            return CharsetGuess.of((ConfigMapperFactory)CONFIG_MAPPER_FACTORY).guess(sample);
        }
        if (!parserConfig.has("newline")) {
            return NewlineGuess.of((ConfigMapperFactory)CONFIG_MAPPER_FACTORY).guess(config, sample);
        }
        BufferAllocator bufferAllocator = Exec.getBufferAllocator();
        return this.guessLines(config, LineGuessHelper.of((ConfigMapperFactory)CONFIG_MAPPER_FACTORY).toLines(config, sample), bufferAllocator);
    }

    protected ConfigDiff guessLines(ConfigSource config, List<String> sampleLines, BufferAllocator bufferAllocator) {
        List<GuesstimatedType> columnTypes;
        boolean headerLine;
        String nullString;
        String quote;
        ConfigDiff configDiff = CsvGuessPlugin.newConfigDiff();
        if (!"csv".equals(config.getNestedOrGetEmpty("parser").get(String.class, "type", (Object)"csv"))) {
            return configDiff;
        }
        ConfigSource parserConfig = config.getNestedOrGetEmpty("parser");
        String delim = "csv".equals(parserConfig.get(String.class, "type", (Object)"csv")) && parserConfig.has("delimiter") ? (String)parserConfig.get(String.class, "delimiter") : this.guessDelimiter(sampleLines);
        ConfigDiff parserGuessed = CsvGuessPlugin.newConfigDiff();
        parserGuessed.merge((DataSource)parserConfig);
        parserGuessed.set("type", (Object)"csv");
        parserGuessed.set("delimiter", (Object)delim);
        if (!parserGuessed.has("quote")) {
            quote = CsvGuessPlugin.guessQuote(sampleLines, delim);
            if (quote == null) {
                parserGuessed.setNested("quote", null);
            } else {
                parserGuessed.set("quote", (Object)quote);
            }
        }
        if ("".equals(parserGuessed.get(String.class, "quote"))) {
            parserGuessed.set("quote", (Object)"\"");
        }
        if (!parserGuessed.has("escape") && (quote = (String)parserGuessed.get(String.class, "quote")) != null) {
            String escape = CsvGuessPlugin.guessEscape(sampleLines, delim, quote);
            if (escape == null) {
                parserGuessed.setNested("escape", null);
            } else {
                parserGuessed.set("escape", (Object)escape);
            }
        }
        if (!parserGuessed.has("null_string") && (nullString = CsvGuessPlugin.guessNullString(sampleLines, delim)) != null) {
            parserGuessed.set("null_string", (Object)nullString);
        }
        List<List<String>> sampleRecordsBeforeSkip = CsvGuessPlugin.splitLines(parserGuessed, false, sampleLines, delim, null, bufferAllocator);
        int skipHeaderLines = CsvGuessPlugin.guessSkipHeaderLines(sampleRecordsBeforeSkip);
        List<String> skippedSampleLines = sampleLines.subList(skipHeaderLines, sampleLines.size());
        List<List<String>> skippedSampleRecords = sampleRecordsBeforeSkip.subList(skipHeaderLines, sampleRecordsBeforeSkip.size());
        List<String> uncommentedSampleLines = parserGuessed.has("comment_line_marker") ? skippedSampleLines : CsvGuessPlugin.guessCommentLineMarker(skippedSampleLines, delim, (String)parserGuessed.get(String.class, "quote"), (String)parserGuessed.get(String.class, "null_string", null), parserGuessed);
        List<List<String>> sampleRecords = CsvGuessPlugin.splitLines(parserGuessed, true, uncommentedSampleLines, delim, null, bufferAllocator);
        if (sampleRecords == null || sampleRecords.isEmpty()) {
            return configDiff;
        }
        if (uncommentedSampleLines.size() == 1) {
            headerLine = false;
            if (parserGuessed.has("trim_if_not_quoted")) {
                columnTypes = CsvGuessPlugin.typesFromListRecords(sampleRecords.subList(0, 1));
            } else {
                List<List<String>> sampleRecordsTrimmed = CsvGuessPlugin.splitLines(parserGuessed, true, uncommentedSampleLines, delim, true, bufferAllocator);
                List<GuesstimatedType> columnTypesTrimmed = CsvGuessPlugin.typesFromListRecords(sampleRecordsTrimmed);
                List<GuesstimatedType> columnTypesUntrimmed = CsvGuessPlugin.typesFromListRecords(sampleRecords.subList(0, 1));
                if (columnTypesUntrimmed.equals(columnTypesTrimmed)) {
                    parserGuessed.set("trim_if_not_quoted", (Object)false);
                    columnTypes = columnTypesUntrimmed;
                } else {
                    parserGuessed.set("trim_if_not_quoted", (Object)true);
                    columnTypes = columnTypesTrimmed;
                }
            }
        } else {
            List<GuesstimatedType> otherTypes;
            List<GuesstimatedType> firstTypes = CsvGuessPlugin.typesFromListRecords(sampleRecords.subList(0, 1));
            List<GuesstimatedType> otherTypesUntrimmed = CsvGuessPlugin.typesFromListRecords(sampleRecords.subList(1, sampleRecords.size()));
            logger.debug("Types of the first line : {}", firstTypes);
            logger.debug("Types of the other lines (untrimmed): {}", otherTypesUntrimmed);
            if (parserGuessed.has("trim_if_not_quoted")) {
                otherTypes = otherTypesUntrimmed;
            } else {
                List<List<String>> sampleRecordsTrimmed = CsvGuessPlugin.splitLines(parserGuessed, true, uncommentedSampleLines, delim, true, bufferAllocator);
                List<GuesstimatedType> otherTypesTrimmed = CsvGuessPlugin.typesFromListRecords(sampleRecordsTrimmed.subList(1, sampleRecordsTrimmed.size()));
                if (otherTypesUntrimmed.equals(otherTypesTrimmed)) {
                    parserGuessed.set("trim_if_not_quoted", (Object)false);
                    otherTypes = otherTypesUntrimmed;
                } else {
                    parserGuessed.set("trim_if_not_quoted", (Object)true);
                    otherTypes = otherTypesTrimmed;
                }
            }
            logger.debug("Types of the other lines: {}", otherTypes);
            headerLine = !firstTypes.equals(otherTypes) && firstTypes.stream().allMatch(t -> GuesstimatedType.STRING.equals(t) || GuesstimatedType.BOOLEAN.equals(t)) || CsvGuessPlugin.guessStringHeaderLine(sampleRecords);
            logger.debug("headerLine: {}", (Object)headerLine);
            columnTypes = otherTypes;
        }
        if (columnTypes.isEmpty()) {
            return configDiff;
        }
        if (headerLine) {
            parserGuessed.set("skip_header_lines", (Object)(skipHeaderLines + 1));
        } else {
            parserGuessed.set("skip_header_lines", (Object)skipHeaderLines);
        }
        if (!parserGuessed.has("allow_extra_columns")) {
            parserGuessed.set("allow_extra_columns", (Object)false);
        }
        if (!parserGuessed.has("allow_optional_columns")) {
            parserGuessed.set("allow_optional_columns", (Object)false);
        }
        List columnNames = headerLine ? sampleRecords.get(0).stream().map(String::trim).collect(Collectors.toList()) : IntStream.range(0, columnTypes.size()).mapToObj(i -> "c" + i).collect(Collectors.toList());
        List schema = IntStream.range(0, Math.min(columnNames.size(), columnTypes.size())).mapToObj(i -> this.newColumn((String)columnNames.get(i), (GuesstimatedType)columnTypes.get(i))).collect(Collectors.toList());
        parserGuessed.set("columns", schema);
        configDiff.setNested("parser", (DataSource)parserGuessed);
        return configDiff;
    }

    protected ConfigDiff newColumn(String name, GuesstimatedType type) {
        ConfigDiff column = CsvGuessPlugin.newConfigDiff();
        column.set("name", (Object)name);
        column.set("type", (Object)type.toString());
        if (type.isTimestamp()) {
            column.set("format", (Object)type.getFormatOrTimeValue());
        }
        return column;
    }

    protected static ConfigDiff newConfigDiff() {
        return CONFIG_MAPPER_FACTORY.newConfigDiff();
    }

    private static List<List<String>> splitLines(ConfigDiff parserConfig, boolean skipEmptyLines, List<String> sampleLines, String delim, Boolean trimIfNotQuoted, BufferAllocator bufferAllocator) {
        try {
            String nullString = (String)parserConfig.get(String.class, "null_string", null);
            ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource();
            config.merge((DataSource)parserConfig);
            if (trimIfNotQuoted != null) {
                config.set("trim_if_not_quoted", (Object)trimIfNotQuoted);
            }
            config.set("charset", (Object)"UTF-8");
            config.set("columns", new ArrayList());
            CsvParserPlugin.PluginTask parserTask = (CsvParserPlugin.PluginTask)CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
            byte[] data = CsvGuessPlugin.joinBytes(sampleLines, parserTask.getNewline());
            Buffer sample = bufferAllocator.allocate(data.length);
            sample.setBytes(0, data, 0, data.length);
            sample.limit(data.length);
            CsvTokenizer tokenizer = CsvGuessPlugin.buildCsvTokenizer(parserTask, sample);
            ArrayList rows = new ArrayList();
            while (tokenizer.nextRecord(skipEmptyLines)) {
                try {
                    ArrayList<String> columns = new ArrayList<String>();
                    try {
                        while (true) {
                            String column = tokenizer.nextColumn();
                            boolean quoted = tokenizer.wasQuotedColumn();
                            if (nullString != null && !quoted && nullString.equals(column)) {
                                columns.add(null);
                                continue;
                            }
                            columns.add(column);
                        }
                    }
                    catch (RecordDoesNotHaveExpectedColumnException ex) {
                        rows.add(Collections.unmodifiableList(columns));
                    }
                }
                catch (InvalidCsvQuotationException ex) {
                    tokenizer.skipCurrentLine();
                }
            }
            return Collections.unmodifiableList(rows);
        }
        catch (RuntimeException ex) {
            ArrayList<List<String>> rows = new ArrayList<List<String>>();
            for (String line : sampleLines) {
                String[] split = line.split(Pattern.quote(delim));
                rows.add(Collections.unmodifiableList(Arrays.asList(split)));
            }
            return Collections.unmodifiableList(rows);
        }
    }

    private static CsvTokenizer buildCsvTokenizer(CsvParserPlugin.PluginTask parserTask, Buffer sample) {
        CsvTokenizer.Builder builder = CsvTokenizer.builder((String)parserTask.getDelimiter());
        parserTask.getQuoteChar().ifPresent(q -> builder.setQuote(q.getCharacter()));
        parserTask.getEscapeChar().ifPresent(e -> builder.setEscape(e.getCharacter()));
        builder.setNewline(parserTask.getNewline().getString());
        if (parserTask.getTrimIfNotQuoted()) {
            builder.enableTrimIfNotQuoted();
        }
        if (parserTask.getQuotesInQuotedFields() == CsvParserPlugin.QuotesInQuotedFields.ACCEPT_STRAY_QUOTES_ASSUMING_NO_DELIMITERS_IN_FIELDS) {
            builder.acceptStrayQuotesAssumingNoDelimitersInFields();
        }
        builder.setMaxQuotedFieldLength(parserTask.getMaxQuotedSizeLimit());
        parserTask.getCommentLineMarker().ifPresent(m -> builder.setCommentLineMarker(m));
        parserTask.getNullString().ifPresent(n -> builder.setNullString(n));
        ArrayList<Buffer> listBuffer = new ArrayList<Buffer>();
        listBuffer.add(sample);
        ArrayList<ArrayList<Buffer>> listListBuffer = new ArrayList<ArrayList<Buffer>>();
        listListBuffer.add(listBuffer);
        LineDecoder decoder = LineDecoder.of((FileInput)new ListFileInput(listListBuffer), (Charset)parserTask.getCharset(), (LineDelimiter)parserTask.getLineDelimiterRecognized().orElse(null));
        decoder.nextFile();
        return builder.build(decoder.iterator());
    }

    private String guessDelimiter(List<String> sampleLines) {
        String selectedDelimiter = null;
        double mostWeight = 0.0;
        for (char delimiter : DELIMITER_CANDIDATES) {
            double weight;
            List<Integer> counts = StreamSupport.stream(sampleLines.spliterator(), false).map(line -> (int)line.chars().filter(c -> c == delimiter).count()).collect(Collectors.toList());
            int total = CsvGuessPlugin.sumOfList(counts);
            if (total <= 0 || !((weight = (double)total / CsvGuessPlugin.standardDeviationOfList(counts)) > mostWeight)) continue;
            selectedDelimiter = "" + delimiter;
            mostWeight = weight;
        }
        if (selectedDelimiter != null && mostWeight > 1.0) {
            return selectedDelimiter;
        }
        return "" + DELIMITER_CANDIDATES.get(0);
    }

    private static String guessQuote(List<String> sampleLines, String delim) {
        String selectedQuote = null;
        double mostWeight = 0.0;
        String delimRegex = Pattern.quote(delim);
        for (char q : QUOTE_CANDIDATES) {
            String quoteRegex = Pattern.quote("" + q);
            ArrayList<Integer> weights = new ArrayList<Integer>();
            for (String line : sampleLines) {
                long count = line.chars().filter(c -> c == q).count();
                if (count <= 0L) continue;
                weights.add((int)count + CsvGuessPlugin.weighQuote(line, delimRegex, quoteRegex));
            }
            double d = weights.isEmpty() ? 0.0 : CsvGuessPlugin.averageOfList(weights);
            double weight = d;
            if (!(weight > mostWeight)) continue;
            selectedQuote = "" + q;
            mostWeight = weight;
        }
        if (mostWeight >= 10.0) {
            return selectedQuote;
        }
        if (!CsvGuessPlugin.guessForceNoQuote(sampleLines, delim, "\"")) {
            return "\"";
        }
        return null;
    }

    private static boolean guessForceNoQuote(List<String> sampleLines, String delim, String quoteCandidate) {
        String delimRegex = Pattern.quote(delim);
        String quoteRegex = Pattern.quote(quoteCandidate);
        Pattern pattern = Pattern.compile(String.format("(?:\\A|%s)\\s*[^%s]+%s", delimRegex, quoteRegex, quoteRegex));
        for (String line : sampleLines) {
            if (!pattern.matcher(line).find()) continue;
            return true;
        }
        return false;
    }

    private static String guessEscape(List<String> sampleLines, String delim, String quote) {
        int maxCount = 0;
        String selectedEscape = null;
        for (String str : ESCAPE_CANDIDATES) {
            Pattern regex = Pattern.compile(String.format("%s(?:%s|%s)", Pattern.quote(str), Pattern.quote(delim), Pattern.quote(quote)));
            int count = StreamSupport.stream(sampleLines.spliterator(), false).mapToInt(line -> CsvGuessPlugin.countPattern(line, regex)).sum();
            if (count <= maxCount) continue;
            selectedEscape = str;
            maxCount = count;
        }
        if (selectedEscape == null) {
            if ("\"".equals(quote)) {
                return "\"";
            }
            return null;
        }
        return selectedEscape;
    }

    private static String guessNullString(List<String> sampleLines, String delim) {
        int maxCount = 0;
        String selectedNullString = null;
        for (String str : NULL_STRING_CANDIDATES) {
            Pattern regex = Pattern.compile(String.format("(?:^|%s)%s(?:$|%s)", Pattern.quote(delim), Pattern.quote(str), Pattern.quote(delim)));
            int count = StreamSupport.stream(sampleLines.spliterator(), false).mapToInt(line -> CsvGuessPlugin.countPattern(line, regex)).sum();
            if (count <= maxCount) continue;
            selectedNullString = str;
            maxCount = count;
        }
        return selectedNullString;
    }

    private static int guessSkipHeaderLines(List<List<String>> sampleRecords) {
        ArrayList<Integer> counts = new ArrayList<Integer>();
        for (List<String> records : sampleRecords) {
            counts.add(records.size());
        }
        for (int i = 1; i <= Math.min(10, counts.size() - 1); ++i) {
            int checkRowCount = (Integer)counts.get(i - 1);
            if (!counts.subList(i, Math.min(counts.size(), 10)).stream().allMatch(c -> c <= checkRowCount)) continue;
            return i - 1;
        }
        return 0;
    }

    private static List<String> guessCommentLineMarker(List<String> sampleLines, String delim, String quote, String nullString, ConfigDiff parserGuessed) {
        ArrayList<Pattern> exclude = new ArrayList<Pattern>();
        if (quote != null && !quote.isEmpty()) {
            exclude.add(Pattern.compile("^" + Pattern.quote(quote)));
        }
        if (nullString != null) {
            exclude.add(Pattern.compile(String.format("^%s(?:%s|$)", Pattern.quote(nullString), Pattern.quote(delim))));
        }
        String selectedCommentLineMarker = null;
        List selectedUnmatchLines = null;
        int maxMatchCount = 0;
        for (String str : COMMENT_LINE_MARKER_CANDIDATES) {
            Pattern regex = Pattern.compile("^" + Pattern.quote(str));
            List unmatchLines = sampleLines.stream().filter(line -> {
                for (Pattern ex : exclude) {
                    if (!ex.matcher((CharSequence)line).find()) continue;
                    return true;
                }
                return !regex.matcher((CharSequence)line).find();
            }).collect(Collectors.toList());
            int matchCount = sampleLines.size() - unmatchLines.size();
            if (matchCount <= maxMatchCount) continue;
            selectedCommentLineMarker = str;
            selectedUnmatchLines = unmatchLines;
            maxMatchCount = matchCount;
        }
        if (selectedCommentLineMarker != null) {
            parserGuessed.set("comment_line_marker", selectedCommentLineMarker);
            return selectedUnmatchLines;
        }
        return sampleLines;
    }

    private static boolean guessStringHeaderLine(List<List<String>> sampleRecords) {
        List<String> first = sampleRecords.get(0);
        for (int i = 0; i < first.size(); ++i) {
            double avg;
            List<Integer> sub;
            int columnIndex = i;
            List lengthsAtColumn = StreamSupport.stream(sampleRecords.spliterator(), false).map(row -> (String)row.get(columnIndex)).filter(element -> element != null).map(element -> element.length()).collect(Collectors.toList());
            if (lengthsAtColumn.size() <= 1 || !(CsvGuessPlugin.varianceOfList(sub = lengthsAtColumn.subList(1, lengthsAtColumn.size())) <= 0.2) || !((avg = CsvGuessPlugin.averageOfList(sub)) == 0.0 ? (Integer)lengthsAtColumn.get(0) > 1 : Math.abs(avg - (double)((Integer)lengthsAtColumn.get(0)).intValue()) / avg > 0.7)) continue;
            return true;
        }
        return false;
    }

    private static int sumOfList(List<Integer> integers) {
        return StreamSupport.stream(integers.spliterator(), false).mapToInt(i -> i).sum();
    }

    private static double averageOfList(List<Integer> integers) {
        return StreamSupport.stream(integers.spliterator(), false).mapToInt(i -> i).average().orElse(0.0);
    }

    private static double varianceOfList(List<Integer> integers) {
        double average = CsvGuessPlugin.averageOfList(integers);
        return StreamSupport.stream(integers.spliterator(), false).mapToDouble(i -> ((double)i.intValue() - average) * ((double)i.intValue() - average)).average().orElse(0.0);
    }

    private static double standardDeviationOfList(List<Integer> integers) {
        double result = Math.sqrt(CsvGuessPlugin.varianceOfList(integers));
        if (result < 1.0E-11) {
            return 1.0E-9;
        }
        return result;
    }

    private static int weighQuote(String line, String delimRegex, String quoteRegex) {
        Pattern patternQ = Pattern.compile(String.format("(?:\\A|%s)\\s*%s(?:(?!%s).)*\\s*%s(?:$|%s)", delimRegex, quoteRegex, quoteRegex, quoteRegex, delimRegex));
        Pattern patternD = Pattern.compile(String.format("(?:\\A|%s)\\s*%s(?:(?!%s).)*\\s*%s(?:$|%s)", delimRegex, quoteRegex, delimRegex, quoteRegex, delimRegex));
        return CsvGuessPlugin.countPattern(line, patternQ) * 20 + CsvGuessPlugin.countPattern(line, patternD) * 40;
    }

    private static int countPattern(String string, Pattern pattern) {
        Matcher matcher = pattern.matcher(string);
        int count = 0;
        while (matcher.find()) {
            ++count;
        }
        return count;
    }

    private static byte[] joinBytes(List<String> sampleLines, Newline newline) {
        ByteArrayOutputStream data = new ByteArrayOutputStream();
        boolean first = true;
        for (String line : sampleLines) {
            if (first) {
                first = false;
            } else {
                byte[] newlineBytes = newline.getString().getBytes(StandardCharsets.UTF_8);
                data.write(newlineBytes, 0, newlineBytes.length);
            }
            byte[] bytes = line.getBytes(StandardCharsets.UTF_8);
            data.write(bytes, 0, bytes.length);
        }
        return data.toByteArray();
    }

    private static List<GuesstimatedType> typesFromListRecords(List<List<String>> samples) {
        List<List<String>> sampleObjects = samples;
        return SCHEMA_GUESS.typesFromListRecords(sampleObjects);
    }
}

