/*
 * Decompiled with CFR 0.152.
 */
package org.embulk.util.guess;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import org.embulk.config.ConfigDiff;
import org.embulk.config.DataSource;
import org.embulk.spi.Buffer;
import org.embulk.util.config.ConfigMapperFactory;

public final class CharsetGuess {
    private final ConfigMapperFactory configMapperFactory;

    private CharsetGuess(ConfigMapperFactory configMapperFactory) {
        this.configMapperFactory = configMapperFactory;
    }

    public static CharsetGuess of(ConfigMapperFactory configMapperFactory) {
        return new CharsetGuess(configMapperFactory);
    }

    public ConfigDiff guess(Buffer sample) {
        CharsetDetector detector = new CharsetDetector();
        int sampleLength = sample.limit();
        byte[] sampleArray = new byte[sampleLength];
        sample.getBytes(0, sampleArray, 0, sampleLength);
        detector.setText(sampleArray);
        CharsetMatch bestMatch = detector.detect();
        ConfigDiff charset = this.configMapperFactory.newConfigDiff();
        if (bestMatch.getConfidence() < 50) {
            charset.set("charset", (Object)"UTF-8");
        } else {
            charset.set("charset", (Object)CharsetGuess.convertPredefined(bestMatch.getName()));
        }
        ConfigDiff result = this.configMapperFactory.newConfigDiff();
        result.setNested("parser", (DataSource)charset);
        return result;
    }

    private static String convertPredefined(String before) {
        switch (before) {
            case "ISO-8859-1": {
                return "UTF-8";
            }
            case "Shift_JIS": {
                return "MS932";
            }
        }
        return before;
    }
}

