/*
 * Decompiled with CFR 0.152.
 */
package water.parser;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
import water.DKV;
import water.H2O;
import water.Iced;
import water.Key;
import water.MRTask;
import water.api.ParseSetupV3;
import water.exceptions.H2OIllegalArgumentException;
import water.fvec.ByteVec;
import water.fvec.FileVec;
import water.fvec.Frame;
import water.fvec.UploadFileVec;
import water.fvec.Vec;
import water.parser.BufferedString;
import water.parser.DefaultParserProviders;
import water.parser.ParseDataset;
import water.parser.ParseTime;
import water.parser.ParseUUID;
import water.parser.ParseWriter;
import water.parser.Parser;
import water.parser.ParserInfo;
import water.parser.ParserProvider;
import water.parser.ParserService;
import water.parser.PreviewParseWriter;
import water.parser.ZipUtil;
import water.util.ArrayUtils;
import water.util.Log;

public class ParseSetup
extends Iced {
    public static final byte GUESS_SEP = -1;
    public static final int NO_HEADER = -1;
    public static final int GUESS_HEADER = 0;
    public static final int HAS_HEADER = 1;
    public static final int GUESS_COL_CNT = -1;
    ParserInfo _parse_type;
    byte _separator;
    boolean _single_quotes;
    int _check_header;
    int _number_columns;
    String[] _column_names;
    byte[] _column_types;
    String[][] _domains;
    String[][] _na_strings;
    String[][] _data;
    public ParseWriter.ParseErr[] _errs;
    public int _chunk_size = 0x400000;
    PreviewParseWriter _column_previews = null;

    public ParseSetup(ParseSetup ps) {
        this(ps._parse_type, ps._separator, ps._single_quotes, ps._check_header, ps._number_columns, ps._column_names, ps._column_types, ps._domains, ps._na_strings, ps._data, new ParseWriter.ParseErr[0], ps._chunk_size);
    }

    public static ParseSetup makeSVMLightSetup() {
        return new ParseSetup(DefaultParserProviders.SVMLight_INFO, -1, false, -1, 1, null, new byte[]{3}, null, null, null, new ParseWriter.ParseErr[0]);
    }

    public ParseSetup(ParserInfo parse_type, byte sep, boolean singleQuotes, int checkHeader, int ncols, String[] columnNames, byte[] ctypes, String[][] domains, String[][] naStrings, String[][] data, ParseWriter.ParseErr[] errs, int chunkSize) {
        this._parse_type = parse_type;
        this._separator = sep;
        this._single_quotes = singleQuotes;
        this._check_header = checkHeader;
        this._number_columns = ncols;
        this._column_names = columnNames;
        this._column_types = ctypes;
        this._domains = domains;
        this._na_strings = naStrings;
        this._data = data;
        this._chunk_size = chunkSize;
        this._errs = errs;
    }

    public ParseSetup(ParseSetupV3 ps) {
        this(ps.parse_type != null ? ParserService.INSTANCE.getByName(ps.parse_type).info() : DefaultParserProviders.GUESS_INFO, ps.separator != 0 ? (byte)ps.separator : (byte)-1, ps.single_quotes, ps.check_header, -1, ps.column_names, ParseSetup.strToColumnTypes(ps.column_types), null, ps.na_strings, null, new ParseWriter.ParseErr[0], ps.chunk_size);
    }

    public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, String[] columnNames, byte[] ctypes, String[][] domains, String[][] naStrings, String[][] data) {
        this(parseType, sep, singleQuotes, checkHeader, ncols, columnNames, ctypes, domains, naStrings, data, new ParseWriter.ParseErr[0], 0x400000);
    }

    public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, String[] columnNames, byte[] ctypes, String[][] domains, String[][] naStrings, String[][] data, ParseWriter.ParseErr[] errs) {
        this(parseType, sep, singleQuotes, checkHeader, ncols, columnNames, ctypes, domains, naStrings, data, errs, 0x400000);
    }

    public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, String[][] data, ParseWriter.ParseErr[] errs) {
        this(parseType, sep, singleQuotes, checkHeader, ncols, null, null, null, null, data, errs, 0x400000);
    }

    public ParseSetup() {
    }

    public String[] getColumnNames() {
        return this._column_names;
    }

    public String[][] getData() {
        return this._data;
    }

    public String[] getColumnTypeStrings() {
        String[] types = new String[this._column_types.length];
        for (int i = 0; i < types.length; ++i) {
            types[i] = Vec.TYPE_STR[this._column_types[i]];
        }
        return types;
    }

    public byte[] getColumnTypes() {
        return this._column_types;
    }

    public static byte[] strToColumnTypes(String[] strs) {
        if (strs == null) {
            return null;
        }
        byte[] types = new byte[strs.length];
        block22: for (int i = 0; i < types.length; ++i) {
            String string = strs[i].toLowerCase();
            int n = -1;
            switch (string.hashCode()) {
                case -284840886: {
                    if (!string.equals("unknown")) break;
                    n = 0;
                    break;
                }
                case 3601339: {
                    if (!string.equals("uuid")) break;
                    n = 1;
                    break;
                }
                case -891985903: {
                    if (!string.equals("string")) break;
                    n = 2;
                    break;
                }
                case 97526364: {
                    if (!string.equals("float")) break;
                    n = 3;
                    break;
                }
                case 3496350: {
                    if (!string.equals("real")) break;
                    n = 4;
                    break;
                }
                case -1325958191: {
                    if (!string.equals("double")) break;
                    n = 5;
                    break;
                }
                case 104431: {
                    if (!string.equals("int")) break;
                    n = 6;
                    break;
                }
                case -2000413939: {
                    if (!string.equals("numeric")) break;
                    n = 7;
                    break;
                }
                case 1537307680: {
                    if (!string.equals("categorical")) break;
                    n = 8;
                    break;
                }
                case -1282148017: {
                    if (!string.equals("factor")) break;
                    n = 9;
                    break;
                }
                case 3118337: {
                    if (!string.equals("enum")) break;
                    n = 10;
                    break;
                }
                case 3560141: {
                    if (!string.equals("time")) break;
                    n = 11;
                }
            }
            switch (n) {
                case 0: {
                    types[i] = 0;
                    continue block22;
                }
                case 1: {
                    types[i] = 1;
                    continue block22;
                }
                case 2: {
                    types[i] = 2;
                    continue block22;
                }
                case 3: 
                case 4: 
                case 5: 
                case 6: 
                case 7: {
                    types[i] = 3;
                    continue block22;
                }
                case 8: 
                case 9: 
                case 10: {
                    types[i] = 4;
                    continue block22;
                }
                case 11: {
                    types[i] = 5;
                    continue block22;
                }
                default: {
                    types[i] = 0;
                    throw new H2OIllegalArgumentException("Provided column type " + strs[i] + " is unknown.  Cannot proceed with parse due to invalid argument.");
                }
            }
        }
        return types;
    }

    protected Parser parser(Key jobKey) {
        ParserProvider pp = ParserService.INSTANCE.getByInfo(this._parse_type);
        if (pp != null) {
            return pp.createParser(this, jobKey);
        }
        throw new H2OIllegalArgumentException("Unknown file type.  Parse cannot be completed.", "Attempted to invoke a parser for ParseType:" + this._parse_type + ", which doesn't exist.");
    }

    public final ParseSetup getFinalSetup(Key[] inputKeys, ParseSetup demandedSetup) {
        ParserProvider pp = ParserService.INSTANCE.getByInfo(this._parse_type);
        if (pp != null) {
            return pp.createParserSetup(inputKeys, demandedSetup);
        }
        throw new H2OIllegalArgumentException("Unknown parser configuration! Configuration=" + this);
    }

    HashSet<String> checkDupColumnNames() {
        HashSet<String> conflictingNames = new HashSet<String>();
        if (null == this._column_names) {
            return conflictingNames;
        }
        HashSet<String> uniqueNames = new HashSet<String>();
        for (String n : this._column_names) {
            if (uniqueNames.add(n)) continue;
            conflictingNames.add(n);
        }
        return conflictingNames;
    }

    public String toString() {
        return this._parse_type.toString();
    }

    static boolean allStrings(String[] line) {
        BufferedString str = new BufferedString();
        for (String s : line) {
            try {
                Double.parseDouble(s);
                return false;
            }
            catch (NumberFormatException e) {
                str.setTo(s);
                if (ParseTime.isTime(str)) {
                    return false;
                }
                if (!ParseUUID.isUUID(str)) continue;
                return false;
            }
        }
        return true;
    }

    static boolean hasHeader(String[] l1, String[] l2) {
        return ParseSetup.allStrings(l1) && !ParseSetup.allStrings(l2);
    }

    public static ParseSetup guessSetup(Key[] fkeys, boolean singleQuote, int checkHeader) {
        return ParseSetup.guessSetup(fkeys, new ParseSetup(DefaultParserProviders.GUESS_INFO, -1, singleQuote, checkHeader, -1, null, new ParseWriter.ParseErr[0]));
    }

    public static ParseSetup guessSetup(Key[] fkeys, ParseSetup userSetup) {
        GuessSetupTsk t = new GuessSetupTsk(userSetup);
        ((GuessSetupTsk)t.doAll(fkeys)).getResult();
        Object ice = DKV.getGet(fkeys[0]);
        t._gblSetup._chunk_size = ice instanceof Frame && ((Frame)ice).vec(0) instanceof UploadFileVec ? 0x400000 : FileVec.calcOptimalChunkSize(t._totalParseSize, t._gblSetup._number_columns, t._maxLineLength, Runtime.getRuntime().availableProcessors(), H2O.getCloudSize(), false, true);
        return t._gblSetup;
    }

    public static ParseSetup guessSetup(byte[] bits, ParseSetup userSetup) {
        return ParseSetup.guessSetup(bits, userSetup._parse_type, userSetup._separator, -1, userSetup._single_quotes, userSetup._check_header, userSetup._column_names, userSetup._column_types, null, null);
    }

    public static ParseSetup guessSetup(byte[] bits, ParserInfo parserType, byte sep, int ncols, boolean singleQuotes, int checkHeader, String[] columnNames, byte[] columnTypes, String[][] domains, String[][] naStrings) {
        ParserProvider pp = ParserService.INSTANCE.getByInfo(parserType);
        if (pp != null) {
            return pp.guessSetup(bits, sep, ncols, singleQuotes, checkHeader, columnNames, columnTypes, domains, naStrings);
        }
        throw new ParseDataset.H2OParseException("Cannot determine file type.");
    }

    public static String createHexName(String n) {
        int i;
        int sep = n.lastIndexOf(File.separatorChar);
        if (sep > 0) {
            n = n.substring(sep + 1);
        }
        int dot = n.lastIndexOf(46);
        while (dot > 0 && (n.endsWith("zip") || n.endsWith("gz") || n.endsWith("csv") || n.endsWith("xls") || n.endsWith("txt") || n.endsWith("svm") || n.endsWith("arff"))) {
            n = n.substring(0, dot);
            dot = n.lastIndexOf(46);
        }
        if (!Character.isJavaIdentifierStart(n.charAt(0))) {
            n = "X" + n;
        }
        char[] cs = n.toCharArray();
        for (i = 1; i < cs.length; ++i) {
            if (Character.isJavaIdentifierPart(cs[i])) continue;
            cs[i] = 95;
        }
        n = new String(cs);
        i = 0;
        String res = n + ".hex";
        Key k = Key.make(res);
        while (DKV.get(k) != null) {
            res = n + ++i + ".hex";
            k = Key.make(res);
        }
        return res;
    }

    private static final void checkEncoding(byte[] bits) {
        if (bits.length >= 2 && (bits[0] == -1 && bits[1] == -2 || bits[0] == -2 && bits[1] == -1)) {
            throw new ParseDataset.H2OParseException("UTF16 encoding detected, but is not supported.");
        }
    }

    private static final long maxLineLength(byte[] bytes) {
        if (bytes.length >= 2) {
            String st = new String(bytes);
            StringReader sr = new StringReader(st);
            BufferedReader br = new BufferedReader(sr);
            long maxLineLength = 0L;
            try {
                String line;
                while ((line = br.readLine()) != null) {
                    maxLineLength = Math.max((long)line.length(), maxLineLength);
                }
            }
            catch (IOException e) {
                return -1L;
            }
            return maxLineLength;
        }
        return -1L;
    }

    public ParserInfo getParseType() {
        return this._parse_type;
    }

    public ParseSetup setParseType(ParserInfo parse_type) {
        this._parse_type = parse_type;
        return this;
    }

    public ParseSetup setSeparator(byte separator) {
        this._separator = separator;
        return this;
    }

    public ParseSetup setSingleQuotes(boolean single_quotes) {
        this._single_quotes = single_quotes;
        return this;
    }

    public ParseSetup setCheckHeader(int check_header) {
        this._check_header = check_header;
        return this;
    }

    public ParseSetup setNumberColumns(int number_columns) {
        this._number_columns = number_columns;
        return this;
    }

    public ParseSetup setColumnNames(String[] column_names) {
        this._column_names = column_names;
        return this;
    }

    public ParseSetup setColumnTypes(byte[] column_types) {
        this._column_types = column_types;
        return this;
    }

    public ParseSetup setDomains(String[][] domains) {
        this._domains = domains;
        return this;
    }

    public ParseSetup setNAStrings(String[][] na_strings) {
        this._na_strings = na_strings;
        return this;
    }

    public ParseSetup setChunkSize(int chunk_size) {
        this._chunk_size = chunk_size;
        return this;
    }

    public static class GuessSetupTsk
    extends MRTask<GuessSetupTsk> {
        final ParseSetup _userSetup;
        boolean _empty = true;
        public ParseSetup _gblSetup;
        public long _totalParseSize;
        public long _maxLineLength;
        String _file;

        public GuessSetupTsk(ParseSetup userSetup) {
            this._userSetup = userSetup;
        }

        @Override
        public void map(Key key) {
            this._file = key.toString();
            Object ice = DKV.getGet(key);
            if (ice == null) {
                throw new H2OIllegalArgumentException("Missing data", "Did not find any data under key " + key);
            }
            ByteVec bv = (ByteVec)(ice instanceof ByteVec ? ice : ((Frame)ice).vecs()[0]);
            byte[] bits = ZipUtil.getFirstUnzippedBytes(bv);
            if (bits.length > 0) {
                this._empty = false;
                float decompRatio = ZipUtil.decompressionRatio(bv);
                this._totalParseSize = (double)decompRatio > 1.0 ? (long)((float)this._totalParseSize + (float)bv.length() * decompRatio) : (this._totalParseSize += bv.length());
                ParseSetup.checkEncoding(bits);
                this._maxLineLength = ParseSetup.maxLineLength(bits);
                if (this._maxLineLength == -1L) {
                    throw new H2OIllegalArgumentException("The first 4MB of the data don't contain any line breaks. Cannot parse.");
                }
                try {
                    this._gblSetup = ParseSetup.guessSetup(bits, this._userSetup);
                    for (ParseWriter.ParseErr e : this._gblSetup._errs) {
                        e._byteOffset += (long)(e._cidx * Parser.StreamData.bufSz);
                        e._cidx = 0;
                        e._file = this._file;
                    }
                }
                catch (ParseDataset.H2OParseException pse) {
                    throw pse.resetMsg(pse.getMessage() + " for " + key);
                }
            }
        }

        @Override
        public void reduce(GuessSetupTsk other) {
            if (other._empty) {
                return;
            }
            if (this._gblSetup == null) {
                this._empty = false;
                this._gblSetup = other._gblSetup;
                assert (this._gblSetup != null);
                return;
            }
            this._gblSetup = this.mergeSetups(this._gblSetup, other._gblSetup, this._file, other._file);
            this._totalParseSize += other._totalParseSize;
            this._maxLineLength = Math.max(this._maxLineLength, other._maxLineLength);
        }

        @Override
        public void postGlobal() {
            if (this._gblSetup._column_previews != null && !this._gblSetup._parse_type.equals(DefaultParserProviders.ARFF_INFO)) {
                this._gblSetup._column_types = this._gblSetup._column_previews.guessTypes();
                this._gblSetup._na_strings = this._userSetup._na_strings == null ? this._gblSetup._column_previews.guessNAStrings(this._gblSetup._column_types) : this._userSetup._na_strings;
            }
            for (ParseWriter.ParseErr err : this._gblSetup._errs) {
                Log.warn("ParseSetup: " + err.toString());
            }
        }

        private ParseSetup mergeSetups(ParseSetup setupA, ParseSetup setupB, String fileA, String fileB) {
            if (setupA == null) {
                return setupB;
            }
            ParseSetup mergedSetup = setupA;
            mergedSetup._check_header = GuessSetupTsk.unifyCheckHeader(setupA._check_header, setupB._check_header);
            mergedSetup._separator = GuessSetupTsk.unifyColumnSeparators(setupA._separator, setupB._separator);
            mergedSetup._column_names = GuessSetupTsk.unifyColumnNames(setupA._column_names, setupB._column_names);
            if (!setupA._parse_type.equals(DefaultParserProviders.ARFF_INFO) || !setupB._parse_type.equals(DefaultParserProviders.CSV_INFO)) {
                if (setupA._parse_type.equals(DefaultParserProviders.CSV_INFO) && setupB._parse_type.equals(DefaultParserProviders.ARFF_INFO)) {
                    mergedSetup._parse_type = DefaultParserProviders.ARFF_INFO;
                    mergedSetup._column_types = setupB._column_types;
                } else if (setupA._parse_type.equals(setupB._parse_type)) {
                    mergedSetup._column_previews = PreviewParseWriter.unifyColumnPreviews(setupA._column_previews, setupB._column_previews);
                } else {
                    throw new ParseDataset.H2OParseException("File type mismatch. Cannot parse files of type " + setupA._parse_type + " and " + setupB._parse_type + " as one dataset.");
                }
            }
            int n = mergedSetup._number_columns = mergedSetup._parse_type.equals(DefaultParserProviders.CSV_INFO) ? Math.max(setupA._number_columns, setupB._number_columns) : this.unifyColumnCount(setupA._number_columns, setupB._number_columns, mergedSetup, fileA, fileB);
            if (mergedSetup._data.length < 10) {
                int n2 = mergedSetup._data.length;
                int m = Math.min(10, n2 + setupB._data.length - 1);
                mergedSetup._data = (String[][])Arrays.copyOf(mergedSetup._data, m);
                System.arraycopy(setupB._data, 1, mergedSetup._data, n2, m - n2);
            }
            mergedSetup._errs = ArrayUtils.append(setupA._errs, setupB._errs);
            if (mergedSetup._errs.length > 20) {
                mergedSetup._errs = Arrays.copyOf(mergedSetup._errs, 20);
            }
            return mergedSetup;
        }

        private static int unifyCheckHeader(int chkHdrA, int chkHdrB) {
            if (chkHdrA == 0 || chkHdrB == 0) {
                throw new ParseDataset.H2OParseException("Unable to determine header on a file. Not expected.");
            }
            if (chkHdrA == 1 || chkHdrB == 1) {
                return 1;
            }
            return -1;
        }

        private static byte unifyColumnSeparators(byte sepA, byte sepB) {
            if (sepA == sepB) {
                return sepA;
            }
            if (sepA == -1) {
                return sepB;
            }
            if (sepB == -1) {
                return sepA;
            }
            throw new ParseDataset.H2OParseException("Column separator mismatch. One file seems to use \"" + (char)sepA + "\" and the other uses \"" + (char)sepB + "\".");
        }

        private int unifyColumnCount(int cntA, int cntB, ParseSetup mergedSetup, String fileA, String fileB) {
            if (cntA == cntB) {
                return cntA;
            }
            if (cntA == 0) {
                return cntB;
            }
            if (cntB == 0) {
                return cntA;
            }
            ParseWriter.ParseErr err = new ParseWriter.ParseErr();
            err._err = "Incompatible number of columns, " + cntA + " != " + cntB;
            err._file = fileA + ", " + fileB;
            mergedSetup._errs = ArrayUtils.append(mergedSetup._errs, err);
            return Math.max(cntA, cntB);
        }

        private static String[] unifyColumnNames(String[] namesA, String[] namesB) {
            if (namesA == null) {
                return namesB;
            }
            if (namesB == null) {
                return namesA;
            }
            for (int i = 0; i < namesA.length; ++i) {
                if (i <= namesB.length && namesA[i].equals(namesB[i])) continue;
                throw new ParseDataset.H2OParseException("Column names do not match between files.");
            }
            return namesA;
        }
    }
}

