/*
 * Decompiled with CFR 0.152.
 */
package ai.libs.jaicore.ml.core.dataset.serialization;

import ai.libs.jaicore.basic.OptionsParser;
import ai.libs.jaicore.basic.kvstore.KVStore;
import ai.libs.jaicore.ml.core.dataset.AInstance;
import ai.libs.jaicore.ml.core.dataset.Dataset;
import ai.libs.jaicore.ml.core.dataset.DenseInstance;
import ai.libs.jaicore.ml.core.dataset.SparseInstance;
import ai.libs.jaicore.ml.core.dataset.schema.LabeledInstanceSchema;
import ai.libs.jaicore.ml.core.dataset.schema.attribute.IntBasedCategoricalAttribute;
import ai.libs.jaicore.ml.core.dataset.schema.attribute.NumericAttribute;
import ai.libs.jaicore.ml.core.dataset.schema.attribute.StringAttribute;
import ai.libs.jaicore.ml.core.dataset.serialization.arff.EArffAttributeType;
import ai.libs.jaicore.ml.core.dataset.serialization.arff.EArffItem;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.api4.java.ai.ml.core.dataset.descriptor.IDatasetDescriptor;
import org.api4.java.ai.ml.core.dataset.descriptor.IFileDatasetDescriptor;
import org.api4.java.ai.ml.core.dataset.schema.attribute.IAttribute;
import org.api4.java.ai.ml.core.dataset.schema.attribute.ICategoricalAttribute;
import org.api4.java.ai.ml.core.dataset.schema.attribute.INumericAttribute;
import org.api4.java.ai.ml.core.dataset.serialization.DatasetDeserializationFailedException;
import org.api4.java.ai.ml.core.dataset.serialization.IDatasetDeserializer;
import org.api4.java.ai.ml.core.dataset.serialization.UnsupportedAttributeTypeException;
import org.api4.java.ai.ml.core.dataset.supervised.ILabeledDataset;
import org.api4.java.ai.ml.core.dataset.supervised.ILabeledInstance;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ArffDatasetAdapter
implements IDatasetDeserializer<ILabeledDataset<ILabeledInstance>> {
    private static final Logger LOGGER = LoggerFactory.getLogger(ArffDatasetAdapter.class);
    public static final String K_RELATION_NAME = "relationName";
    public static final String K_CLASS_INDEX = "classIndex";
    private static final String F_CLASS_INDEX = "C";
    private static final String SEPARATOR_RELATIONNAME = ":";
    private static final String SEPARATOR_ATTRIBUTE_DESCRIPTION = " ";
    private static final String SEPARATOR_DENSE_INSTANCE_VALUES = ",";
    private final boolean sparseMode;
    private IDatasetDescriptor datasetDescriptor = null;

    public ArffDatasetAdapter(boolean sparseMode, IDatasetDescriptor datasetDescriptor) {
        this(sparseMode);
        this.datasetDescriptor = datasetDescriptor;
    }

    public ArffDatasetAdapter(boolean sparseMode) {
        this.sparseMode = sparseMode;
    }

    public ArffDatasetAdapter() {
        this(false);
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    public IAttribute getAttributeWithName(IFileDatasetDescriptor datasetFile, String nameOfAttribute) throws DatasetDeserializationFailedException {
        try (BufferedReader br = Files.newBufferedReader(datasetFile.getDatasetDescription().toPath());){
            IAttribute att;
            String line;
            do {
                if ((line = br.readLine()) == null) throw new NoSuchElementException("No attribute with name " + nameOfAttribute + " found.");
            } while (!line.toLowerCase().startsWith(EArffItem.ATTRIBUTE.getValue().toLowerCase()) || !(att = ArffDatasetAdapter.parseAttribute(line)).getName().equals(nameOfAttribute));
            IAttribute iAttribute = att;
            return iAttribute;
        }
        catch (Exception e) {
            throw new DatasetDeserializationFailedException((Throwable)e);
        }
    }

    public ILabeledDataset<ILabeledInstance> deserializeDataset(IFileDatasetDescriptor datasetFile, String nameOfClassAttribute) throws DatasetDeserializationFailedException {
        Objects.requireNonNull(datasetFile, "No dataset has been configured.");
        int numAttributes = 0;
        try (BufferedReader br = Files.newBufferedReader(datasetFile.getDatasetDescription().toPath());){
            String line;
            while ((line = br.readLine()) != null) {
                if (!line.toLowerCase().startsWith(EArffItem.ATTRIBUTE.getValue().toLowerCase())) continue;
                IAttribute att = ArffDatasetAdapter.parseAttribute(line);
                if (att.getName().equals(nameOfClassAttribute)) {
                    break;
                }
                ++numAttributes;
            }
        }
        catch (Exception e) {
            throw new DatasetDeserializationFailedException((Throwable)e);
        }
        LOGGER.info("Successfully identified class attribute index {} for attribute with name {}", (Object)numAttributes, (Object)nameOfClassAttribute);
        return this.deserializeDataset(datasetFile, numAttributes);
    }

    public ILabeledDataset<ILabeledInstance> deserializeDataset(IFileDatasetDescriptor datasetDescriptor, int columnWithClassIndex) throws DatasetDeserializationFailedException {
        Objects.requireNonNull(datasetDescriptor, "No dataset has been configured.");
        return ArffDatasetAdapter.readDataset(this.sparseMode, datasetDescriptor.getDatasetDescription(), columnWithClassIndex);
    }

    public ILabeledDataset<ILabeledInstance> deserializeDataset(IDatasetDescriptor datasetDescriptor) throws DatasetDeserializationFailedException, InterruptedException {
        if (!(datasetDescriptor instanceof IFileDatasetDescriptor)) {
            throw new DatasetDeserializationFailedException("Cannot handle dataset descriptor of type " + datasetDescriptor.getClass().getName());
        }
        return this.deserializeDataset((IFileDatasetDescriptor)datasetDescriptor, -1);
    }

    public ILabeledDataset<ILabeledInstance> deserializeDataset() throws InterruptedException, DatasetDeserializationFailedException {
        return this.deserializeDataset(this.datasetDescriptor);
    }

    protected static KVStore parseRelation(String line) {
        KVStore metaData = new KVStore();
        String relationDescription = line.substring(EArffItem.RELATION.getValue().length()).trim();
        if (relationDescription.startsWith("'") && relationDescription.endsWith("'")) {
            String[] relationNameAndOptions = line.substring(line.indexOf(39) + 1, line.lastIndexOf(39)).split(SEPARATOR_RELATIONNAME);
            metaData.put((Object)K_RELATION_NAME, (Object)relationNameAndOptions[0].trim());
            if (relationNameAndOptions.length > 1) {
                OptionsParser optParser = new OptionsParser(relationNameAndOptions[1]);
                metaData.put((Object)K_CLASS_INDEX, optParser.get((Object)F_CLASS_INDEX));
            }
        } else {
            metaData.put((Object)K_RELATION_NAME, (Object)relationDescription);
        }
        return metaData;
    }

    protected static IAttribute parseAttribute(String line) throws UnsupportedAttributeTypeException {
        EArffAttributeType attType;
        String attributeDefinitionSplit = line.replaceAll("\\t", SEPARATOR_ATTRIBUTE_DESCRIPTION).substring(EArffItem.ATTRIBUTE.getValue().length() + 1).trim();
        String name = attributeDefinitionSplit.substring(0, attributeDefinitionSplit.indexOf(SEPARATOR_ATTRIBUTE_DESCRIPTION));
        if (name.trim().startsWith("'") && !name.trim().endsWith("'")) {
            int cutIndex = attributeDefinitionSplit.indexOf(39, name.length());
            name = name + attributeDefinitionSplit.substring(name.length(), name.length() + cutIndex + 1);
        }
        String type = attributeDefinitionSplit.substring(name.length() + 1).trim();
        if ((name = name.trim()).startsWith("'") && name.endsWith("'") || name.startsWith("\"") && name.endsWith("\"")) {
            name = name.substring(1, name.length() - 1);
        }
        String[] values = null;
        if (type.startsWith("{") && type.endsWith("}")) {
            values = type.substring(1, type.length() - 1).split(SEPARATOR_DENSE_INSTANCE_VALUES);
            attType = EArffAttributeType.NOMINAL;
        } else {
            try {
                attType = EArffAttributeType.valueOf(type.toUpperCase());
            }
            catch (IllegalArgumentException e) {
                throw new UnsupportedAttributeTypeException("The attribute type " + type.toUpperCase() + " is not supported in the EArffAttributeType ENUM. (line: " + line + ")");
            }
        }
        switch (attType) {
            case NUMERIC: 
            case REAL: 
            case INTEGER: {
                return new NumericAttribute(name);
            }
            case STRING: {
                return new StringAttribute(name);
            }
            case NOMINAL: {
                if (values != null) {
                    return new IntBasedCategoricalAttribute(name, Arrays.stream(values).map(String::trim).map(x -> x.startsWith("'") && x.endsWith("'") || x.startsWith("\"") && x.endsWith("\"") ? x.substring(1, x.length() - 1) : x).collect(Collectors.toList()));
                }
                throw new IllegalStateException("Identified a nominal attribute but it seems to have no values.");
            }
        }
        throw new UnsupportedAttributeTypeException("Can not deal with attribute type " + type);
    }

    protected static Object parseInstance(boolean sparseData, List<IAttribute> attributes, int targetIndex, String line) {
        Object[] lineSplit;
        if (line.trim().startsWith("%")) {
            throw new IllegalArgumentException("Cannot create object for commented line!");
        }
        boolean sparseMode = sparseData;
        String curLine = line;
        if (curLine.trim().startsWith("{") && curLine.trim().endsWith("}")) {
            curLine = curLine.substring(1, curLine.length() - 1);
            sparseMode = true;
            if (curLine.trim().isEmpty()) {
                return new HashMap();
            }
        }
        if ((lineSplit = curLine.split(SEPARATOR_DENSE_INSTANCE_VALUES)).length < attributes.size()) {
            sparseMode = true;
        }
        if (!sparseMode) {
            if (lineSplit.length != attributes.size()) {
                throw new IllegalArgumentException("Cannot parse instance as this is not a sparse instance but has less columns than there are attributes defined. Expected values: " + attributes.size() + ". Actual number of values: " + lineSplit.length + ". Values: " + Arrays.toString(lineSplit));
            }
            Object[] parsedDenseInstance = new Object[lineSplit.length - 1];
            Object target = null;
            int cI = 0;
            for (int i = 0; i < lineSplit.length; ++i) {
                if (i == targetIndex) {
                    target = attributes.get(i).deserializeAttributeValue((String)lineSplit[i]);
                    continue;
                }
                parsedDenseInstance[cI++] = attributes.get(i).deserializeAttributeValue((String)lineSplit[i]);
            }
            return Arrays.asList(parsedDenseInstance, target);
        }
        HashMap<Integer, Object> parsedSparseInstance = new HashMap<Integer, Object>();
        for (String string : lineSplit) {
            int indexOfFirstSpace = string.indexOf(32);
            int indexOfAttribute = Integer.parseInt(string.substring(0, indexOfFirstSpace));
            String attributeValue = string.substring(indexOfFirstSpace + 1);
            parsedSparseInstance.put(indexOfAttribute, attributes.get(indexOfAttribute).deserializeAttributeValue(attributeValue));
        }
        return parsedSparseInstance;
    }

    protected static ILabeledDataset<ILabeledInstance> createDataset(KVStore relationMetaData, List<IAttribute> attributes) {
        if (!relationMetaData.containsKey((Object)K_CLASS_INDEX) || relationMetaData.getAsInt(K_CLASS_INDEX) < 0) {
            throw new IllegalArgumentException("No (valid) class index given!");
        }
        ArrayList<IAttribute> attributeList = new ArrayList<IAttribute>(attributes);
        IAttribute labelAttribute = (IAttribute)attributeList.remove(relationMetaData.getAsInt(K_CLASS_INDEX));
        LabeledInstanceSchema schema = new LabeledInstanceSchema(relationMetaData.getAsString(K_RELATION_NAME), attributeList, labelAttribute);
        return new Dataset(schema);
    }

    public static ILabeledDataset<ILabeledInstance> readDataset(File datasetFile) throws DatasetDeserializationFailedException {
        return ArffDatasetAdapter.readDataset(false, datasetFile);
    }

    public static ILabeledDataset<ILabeledInstance> readDataset(boolean sparseMode, File datasetFile) throws DatasetDeserializationFailedException {
        return ArffDatasetAdapter.readDataset(sparseMode, datasetFile, -1);
    }

    public static ILabeledDataset<ILabeledInstance> readDataset(boolean sparseMode, File datasetFile, int columnWithClassIndex) throws DatasetDeserializationFailedException {
        ILabeledDataset<ILabeledInstance> iLabeledDataset;
        block20: {
            BufferedReader br = Files.newBufferedReader(datasetFile.toPath());
            try {
                String line;
                ILabeledDataset<ILabeledInstance> dataset = null;
                KVStore relationMetaData = new KVStore();
                ArrayList<IAttribute> attributes = new ArrayList<IAttribute>();
                boolean instanceReadMode = false;
                long lineCounter = 1L;
                while ((line = br.readLine()) != null) {
                    AInstance newI;
                    if (!instanceReadMode) {
                        if (line.toLowerCase().startsWith(EArffItem.RELATION.getValue())) {
                            relationMetaData = ArffDatasetAdapter.parseRelation(line);
                            if (columnWithClassIndex < 0) continue;
                            relationMetaData.put((Object)K_CLASS_INDEX, (Object)columnWithClassIndex);
                            continue;
                        }
                        if (line.toLowerCase().startsWith(EArffItem.ATTRIBUTE.getValue())) {
                            attributes.add(ArffDatasetAdapter.parseAttribute(line));
                            continue;
                        }
                        if (!line.toLowerCase().startsWith(EArffItem.DATA.getValue())) continue;
                        if (!line.toLowerCase().trim().equals(EArffItem.DATA.getValue())) {
                            throw new IllegalArgumentException("Error while parsing arff-file on line " + lineCounter + ": There is more in this line than just the data declaration " + EArffItem.DATA.getValue() + ", which is not supported");
                        }
                        instanceReadMode = true;
                        if (relationMetaData.containsKey((Object)K_CLASS_INDEX) && relationMetaData.getAsInt(K_CLASS_INDEX) >= 0) {
                            dataset = ArffDatasetAdapter.createDataset(relationMetaData, attributes);
                            continue;
                        }
                        LOGGER.warn("Invalid class index in the dataset's meta data ({}): Assuming last column to be the target attribute!", relationMetaData.get((Object)K_CLASS_INDEX));
                        relationMetaData.put((Object)K_CLASS_INDEX, (Object)(attributes.size() - 1));
                        dataset = ArffDatasetAdapter.createDataset(relationMetaData, attributes);
                        continue;
                    }
                    if ((line = line.trim()).isEmpty() || line.startsWith("%")) continue;
                    Object parsedInstance = ArffDatasetAdapter.parseInstance(sparseMode, attributes, relationMetaData.getAsInt(K_CLASS_INDEX), line);
                    if (parsedInstance instanceof List) {
                        newI = new DenseInstance((Object[])((List)parsedInstance).get(0), ((List)parsedInstance).get(1));
                    } else if (parsedInstance instanceof Map) {
                        Integer label;
                        Map parsedSparseInstance = (Map)parsedInstance;
                        Integer n = label = parsedSparseInstance.containsKey(relationMetaData.getAsInt(K_CLASS_INDEX)) ? parsedSparseInstance.remove(relationMetaData.getAsInt(K_CLASS_INDEX)) : Integer.valueOf(0);
                        if (label == null) {
                            throw new IllegalArgumentException("Cannot identify label for instance " + line);
                        }
                        newI = new SparseInstance(dataset.getNumAttributes(), parsedSparseInstance, label);
                    } else {
                        throw new IllegalStateException("Severe Error: The format of the parsed instance is not as expected.");
                    }
                    if (newI.getNumAttributes() != dataset.getNumAttributes()) {
                        throw new IllegalStateException("Instance has " + newI.getNumAttributes() + " attributes, but the dataset defines " + dataset.getNumAttributes() + " attributes.");
                    }
                    dataset.add((Object)newI);
                }
                ++lineCounter;
                iLabeledDataset = dataset;
                if (br == null) break block20;
            }
            catch (Throwable throwable) {
                try {
                    if (br != null) {
                        try {
                            br.close();
                        }
                        catch (Throwable throwable2) {
                            throwable.addSuppressed(throwable2);
                        }
                    }
                    throw throwable;
                }
                catch (Exception e) {
                    throw new DatasetDeserializationFailedException("Could not deserialize dataset from ARFF file.", (Throwable)e);
                }
            }
            br.close();
        }
        return iLabeledDataset;
    }

    public static void serializeDataset(File arffOutputFile, ILabeledDataset<? extends ILabeledInstance> data) throws IOException {
        try (BufferedWriter bw = new BufferedWriter(new FileWriter(arffOutputFile));){
            ArffDatasetAdapter.serializeMetaData(bw, data);
            bw.write("\n\n");
            ArffDatasetAdapter.serializeData(bw, data);
        }
    }

    private static void serializeData(BufferedWriter bw, ILabeledDataset<? extends ILabeledInstance> data) throws IOException {
        bw.write(EArffItem.DATA.getValue() + "\n");
        for (ILabeledInstance instance : data) {
            if (instance instanceof DenseInstance) {
                Object[] atts = instance.getAttributes();
                bw.write(IntStream.range(0, atts.length).mapToObj(x -> ArffDatasetAdapter.serializeAttributeValue(data.getInstanceSchema().getAttribute(x), atts[x])).collect(Collectors.joining(SEPARATOR_DENSE_INSTANCE_VALUES)));
                bw.write(SEPARATOR_DENSE_INSTANCE_VALUES);
                bw.write(ArffDatasetAdapter.serializeAttributeValue(data.getInstanceSchema().getLabelAttribute(), instance.getLabel()));
                bw.write("\n");
                continue;
            }
            bw.write("{");
            bw.write(((SparseInstance)instance).getAttributeMap().entrySet().stream().map(x -> x.getKey() + SEPARATOR_ATTRIBUTE_DESCRIPTION + ArffDatasetAdapter.serializeAttributeValue(data.getInstanceSchema().getAttribute(((Integer)x.getKey()).intValue()), x.getValue())).collect(Collectors.joining(SEPARATOR_DENSE_INSTANCE_VALUES)));
            if (instance.isLabelPresent()) {
                bw.write(SEPARATOR_DENSE_INSTANCE_VALUES);
            }
            bw.write(data.getNumAttributes());
            bw.write(SEPARATOR_ATTRIBUTE_DESCRIPTION);
            bw.write(ArffDatasetAdapter.serializeAttributeValue(data.getInstanceSchema().getLabelAttribute(), instance.getLabel()));
            bw.write("}\n");
        }
    }

    private static String serializeAttributeValue(IAttribute att, Object value) {
        String returnValue = att.serializeAttributeValue(value);
        if (att instanceof ICategoricalAttribute) {
            returnValue = "'" + returnValue + "'";
        }
        return returnValue;
    }

    private static void serializeMetaData(BufferedWriter bw, ILabeledDataset<? extends ILabeledInstance> data) throws IOException {
        StringBuilder sb = new StringBuilder();
        sb.append(EArffItem.RELATION.getValue() + SEPARATOR_ATTRIBUTE_DESCRIPTION + data.getRelationName());
        sb.append("\n");
        sb.append("\n");
        for (IAttribute att : data.getInstanceSchema().getAttributeList()) {
            sb.append(ArffDatasetAdapter.serializeAttribute(att));
            sb.append("\n");
        }
        sb.append(ArffDatasetAdapter.serializeAttribute(data.getInstanceSchema().getLabelAttribute()));
        bw.write(sb.toString());
    }

    private static String serializeAttribute(IAttribute att) {
        StringBuilder sb = new StringBuilder();
        sb.append(EArffItem.ATTRIBUTE.getValue() + " '" + att.getName() + "' ");
        if (att instanceof ICategoricalAttribute) {
            sb.append("{'" + ((ICategoricalAttribute)att).getLabels().stream().collect(Collectors.joining("','")) + "'}");
        } else if (att instanceof INumericAttribute) {
            sb.append(EArffAttributeType.NUMERIC.getName());
        }
        return sb.toString();
    }
}

