/*
 * Decompiled with CFR 0.152.
 */
package org.tribuo.data.text;

import com.oracle.labs.mlrg.olcut.config.ConfigurationManager;
import com.oracle.labs.mlrg.olcut.config.Option;
import com.oracle.labs.mlrg.olcut.config.Options;
import com.oracle.labs.mlrg.olcut.util.LabsLogFormatter;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Random;
import java.util.logging.Formatter;
import java.util.logging.Handler;
import java.util.logging.Level;
import java.util.logging.Logger;

public class SplitTextData {
    private static final Logger logger = Logger.getLogger(SplitTextData.class.getName());

    public static void main(String[] args) throws IOException {
        int i;
        for (Handler h : Logger.getLogger("").getHandlers()) {
            h.setLevel(Level.ALL);
            h.setFormatter((Formatter)new LabsLogFormatter());
            try {
                h.setEncoding("utf-8");
            }
            catch (UnsupportedEncodingException | SecurityException ex) {
                logger.severe("Error setting output encoding");
            }
        }
        TrainTestSplitOptions options = new TrainTestSplitOptions();
        ConfigurationManager cm = new ConfigurationManager(args, (Options)options);
        if (options.inputPath == null || options.trainPath == null || options.validationPath == null || (double)options.splitFraction < 0.0 || (double)options.splitFraction > 1.0) {
            System.out.println("Incorrect arguments");
            System.out.println(cm.usage());
            return;
        }
        int n = 0;
        int validCounter = 0;
        int invalidCounter = 0;
        BufferedReader input = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(options.inputPath.toFile()), StandardCharsets.UTF_8));
        PrintWriter trainOutput = new PrintWriter(new OutputStreamWriter((OutputStream)new BufferedOutputStream(new FileOutputStream(options.trainPath.toFile())), StandardCharsets.UTF_8));
        PrintWriter testOutput = new PrintWriter(new OutputStreamWriter((OutputStream)new BufferedOutputStream(new FileOutputStream(options.validationPath.toFile())), StandardCharsets.UTF_8));
        ArrayList<Line> lines = new ArrayList<Line>();
        while (input.ready()) {
            ++n;
            String line = input.readLine().trim();
            if (line.isEmpty()) {
                ++invalidCounter;
                continue;
            }
            String[] fields = line.split("##");
            if (fields.length != 2) {
                ++invalidCounter;
                logger.warning(String.format("Bad line in %s at %d: %s", options.inputPath, n, line.substring(Math.min(50, line.length()))));
                continue;
            }
            String label = fields[0].trim().toUpperCase();
            lines.add(new Line(label, fields[1]));
            ++validCounter;
        }
        input.close();
        logger.info("Found " + validCounter + " valid examples, " + invalidCounter + " invalid examples out of " + n + " lines.");
        int numTraining = Math.round(options.splitFraction * (float)validCounter);
        int numTesting = validCounter - numTraining;
        logger.info("Outputting " + numTraining + " training examples, and " + numTesting + " testing examples, with a " + options.splitFraction + " split.");
        Collections.shuffle(lines, new Random(options.seed));
        for (i = 0; i < numTraining; ++i) {
            trainOutput.println(lines.get(i));
        }
        for (i = numTraining; i < validCounter; ++i) {
            testOutput.println(lines.get(i));
        }
        trainOutput.close();
        testOutput.close();
    }

    public static class TrainTestSplitOptions
    implements Options {
        @Option(charName=115, longName="split-fraction", usage="Split fraction.")
        public float splitFraction;
        @Option(charName=105, longName="input-file", usage="Input data file in standard text format.")
        public Path inputPath;
        @Option(charName=116, longName="training-output-file", usage="Output training data file.")
        public Path trainPath;
        @Option(charName=118, longName="validation-output-file", usage="Output validation data file.")
        public Path validationPath;
        @Option(charName=114, longName="rng-seed", usage="Seed for the RNG.")
        public long seed = 1L;

        public String getOptionsDescription() {
            return "Splits a standard text format dataset in two.";
        }
    }

    private static class Line {
        public final String label;
        public final String text;

        Line(String label, String text) {
            this.label = label;
            this.text = text;
        }

        public String toString() {
            return this.label + "##" + this.text;
        }
    }
}

