/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hudi.utilities;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.com.beust.jcommander.JCommander;
import org.apache.hudi.com.beust.jcommander.Parameter;
import org.apache.hudi.com.codahale.metrics.Histogram;
import org.apache.hudi.com.codahale.metrics.Snapshot;
import org.apache.hudi.com.codahale.metrics.UniformReservoir;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.SerializableConfiguration;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.TableNotFoundException;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.utilities.IdentitySplitter;
import org.apache.hudi.utilities.UtilHelpers;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TableSizeStats
implements Serializable {
    private static final Logger LOG = LoggerFactory.getLogger(TableSizeStats.class);
    private static final DateTimeFormatter DATE_FORMATTER = new DateTimeFormatterBuilder().appendOptional(DateTimeFormatter.ofPattern("yyyy/M/d")).appendOptional(DateTimeFormatter.ofPattern("yyyy-M-d")).toFormatter();
    private static final String[] FILE_SIZE_UNITS = new String[]{"B", "KB", "MB", "GB", "TB"};
    private transient JavaSparkContext jsc;
    private Config cfg;
    private TypedProperties props;

    public TableSizeStats(JavaSparkContext jsc, Config cfg) {
        this.jsc = jsc;
        this.cfg = cfg;
        this.props = cfg.propsFilePath == null ? UtilHelpers.buildProperties(cfg.configs) : this.readConfigFromFileSystem(jsc, cfg);
    }

    private TypedProperties readConfigFromFileSystem(JavaSparkContext jsc, Config cfg) {
        return UtilHelpers.readConfig(jsc.hadoopConfiguration(), new Path(cfg.propsFilePath), cfg.configs).getProps(true);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] args2) {
        Config cfg = new Config();
        JCommander cmd = new JCommander((Object)cfg, null, args2);
        if (cfg.help.booleanValue() || args2.length == 0) {
            cmd.usage();
            System.exit(1);
        }
        SparkConf sparkConf = UtilHelpers.buildSparkConf("Table-Size-Stats", cfg.sparkMaster);
        sparkConf.set("spark.executor.memory", cfg.sparkMemory);
        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
        try {
            TableSizeStats tableSizeStats = new TableSizeStats(jsc, cfg);
            tableSizeStats.run();
        }
        catch (TableNotFoundException e) {
            LOG.warn(String.format("The Hudi data table is not found: [%s].", cfg.basePath), (Throwable)e);
        }
        catch (Throwable throwable) {
            LOG.error("Failed to get table size stats for " + cfg, throwable);
        }
        finally {
            jsc.stop();
        }
    }

    public void run() {
        try {
            LOG.info(this.cfg.toString());
            LOG.info(" ****** Fetching table size stats ******");
            LocalDate[] dateInterval = TableSizeStats.getUserSpecifiedDateInterval(this.cfg);
            if (this.cfg.propsFilePath != null) {
                List<String> filePaths = TableSizeStats.getFilePaths(this.cfg.propsFilePath, this.jsc.hadoopConfiguration());
                for (String filePath : filePaths) {
                    this.logTableStats(filePath, dateInterval);
                }
            } else {
                if (this.cfg.basePath == null) {
                    throw new HoodieIOException("Base path needs to be set.");
                }
                this.logTableStats(this.cfg.basePath, dateInterval);
            }
        }
        catch (Exception e) {
            throw new HoodieException("Unable to do fetch table size stats." + this.cfg.basePath, e);
        }
    }

    private void logTableStats(String basePath, LocalDate[] dateInterval) throws IOException {
        LOG.warn("Processing table " + basePath);
        HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().enable(TableSizeStats.isMetadataEnabled(basePath, this.jsc)).build();
        HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(this.jsc);
        HoodieTableMetadata tableMetadata = HoodieTableMetadata.create(engineContext, metadataConfig, basePath);
        SerializableConfiguration serializableConfiguration = new SerializableConfiguration(this.jsc.hadoopConfiguration());
        List<String> allPartitions = tableMetadata.getAllPartitionPaths();
        if (dateInterval != null && TableSizeStats.getPartitionDate(allPartitions.get(0)) == null) {
            throw new HoodieException("Cannot apply --start-date, --end-date, or --num-days when partition does not contain date. Interval: " + Arrays.toString(dateInterval) + ", Partition Name: " + allPartitions.get(0));
        }
        Histogram tableHistogram = new Histogram(new UniformReservoir(1000000));
        allPartitions.forEach(partition -> {
            LocalDate partitionDate = null;
            LocalDate startDate = null;
            LocalDate endDate = null;
            if (dateInterval != null) {
                partitionDate = TableSizeStats.getPartitionDate(partition);
                startDate = dateInterval[0];
                endDate = dateInterval[1];
            }
            if (partitionDate == null || startDate == null && endDate == null || endDate == null && (partitionDate.isEqual(startDate) || partitionDate.isAfter(startDate)) || startDate == null && partitionDate.isBefore(endDate) || startDate != null && endDate != null && (partitionDate.isEqual(startDate) || partitionDate.isAfter(startDate)) && partitionDate.isBefore(endDate)) {
                HoodieTableMetaClient metaClientLocal = HoodieTableMetaClient.builder().setBasePath(basePath).setConf(serializableConfiguration.get()).build();
                HoodieMetadataConfig metadataConfig1 = HoodieMetadataConfig.newBuilder().enable(false).build();
                HoodieTableFileSystemView fileSystemView = FileSystemViewManager.createInMemoryFileSystemView(new HoodieLocalEngineContext(serializableConfiguration.get()), metaClientLocal, metadataConfig1);
                List<HoodieBaseFile> baseFiles = fileSystemView.getLatestBaseFiles((String)partition).collect(Collectors.toList());
                Histogram partitionHistogram = this.cfg.partitionStats && partition.trim().length() > 0 ? new Histogram(new UniformReservoir(1000000)) : null;
                baseFiles.forEach(baseFile -> {
                    if (partitionHistogram != null) {
                        partitionHistogram.update(baseFile.getFileSize());
                    }
                    tableHistogram.update(baseFile.getFileSize());
                });
                if (partitionHistogram != null) {
                    TableSizeStats.logStats("Partition stats [name: " + partition + (partitionDate != null ? ", has date: yes" : "") + "]", partitionHistogram);
                }
            }
        });
        if (this.cfg.tableStats) {
            TableSizeStats.logStats("Table stats [path: " + basePath + "]", tableHistogram);
        } else {
            LOG.info("Total size: {}", (Object)TableSizeStats.getFileSizeUnit(Arrays.stream(tableHistogram.getSnapshot().getValues()).sum()));
        }
    }

    private static boolean isMetadataEnabled(String basePath, JavaSparkContext jsc) {
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(basePath).setConf(jsc.hadoopConfiguration()).build();
        Set<String> partitions = metaClient.getTableConfig().getMetadataPartitions();
        return !partitions.isEmpty() && partitions.contains("files");
    }

    private static List<String> getFilePaths(String propsPath, Configuration hadoopConf) {
        ArrayList<String> filePaths = new ArrayList<String>();
        FileSystem fs = FSUtils.getFs(propsPath, Option.ofNullable(hadoopConf).orElseGet(Configuration::new));
        try (BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)fs.open(new Path(propsPath))));){
            String line = reader.readLine();
            while (line != null) {
                filePaths.add(line);
                line = reader.readLine();
            }
        }
        catch (IOException ioe) {
            LOG.error("Error reading in properties from dfs from file." + propsPath);
            throw new HoodieIOException("Cannot read properties from dfs from file " + propsPath, ioe);
        }
        return filePaths;
    }

    private static LocalDate[] getUserSpecifiedDateInterval(Config cfg) {
        LocalDate[] localDateArray;
        LocalDate endDate = null;
        if (cfg.endDate != null) {
            try {
                endDate = LocalDate.parse(cfg.endDate, DATE_FORMATTER);
                LOG.info("Setting ending date to {}. ", (Object)endDate);
            }
            catch (DateTimeParseException dtpe) {
                throw new HoodieException("Unable to parse --end-date. ", dtpe);
            }
        } else {
            LOG.info("End date is not specified: {}.", endDate);
        }
        LocalDate startDate = null;
        if (cfg.startDate != null) {
            startDate = LocalDate.parse(cfg.startDate, DATE_FORMATTER);
            LOG.info("Setting starting date to {}.", (Object)startDate);
        } else if (cfg.numDays == 0L) {
            LOG.info("Start date not specified: {}.", (Object)startDate);
        } else if (cfg.numDays > 0L) {
            endDate = LocalDate.now();
            startDate = endDate.minusDays(cfg.numDays);
            LOG.info("Setting starting date to {} ({} - {} days). ", new Object[]{startDate, endDate, cfg.numDays});
        } else {
            throw new HoodieException("--num-days must specify a positive value.");
        }
        if (startDate != null && endDate != null && !startDate.isBefore(endDate)) {
            throw new HoodieException("Starting date must be before ending date. Start Date: " + startDate + ", End Date: " + endDate);
        }
        if (startDate == null && endDate == null) {
            localDateArray = null;
        } else {
            LocalDate[] localDateArray2 = new LocalDate[2];
            localDateArray2[0] = startDate;
            localDateArray = localDateArray2;
            localDateArray2[1] = endDate;
        }
        return localDateArray;
    }

    @Nullable
    private static LocalDate getPartitionDate(String partition) {
        String[] parts;
        String dateString = partition;
        if (partition.contains("=") && (parts = partition.split("=")) != null && parts.length == 2) {
            dateString = parts[1].trim();
        }
        LocalDate partitionDate = null;
        try {
            return LocalDate.parse(dateString, DATE_FORMATTER);
        }
        catch (DateTimeParseException dtpe) {
            LOG.error("Partition name {} must conform to date format if --start-date, --end-date, or --num-days are specified. ", (Object)partition, (Object)dtpe);
            return partitionDate;
        }
    }

    private static String getFileSizeUnit(double size) {
        int counter;
        for (counter = 0; size > 1024.0 && counter < FILE_SIZE_UNITS.length; size /= 1024.0, ++counter) {
        }
        return String.format("%.2f %s", size, FILE_SIZE_UNITS[counter]);
    }

    private static void logStats(String header, Histogram histogram) {
        LOG.info(header);
        Snapshot snapshot = histogram.getSnapshot();
        LOG.info("Number of files: {}", (Object)snapshot.size());
        LOG.info("Total size: {}", (Object)TableSizeStats.getFileSizeUnit(Arrays.stream(snapshot.getValues()).sum()));
        LOG.info("Minimum file size: {}", (Object)TableSizeStats.getFileSizeUnit(snapshot.getMin()));
        LOG.info("Maximum file size: {}", (Object)TableSizeStats.getFileSizeUnit(snapshot.getMax()));
        LOG.info("Average file size: {}", (Object)TableSizeStats.getFileSizeUnit(snapshot.getMean()));
        LOG.info("Median file size: {}", (Object)TableSizeStats.getFileSizeUnit(snapshot.getMedian()));
        LOG.info("P50 file size: {}", (Object)TableSizeStats.getFileSizeUnit(snapshot.getValue(0.5)));
        LOG.info("P90 file size: {}", (Object)TableSizeStats.getFileSizeUnit(snapshot.getValue(0.9)));
        LOG.info("P95 file size: {}", (Object)TableSizeStats.getFileSizeUnit(snapshot.getValue(0.95)));
        LOG.info("P99 file size: {}", (Object)TableSizeStats.getFileSizeUnit(snapshot.getValue(0.99)));
    }

    public static class Config
    implements Serializable {
        @Parameter(names={"--base-path", "-bp"}, description="Base path for the table", required=false)
        public String basePath = null;
        @Parameter(names={"--num-days", "-nd"}, description="Consider files modified within this many days.", required=false)
        public long numDays = 0L;
        @Parameter(names={"--start-date", "-sd"}, description="Consider files modified on or after this date.", required=false)
        public String startDate = null;
        @Parameter(names={"--end-date", "-ed"}, description="Consider files modified before this date.", required=false)
        public String endDate = null;
        @Parameter(names={"--enable-table-stats", "-fs"}, description="Show file-level stats.", required=false)
        public boolean tableStats = false;
        @Parameter(names={"--enable-partition-stats", "-ps"}, description="Show partition-level stats.", required=false)
        public boolean partitionStats = false;
        @Parameter(names={"--props-path", "-pp"}, description="Properties file containing base paths one per line", required=false)
        public String propsFilePath = null;
        @Parameter(names={"--parallelism", "-pl"}, description="Parallelism for valuation", required=false)
        public int parallelism = 200;
        @Parameter(names={"--spark-master", "-ms"}, description="Spark master", required=false)
        public String sparkMaster = null;
        @Parameter(names={"--spark-memory", "-sm"}, description="spark memory to use", required=false)
        public String sparkMemory = "1g";
        @Parameter(names={"--hoodie-conf"}, description="Any configuration that can be set in the properties file (using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated", splitter=IdentitySplitter.class)
        public List<String> configs = new ArrayList<String>();
        @Parameter(names={"--help", "-h"}, help=true)
        public Boolean help = false;

        public String toString() {
            return "TableSizeStats {\n   --base-path " + this.basePath + ", \n   --num-days " + this.numDays + ", \n   --start-date " + this.startDate + ", \n   --end-date " + this.endDate + ", \n   --enable-table-stats " + this.tableStats + ", \n   --enable-partition-stats " + this.partitionStats + ", \n   --parallelism " + this.parallelism + ", \n   --spark-master " + this.sparkMaster + ", \n   --spark-memory " + this.sparkMemory + ", \n   --props " + this.propsFilePath + ", \n   --hoodie-conf " + this.configs + "\n}";
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            Config config = (Config)o;
            return this.basePath.equals(config.basePath) && Objects.equals(this.numDays, config.numDays) && Objects.equals(this.startDate, config.startDate) && Objects.equals(this.endDate, config.endDate) && Objects.equals(this.tableStats, config.tableStats) && Objects.equals(this.partitionStats, config.partitionStats) && Objects.equals(this.parallelism, config.parallelism) && Objects.equals(this.sparkMaster, config.sparkMaster) && Objects.equals(this.sparkMemory, config.sparkMemory) && Objects.equals(this.propsFilePath, config.propsFilePath) && Objects.equals(this.configs, config.configs);
        }

        public int hashCode() {
            return Objects.hash(this.basePath, this.numDays, this.startDate, this.endDate, this.tableStats, this.partitionStats, this.parallelism, this.sparkMaster, this.sparkMemory, this.propsFilePath, this.configs, this.help);
        }
    }
}

