/*
 * Decompiled with CFR 0.152.
 */
package net.maizegenetics.analysis.data;

import com.google.common.collect.Range;
import java.awt.Frame;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;
import javax.swing.ImageIcon;
import net.maizegenetics.analysis.data.FileLoadPlugin;
import net.maizegenetics.analysis.numericaltransform.ImputationPlugin;
import net.maizegenetics.analysis.numericaltransform.NumericalGenotypePlugin;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.score.ReferenceProbability;
import net.maizegenetics.matrixalgebra.Matrix.DoubleMatrix;
import net.maizegenetics.matrixalgebra.Matrix.DoubleMatrixFactory;
import net.maizegenetics.phenotype.NumericAttribute;
import net.maizegenetics.phenotype.Phenotype;
import net.maizegenetics.phenotype.PhenotypeAttribute;
import net.maizegenetics.phenotype.PhenotypeBuilder;
import net.maizegenetics.phenotype.TaxaAttribute;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.Datum;
import net.maizegenetics.plugindef.Plugin;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.stats.PCA.PrinComp;
import net.maizegenetics.util.OpenBitSet;
import net.maizegenetics.util.SimpleTableReport;
import org.apache.log4j.Logger;

public class PrincipalComponentsPlugin
extends AbstractPlugin {
    private static final Logger myLogger = Logger.getLogger(PrincipalComponentsPlugin.class);
    private PluginParameter<Boolean> useCovariance = new PluginParameter.Builder<Boolean>("covariance", true, Boolean.class).description("If the box is checked, then the analysis will do an eigenvalue decomposition of the covariance matrix. If the box is unchecked, it will use a correlation matrix. Using the covariance matrix is recommended for genotypes while the correlation matrix is often used for phenotypes.").guiName("covariance (alternative = correlation)").build();
    private PluginParameter<PCA_LIMIT> limitBy = new PluginParameter.Builder<PCA_LIMIT>("limitBy", PCA_LIMIT.number_of_components, PCA_LIMIT.class).description("This parameter determines the type of value that will be used to limit the number of principal components (axes) returned. The possible choices are number_of_components, min_eigenvalue, and total_variance.").guiName("limit number of components by").build();
    private PluginParameter<Integer> numberOfComponents = new PluginParameter.Builder<Integer>("ncomponents", 5, Integer.class).description("The analysis will return this many principal components up to the number of taxa.").guiName("number of components").dependentOnParameter(this.limitBy, (Object)PCA_LIMIT.number_of_components).build();
    private PluginParameter<Double> minEigenval = new PluginParameter.Builder<Double>("minEigenval", 0.0, Double.class).description("All principal components with an eigenvalue greater than or equal to this value will be returned.").guiName("minimum eigenvalue").dependentOnParameter(this.limitBy, (Object)PCA_LIMIT.min_eigenvalue).build();
    private PluginParameter<Double> totalVar = new PluginParameter.Builder<Double>("totalVar", 0.5, Double.class).description("The first principal components that together explain this proportion of the total variance will be returned.").range((Range<Comparable<Double>>)Range.closed((Comparable)Double.valueOf(0.0), (Comparable)Double.valueOf(1.0))).guiName("total variance").dependentOnParameter(this.limitBy, (Object)PCA_LIMIT.total_variance).build();
    private PluginParameter<Boolean> reportEigenvalues = new PluginParameter.Builder<Boolean>("reportEigenvalues", true, Boolean.class).description("Returns a list of eigenvalues sorted high to low.").guiName("Return Eigenvalues").build();
    private PluginParameter<Boolean> reportEigenvectors = new PluginParameter.Builder<Boolean>("reportEigenvectors", true, Boolean.class).description("Returns the eigenvectors calculated from a Singular Value Decomposition of the data. The resulting table can be quite large if the number of variants and taxa are big.").guiName("Return Eigenvectors").build();

    public PrincipalComponentsPlugin(Frame parentFrame, boolean isInteractive) {
        super(parentFrame, isInteractive);
    }

    @Override
    public DataSet processData(DataSet input) {
        ArrayList<Datum> myResults = new ArrayList<Datum>();
        List<Datum> myData = input.getDataOfType(new Class[]{Phenotype.class, GenotypeTable.class});
        for (Datum aDatum : myData) {
            if (aDatum.getData() instanceof Phenotype) {
                Phenotype myPhenotype = (Phenotype)aDatum.getData();
                if (this.areAnyPhenotypesMissing(myPhenotype.dataAttributeStream())) {
                    StringBuilder msgBuilder = new StringBuilder();
                    msgBuilder.append("There are missing values in ").append(aDatum.getName()).append(". PCA will not be run.");
                    throw new IllegalArgumentException(msgBuilder.toString());
                }
                List<PhenotypeAttribute> dataAttributes = myPhenotype.attributeListOfType(Phenotype.ATTRIBUTE_TYPE.data);
                int nAttributes = dataAttributes.size();
                int nobs = myPhenotype.numberOfObservations();
                DoubleMatrix dataMatrix = DoubleMatrixFactory.DEFAULT.make(nobs, nAttributes);
                int colCount = 0;
                for (PhenotypeAttribute attr : dataAttributes) {
                    float[] colData = ((NumericAttribute)attr).floatValues();
                    for (int i = 0; i < nobs; ++i) {
                        dataMatrix.set(i, colCount, colData[i]);
                    }
                    ++colCount;
                }
                PrinComp.PC_TYPE pctype = this.useCovariance.value() != false ? PrinComp.PC_TYPE.cov : PrinComp.PC_TYPE.corr;
                PrinComp pca = new PrinComp(dataMatrix, pctype);
                myResults.addAll(this.addResultsToDatumList(pca, myPhenotype.taxaAttribute(), dataAttributes, aDatum.getName()));
                continue;
            }
            GenotypeTable myGenotype = (GenotypeTable)aDatum.getData();
            if (!myGenotype.hasReferenceProbablity()) {
                myGenotype = NumericalGenotypePlugin.setAlternateMinorAllelesToMinor(myGenotype);
                DataSet myDataset = new DataSet(new Datum("name", myGenotype, "comment"), (Plugin)this);
                ImputationPlugin myImputer = new ImputationPlugin(null, false);
                myImputer.by_mean(true);
                DataSet imputedDataset = myImputer.performFunction(myDataset);
                myGenotype = (GenotypeTable)imputedDataset.getData(0).getData();
            } else if (this.areAnyGenotypesMissingInReferenceProbability(myGenotype)) {
                StringBuilder msgBuilder = new StringBuilder();
                msgBuilder.append("There are missing values in ").append(aDatum.getName()).append(". PCA will not be run.");
                throw new IllegalArgumentException(msgBuilder.toString());
            }
            int ntaxa = myGenotype.numberOfTaxa();
            int nsites = myGenotype.numberOfSites();
            DoubleMatrix dataMatrix = DoubleMatrixFactory.DEFAULT.make(ntaxa, nsites);
            for (int t = 0; t < ntaxa; ++t) {
                for (int s = 0; s < nsites; ++s) {
                    dataMatrix.set(t, s, myGenotype.referenceProbability(t, s));
                }
            }
            PrinComp.PC_TYPE pctype = this.useCovariance.value() != false ? PrinComp.PC_TYPE.cov : PrinComp.PC_TYPE.corr;
            PrinComp pca = new PrinComp(dataMatrix, pctype);
            myResults.addAll(this.addResultsToDatumList(pca, myGenotype, aDatum.getName()));
        }
        return new DataSet(myResults, (Plugin)this);
    }

    private boolean areAnyPhenotypesMissing(Stream<NumericAttribute> attributes) {
        Optional<NumericAttribute> na = attributes.filter(a -> a.missing().cardinality() > 0L).findAny();
        return na.isPresent();
    }

    private boolean areAnyGenotypesMissingInReferenceProbability(GenotypeTable myGenotype) {
        int ntaxa = myGenotype.numberOfTaxa();
        int nsites = myGenotype.numberOfSites();
        ReferenceProbability refprob = myGenotype.referenceProbability();
        for (int s = 0; s < nsites; ++s) {
            for (int t = 0; t < ntaxa; ++t) {
                if (!Float.isNaN(refprob.value(t, s))) continue;
                return true;
            }
        }
        return false;
    }

    private List<Datum> addResultsToDatumList(PrinComp pca, TaxaAttribute myTaxa, List<PhenotypeAttribute> data, String datasetName) {
        int numberOfPCs;
        ArrayList<Datum> results = new ArrayList<Datum>();
        double[] eigenvalues = pca.getEigenValues();
        int nvalues = eigenvalues.length;
        double[] cumulativeEigenvalues = Arrays.copyOf(eigenvalues, nvalues);
        for (int i = 1; i < nvalues; ++i) {
            int n = i;
            cumulativeEigenvalues[n] = cumulativeEigenvalues[n] + cumulativeEigenvalues[i - 1];
        }
        if (this.limitBy.value() == PCA_LIMIT.number_of_components) {
            numberOfPCs = Math.min(this.numberOfComponents.value(), nvalues);
        } else if (this.limitBy.value() == PCA_LIMIT.total_variance) {
            double limit = this.totalVar.value() * cumulativeEigenvalues[nvalues - 1];
            int ndx = Arrays.binarySearch(cumulativeEigenvalues, limit);
            numberOfPCs = ndx < -1 ? -ndx : ndx + 1;
            numberOfPCs = Math.min(numberOfPCs, nvalues);
        } else {
            int ndx = Arrays.binarySearch(eigenvalues, this.minEigenval.value());
            numberOfPCs = ndx < -1 ? -ndx : ndx + 1;
            numberOfPCs = Math.min(numberOfPCs, nvalues);
        }
        DoubleMatrix pcs = pca.getPrincipalComponents();
        ArrayList<PhenotypeAttribute> attributes = new ArrayList<PhenotypeAttribute>();
        ArrayList<Phenotype.ATTRIBUTE_TYPE> types = new ArrayList<Phenotype.ATTRIBUTE_TYPE>();
        attributes.add(myTaxa);
        types.add(Phenotype.ATTRIBUTE_TYPE.taxa);
        int ntaxa = myTaxa.size();
        for (int i = 0; i < numberOfPCs; ++i) {
            String pcname = "PC" + (i + 1);
            float[] pcvalue = new float[ntaxa];
            for (int t = 0; t < ntaxa; ++t) {
                pcvalue[t] = (float)pcs.get(t, i);
            }
            NumericAttribute na = new NumericAttribute(pcname, pcvalue, new OpenBitSet(ntaxa));
            attributes.add(na);
            types.add(Phenotype.ATTRIBUTE_TYPE.covariate);
        }
        Phenotype pcPhenotype = new PhenotypeBuilder().fromAttributeList(attributes, types).build().get(0);
        StringBuilder nameBuilder = new StringBuilder();
        nameBuilder.append("PC_").append(datasetName);
        StringBuilder commentBuilder = new StringBuilder("\nPrincipalComponents stored as covariates.\n");
        commentBuilder.append("calculated from ").append(datasetName);
        results.add(new Datum(nameBuilder.toString(), pcPhenotype, commentBuilder.toString()));
        if (this.reportEigenvalues.value().booleanValue()) {
            String name = "Proportion of Variance Explained";
            Object[] columnNames = new String[]{"PC", "eigenvalue", "proportion of total", "cumulative proportion"};
            int nEigenvalues = eigenvalues.length;
            Object[][] tableData = new Object[nEigenvalues][4];
            double sumvalues = cumulativeEigenvalues[nEigenvalues - 1];
            for (int i = 0; i < nEigenvalues; ++i) {
                tableData[i][0] = String.format("%d", i + 1);
                tableData[i][1] = new Double(eigenvalues[i]);
                tableData[i][2] = new Double(eigenvalues[i] / sumvalues);
                tableData[i][3] = new Double(cumulativeEigenvalues[i] / sumvalues);
            }
            nameBuilder = new StringBuilder();
            nameBuilder.append("Eigenvalues_").append(datasetName);
            commentBuilder = new StringBuilder("\nEigenvalues and proportion of variance explained by PCs.\n");
            commentBuilder.append("calculated from ").append(datasetName);
            SimpleTableReport str = new SimpleTableReport(name, columnNames, tableData);
            results.add(new Datum(nameBuilder.toString(), str, commentBuilder.toString()));
        }
        DoubleMatrix eigenvectors = pca.getEigenVectors();
        if (this.reportEigenvectors.value().booleanValue()) {
            String name = "Eigenvectors";
            int ncol = numberOfPCs + 1;
            int nrows = data.size();
            Object[] columnNames = new String[ncol];
            columnNames[0] = "Trait";
            for (int c = 1; c < ncol; ++c) {
                columnNames[c] = String.format("Eigenvector%d", c);
            }
            Object[][] tableData = new Object[nrows][ncol];
            for (int r = 0; r < nrows; ++r) {
                tableData[r][0] = data.get(r).name();
                for (int c = 1; c < ncol; ++c) {
                    tableData[r][c] = new Double(eigenvectors.get(r, c - 1));
                }
            }
            nameBuilder = new StringBuilder();
            nameBuilder.append("Eigenvectors_").append(datasetName);
            commentBuilder = new StringBuilder("\nEigenvectors for requested PCs.\n");
            commentBuilder.append("calculated from ").append(datasetName);
            SimpleTableReport str = new SimpleTableReport(name, columnNames, tableData);
            results.add(new Datum(nameBuilder.toString(), str, commentBuilder.toString()));
        }
        return results;
    }

    private List<Datum> addResultsToDatumList(PrinComp pca, GenotypeTable myGenotype, String datasetName) {
        int numberOfPCs;
        ArrayList<Datum> results = new ArrayList<Datum>();
        double[] eigenvalues = pca.getEigenValues();
        int nvalues = eigenvalues.length;
        double[] cumulativeEigenvalues = Arrays.copyOf(eigenvalues, nvalues);
        for (int i = 1; i < nvalues; ++i) {
            int n = i;
            cumulativeEigenvalues[n] = cumulativeEigenvalues[n] + cumulativeEigenvalues[i - 1];
        }
        if (this.limitBy.value() == PCA_LIMIT.number_of_components) {
            numberOfPCs = Math.min(this.numberOfComponents.value(), nvalues);
        } else if (this.limitBy.value() == PCA_LIMIT.total_variance) {
            double limit = this.totalVar.value() * cumulativeEigenvalues[nvalues - 1];
            int ndx = Arrays.binarySearch(cumulativeEigenvalues, limit);
            numberOfPCs = ndx < -1 ? -ndx : ndx + 1;
            numberOfPCs = Math.min(numberOfPCs, nvalues);
        } else {
            int ndx = Arrays.binarySearch(eigenvalues, this.minEigenval.value());
            numberOfPCs = ndx < -1 ? -ndx : ndx + 1;
            numberOfPCs = Math.min(numberOfPCs, nvalues);
        }
        DoubleMatrix pcs = pca.getPrincipalComponents();
        ArrayList<PhenotypeAttribute> attributes = new ArrayList<PhenotypeAttribute>();
        ArrayList<Phenotype.ATTRIBUTE_TYPE> types = new ArrayList<Phenotype.ATTRIBUTE_TYPE>();
        attributes.add(new TaxaAttribute(myGenotype.taxa()));
        types.add(Phenotype.ATTRIBUTE_TYPE.taxa);
        int ntaxa = myGenotype.numberOfTaxa();
        for (int i = 0; i < numberOfPCs; ++i) {
            String pcname = "PC" + (i + 1);
            float[] pcvalue = new float[ntaxa];
            for (int t = 0; t < ntaxa; ++t) {
                pcvalue[t] = (float)pcs.get(t, i);
            }
            NumericAttribute na = new NumericAttribute(pcname, pcvalue, new OpenBitSet(ntaxa));
            attributes.add(na);
            types.add(Phenotype.ATTRIBUTE_TYPE.covariate);
        }
        Phenotype pcPhenotype = new PhenotypeBuilder().fromAttributeList(attributes, types).build().get(0);
        StringBuilder nameBuilder = new StringBuilder();
        nameBuilder.append("PC_").append(datasetName);
        StringBuilder commentBuilder = new StringBuilder("\nPrincipalComponents stored as covariates.\n");
        commentBuilder.append("calculated from ").append(datasetName);
        results.add(new Datum(nameBuilder.toString(), pcPhenotype, commentBuilder.toString()));
        if (this.reportEigenvalues.value().booleanValue()) {
            String name = "Proportion of Variance Explained";
            Object[] columnNames = new String[]{"PC", "eigenvalue", "proportion of total", "cumulative proportion"};
            int nEigenvalues = eigenvalues.length;
            Object[][] tableData = new Object[nEigenvalues][4];
            double sumvalues = cumulativeEigenvalues[nEigenvalues - 1];
            for (int i = 0; i < nEigenvalues; ++i) {
                tableData[i][0] = String.format("%d", i);
                tableData[i][1] = new Double(eigenvalues[i]);
                tableData[i][2] = new Double(eigenvalues[i] / sumvalues);
                tableData[i][3] = new Double(cumulativeEigenvalues[i] / sumvalues);
            }
            nameBuilder = new StringBuilder();
            nameBuilder.append("Eigenvalues_").append(datasetName);
            commentBuilder = new StringBuilder("\nEigenvalues and proportion of variance explained by PCs.\n");
            commentBuilder.append("calculated from ").append(datasetName);
            SimpleTableReport str = new SimpleTableReport(name, columnNames, tableData);
            results.add(new Datum(nameBuilder.toString(), str, commentBuilder.toString()));
        }
        DoubleMatrix eigenvectors = pca.getEigenVectors();
        if (this.reportEigenvectors.value().booleanValue()) {
            String name = "Eigenvectors";
            int ncol = numberOfPCs + 1;
            int nrows = myGenotype.numberOfSites();
            Object[] columnNames = new String[ncol];
            columnNames[0] = "Trait";
            for (int c = 1; c < ncol; ++c) {
                columnNames[c] = String.format("Eigenvector%d", c);
            }
            Object[][] tableData = new Object[nrows][ncol];
            for (int r = 0; r < nrows; ++r) {
                tableData[r][0] = myGenotype.positions().siteName(r);
                for (int c = 1; c < ncol; ++c) {
                    tableData[r][c] = new Double(eigenvectors.get(r, c - 1));
                }
            }
            nameBuilder = new StringBuilder();
            nameBuilder.append("Eigenvectors_").append(datasetName);
            commentBuilder = new StringBuilder("\nEigenvectors for requested PCs.\n");
            commentBuilder.append("calculated from ").append(datasetName);
            SimpleTableReport str = new SimpleTableReport(name, columnNames, tableData);
            results.add(new Datum(nameBuilder.toString(), str, commentBuilder.toString()));
        }
        return results;
    }

    @Override
    public String pluginDescription() {
        return "This plugin performs principal components analysis and returns the requested number of PC axes (components), and, optionally, the eigenvalues and eigenvectors. It can take as input either phenotype data or ReferenceProbability from a GenotypeTable.";
    }

    @Override
    public ImageIcon getIcon() {
        URL imageURL = FileLoadPlugin.class.getResource("/net/maizegenetics/analysis/images/pca.gif");
        if (imageURL == null) {
            return null;
        }
        return new ImageIcon(imageURL);
    }

    @Override
    public String getButtonName() {
        return "PCA";
    }

    @Override
    public String getToolTipText() {
        return "Performs principal components analysis";
    }

    public Phenotype runPlugin(DataSet input) {
        return (Phenotype)this.performFunction(input).getData(0).getData();
    }

    public Boolean covariance() {
        return this.useCovariance.value();
    }

    public PrincipalComponentsPlugin covariance(Boolean value) {
        this.useCovariance = new PluginParameter<Boolean>(this.useCovariance, value);
        return this;
    }

    public PCA_LIMIT limitNumberOfComponentsBy() {
        return this.limitBy.value();
    }

    public PrincipalComponentsPlugin limitNumberOfComponentsBy(PCA_LIMIT value) {
        this.limitBy = new PluginParameter<PCA_LIMIT>(this.limitBy, value);
        return this;
    }

    public Integer numberOfComponents() {
        return this.numberOfComponents.value();
    }

    public PrincipalComponentsPlugin numberOfComponents(Integer value) {
        this.numberOfComponents = new PluginParameter<Integer>(this.numberOfComponents, value);
        return this;
    }

    public Double minimumEigenvalue() {
        return this.minEigenval.value();
    }

    public PrincipalComponentsPlugin minimumEigenvalue(Double value) {
        this.minEigenval = new PluginParameter<Double>(this.minEigenval, value);
        return this;
    }

    public Double totalVariance() {
        return this.totalVar.value();
    }

    public PrincipalComponentsPlugin totalVariance(Double value) {
        this.totalVar = new PluginParameter<Double>(this.totalVar, value);
        return this;
    }

    public Boolean returnEigenvalues() {
        return this.reportEigenvalues.value();
    }

    public PrincipalComponentsPlugin returnEigenvalues(Boolean value) {
        this.reportEigenvalues = new PluginParameter<Boolean>(this.reportEigenvalues, value);
        return this;
    }

    public Boolean returnEigenvectors() {
        return this.reportEigenvectors.value();
    }

    public PrincipalComponentsPlugin returnEigenvectors(Boolean value) {
        this.reportEigenvectors = new PluginParameter<Boolean>(this.reportEigenvectors, value);
        return this;
    }

    public static enum PCA_LIMIT {
        number_of_components,
        min_eigenvalue,
        total_variance;

    }
}

