001package javax.visrec.util;
002
003import javax.visrec.ml.data.Normalizer;
004import javax.visrec.ml.data.DataSet;
005
006/**
007 * Utility methods that provides common operations on data sets.
008 * Will probably be deprecated or figure out how to expose implementation specific operations
009 * 
010 * @author Zoran Sevarac
011 */
012@Deprecated
013public class DataSets {
014    
015    private DataSets() { } 
016    
017    // method to get basic statistics - summary() mead median, mi , max std, qt
018    
019    // scale values 
020    // maybe just provide DataSet.normalize(new MaxNormalizer) , and dataSet injects itself into normalizer
021    // or even better norm= new MaxNormalizer(dataSet); norm.normalize(); also separate construction from analysis
022
023    public static <T extends DataSet<?>> void normalize(T dataSet, Normalizer<T> norm) {
024        norm.normalize(dataSet);
025    }
026
027    // how about moving thes estatic methods to coresponding interface?
028//    public static <T extends DataSet<?>> void normalizeMax(DataSet<E> dataSet) {
029//        Normalizer norm = new MaxNormalizer(dataSet); // perform analysys of data set (find max values)
030//        return norm.normalize(dataSet, false); // perfrom normalization and return as new data set
031//    }
032    
033//    public static <E> DataSet<E> normalizeMinMax(DataSet<E> dataSet) {
034//        Normalizer norm = new MinMaxNormalizer(dataSet); // perform analysys of data set (find max values)
035//        return norm.normalize(dataSet, false); // perfrom normalization and return as new data set
036//    }    
037
038//    public static <E> DataSet<E> normalizeRange(DataSet<E> dataSet, float low, float high) {
039//        Normalizer norm = new MinMaxNormalizer(dataSet); // perform analysys of data set (find max values)
040//        return norm.normalize(dataSet, false); // perfrom normalization and return as new data set
041//    }    
042    
043
044    // how to specify which columns to normalize? do we need to? just normalize all
045    // how will this method know about how to normalize specific type of elemeents? eg. User? or  this assumes only numeric values
046        
047    
048    // retrun data set whith ddesired statistical properties
049    // zero mean, one std
050    public static <T extends DataSet<?>> void standardize(T dataSet) { // apply to all numer columns
051        // how will this method know about how to normalize specific type of elemeents?
052        throw new UnsupportedOperationException("not implemented");
053    }
054    
055    // this shoul ddefinitely be utility method
056    public static <E> DataSet<E> removeDuplicates() {
057        throw new UnsupportedOperationException("not implemented");
058    }
059
060    //transform()       - maybe can transorm into datas set whsle elements ar eof another type
061        
062//      statisticsSummary ()    -       mean std freq  by cols, maybe better go put it in dat aset class?
063        // max, min, mean, std
064    // returns true if data set is balanced (only needed for classification problems, not to go in Dataset interface)     
065    public static boolean isBalanced(DataSet<?> dataSet) {  //use generic method to infer type of data set elements
066        throw new UnsupportedOperationException("not implemented");
067    }
068
069    // summary - return basic statistics for each column in dat aset min, max, mean , mode, std, 1q, 3q
070    
071    /**
072        addNoise with some noice generator
073        balance(BalanceStrtegy)
074        
075        dimensionalityreuction    
076    */
077    
078}