001package javax.visrec.util; 002 003import javax.visrec.ml.data.Normalizer; 004import javax.visrec.ml.data.DataSet; 005 006/** 007 * Utility methods that provides common operations on data sets. 008 * Will probably be deprecated or figure out how to expose implementation specific operations 009 * 010 * @author Zoran Sevarac 011 */ 012@Deprecated 013public class DataSets { 014 015 private DataSets() { } 016 017 // method to get basic statistics - summary() mead median, mi , max std, qt 018 019 // scale values 020 // maybe just provide DataSet.normalize(new MaxNormalizer) , and dataSet injects itself into normalizer 021 // or even better norm= new MaxNormalizer(dataSet); norm.normalize(); also separate construction from analysis 022 023 public static <T extends DataSet<?>> void normalize(T dataSet, Normalizer<T> norm) { 024 norm.normalize(dataSet); 025 } 026 027 // how about moving thes estatic methods to coresponding interface? 028// public static <T extends DataSet<?>> void normalizeMax(DataSet<E> dataSet) { 029// Normalizer norm = new MaxNormalizer(dataSet); // perform analysys of data set (find max values) 030// return norm.normalize(dataSet, false); // perfrom normalization and return as new data set 031// } 032 033// public static <E> DataSet<E> normalizeMinMax(DataSet<E> dataSet) { 034// Normalizer norm = new MinMaxNormalizer(dataSet); // perform analysys of data set (find max values) 035// return norm.normalize(dataSet, false); // perfrom normalization and return as new data set 036// } 037 038// public static <E> DataSet<E> normalizeRange(DataSet<E> dataSet, float low, float high) { 039// Normalizer norm = new MinMaxNormalizer(dataSet); // perform analysys of data set (find max values) 040// return norm.normalize(dataSet, false); // perfrom normalization and return as new data set 041// } 042 043 044 // how to specify which columns to normalize? do we need to? just normalize all 045 // how will this method know about how to normalize specific type of elemeents? eg. User? or this assumes only numeric values 046 047 048 // retrun data set whith ddesired statistical properties 049 // zero mean, one std 050 public static <T extends DataSet<?>> void standardize(T dataSet) { // apply to all numer columns 051 // how will this method know about how to normalize specific type of elemeents? 052 throw new UnsupportedOperationException("not implemented"); 053 } 054 055 // this shoul ddefinitely be utility method 056 public static <E> DataSet<E> removeDuplicates() { 057 throw new UnsupportedOperationException("not implemented"); 058 } 059 060 //transform() - maybe can transorm into datas set whsle elements ar eof another type 061 062// statisticsSummary () - mean std freq by cols, maybe better go put it in dat aset class? 063 // max, min, mean, std 064 // returns true if data set is balanced (only needed for classification problems, not to go in Dataset interface) 065 public static boolean isBalanced(DataSet<?> dataSet) { //use generic method to infer type of data set elements 066 throw new UnsupportedOperationException("not implemented"); 067 } 068 069 // summary - return basic statistics for each column in dat aset min, max, mean , mode, std, 1q, 3q 070 071 /** 072 addNoise with some noice generator 073 balance(BalanceStrtegy) 074 075 dimensionalityreuction 076 */ 077 078}