001/* ====================================================== 002 * JFreeChart : a chart library for the Java(tm) platform 003 * ====================================================== 004 * 005 * (C) Copyright 2000-present, by David Gilbert and Contributors. 006 * 007 * Project Info: https://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 025 * Other names may be trademarks of their respective owners.] 026 * 027 * --------------- 028 * Regression.java 029 * --------------- 030 * (C) Copyright 2002-present, by David Gilbert. 031 * 032 * Original Author: David Gilbert; 033 * Contributor(s): Peter Kolb (patch 2795746); 034 * 035 */ 036 037package org.jfree.data.statistics; 038 039import org.jfree.chart.util.Args; 040import org.jfree.data.xy.XYDataset; 041 042/** 043 * A utility class for fitting regression curves to data. 044 */ 045public class Regression { 046 047 private Regression() { 048 // no requirement to instantiate 049 } 050 051 /** 052 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 053 * the data using ordinary least squares regression. The result is 054 * returned as a double[], where result[0] --> a, and result[1] --> b. 055 * 056 * @param data the data. 057 * 058 * @return The parameters. 059 */ 060 public static double[] getOLSRegression(double[][] data) { 061 062 int n = data.length; 063 if (n < 2) { 064 throw new IllegalArgumentException("Not enough data."); 065 } 066 067 double sumX = 0; 068 double sumY = 0; 069 double sumXX = 0; 070 double sumXY = 0; 071 for (int i = 0; i < n; i++) { 072 double x = data[i][0]; 073 double y = data[i][1]; 074 sumX += x; 075 sumY += y; 076 double xx = x * x; 077 sumXX += xx; 078 double xy = x * y; 079 sumXY += xy; 080 } 081 double sxx = sumXX - (sumX * sumX) / n; 082 double sxy = sumXY - (sumX * sumY) / n; 083 double xbar = sumX / n; 084 double ybar = sumY / n; 085 086 double[] result = new double[2]; 087 result[1] = sxy / sxx; 088 result[0] = ybar - result[1] * xbar; 089 090 return result; 091 092 } 093 094 /** 095 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 096 * the data using ordinary least squares regression. The result is returned 097 * as a double[], where result[0] --> a, and result[1] --> b. 098 * 099 * @param data the data. 100 * @param series the series (zero-based index). 101 * 102 * @return The parameters. 103 */ 104 public static double[] getOLSRegression(XYDataset data, int series) { 105 106 int n = data.getItemCount(series); 107 if (n < 2) { 108 throw new IllegalArgumentException("Not enough data."); 109 } 110 111 double sumX = 0; 112 double sumY = 0; 113 double sumXX = 0; 114 double sumXY = 0; 115 for (int i = 0; i < n; i++) { 116 double x = data.getXValue(series, i); 117 double y = data.getYValue(series, i); 118 sumX += x; 119 sumY += y; 120 double xx = x * x; 121 sumXX += xx; 122 double xy = x * y; 123 sumXY += xy; 124 } 125 double sxx = sumXX - (sumX * sumX) / n; 126 double sxy = sumXY - (sumX * sumY) / n; 127 double xbar = sumX / n; 128 double ybar = sumY / n; 129 130 double[] result = new double[2]; 131 result[1] = sxy / sxx; 132 result[0] = ybar - result[1] * xbar; 133 134 return result; 135 136 } 137 138 /** 139 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 140 * the data using a power regression equation. The result is returned as 141 * an array, where double[0] --> a, and double[1] --> b. 142 * 143 * @param data the data. 144 * 145 * @return The parameters. 146 */ 147 public static double[] getPowerRegression(double[][] data) { 148 149 int n = data.length; 150 if (n < 2) { 151 throw new IllegalArgumentException("Not enough data."); 152 } 153 154 double sumX = 0; 155 double sumY = 0; 156 double sumXX = 0; 157 double sumXY = 0; 158 for (int i = 0; i < n; i++) { 159 double x = Math.log(data[i][0]); 160 double y = Math.log(data[i][1]); 161 sumX += x; 162 sumY += y; 163 double xx = x * x; 164 sumXX += xx; 165 double xy = x * y; 166 sumXY += xy; 167 } 168 double sxx = sumXX - (sumX * sumX) / n; 169 double sxy = sumXY - (sumX * sumY) / n; 170 double xbar = sumX / n; 171 double ybar = sumY / n; 172 173 double[] result = new double[2]; 174 result[1] = sxy / sxx; 175 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 176 177 return result; 178 179 } 180 181 /** 182 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 183 * the data using a power regression equation. The result is returned as 184 * an array, where double[0] --> a, and double[1] --> b. 185 * 186 * @param data the data. 187 * @param series the series to fit the regression line against. 188 * 189 * @return The parameters. 190 */ 191 public static double[] getPowerRegression(XYDataset data, int series) { 192 193 int n = data.getItemCount(series); 194 if (n < 2) { 195 throw new IllegalArgumentException("Not enough data."); 196 } 197 198 double sumX = 0; 199 double sumY = 0; 200 double sumXX = 0; 201 double sumXY = 0; 202 for (int i = 0; i < n; i++) { 203 double x = Math.log(data.getXValue(series, i)); 204 double y = Math.log(data.getYValue(series, i)); 205 sumX += x; 206 sumY += y; 207 double xx = x * x; 208 sumXX += xx; 209 double xy = x * y; 210 sumXY += xy; 211 } 212 double sxx = sumXX - (sumX * sumX) / n; 213 double sxy = sumXY - (sumX * sumY) / n; 214 double xbar = sumX / n; 215 double ybar = sumY / n; 216 217 double[] result = new double[2]; 218 result[1] = sxy / sxx; 219 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 220 221 return result; 222 223 } 224 225 /** 226 * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 227 * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n, 228 * fitted to the data using a polynomial regression equation. 229 * The result is returned as an array with a length of n + 2, 230 * where double[0] --> a0, double[1] --> a1, .., double[n] --> an. 231 * and double[n + 1] is the correlation coefficient R2 232 * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german 233 * edition), pp. 243ff and 327ff. 234 * 235 * @param dataset the dataset ({@code null} not permitted). 236 * @param series the series to fit the regression line against (the series 237 * must have at least order + 1 non-NaN items). 238 * @param order the order of the function (> 0). 239 * 240 * @return The parameters. 241 */ 242 public static double[] getPolynomialRegression(XYDataset dataset, 243 int series, int order) { 244 Args.nullNotPermitted(dataset, "dataset"); 245 int itemCount = dataset.getItemCount(series); 246 if (itemCount < order + 1) { 247 throw new IllegalArgumentException("Not enough data."); 248 } 249 int validItems = 0; 250 double[][] data = new double[2][itemCount]; 251 for(int item = 0; item < itemCount; item++){ 252 double x = dataset.getXValue(series, item); 253 double y = dataset.getYValue(series, item); 254 if (!Double.isNaN(x) && !Double.isNaN(y)){ 255 data[0][validItems] = x; 256 data[1][validItems] = y; 257 validItems++; 258 } 259 } 260 if (validItems < order + 1) { 261 throw new IllegalArgumentException("Not enough data."); 262 } 263 int equations = order + 1; 264 int coefficients = order + 2; 265 double[] result = new double[equations + 1]; 266 double[][] matrix = new double[equations][coefficients]; 267 double sumX = 0.0; 268 double sumY = 0.0; 269 270 for(int item = 0; item < validItems; item++){ 271 sumX += data[0][item]; 272 sumY += data[1][item]; 273 for(int eq = 0; eq < equations; eq++){ 274 for(int coe = 0; coe < coefficients - 1; coe++){ 275 matrix[eq][coe] += Math.pow(data[0][item],eq + coe); 276 } 277 matrix[eq][coefficients - 1] += data[1][item] 278 * Math.pow(data[0][item],eq); 279 } 280 } 281 double[][] subMatrix = calculateSubMatrix(matrix); 282 for (int eq = 1; eq < equations; eq++) { 283 matrix[eq][0] = 0; 284 if (coefficients - 1 >= 0) System.arraycopy(subMatrix[eq - 1], 0, matrix[eq], 1, coefficients - 1); 285 } 286 for (int eq = equations - 1; eq > -1; eq--) { 287 double value = matrix[eq][coefficients - 1]; 288 for (int coe = eq; coe < coefficients -1; coe++) { 289 value -= matrix[eq][coe] * result[coe]; 290 } 291 result[eq] = value / matrix[eq][eq]; 292 } 293 double meanY = sumY / validItems; 294 double yObsSquare = 0.0; 295 double yRegSquare = 0.0; 296 for (int item = 0; item < validItems; item++) { 297 double yCalc = 0; 298 for (int eq = 0; eq < equations; eq++) { 299 yCalc += result[eq] * Math.pow(data[0][item],eq); 300 } 301 yRegSquare += Math.pow(yCalc - meanY, 2); 302 yObsSquare += Math.pow(data[1][item] - meanY, 2); 303 } 304 double rSquare = yRegSquare / yObsSquare; 305 result[equations] = rSquare; 306 return result; 307 } 308 309 /** 310 * Returns a matrix with the following features: (1) the number of rows 311 * and columns is 1 less than that of the original matrix; (2)the matrix 312 * is triangular, i.e. all elements a (row, column) with column > row are 313 * zero. This method is used for calculating a polynomial regression. 314 * 315 * @param matrix the start matrix. 316 * 317 * @return The new matrix. 318 */ 319 private static double[][] calculateSubMatrix(double[][] matrix){ 320 int equations = matrix.length; 321 int coefficients = matrix[0].length; 322 double[][] result = new double[equations - 1][coefficients - 1]; 323 for (int eq = 1; eq < equations; eq++) { 324 double factor = matrix[0][0] / matrix[eq][0]; 325 for (int coe = 1; coe < coefficients; coe++) { 326 result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe] 327 * factor; 328 } 329 } 330 if (equations == 1) { 331 return result; 332 } 333 // check for zero pivot element 334 if (result[0][0] == 0) { 335 boolean found = false; 336 for (int i = 0; i < result.length; i ++) { 337 if (result[i][0] != 0) { 338 found = true; 339 double[] temp = result[0]; 340 System.arraycopy(result[i], 0, result[0], 0, 341 result[i].length); 342 System.arraycopy(temp, 0, result[i], 0, temp.length); 343 break; 344 } 345 } 346 if (!found) { 347 //System.out.println("Equation has no solution!"); 348 return new double[equations - 1][coefficients - 1]; 349 } 350 } 351 double[][] subMatrix = calculateSubMatrix(result); 352 for (int eq = 1; eq < equations - 1; eq++) { 353 result[eq][0] = 0; 354 if (coefficients - 1 - 1 >= 0) System.arraycopy(subMatrix[eq - 1], 0, result[eq], 1, coefficients - 1 - 1); 355 } 356 return result; 357 } 358 359}