001/* ======================================================
002 * JFreeChart : a chart library for the Java(tm) platform
003 * ======================================================
004 *
005 * (C) Copyright 2000-present, by David Gilbert and Contributors.
006 *
007 * Project Info:  https://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022 * USA.
023 *
024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 
025 * Other names may be trademarks of their respective owners.]
026 *
027 * ---------------
028 * Regression.java
029 * ---------------
030 * (C) Copyright 2002-present, by David Gilbert.
031 *
032 * Original Author:  David Gilbert;
033 * Contributor(s):   Peter Kolb (patch 2795746);
034 *
035 */
036
037package org.jfree.data.statistics;
038
039import org.jfree.chart.util.Args;
040import org.jfree.data.xy.XYDataset;
041
042/**
043 * A utility class for fitting regression curves to data.
044 */
045public class Regression {
046
047    private Regression() {
048        // no requirement to instantiate
049    }
050
051    /**
052     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
053     * the data using ordinary least squares regression.  The result is
054     * returned as a double[], where result[0] --> a, and result[1] --> b.
055     *
056     * @param data  the data.
057     *
058     * @return The parameters.
059     */
060    public static double[] getOLSRegression(double[][] data) {
061
062        int n = data.length;
063        if (n < 2) {
064            throw new IllegalArgumentException("Not enough data.");
065        }
066
067        double sumX = 0;
068        double sumY = 0;
069        double sumXX = 0;
070        double sumXY = 0;
071        for (int i = 0; i < n; i++) {
072            double x = data[i][0];
073            double y = data[i][1];
074            sumX += x;
075            sumY += y;
076            double xx = x * x;
077            sumXX += xx;
078            double xy = x * y;
079            sumXY += xy;
080        }
081        double sxx = sumXX - (sumX * sumX) / n;
082        double sxy = sumXY - (sumX * sumY) / n;
083        double xbar = sumX / n;
084        double ybar = sumY / n;
085
086        double[] result = new double[2];
087        result[1] = sxy / sxx;
088        result[0] = ybar - result[1] * xbar;
089
090        return result;
091
092    }
093
094    /**
095     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
096     * the data using ordinary least squares regression. The result is returned
097     * as a double[], where result[0] --&gt; a, and result[1] --&gt; b.
098     *
099     * @param data  the data.
100     * @param series  the series (zero-based index).
101     *
102     * @return The parameters.
103     */
104    public static double[] getOLSRegression(XYDataset data, int series) {
105
106        int n = data.getItemCount(series);
107        if (n < 2) {
108            throw new IllegalArgumentException("Not enough data.");
109        }
110
111        double sumX = 0;
112        double sumY = 0;
113        double sumXX = 0;
114        double sumXY = 0;
115        for (int i = 0; i < n; i++) {
116            double x = data.getXValue(series, i);
117            double y = data.getYValue(series, i);
118            sumX += x;
119            sumY += y;
120            double xx = x * x;
121            sumXX += xx;
122            double xy = x * y;
123            sumXY += xy;
124        }
125        double sxx = sumXX - (sumX * sumX) / n;
126        double sxy = sumXY - (sumX * sumY) / n;
127        double xbar = sumX / n;
128        double ybar = sumY / n;
129
130        double[] result = new double[2];
131        result[1] = sxy / sxx;
132        result[0] = ybar - result[1] * xbar;
133
134        return result;
135
136    }
137
138    /**
139     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
140     * the data using a power regression equation.  The result is returned as
141     * an array, where double[0] --&gt; a, and double[1] --&gt; b.
142     *
143     * @param data  the data.
144     *
145     * @return The parameters.
146     */
147    public static double[] getPowerRegression(double[][] data) {
148
149        int n = data.length;
150        if (n < 2) {
151            throw new IllegalArgumentException("Not enough data.");
152        }
153
154        double sumX = 0;
155        double sumY = 0;
156        double sumXX = 0;
157        double sumXY = 0;
158        for (int i = 0; i < n; i++) {
159            double x = Math.log(data[i][0]);
160            double y = Math.log(data[i][1]);
161            sumX += x;
162            sumY += y;
163            double xx = x * x;
164            sumXX += xx;
165            double xy = x * y;
166            sumXY += xy;
167        }
168        double sxx = sumXX - (sumX * sumX) / n;
169        double sxy = sumXY - (sumX * sumY) / n;
170        double xbar = sumX / n;
171        double ybar = sumY / n;
172
173        double[] result = new double[2];
174        result[1] = sxy / sxx;
175        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
176
177        return result;
178
179    }
180
181    /**
182     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
183     * the data using a power regression equation.  The result is returned as
184     * an array, where double[0] --&gt; a, and double[1] --&gt; b.
185     *
186     * @param data  the data.
187     * @param series  the series to fit the regression line against.
188     *
189     * @return The parameters.
190     */
191    public static double[] getPowerRegression(XYDataset data, int series) {
192
193        int n = data.getItemCount(series);
194        if (n < 2) {
195            throw new IllegalArgumentException("Not enough data.");
196        }
197
198        double sumX = 0;
199        double sumY = 0;
200        double sumXX = 0;
201        double sumXY = 0;
202        for (int i = 0; i < n; i++) {
203            double x = Math.log(data.getXValue(series, i));
204            double y = Math.log(data.getYValue(series, i));
205            sumX += x;
206            sumY += y;
207            double xx = x * x;
208            sumXX += xx;
209            double xy = x * y;
210            sumXY += xy;
211        }
212        double sxx = sumXX - (sumX * sumX) / n;
213        double sxy = sumXY - (sumX * sumY) / n;
214        double xbar = sumX / n;
215        double ybar = sumY / n;
216
217        double[] result = new double[2];
218        result[1] = sxy / sxx;
219        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
220
221        return result;
222
223    }
224
225    /**
226     * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 
227     * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n,
228     * fitted to the data using a polynomial regression equation.
229     * The result is returned as an array with a length of n + 2,
230     * where double[0] --&gt; a0, double[1] --&gt; a1, .., double[n] --&gt; an.
231     * and double[n + 1] is the correlation coefficient R2
232     * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german
233     * edition), pp. 243ff and 327ff.
234     *
235     * @param dataset  the dataset ({@code null} not permitted).
236     * @param series  the series to fit the regression line against (the series
237     *         must have at least order + 1 non-NaN items).
238     * @param order  the order of the function (&gt; 0).
239     *
240     * @return The parameters.
241     */
242    public static double[] getPolynomialRegression(XYDataset dataset, 
243            int series, int order) {
244        Args.nullNotPermitted(dataset, "dataset");
245        int itemCount = dataset.getItemCount(series);
246        if (itemCount < order + 1) {
247            throw new IllegalArgumentException("Not enough data.");
248        }
249        int validItems = 0;
250        double[][] data = new double[2][itemCount];
251        for(int item = 0; item < itemCount; item++){
252            double x = dataset.getXValue(series, item);
253            double y = dataset.getYValue(series, item);
254            if (!Double.isNaN(x) && !Double.isNaN(y)){
255                data[0][validItems] = x;
256                data[1][validItems] = y;
257                validItems++;
258            }
259        }
260        if (validItems < order + 1) {
261            throw new IllegalArgumentException("Not enough data.");
262        }
263        int equations = order + 1;
264        int coefficients = order + 2;
265        double[] result = new double[equations + 1];
266        double[][] matrix = new double[equations][coefficients];
267        double sumX = 0.0;
268        double sumY = 0.0;
269
270        for(int item = 0; item < validItems; item++){
271            sumX += data[0][item];
272            sumY += data[1][item];
273            for(int eq = 0; eq < equations; eq++){
274                for(int coe = 0; coe < coefficients - 1; coe++){
275                    matrix[eq][coe] += Math.pow(data[0][item],eq + coe);
276                }
277                matrix[eq][coefficients - 1] += data[1][item]
278                        * Math.pow(data[0][item],eq);
279            }
280        }
281        double[][] subMatrix = calculateSubMatrix(matrix);
282        for (int eq = 1; eq < equations; eq++) {
283            matrix[eq][0] = 0;
284            if (coefficients - 1 >= 0) System.arraycopy(subMatrix[eq - 1], 0, matrix[eq], 1, coefficients - 1);
285        }
286        for (int eq = equations - 1; eq > -1; eq--) {
287            double value = matrix[eq][coefficients - 1];
288            for (int coe = eq; coe < coefficients -1; coe++) {
289                value -= matrix[eq][coe] * result[coe];
290            }
291            result[eq] = value / matrix[eq][eq];
292        }
293        double meanY = sumY / validItems;
294        double yObsSquare = 0.0;
295        double yRegSquare = 0.0;
296        for (int item = 0; item < validItems; item++) {
297            double yCalc = 0;
298            for (int eq = 0; eq < equations; eq++) {
299                yCalc += result[eq] * Math.pow(data[0][item],eq);
300            }
301            yRegSquare += Math.pow(yCalc - meanY, 2);
302            yObsSquare += Math.pow(data[1][item] - meanY, 2);
303        }
304        double rSquare = yRegSquare / yObsSquare;
305        result[equations] = rSquare;
306        return result;
307    }
308
309    /**
310     * Returns a matrix with the following features: (1) the number of rows
311     * and columns is 1 less than that of the original matrix; (2)the matrix
312     * is triangular, i.e. all elements a (row, column) with column &gt; row are
313     * zero.  This method is used for calculating a polynomial regression.
314     * 
315     * @param matrix  the start matrix.
316     *
317     * @return The new matrix.
318     */
319    private static double[][] calculateSubMatrix(double[][] matrix){
320        int equations = matrix.length;
321        int coefficients = matrix[0].length;
322        double[][] result = new double[equations - 1][coefficients - 1];
323        for (int eq = 1; eq < equations; eq++) {
324            double factor = matrix[0][0] / matrix[eq][0];
325            for (int coe = 1; coe < coefficients; coe++) {
326                result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe]
327                        * factor;
328            }
329        }
330        if (equations == 1) {
331            return result;
332        }
333        // check for zero pivot element
334        if (result[0][0] == 0) {
335            boolean found = false;
336            for (int i = 0; i < result.length; i ++) {
337                if (result[i][0] != 0) {
338                    found = true;
339                    double[] temp = result[0];
340                    System.arraycopy(result[i], 0, result[0], 0, 
341                            result[i].length);
342                    System.arraycopy(temp, 0, result[i], 0, temp.length);
343                    break;
344                }
345            }
346            if (!found) {
347                //System.out.println("Equation has no solution!");
348                return new double[equations - 1][coefficients - 1];
349            }
350        }
351        double[][] subMatrix = calculateSubMatrix(result);
352        for (int eq = 1; eq < equations -  1; eq++) {
353            result[eq][0] = 0;
354            if (coefficients - 1 - 1 >= 0) System.arraycopy(subMatrix[eq - 1], 0, result[eq], 1, coefficients - 1 - 1);
355        }
356        return result;
357    }
358
359}