001/* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-2014, by Object Refinery Limited and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 025 * Other names may be trademarks of their respective owners.] 026 * 027 * --------------- 028 * Regression.java 029 * --------------- 030 * (C) Copyright 2002-2014, by Object Refinery Limited. 031 * 032 * Original Author: David Gilbert (for Object Refinery Limited); 033 * Contributor(s): Peter Kolb (patch 2795746); 034 * 035 * Changes 036 * ------- 037 * 30-Sep-2002 : Version 1 (DG); 038 * 18-Aug-2003 : Added 'abstract' (DG); 039 * 15-Jul-2004 : Switched getX() with getXValue() and getY() with 040 * getYValue() (DG); 041 * 29-May-2009 : Added support for polynomial regression, see patch 2795746 042 * by Peter Kolb (DG); 043 * 03-Jul-2013 : Use ParamChecks (DG); 044 * 045 */ 046 047package org.jfree.data.statistics; 048 049import org.jfree.chart.util.ParamChecks; 050import org.jfree.data.xy.XYDataset; 051 052/** 053 * A utility class for fitting regression curves to data. 054 */ 055public abstract class Regression { 056 057 /** 058 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 059 * the data using ordinary least squares regression. The result is 060 * returned as a double[], where result[0] --> a, and result[1] --> b. 061 * 062 * @param data the data. 063 * 064 * @return The parameters. 065 */ 066 public static double[] getOLSRegression(double[][] data) { 067 068 int n = data.length; 069 if (n < 2) { 070 throw new IllegalArgumentException("Not enough data."); 071 } 072 073 double sumX = 0; 074 double sumY = 0; 075 double sumXX = 0; 076 double sumXY = 0; 077 for (int i = 0; i < n; i++) { 078 double x = data[i][0]; 079 double y = data[i][1]; 080 sumX += x; 081 sumY += y; 082 double xx = x * x; 083 sumXX += xx; 084 double xy = x * y; 085 sumXY += xy; 086 } 087 double sxx = sumXX - (sumX * sumX) / n; 088 double sxy = sumXY - (sumX * sumY) / n; 089 double xbar = sumX / n; 090 double ybar = sumY / n; 091 092 double[] result = new double[2]; 093 result[1] = sxy / sxx; 094 result[0] = ybar - result[1] * xbar; 095 096 return result; 097 098 } 099 100 /** 101 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 102 * the data using ordinary least squares regression. The result is returned 103 * as a double[], where result[0] --> a, and result[1] --> b. 104 * 105 * @param data the data. 106 * @param series the series (zero-based index). 107 * 108 * @return The parameters. 109 */ 110 public static double[] getOLSRegression(XYDataset data, int series) { 111 112 int n = data.getItemCount(series); 113 if (n < 2) { 114 throw new IllegalArgumentException("Not enough data."); 115 } 116 117 double sumX = 0; 118 double sumY = 0; 119 double sumXX = 0; 120 double sumXY = 0; 121 for (int i = 0; i < n; i++) { 122 double x = data.getXValue(series, i); 123 double y = data.getYValue(series, i); 124 sumX += x; 125 sumY += y; 126 double xx = x * x; 127 sumXX += xx; 128 double xy = x * y; 129 sumXY += xy; 130 } 131 double sxx = sumXX - (sumX * sumX) / n; 132 double sxy = sumXY - (sumX * sumY) / n; 133 double xbar = sumX / n; 134 double ybar = sumY / n; 135 136 double[] result = new double[2]; 137 result[1] = sxy / sxx; 138 result[0] = ybar - result[1] * xbar; 139 140 return result; 141 142 } 143 144 /** 145 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 146 * the data using a power regression equation. The result is returned as 147 * an array, where double[0] --> a, and double[1] --> b. 148 * 149 * @param data the data. 150 * 151 * @return The parameters. 152 */ 153 public static double[] getPowerRegression(double[][] data) { 154 155 int n = data.length; 156 if (n < 2) { 157 throw new IllegalArgumentException("Not enough data."); 158 } 159 160 double sumX = 0; 161 double sumY = 0; 162 double sumXX = 0; 163 double sumXY = 0; 164 for (int i = 0; i < n; i++) { 165 double x = Math.log(data[i][0]); 166 double y = Math.log(data[i][1]); 167 sumX += x; 168 sumY += y; 169 double xx = x * x; 170 sumXX += xx; 171 double xy = x * y; 172 sumXY += xy; 173 } 174 double sxx = sumXX - (sumX * sumX) / n; 175 double sxy = sumXY - (sumX * sumY) / n; 176 double xbar = sumX / n; 177 double ybar = sumY / n; 178 179 double[] result = new double[2]; 180 result[1] = sxy / sxx; 181 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 182 183 return result; 184 185 } 186 187 /** 188 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 189 * the data using a power regression equation. The result is returned as 190 * an array, where double[0] --> a, and double[1] --> b. 191 * 192 * @param data the data. 193 * @param series the series to fit the regression line against. 194 * 195 * @return The parameters. 196 */ 197 public static double[] getPowerRegression(XYDataset data, int series) { 198 199 int n = data.getItemCount(series); 200 if (n < 2) { 201 throw new IllegalArgumentException("Not enough data."); 202 } 203 204 double sumX = 0; 205 double sumY = 0; 206 double sumXX = 0; 207 double sumXY = 0; 208 for (int i = 0; i < n; i++) { 209 double x = Math.log(data.getXValue(series, i)); 210 double y = Math.log(data.getYValue(series, i)); 211 sumX += x; 212 sumY += y; 213 double xx = x * x; 214 sumXX += xx; 215 double xy = x * y; 216 sumXY += xy; 217 } 218 double sxx = sumXX - (sumX * sumX) / n; 219 double sxy = sumXY - (sumX * sumY) / n; 220 double xbar = sumX / n; 221 double ybar = sumY / n; 222 223 double[] result = new double[2]; 224 result[1] = sxy / sxx; 225 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 226 227 return result; 228 229 } 230 231 /** 232 * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 233 * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n, 234 * fitted to the data using a polynomial regression equation. 235 * The result is returned as an array with a length of n + 2, 236 * where double[0] --> a0, double[1] --> a1, .., double[n] --> an. 237 * and double[n + 1] is the correlation coefficient R2 238 * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german 239 * edition), pp. 243ff and 327ff. 240 * 241 * @param dataset the dataset (<code>null</code> not permitted). 242 * @param series the series to fit the regression line against (the series 243 * must have at least order + 1 non-NaN items). 244 * @param order the order of the function (> 0). 245 * 246 * @return The parameters. 247 * 248 * @since 1.0.14 249 */ 250 public static double[] getPolynomialRegression(XYDataset dataset, 251 int series, int order) { 252 ParamChecks.nullNotPermitted(dataset, "dataset"); 253 int itemCount = dataset.getItemCount(series); 254 if (itemCount < order + 1) { 255 throw new IllegalArgumentException("Not enough data."); 256 } 257 int validItems = 0; 258 double[][] data = new double[2][itemCount]; 259 for(int item = 0; item < itemCount; item++){ 260 double x = dataset.getXValue(series, item); 261 double y = dataset.getYValue(series, item); 262 if (!Double.isNaN(x) && !Double.isNaN(y)){ 263 data[0][validItems] = x; 264 data[1][validItems] = y; 265 validItems++; 266 } 267 } 268 if (validItems < order + 1) { 269 throw new IllegalArgumentException("Not enough data."); 270 } 271 int equations = order + 1; 272 int coefficients = order + 2; 273 double[] result = new double[equations + 1]; 274 double[][] matrix = new double[equations][coefficients]; 275 double sumX = 0.0; 276 double sumY = 0.0; 277 278 for(int item = 0; item < validItems; item++){ 279 sumX += data[0][item]; 280 sumY += data[1][item]; 281 for(int eq = 0; eq < equations; eq++){ 282 for(int coe = 0; coe < coefficients - 1; coe++){ 283 matrix[eq][coe] += Math.pow(data[0][item],eq + coe); 284 } 285 matrix[eq][coefficients - 1] += data[1][item] 286 * Math.pow(data[0][item],eq); 287 } 288 } 289 double[][] subMatrix = calculateSubMatrix(matrix); 290 for (int eq = 1; eq < equations; eq++) { 291 matrix[eq][0] = 0; 292 for (int coe = 1; coe < coefficients; coe++) { 293 matrix[eq][coe] = subMatrix[eq - 1][coe - 1]; 294 } 295 } 296 for (int eq = equations - 1; eq > -1; eq--) { 297 double value = matrix[eq][coefficients - 1]; 298 for (int coe = eq; coe < coefficients -1; coe++) { 299 value -= matrix[eq][coe] * result[coe]; 300 } 301 result[eq] = value / matrix[eq][eq]; 302 } 303 double meanY = sumY / validItems; 304 double yObsSquare = 0.0; 305 double yRegSquare = 0.0; 306 for (int item = 0; item < validItems; item++) { 307 double yCalc = 0; 308 for (int eq = 0; eq < equations; eq++) { 309 yCalc += result[eq] * Math.pow(data[0][item],eq); 310 } 311 yRegSquare += Math.pow(yCalc - meanY, 2); 312 yObsSquare += Math.pow(data[1][item] - meanY, 2); 313 } 314 double rSquare = yRegSquare / yObsSquare; 315 result[equations] = rSquare; 316 return result; 317 } 318 319 /** 320 * Returns a matrix with the following features: (1) the number of rows 321 * and columns is 1 less than that of the original matrix; (2)the matrix 322 * is triangular, i.e. all elements a (row, column) with column > row are 323 * zero. This method is used for calculating a polynomial regression. 324 * 325 * @param matrix the start matrix. 326 * 327 * @return The new matrix. 328 */ 329 private static double[][] calculateSubMatrix(double[][] matrix){ 330 int equations = matrix.length; 331 int coefficients = matrix[0].length; 332 double[][] result = new double[equations - 1][coefficients - 1]; 333 for (int eq = 1; eq < equations; eq++) { 334 double factor = matrix[0][0] / matrix[eq][0]; 335 for (int coe = 1; coe < coefficients; coe++) { 336 result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe] 337 * factor; 338 } 339 } 340 if (equations == 1) { 341 return result; 342 } 343 // check for zero pivot element 344 if (result[0][0] == 0) { 345 boolean found = false; 346 for (int i = 0; i < result.length; i ++) { 347 if (result[i][0] != 0) { 348 found = true; 349 double[] temp = result[0]; 350 System.arraycopy(result[i], 0, result[0], 0, 351 result[i].length); 352 System.arraycopy(temp, 0, result[i], 0, temp.length); 353 break; 354 } 355 } 356 if (!found) { 357 //System.out.println("Equation has no solution!"); 358 return new double[equations - 1][coefficients - 1]; 359 } 360 } 361 double[][] subMatrix = calculateSubMatrix(result); 362 for (int eq = 1; eq < equations - 1; eq++) { 363 result[eq][0] = 0; 364 for (int coe = 1; coe < coefficients - 1; coe++) { 365 result[eq][coe] = subMatrix[eq - 1][coe - 1]; 366 } 367 } 368 return result; 369 } 370 371}