001/* ===========================================================
002 * JFreeChart : a free chart library for the Java(tm) platform
003 * ===========================================================
004 *
005 * (C) Copyright 2000-2014, by Object Refinery Limited and Contributors.
006 *
007 * Project Info:  http://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022 * USA.
023 *
024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 
025 * Other names may be trademarks of their respective owners.]
026 *
027 * ---------------
028 * Regression.java
029 * ---------------
030 * (C) Copyright 2002-2014, by Object Refinery Limited.
031 *
032 * Original Author:  David Gilbert (for Object Refinery Limited);
033 * Contributor(s):   Peter Kolb (patch 2795746);
034 *
035 * Changes
036 * -------
037 * 30-Sep-2002 : Version 1 (DG);
038 * 18-Aug-2003 : Added 'abstract' (DG);
039 * 15-Jul-2004 : Switched getX() with getXValue() and getY() with
040 *               getYValue() (DG);
041 * 29-May-2009 : Added support for polynomial regression, see patch 2795746
042 *               by Peter Kolb (DG);
043 * 03-Jul-2013 : Use ParamChecks (DG);
044 *
045 */
046
047package org.jfree.data.statistics;
048
049import org.jfree.chart.util.ParamChecks;
050import org.jfree.data.xy.XYDataset;
051
052/**
053 * A utility class for fitting regression curves to data.
054 */
055public abstract class Regression {
056
057    /**
058     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
059     * the data using ordinary least squares regression.  The result is
060     * returned as a double[], where result[0] --> a, and result[1] --> b.
061     *
062     * @param data  the data.
063     *
064     * @return The parameters.
065     */
066    public static double[] getOLSRegression(double[][] data) {
067
068        int n = data.length;
069        if (n < 2) {
070            throw new IllegalArgumentException("Not enough data.");
071        }
072
073        double sumX = 0;
074        double sumY = 0;
075        double sumXX = 0;
076        double sumXY = 0;
077        for (int i = 0; i < n; i++) {
078            double x = data[i][0];
079            double y = data[i][1];
080            sumX += x;
081            sumY += y;
082            double xx = x * x;
083            sumXX += xx;
084            double xy = x * y;
085            sumXY += xy;
086        }
087        double sxx = sumXX - (sumX * sumX) / n;
088        double sxy = sumXY - (sumX * sumY) / n;
089        double xbar = sumX / n;
090        double ybar = sumY / n;
091
092        double[] result = new double[2];
093        result[1] = sxy / sxx;
094        result[0] = ybar - result[1] * xbar;
095
096        return result;
097
098    }
099
100    /**
101     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
102     * the data using ordinary least squares regression. The result is returned
103     * as a double[], where result[0] --&gt; a, and result[1] --&gt; b.
104     *
105     * @param data  the data.
106     * @param series  the series (zero-based index).
107     *
108     * @return The parameters.
109     */
110    public static double[] getOLSRegression(XYDataset data, int series) {
111
112        int n = data.getItemCount(series);
113        if (n < 2) {
114            throw new IllegalArgumentException("Not enough data.");
115        }
116
117        double sumX = 0;
118        double sumY = 0;
119        double sumXX = 0;
120        double sumXY = 0;
121        for (int i = 0; i < n; i++) {
122            double x = data.getXValue(series, i);
123            double y = data.getYValue(series, i);
124            sumX += x;
125            sumY += y;
126            double xx = x * x;
127            sumXX += xx;
128            double xy = x * y;
129            sumXY += xy;
130        }
131        double sxx = sumXX - (sumX * sumX) / n;
132        double sxy = sumXY - (sumX * sumY) / n;
133        double xbar = sumX / n;
134        double ybar = sumY / n;
135
136        double[] result = new double[2];
137        result[1] = sxy / sxx;
138        result[0] = ybar - result[1] * xbar;
139
140        return result;
141
142    }
143
144    /**
145     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
146     * the data using a power regression equation.  The result is returned as
147     * an array, where double[0] --&gt; a, and double[1] --&gt; b.
148     *
149     * @param data  the data.
150     *
151     * @return The parameters.
152     */
153    public static double[] getPowerRegression(double[][] data) {
154
155        int n = data.length;
156        if (n < 2) {
157            throw new IllegalArgumentException("Not enough data.");
158        }
159
160        double sumX = 0;
161        double sumY = 0;
162        double sumXX = 0;
163        double sumXY = 0;
164        for (int i = 0; i < n; i++) {
165            double x = Math.log(data[i][0]);
166            double y = Math.log(data[i][1]);
167            sumX += x;
168            sumY += y;
169            double xx = x * x;
170            sumXX += xx;
171            double xy = x * y;
172            sumXY += xy;
173        }
174        double sxx = sumXX - (sumX * sumX) / n;
175        double sxy = sumXY - (sumX * sumY) / n;
176        double xbar = sumX / n;
177        double ybar = sumY / n;
178
179        double[] result = new double[2];
180        result[1] = sxy / sxx;
181        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
182
183        return result;
184
185    }
186
187    /**
188     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
189     * the data using a power regression equation.  The result is returned as
190     * an array, where double[0] --&gt; a, and double[1] --&gt; b.
191     *
192     * @param data  the data.
193     * @param series  the series to fit the regression line against.
194     *
195     * @return The parameters.
196     */
197    public static double[] getPowerRegression(XYDataset data, int series) {
198
199        int n = data.getItemCount(series);
200        if (n < 2) {
201            throw new IllegalArgumentException("Not enough data.");
202        }
203
204        double sumX = 0;
205        double sumY = 0;
206        double sumXX = 0;
207        double sumXY = 0;
208        for (int i = 0; i < n; i++) {
209            double x = Math.log(data.getXValue(series, i));
210            double y = Math.log(data.getYValue(series, i));
211            sumX += x;
212            sumY += y;
213            double xx = x * x;
214            sumXX += xx;
215            double xy = x * y;
216            sumXY += xy;
217        }
218        double sxx = sumXX - (sumX * sumX) / n;
219        double sxy = sumXY - (sumX * sumY) / n;
220        double xbar = sumX / n;
221        double ybar = sumY / n;
222
223        double[] result = new double[2];
224        result[1] = sxy / sxx;
225        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
226
227        return result;
228
229    }
230
231    /**
232     * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 
233     * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n,
234     * fitted to the data using a polynomial regression equation.
235     * The result is returned as an array with a length of n + 2,
236     * where double[0] --&gt; a0, double[1] --&gt; a1, .., double[n] --&gt; an.
237     * and double[n + 1] is the correlation coefficient R2
238     * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german
239     * edition), pp. 243ff and 327ff.
240     *
241     * @param dataset  the dataset (<code>null</code> not permitted).
242     * @param series  the series to fit the regression line against (the series
243     *         must have at least order + 1 non-NaN items).
244     * @param order  the order of the function (&gt; 0).
245     *
246     * @return The parameters.
247     *
248     * @since 1.0.14
249     */
250    public static double[] getPolynomialRegression(XYDataset dataset, 
251            int series, int order) {
252        ParamChecks.nullNotPermitted(dataset, "dataset");
253        int itemCount = dataset.getItemCount(series);
254        if (itemCount < order + 1) {
255            throw new IllegalArgumentException("Not enough data.");
256        }
257        int validItems = 0;
258        double[][] data = new double[2][itemCount];
259        for(int item = 0; item < itemCount; item++){
260            double x = dataset.getXValue(series, item);
261            double y = dataset.getYValue(series, item);
262            if (!Double.isNaN(x) && !Double.isNaN(y)){
263                data[0][validItems] = x;
264                data[1][validItems] = y;
265                validItems++;
266            }
267        }
268        if (validItems < order + 1) {
269            throw new IllegalArgumentException("Not enough data.");
270        }
271        int equations = order + 1;
272        int coefficients = order + 2;
273        double[] result = new double[equations + 1];
274        double[][] matrix = new double[equations][coefficients];
275        double sumX = 0.0;
276        double sumY = 0.0;
277
278        for(int item = 0; item < validItems; item++){
279            sumX += data[0][item];
280            sumY += data[1][item];
281            for(int eq = 0; eq < equations; eq++){
282                for(int coe = 0; coe < coefficients - 1; coe++){
283                    matrix[eq][coe] += Math.pow(data[0][item],eq + coe);
284                }
285                matrix[eq][coefficients - 1] += data[1][item]
286                        * Math.pow(data[0][item],eq);
287            }
288        }
289        double[][] subMatrix = calculateSubMatrix(matrix);
290        for (int eq = 1; eq < equations; eq++) {
291            matrix[eq][0] = 0;
292            for (int coe = 1; coe < coefficients; coe++) {
293                matrix[eq][coe] = subMatrix[eq - 1][coe - 1];
294            }
295        }
296        for (int eq = equations - 1; eq > -1; eq--) {
297            double value = matrix[eq][coefficients - 1];
298            for (int coe = eq; coe < coefficients -1; coe++) {
299                value -= matrix[eq][coe] * result[coe];
300            }
301            result[eq] = value / matrix[eq][eq];
302        }
303        double meanY = sumY / validItems;
304        double yObsSquare = 0.0;
305        double yRegSquare = 0.0;
306        for (int item = 0; item < validItems; item++) {
307            double yCalc = 0;
308            for (int eq = 0; eq < equations; eq++) {
309                yCalc += result[eq] * Math.pow(data[0][item],eq);
310            }
311            yRegSquare += Math.pow(yCalc - meanY, 2);
312            yObsSquare += Math.pow(data[1][item] - meanY, 2);
313        }
314        double rSquare = yRegSquare / yObsSquare;
315        result[equations] = rSquare;
316        return result;
317    }
318
319    /**
320     * Returns a matrix with the following features: (1) the number of rows
321     * and columns is 1 less than that of the original matrix; (2)the matrix
322     * is triangular, i.e. all elements a (row, column) with column &gt; row are
323     * zero.  This method is used for calculating a polynomial regression.
324     * 
325     * @param matrix  the start matrix.
326     *
327     * @return The new matrix.
328     */
329    private static double[][] calculateSubMatrix(double[][] matrix){
330        int equations = matrix.length;
331        int coefficients = matrix[0].length;
332        double[][] result = new double[equations - 1][coefficients - 1];
333        for (int eq = 1; eq < equations; eq++) {
334            double factor = matrix[0][0] / matrix[eq][0];
335            for (int coe = 1; coe < coefficients; coe++) {
336                result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe]
337                        * factor;
338            }
339        }
340        if (equations == 1) {
341            return result;
342        }
343        // check for zero pivot element
344        if (result[0][0] == 0) {
345            boolean found = false;
346            for (int i = 0; i < result.length; i ++) {
347                if (result[i][0] != 0) {
348                    found = true;
349                    double[] temp = result[0];
350                    System.arraycopy(result[i], 0, result[0], 0, 
351                            result[i].length);
352                    System.arraycopy(temp, 0, result[i], 0, temp.length);
353                    break;
354                }
355            }
356            if (!found) {
357                //System.out.println("Equation has no solution!");
358                return new double[equations - 1][coefficients - 1];
359            }
360        }
361        double[][] subMatrix = calculateSubMatrix(result);
362        for (int eq = 1; eq < equations -  1; eq++) {
363            result[eq][0] = 0;
364            for (int coe = 1; coe < coefficients - 1; coe++) {
365                result[eq][coe] = subMatrix[eq - 1][coe - 1];
366            }
367        }
368        return result;
369    }
370
371}