001 /* ===========================================================
002 * JFreeChart : a free chart library for the Java(tm) platform
003 * ===========================================================
004 *
005 * (C) Copyright 2000-2006, by Object Refinery Limited and Contributors.
006 *
007 * Project Info: http://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
022 * USA.
023 *
024 * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
025 * in the United States and other countries.]
026 *
027 * ---------------
028 * Statistics.java
029 * ---------------
030 * (C) Copyright 2000-2006, by Matthew Wright and Contributors.
031 *
032 * Original Author: Matthew Wright;
033 * Contributor(s): David Gilbert (for Object Refinery Limited);
034 *
035 * $Id: Statistics.java,v 1.5.2.2 2006/11/16 11:19:47 mungady Exp $
036 *
037 * Changes (from 08-Nov-2001)
038 * --------------------------
039 * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
040 * Moved from JFreeChart to package com.jrefinery.data.* in
041 * JCommon class library (DG);
042 * 24-Jun-2002 : Removed unnecessary local variable (DG);
043 * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
044 * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
045 * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
046 * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0
047 * release (DG);
048 *
049 */
050
051 package org.jfree.data.statistics;
052
053 import java.util.ArrayList;
054 import java.util.Collection;
055 import java.util.Collections;
056 import java.util.Iterator;
057 import java.util.List;
058
059 /**
060 * A utility class that provides some common statistical functions.
061 */
062 public abstract class Statistics {
063
064 /**
065 * Returns the mean of an array of numbers. This is equivalent to calling
066 * <code>calculateMean(values, true)</code>.
067 *
068 * @param values the values (<code>null</code> not permitted).
069 *
070 * @return The mean.
071 */
072 public static double calculateMean(Number[] values) {
073 return calculateMean(values, true);
074 }
075
076 /**
077 * Returns the mean of an array of numbers.
078 *
079 * @param values the values (<code>null</code> not permitted).
080 * @param includeNullAndNaN a flag that controls whether or not
081 * <code>null</code> and <code>Double.NaN</code> values are included
082 * in the calculation (if either is present in the array, the result is
083 * {@link Double#NaN}).
084 *
085 * @return The mean.
086 *
087 * @since 1.0.3
088 */
089 public static double calculateMean(Number[] values,
090 boolean includeNullAndNaN) {
091
092 if (values == null) {
093 throw new IllegalArgumentException("Null 'values' argument.");
094 }
095 double sum = 0.0;
096 double current;
097 int counter = 0;
098 for (int i = 0; i < values.length; i++) {
099 // treat nulls the same as NaNs
100 if (values[i] != null) {
101 current = values[i].doubleValue();
102 }
103 else {
104 current = Double.NaN;
105 }
106 // calculate the sum and count
107 if (includeNullAndNaN || !Double.isNaN(current)) {
108 sum = sum + current;
109 counter++;
110 }
111 }
112 double result = (sum / counter);
113 return result;
114 }
115
116 /**
117 * Returns the mean of a collection of <code>Number</code> objects.
118 *
119 * @param values the values (<code>null</code> not permitted).
120 *
121 * @return The mean.
122 */
123 public static double calculateMean(Collection values) {
124 return calculateMean(values, true);
125 }
126
127 /**
128 * Returns the mean of a collection of <code>Number</code> objects.
129 *
130 * @param values the values (<code>null</code> not permitted).
131 * @param includeNullAndNaN a flag that controls whether or not
132 * <code>null</code> and <code>Double.NaN</code> values are included
133 * in the calculation (if either is present in the array, the result is
134 * {@link Double#NaN}).
135 *
136 * @return The mean.
137 *
138 * @since 1.0.3
139 */
140 public static double calculateMean(Collection values,
141 boolean includeNullAndNaN) {
142
143 if (values == null) {
144 throw new IllegalArgumentException("Null 'values' argument.");
145 }
146 int count = 0;
147 double total = 0.0;
148 Iterator iterator = values.iterator();
149 while (iterator.hasNext()) {
150 Object object = iterator.next();
151 if (object == null) {
152 if (includeNullAndNaN) {
153 return Double.NaN;
154 }
155 }
156 else {
157 if (object instanceof Number) {
158 Number number = (Number) object;
159 double value = number.doubleValue();
160 if (Double.isNaN(value)) {
161 if (includeNullAndNaN) {
162 return Double.NaN;
163 }
164 }
165 else {
166 total = total + number.doubleValue();
167 count = count + 1;
168 }
169 }
170 }
171 }
172 return total / count;
173 }
174
175 /**
176 * Calculates the median for a list of values (<code>Number</code> objects).
177 * The list of values will be copied, and the copy sorted, before
178 * calculating the median. To avoid this step (if your list of values
179 * is already sorted), use the {@link #calculateMedian(List, boolean)}
180 * method.
181 *
182 * @param values the values (<code>null</code> permitted).
183 *
184 * @return The median.
185 */
186 public static double calculateMedian(List values) {
187 return calculateMedian(values, true);
188 }
189
190 /**
191 * Calculates the median for a list of values (<code>Number</code> objects).
192 * If <code>copyAndSort</code> is <code>false</code>, the list is assumed
193 * to be presorted in ascending order by value.
194 *
195 * @param values the values (<code>null</code> permitted).
196 * @param copyAndSort a flag that controls whether the list of values is
197 * copied and sorted.
198 *
199 * @return The median.
200 */
201 public static double calculateMedian(List values, boolean copyAndSort) {
202
203 double result = Double.NaN;
204 if (values != null) {
205 if (copyAndSort) {
206 int itemCount = values.size();
207 List copy = new ArrayList(itemCount);
208 for (int i = 0; i < itemCount; i++) {
209 copy.add(i, values.get(i));
210 }
211 Collections.sort(copy);
212 values = copy;
213 }
214 int count = values.size();
215 if (count > 0) {
216 if (count % 2 == 1) {
217 if (count > 1) {
218 Number value = (Number) values.get((count - 1) / 2);
219 result = value.doubleValue();
220 }
221 else {
222 Number value = (Number) values.get(0);
223 result = value.doubleValue();
224 }
225 }
226 else {
227 Number value1 = (Number) values.get(count / 2 - 1);
228 Number value2 = (Number) values.get(count / 2);
229 result = (value1.doubleValue() + value2.doubleValue())
230 / 2.0;
231 }
232 }
233 }
234 return result;
235 }
236
237 /**
238 * Calculates the median for a sublist within a list of values
239 * (<code>Number</code> objects).
240 *
241 * @param values the values, in any order (<code>null</code> not
242 * permitted).
243 * @param start the start index.
244 * @param end the end index.
245 *
246 * @return The median.
247 */
248 public static double calculateMedian(List values, int start, int end) {
249 return calculateMedian(values, start, end, true);
250 }
251
252 /**
253 * Calculates the median for a sublist within a list of values
254 * (<code>Number</code> objects). The entire list will be sorted if the
255 * <code>ascending</code< argument is <code>false</code>.
256 *
257 * @param values the values (<code>null</code> not permitted).
258 * @param start the start index.
259 * @param end the end index.
260 * @param copyAndSort a flag that that controls whether the list of values
261 * is copied and sorted.
262 *
263 * @return The median.
264 */
265 public static double calculateMedian(List values, int start, int end,
266 boolean copyAndSort) {
267
268 double result = Double.NaN;
269 if (copyAndSort) {
270 List working = new ArrayList(end - start + 1);
271 for (int i = start; i <= end; i++) {
272 working.add(values.get(i));
273 }
274 Collections.sort(working);
275 result = calculateMedian(working, false);
276 }
277 else {
278 int count = end - start + 1;
279 if (count > 0) {
280 if (count % 2 == 1) {
281 if (count > 1) {
282 Number value
283 = (Number) values.get(start + (count - 1) / 2);
284 result = value.doubleValue();
285 }
286 else {
287 Number value = (Number) values.get(start);
288 result = value.doubleValue();
289 }
290 }
291 else {
292 Number value1 = (Number) values.get(start + count / 2 - 1);
293 Number value2 = (Number) values.get(start + count / 2);
294 result
295 = (value1.doubleValue() + value2.doubleValue()) / 2.0;
296 }
297 }
298 }
299 return result;
300
301 }
302
303 /**
304 * Returns the standard deviation of a set of numbers.
305 *
306 * @param data the data (<code>null</code> or zero length array not
307 * permitted).
308 *
309 * @return The standard deviation of a set of numbers.
310 */
311 public static double getStdDev(Number[] data) {
312 if (data == null) {
313 throw new IllegalArgumentException("Null 'data' array.");
314 }
315 if (data.length == 0) {
316 throw new IllegalArgumentException("Zero length 'data' array.");
317 }
318 double avg = calculateMean(data);
319 double sum = 0.0;
320
321 for (int counter = 0; counter < data.length; counter++) {
322 double diff = data[counter].doubleValue() - avg;
323 sum = sum + diff * diff;
324 }
325 return Math.sqrt(sum / (data.length - 1));
326 }
327
328 /**
329 * Fits a straight line to a set of (x, y) data, returning the slope and
330 * intercept.
331 *
332 * @param xData the x-data (<code>null</code> not permitted).
333 * @param yData the y-data (<code>null</code> not permitted).
334 *
335 * @return A double array with the intercept in [0] and the slope in [1].
336 */
337 public static double[] getLinearFit(Number[] xData, Number[] yData) {
338
339 if (xData == null) {
340 throw new IllegalArgumentException("Null 'xData' argument.");
341 }
342 if (yData == null) {
343 throw new IllegalArgumentException("Null 'yData' argument.");
344 }
345 if (xData.length != yData.length) {
346 throw new IllegalArgumentException(
347 "Statistics.getLinearFit(): array lengths must be equal.");
348 }
349
350 double[] result = new double[2];
351 // slope
352 result[1] = getSlope(xData, yData);
353 // intercept
354 result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
355
356 return result;
357
358 }
359
360 /**
361 * Finds the slope of a regression line using least squares.
362 *
363 * @param xData the x-values (<code>null</code> not permitted).
364 * @param yData the y-values (<code>null</code> not permitted).
365 *
366 * @return The slope.
367 */
368 public static double getSlope(Number[] xData, Number[] yData) {
369
370 if (xData == null) {
371 throw new IllegalArgumentException("Null 'xData' argument.");
372 }
373 if (yData == null) {
374 throw new IllegalArgumentException("Null 'yData' argument.");
375 }
376 if (xData.length != yData.length) {
377 throw new IllegalArgumentException("Array lengths must be equal.");
378 }
379
380 // ********* stat function for linear slope ********
381 // y = a + bx
382 // a = ybar - b * xbar
383 // sum(x * y) - (sum (x) * sum(y)) / n
384 // b = ------------------------------------
385 // sum (x^2) - (sum(x)^2 / n
386 // *************************************************
387
388 // sum of x, x^2, x * y, y
389 double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
390 int counter;
391 for (counter = 0; counter < xData.length; counter++) {
392 sx = sx + xData[counter].doubleValue();
393 sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
394 sxy = sxy + yData[counter].doubleValue()
395 * xData[counter].doubleValue();
396 sy = sy + yData[counter].doubleValue();
397 }
398 return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
399
400 }
401
402 /**
403 * Calculates the correlation between two datasets. Both arrays should
404 * contain the same number of items. Null values are treated as zero.
405 * <P>
406 * Information about the correlation calculation was obtained from:
407 *
408 * http://trochim.human.cornell.edu/kb/statcorr.htm
409 *
410 * @param data1 the first dataset.
411 * @param data2 the second dataset.
412 *
413 * @return The correlation.
414 */
415 public static double getCorrelation(Number[] data1, Number[] data2) {
416 if (data1 == null) {
417 throw new IllegalArgumentException("Null 'data1' argument.");
418 }
419 if (data2 == null) {
420 throw new IllegalArgumentException("Null 'data2' argument.");
421 }
422 if (data1.length != data2.length) {
423 throw new IllegalArgumentException(
424 "'data1' and 'data2' arrays must have same length."
425 );
426 }
427 int n = data1.length;
428 double sumX = 0.0;
429 double sumY = 0.0;
430 double sumX2 = 0.0;
431 double sumY2 = 0.0;
432 double sumXY = 0.0;
433 for (int i = 0; i < n; i++) {
434 double x = 0.0;
435 if (data1[i] != null) {
436 x = data1[i].doubleValue();
437 }
438 double y = 0.0;
439 if (data2[i] != null) {
440 y = data2[i].doubleValue();
441 }
442 sumX = sumX + x;
443 sumY = sumY + y;
444 sumXY = sumXY + (x * y);
445 sumX2 = sumX2 + (x * x);
446 sumY2 = sumY2 + (y * y);
447 }
448 return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX)
449 * (n * sumY2 - sumY * sumY), 0.5);
450 }
451
452 /**
453 * Returns a data set for a moving average on the data set passed in.
454 *
455 * @param xData an array of the x data.
456 * @param yData an array of the y data.
457 * @param period the number of data points to average
458 *
459 * @return A double[][] the length of the data set in the first dimension,
460 * with two doubles for x and y in the second dimension
461 */
462 public static double[][] getMovingAverage(Number[] xData,
463 Number[] yData,
464 int period) {
465
466 // check arguments...
467 if (xData.length != yData.length) {
468 throw new IllegalArgumentException("Array lengths must be equal.");
469 }
470
471 if (period > xData.length) {
472 throw new IllegalArgumentException(
473 "Period can't be longer than dataset."
474 );
475 }
476
477 double[][] result = new double[xData.length - period][2];
478 for (int i = 0; i < result.length; i++) {
479 result[i][0] = xData[i + period].doubleValue();
480 // holds the moving average sum
481 double sum = 0.0;
482 for (int j = 0; j < period; j++) {
483 sum += yData[i + j].doubleValue();
484 }
485 sum = sum / period;
486 result[i][1] = sum;
487 }
488 return result;
489
490 }
491
492 }