001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkState; 018import static com.google.common.math.DoubleUtils.ensureNonNegative; 019import static com.google.common.primitives.Doubles.isFinite; 020import static java.lang.Double.NaN; 021import static java.lang.Double.isNaN; 022 023import com.google.common.annotations.Beta; 024import com.google.common.annotations.GwtIncompatible; 025import java.util.Iterator; 026 027/** 028 * A mutable object which accumulates double values and tracks some basic statistics over all the 029 * values added so far. The values may be added singly or in groups. This class is not thread safe. 030 * 031 * @author Pete Gillin 032 * @author Kevin Bourrillion 033 * @since 20.0 034 */ 035@Beta 036@GwtIncompatible 037@ElementTypesAreNonnullByDefault 038public final class StatsAccumulator { 039 040 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 041 // methods of this class. 042 private long count = 0; 043 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 044 private double sumOfSquaresOfDeltas = 0.0; 045 private double min = NaN; // any value will do 046 private double max = NaN; // any value will do 047 048 /** Adds the given value to the dataset. */ 049 public void add(double value) { 050 if (count == 0) { 051 count = 1; 052 mean = value; 053 min = value; 054 max = value; 055 if (!isFinite(value)) { 056 sumOfSquaresOfDeltas = NaN; 057 } 058 } else { 059 count++; 060 if (isFinite(value) && isFinite(mean)) { 061 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 062 double delta = value - mean; 063 mean += delta / count; 064 sumOfSquaresOfDeltas += delta * (value - mean); 065 } else { 066 mean = calculateNewMeanNonFinite(mean, value); 067 sumOfSquaresOfDeltas = NaN; 068 } 069 min = Math.min(min, value); 070 max = Math.max(max, value); 071 } 072 } 073 074 /** 075 * Adds the given values to the dataset. 076 * 077 * @param values a series of values, which will be converted to {@code double} values (this may 078 * cause loss of precision) 079 */ 080 public void addAll(Iterable<? extends Number> values) { 081 for (Number value : values) { 082 add(value.doubleValue()); 083 } 084 } 085 086 /** 087 * Adds the given values to the dataset. 088 * 089 * @param values a series of values, which will be converted to {@code double} values (this may 090 * cause loss of precision) 091 */ 092 public void addAll(Iterator<? extends Number> values) { 093 while (values.hasNext()) { 094 add(values.next().doubleValue()); 095 } 096 } 097 098 /** 099 * Adds the given values to the dataset. 100 * 101 * @param values a series of values 102 */ 103 public void addAll(double... values) { 104 for (double value : values) { 105 add(value); 106 } 107 } 108 109 /** 110 * Adds the given values to the dataset. 111 * 112 * @param values a series of values 113 */ 114 public void addAll(int... values) { 115 for (int value : values) { 116 add(value); 117 } 118 } 119 120 /** 121 * Adds the given values to the dataset. 122 * 123 * @param values a series of values, which will be converted to {@code double} values (this may 124 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 125 */ 126 public void addAll(long... values) { 127 for (long value : values) { 128 add(value); 129 } 130 } 131 132 /** 133 * Adds the given statistics to the dataset, as if the individual values used to compute the 134 * statistics had been added directly. 135 */ 136 public void addAll(Stats values) { 137 if (values.count() == 0) { 138 return; 139 } 140 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 141 } 142 143 /** 144 * Adds the given statistics to the dataset, as if the individual values used to compute the 145 * statistics had been added directly. 146 * 147 * @since 28.2 148 */ 149 public void addAll(StatsAccumulator values) { 150 if (values.count() == 0) { 151 return; 152 } 153 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 154 } 155 156 private void merge( 157 long otherCount, 158 double otherMean, 159 double otherSumOfSquaresOfDeltas, 160 double otherMin, 161 double otherMax) { 162 if (count == 0) { 163 count = otherCount; 164 mean = otherMean; 165 sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas; 166 min = otherMin; 167 max = otherMax; 168 } else { 169 count += otherCount; 170 if (isFinite(mean) && isFinite(otherMean)) { 171 // This is a generalized version of the calculation in add(double) above. 172 double delta = otherMean - mean; 173 mean += delta * otherCount / count; 174 sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount; 175 } else { 176 mean = calculateNewMeanNonFinite(mean, otherMean); 177 sumOfSquaresOfDeltas = NaN; 178 } 179 min = Math.min(min, otherMin); 180 max = Math.max(max, otherMax); 181 } 182 } 183 184 /** Returns an immutable snapshot of the current statistics. */ 185 public Stats snapshot() { 186 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 187 } 188 189 /** Returns the number of values. */ 190 public long count() { 191 return count; 192 } 193 194 /** 195 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 196 * values. The count must be non-zero. 197 * 198 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 199 * the arithmetic mean of the population. 200 * 201 * <h3>Non-finite values</h3> 202 * 203 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 204 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 205 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 206 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 207 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 208 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 209 * 210 * @throws IllegalStateException if the dataset is empty 211 */ 212 public double mean() { 213 checkState(count != 0); 214 return mean; 215 } 216 217 /** 218 * Returns the sum of the values. 219 * 220 * <h3>Non-finite values</h3> 221 * 222 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 223 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 224 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 225 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 226 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 227 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 228 */ 229 public final double sum() { 230 return mean * count; 231 } 232 233 /** 234 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 235 * variance</a> of the values. The count must be non-zero. 236 * 237 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 238 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 239 * due to numerical errors. However, it is guaranteed never to return a negative result. 240 * 241 * <h3>Non-finite values</h3> 242 * 243 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 244 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 245 * 246 * @throws IllegalStateException if the dataset is empty 247 */ 248 public final double populationVariance() { 249 checkState(count != 0); 250 if (isNaN(sumOfSquaresOfDeltas)) { 251 return NaN; 252 } 253 if (count == 1) { 254 return 0.0; 255 } 256 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 257 } 258 259 /** 260 * Returns the <a 261 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 262 * population standard deviation</a> of the values. The count must be non-zero. 263 * 264 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 265 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 266 * due to numerical errors. However, it is guaranteed never to return a negative result. 267 * 268 * <h3>Non-finite values</h3> 269 * 270 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 271 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 272 * 273 * @throws IllegalStateException if the dataset is empty 274 */ 275 public final double populationStandardDeviation() { 276 return Math.sqrt(populationVariance()); 277 } 278 279 /** 280 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 281 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 282 * unbiased estimator of the population variance of the population. The count must be greater than 283 * one. 284 * 285 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 286 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 287 * 288 * <h3>Non-finite values</h3> 289 * 290 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 291 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 292 * 293 * @throws IllegalStateException if the dataset is empty or contains a single value 294 */ 295 public final double sampleVariance() { 296 checkState(count > 1); 297 if (isNaN(sumOfSquaresOfDeltas)) { 298 return NaN; 299 } 300 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 301 } 302 303 /** 304 * Returns the <a 305 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 306 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 307 * population, this is an estimator of the population standard deviation of the population which 308 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 309 * the distribution). The count must be greater than one. 310 * 311 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 312 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 313 * 314 * <h3>Non-finite values</h3> 315 * 316 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 317 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 318 * 319 * @throws IllegalStateException if the dataset is empty or contains a single value 320 */ 321 public final double sampleStandardDeviation() { 322 return Math.sqrt(sampleVariance()); 323 } 324 325 /** 326 * Returns the lowest value in the dataset. The count must be non-zero. 327 * 328 * <h3>Non-finite values</h3> 329 * 330 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 331 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 332 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 333 * only then the result is the lowest finite value. If it contains {@link 334 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 335 * 336 * @throws IllegalStateException if the dataset is empty 337 */ 338 public double min() { 339 checkState(count != 0); 340 return min; 341 } 342 343 /** 344 * Returns the highest value in the dataset. The count must be non-zero. 345 * 346 * <h3>Non-finite values</h3> 347 * 348 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 349 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 350 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 351 * only then the result is the highest finite value. If it contains {@link 352 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 353 * 354 * @throws IllegalStateException if the dataset is empty 355 */ 356 public double max() { 357 checkState(count != 0); 358 return max; 359 } 360 361 double sumOfSquaresOfDeltas() { 362 return sumOfSquaresOfDeltas; 363 } 364 365 /** 366 * Calculates the new value for the accumulated mean when a value is added, in the case where at 367 * least one of the previous mean and the value is non-finite. 368 */ 369 static double calculateNewMeanNonFinite(double previousMean, double value) { 370 /* 371 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 372 * the update formula can subtract infinities in cases where the naive formula would add them. 373 * 374 * Consequently: 375 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 376 * value (whether it is NaN or infinity). 377 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 378 * (whether it is NaN or infinity). 379 * 3. If both the previous mean and the new value are non-finite and... 380 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 381 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 382 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 383 */ 384 if (isFinite(previousMean)) { 385 // This is case 1. 386 return value; 387 } else if (isFinite(value) || previousMean == value) { 388 // This is case 2. or 3b. 389 return previousMean; 390 } else { 391 // This is case 3a. or 3c. 392 return NaN; 393 } 394 } 395}