#!/usr/bin/perl # Copyright 2010, by David A. Burton # Cary, NC USA # +1 919-481-0149 # Email: http://www.burtonsys.com/email/ # Permission is hereby granted to use this function for any purpose, # with or without modification, provided only that this copyright & # permission notice is retained. # You can use a "require" to pull this into another Perl program, like this: # # @INC = ('.','..'); # require "composite_sd.pl"; # # It defines four functions: # # &composite_SD is a function to calculate the overall standard deviation of # a collection of sample groups, given only the group standard deviations, # means, and sample counts. # # &sample_SD is a simple function to calculate the standard deviation of a # collection of samples. # # &sum and &avg (to calculate summation and simple average, respectively) # are also defined. # # These functions are compatible both with 32-bit Perl 4.036, and with Perl 5. if (!defined $debugmode) { $debugmode=0; # set it >0 for debug prints } # Input is an array of samples; result is the summation. sub sum { local( $sum ) = 0; foreach $i (@_) { # note that $i is implicitly local to the loop $sum += $i; } return $sum; } # Input is an array of samples; result is the mean. sub avg { if (-1 == $#_) { die "ERR: \&avg( empty list )\n"; } return &sum(@_) / (1+$#_) } # input is an array of samples; result is the standard deviation sub sample_SD { local( @samples ) = @_; local( $mean ) = &avg( @samples ); local( $sum_of_squared_deviations ) = 0; if ($#samples <= 0) { return 0; } else { foreach $i (@samples) { $sum_of_squared_deviations += (($i - $mean) * ($i - $mean)); } return sqrt( $sum_of_squared_deviations / $#samples ); } } # Calculate combined standard deviation via ANOVA (ANalysis Of VAriance) # See: http://www.burtonsys.com/climate/composite_standard_deviations.html # Inputs are: # $G, the number of groups # @means, the array of $G group means # @SDs, the array of $G group standard deviations # $ncount or @ncounts -- number of samples in each group (can be scalar # if all groups have same number of samples) # Result is the overall standard deviation. sub composite_SD { local( $G ) = shift @_; local( @means ) = splice( @_, 0, $G ); local( @SDs ) = splice( @_, 0, $G ); local( @ncounts ); local( $i ); if (0 == $#_) { for ($i=($G-1); $i >= 0; $i--) { $ncounts[$i] = $_[0]; } } else { @ncounts = @_; } if ( ($G != ($#means + 1)) || ($G != ($#SDs + 1)) || ($G != ($#ncounts + 1)) ) { die "ERR: sub composite_SD called w/ wrong number of parameters [$G,$#means,$#SDs,$#ncounts]\n"; } # Okay, we have the parameters, now. # calculate total number of samples, N, and grand mean, GM local($N) = &sum(@ncounts); # total number of samples if ($N <= 1) { print "Warning: only $N samples, SD is incalculable\n"; return -1; } local($GM) = 0; for ($i=0; $i<$G; $i++) { $GM += ($means[$i] * $ncounts[$i]); } $GM /= $N; # grand mean # calculate Error Sum of Squares local($ESS) = 0; for ($i=0; $i<$G; $i++) { $ESS += (($SDs[$i])**2) * ($ncounts[$i] - 1); } # calculate Total Group Sum of Squares local($TGSS) = 0; for ($i=0; $i<$G; $i++) { $TGSS += (($means[$i]-$GM)**2) * $ncounts[$i]; } # calculate standard deviation as square root of grand variance local( $result ) = sqrt( ($ESS+$TGSS)/($N-1) ); if ($debugmode > 0) { printf "dbg composite_SD: N = $N, GM = %4.3f, ESS = %5.1f, TGSS = %5.1f, SD = %4.3f\n", $GM, $ESS, $TGSS, $result; } return $result; } #composite_SD # return true value to 'require' 1;
#!/usr/bin/python3 ''' composite_sd.py defines three functions: composite_SD is a function to calculate the overall standard deviation of a collection of sample groups, given only the group standard deviations, means, and sample counts. sample_SD is a simple function to calculate the standard deviation of a collection of samples. avg (to calculate the simple average) is also defined. These functions are compatible with Python 2.6, Python 2.7, and Python 3.*. Copyright 2010, 2013, by David A. Burton Cary, NC USA +1 919-481-0149 Email: http://www.burtonsys.com/email/ Permission is hereby granted to use this function for any purpose, with or without modification, provided only that this copyright & permission notice is retained. Note: if what you really want is a "running" calculation of variance and/or standard deviation, see this article about the Welford Method: http://www.johndcook.com/standard_deviation.html (That's from the American statistician John Cook, not the Australian climate activist John Cook.) ''' from __future__ import print_function, division # requires python 2.6 or later (2.7 or later preferred) import math __all__ = ['iterable', 'avg', 'sample_SD', 'composite_SD'] def iterable(obj): '''True iff obj is iterable: a list, tuple, or string.''' return hasattr(obj, '__contains__') def avg(samples): if len(samples) >= 1: return sum(samples) / len(samples) return float('nan') def sample_SD(samples): '''input is an array of samples; result is the standard deviation''' mean = avg(samples) sum_of_squared_deviations = 0; sd = 0 if len(samples) >= 2: for datum in samples: sum_of_squared_deviations += ((datum - mean) * (datum - mean)); sd = math.sqrt(sum_of_squared_deviations / (len(samples)-1) ); return sd def composite_SD(means, SDs, ncounts): '''Calculate combined standard deviation via ANOVA (ANalysis Of VAriance) See: http://www.burtonsys.com/climate/composite_standard_deviations.html Inputs are: means, the array of group means SDs, the array of group standard deviations ncounts, number of samples in each group (can be scalar if all groups have same number of samples) Result is the overall standard deviation. ''' G = len(means) # number of groups if G != len(SDs): raise Exception('inconsistent list lengths') if not iterable(ncounts): ncounts = [ncounts] * G # convert scalar ncounts to array elif G != len(ncounts): raise Exception('wrong ncounts list length') # calculate total number of samples, N, and grand mean, GM N = sum(ncounts) # total number of samples if N <= 1: raise Exception("Warning: only " + str(N) + " samples, SD is incalculable") GM = 0.0 for i in range(G): GM += means[i] * ncounts[i] GM /= N # grand mean # calculate Error Sum of Squares ESS = 0.0 for i in range(G): ESS += ((SDs[i])**2) * (ncounts[i] - 1) # calculate Total Group Sum of Squares TGSS = 0.0 for i in range(G): TGSS += ((means[i]-GM)**2) * ncounts[i] # calculate standard deviation as square root of grand variance result = math.sqrt((ESS+TGSS)/(N-1)) return result # samples = range(10) # print('avg=', avg(samples)) # sd = sample_SD(samples) # print('sd=', sd) # pt1 = [0,1,2] # pt2 = [3,4] # pt3 = [5,6,7,8,9] # means = [avg(pt1), avg(pt2), avg(pt3)] # SDs = [sample_SD(pt1), sample_SD(pt2), sample_SD(pt3)] # ncounts = [len(pt1), len(pt2), len(pt3)] # sd2 = composite_SD(means, SDs, ncounts) # print('sd2=', sd2) # # samples = range(9) # print('avg=', avg(samples)) # sd = sample_SD(samples) # print('sd=', sd) # pt1 = [0,1,2] # pt2 = [3,4,5] # pt3 = [6,7,8] # means = [avg(pt1), avg(pt2), avg(pt3)] # SDs = [sample_SD(pt1), sample_SD(pt2), sample_SD(pt3)] # ncounts = 3 # sd2 = composite_SD(means, SDs, ncounts) # print('sd2=', sd2)
Date: Mon, Jun 22, 2015 at 1:05 PM
Subject: Composite standard deviations - PHP Code
Hi Dave,
I converted the python code which is attached into PHP code. I tested and it gives a good composite SD number. You are free to copy or do as you wish as you provided the original python source.
Here is the function which I have added as part of a statistics class.
Cheers, Eric
Eric Jennings, Sr. Systems Analyst
Hampton Affiliates
//--------------------------------------------------------------------------------------------- // This function returns a composite standard deviation. // Calculate combined standard deviation via ANOVA (ANalysis Of VAriance) // See: http://www.burtonsys.com/climate/composite_standard_deviations.html // Inputs are: // Means, the array of group means // SDs, the array of group standard deviations // nCounts, number of samples in each group (can be scalar // if all groups have same number of samples) // Return: Result is the overall standard deviation. // // Copyright 2010, 2013, by David A. Burton; Copyright 2015, by Eric Jennings. // Contact by email: http://www.burtonsys.com/email/ // Permission is hereby granted to use this function for any purpose, // with or without modification, provided only that this copyright and // permission notice is retained. // // Note: if what you really want is a "running" calculation of variance and/or // standard deviation, see this article about the Welford Method: // http://www.johndcook.com/standard_deviation.html // public function composite_SD( $Means, $SDs, $nCounts ){ // number of groups $G = count( $Means ); if( $G != count($SDs)){ $this->Msg .= 'inconsistent list lengths'; return 0; } if( $G != count($nCounts)){ $this->Msg .= 'wrong nCounts list length'; return 0; } // calculate total number of samples, N, and grand mean, GM $N = array_sum($nCounts); // total number of samples if( $N <= 1 ){ $this->Msg .= "Warning: only $N samples, SD is incalculable"; } $GM = 0.0; for( $i = 0; $i < $G; $i++ ){ $GM += $Means[$i] * $nCounts[$i]; } $GM /= $N; // grand mean // calculate Error Sum of Squares $ESS = 0.0; for( $i = 0; $i < $G; $i++){ $ESS += (pow($SDs[$i], 2)) * ($nCounts[$i] - 1); } // calculate Total Group Sum of Squares $TGSS = 0.0; for( $i = 0; $i < $G; $i++ ){ $TGSS += (pow($Means[$i]-$GM, 2)) * $nCounts[$i]; } // calculate standard deviation as square root of grand variance $result = sqrt(($ESS+$TGSS)/($N-1)); return $result; }