# Copyright (C) 2004 Paul Harrison # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ MML estimators. """ from numarray import * from numarray import random_array, linear_algebra, arrayprint import sys euler_constant = 0.577215664901532860606512090082402431042 def dimension_term(D): """ Approximation of the dimension-dependent terms in MML87. D : integer number of dimensions Error is of order 0.3 nit. """ return -0.5*D*log(2*pi) + 0.5*log(D*pi) - euler_constant def neglog_gaussian_likelihood(N, sigma, sum_of_squares, quantum=1.0): """ Return the total negative-log-likelihood of some data sampled from a Gaussian source. N : float number of items sigma : float standard deviation of the source sum_of_squares : float sum of squares of the data quantum : float measurement accuracy Note: This likelihood is a slight approximation, as the Gaussian distribution is not discrete. """ return N*(-log(quantum)+0.5*log(2*pi)+log(sigma)) + sum_of_squares/(2*sigma*sigma) def individual_neglog_gaussian_likelihoods(sigma, squares, quantum=1.0): """ Return the negative-log-likelihood of some data sampled from a Gaussian source. sigma : float standard deviation of the source squares : N-array of float sum of squares of the data quantum : float measurement accuracy Note: This likelihood is a slight approximation, as the Gaussian distribution is not discrete. """ return squares/(2*sigma*sigma) + (-log(quantum)+0.5*log(2*pi)+log(sigma)) def neglog_multivariate_gaussian_likelihood(N, K, concentration, total_data_outerproduct, quantum): """ Multivariate equivalent of neglog_gaussian_likelihood N : float number of items K : float size of data vectors concentration : KxK array of floats inverse of the covariance matrix total_data_outerproduct : KxK array of floats total outerproduct of the data quantum : float measurement accuracy (volume) """ # Note: sum(ravel(total_data_outerproduct*concentration)) # equals dot(dot(transpose(data),concentration,data) return ( N*( - log(quantum) - 0.5*log(linear_algebra.determinant(concentration)) + 0.5*K*log(2*pi) ) + 0.5 * sum(ravel(total_data_outerproduct * concentration)) ) def individual_neglog_multivariate_gaussian_likelihoods( K, concentration, data_outerproduct, quantum): """ Multivariate equivalent of individual_neglog_gaussian_likelihoods K : float size of data vectors concentration : KxK array of floats inverse of the covariance matrix data_outerproduct : NxKxK array of floats outerproduct of each datum quantum : float measurement accuracy (volume) """ return (0.5*sum(reshape(data_outerproduct * concentration, (-1,K*K)),1) + (- log(quantum) - 0.5*log(linear_algebra.determinant(concentration)) + 0.5*K*log(2*pi)) ) class Estimate: """ Abstract base class for MML87 estimators. Fields: data : The input data. weighting : array of floats If estimate is being used as part of a mixture model, the level of assignment of each data point to the class this estimate is for. Otherwise, an array of ones. data_size : Total of "weighting", i.e. normally just the len(data). violates_prior : boolean Do any model parameters exceed bounds on the given prior? Sub-classes should define: def dimensions(self) : number of dimensions in the model def neglog_prior(self) : negative log of the prior density def neglog_fisher(self) : negative log of the determinant of the Fisher matrix def neglog_data(self) : negative log likelihood of each datum (see documentation) """ parameters = [ ] # Names of parameters in the estimate def __init__(self, data, weighting=None): self.data = data if weighting != None: self.weighting = weighting else: self.weighting = ones(len(data), Float) self.data_size = sum(self.weighting) self.has_prior = True self.violates_prior = False def __repr__(self): """ Display the estimate's model, length, and whether it violates the prior. """ result = self.__class__.__name__ + ":" if self.has_prior: result += " %.2f nits" % self.length() if self.violates_prior: result += "\n*** Prior bounds violated! ***" for item in self.parameters: result += "\n%15s=" % item data = getattr(self,item) if isinstance(data,float): result += "%.2f" % data elif type(data) != type(array([])): result += repr(data) elif data.shape[0] > 100: result += "< a large array of numbers >" else: result += arrayprint.array2string(data, precision=2, suppress_small=True) \ .replace("\n", "\n"+" "*16) return result def total_neglog_data(self): return sum(self.neglog_data() * self.weighting) def length(self): """ Calculate the message length. """ assert self.has_prior return self.neglog_prior() \ - 0.5*self.neglog_fisher() \ + self.total_neglog_data() \ + dimension_term(self.dimensions()) def test(self): """ Simple numerical self-consistency test: Is there any single parameter that, if altered slightly, reduces the length? If so, the estimate is not consistent with the length calculation. Also tests that total_neglog_data and neglog_data are consistent. Limitation: Does not detect the length calculation being out by a constant. """ base_length = self.length() any_bad = False print "Testing" discrepancy = abs(self.total_neglog_data() - Estimate.total_neglog_data(self)) if discrepancy > 1e-6: print "Inconsistency between total_neglog_data and neglog_data:", discrepancy any_bad = True def get(position): if shape == (): return getattr(self,parameter) else: return data[position] def set(position, value): if shape == (): setattr(self,parameter,value) else: data[position] = value originals = { } for parameter in self.parameters: data = getattr(self,parameter) if isinstance(data,float): originals[parameter] = data elif type(data) == type(array([])): originals[parameter] = data.copy() for parameter in originals: data = getattr(self,parameter) if isinstance(data,float): shape = () elif type(data) != type(array([])): continue else: shape = data.shape positions = [ () ] for item in shape: new_positions = [ ] for i in xrange(item): new_positions.extend([ j+(i,) for j in positions ]) positions = new_positions for position in positions: for factor in [ 0.5, 0.9, 0.99, 1.01, 1.1, 2.0 ]: data = getattr(self,parameter) original = get(position) set(position, original * factor) self._test_prep() l2 = self.length() for parameter1, value in originals.items(): if type(value) == type(array([])): value = value.copy() setattr(self,parameter1,value) if l2-base_length < -1e-6: print "Length decreased: ", parameter, position, factor, l2-base_length any_bad = True if any_bad: print "-- FAILED!" else: print "-- OK" return not any_bad def _test_prep(self): pass # ^ This can be used to update any intermediate results if parameters change, # allowing test() to properly test an estimator class Gaussian_estimate(Estimate): """ Gaussian distribution estimate. (Identical to classical estimate, but gives a message length.) data : array of float The data. Required only if an explicit length is needed: mean_range : (float, float) The minimum and maximum possible values for the mean. quantum : float The measurement accuracy. Also, the minimum possible sigma. max_sigma : float The maximum possible sigma. """ parameters = [ 'mean', 'sigma' ] def __init__(self, data, mean_range=None, quantum=None, max_sigma=None, weighting=None): Estimate.__init__(self, data, weighting) self.mean = sum(self.data*self.weighting) / self.data_size deviation = data - self.mean self.squared_deviation = deviation*deviation self.sum_squared_deviation = sum(self.squared_deviation*self.weighting) self.sigma = (self.sum_squared_deviation / (self.data_size-1.0)) ** 0.5 if mean_range == None or quantum == None or max_sigma == None: self.has_prior = False else: self.has_prior = True self.quantum = quantum self.mean_range = mean_range self.max_sigma = max_sigma self.violates_prior = \ self.mean < self.mean_range[0] or \ self.mean > self.mean_range[1] or \ self.sigma < self.quantum or \ self.sigma > self.max_sigma def dimensions(self): # mean, sigma return 2 def neglog_prior(self): R_mean = self.mean_range[1] - self.mean_range[0] R_sigma = log(self.max_sigma) - log(self.quantum) return log(R_mean) + log(R_sigma) + log(self.sigma) def neglog_fisher(self): return -log(2) - 2*log(self.data_size) + 4*log(self.sigma) def total_neglog_data(self): return neglog_gaussian_likelihood(self.data_size, self.sigma, self.sum_squared_deviation, self.quantum) def neglog_data(self): return individual_neglog_gaussian_likelihoods( self.sigma, self.squared_deviation, self.quantum) class Discrete_estimate(Estimate): """ Discrete estimate. Given a list of items, estimate the likelihood of each type of item. Based on multinomial estimate described in the book, however the order of items is also encoded in the message. data : array of integers Type of each item (0 <= item <= n_types). n_types : integer Number of types of item. Optional: alpha : n_types-array of float Conjugate prior, as though alpha[i] of each type i had already been observed. alpha[i] = 1.0 is a uniform prior, and the default. """ parameters = [ "probability" ] def __init__(self, data, n_types, alpha=None, weighting=None): Estimate.__init__(self, data, weighting) self.n_types = n_types self.conjugate_prior = (alpha != None) if self.conjugate_prior: self.alpha = alpha else: self.alpha = ones(n_types, Float) sum_alpha = sum(self.alpha) self.counts = zeros(n_types, Float) for i in xrange(len(data)): self.counts[data[i]] += self.weighting[i] self.probability = ( (self.counts+self.alpha-0.5) / (self.data_size+sum_alpha-0.5*n_types) ) def _test_prep(self): # Used by Estimate.test() self.probability /= sum(self.probability) def dimensions(self): # self.probability must add to 1.0, so the model-space has n-1 dimensions. return self.n_types-1 def neglog_prior(self): # Makes no actual difference, # just don't depend on scipy unless necessary. if self.conjugate_prior: from scipy.special import gammaln return ( -sum(log(self.probability) * (self.alpha-1.0)) -gammaln(sum(self.alpha)) +sum(gammaln(self.alpha)) ) else: return -sum(log(arange(1,self.n_types))) def neglog_fisher(self): return ( -(self.n_types-1.0) * log(self.data_size) +sum(log(self.probability)) ) def total_neglog_data(self): return -sum(self.counts * log(self.probability)) def neglog_data(self): return take(-log(self.probability), self.data) class Multivariate_gaussian_estimate(Estimate): """ Multivariate Gaussian distribution estimate, with a conjugate prior. data : NxK-array of floats The data to be encoded. m : float, > K, default K+1.0 Number of "samples" in the prior for the covariance. V : KxK-array of floats, default quantum**(2/K)*identity(K)*m Total covariance matrix of the "samples", defining the prior on the covariance. Note that this is the total, sum x_i*x_j, not (sum x_i*x_j) / (m-1) m1 : float, > 0.0, default 1.0 Number of "samples" in the prior for the mean. u0 : K-array of floats (u0) Mean of the "samples" defining the prior on the mean. Required if an explicit length is needed: quantum : float *Volume* of the measurement accuracy of the data. Note: This class requires SciPy (for gammaln function) """ parameters = [ "mean", "covariance" ] def __init__(self, data, quantum=None, m=None,V=None,m1=1.0,u0=None, weighting=None): assert quantum is not None or (V is not None and u0 is not None) Estimate.__init__(self, data, weighting) self.K = data.shape[1] if m is None: m = self.K + 1.0 if V is None: V = identity(self.K) * (quantum**(2.0/self.K)) * m if u0 is None: u0 = zeros(self.K, Float) self.m = m self.V = V self.m1 = m1 self.u0 = u0 weighted_data = data * self.weighting[:,NewAxis] self.mean = (m1*u0 + sum(weighted_data)) / (m1 + self.data_size) deviation = data - self.mean u0_deviation = u0 - self.mean self.data_outerproduct = deviation[:,NewAxis,:] * deviation[:,:,NewAxis] self.total_data_outerproduct = sum(self.weighting[:,NewAxis,NewAxis] * self.data_outerproduct) total_covariance = m1*outerproduct(u0_deviation, u0_deviation) \ + V + self.total_data_outerproduct self.covariance = total_covariance / (self.data_size+m+self.K-2) if quantum is None: self.has_prior = False else: self.quantum = quantum self.det_V = linear_algebra.determinant(self.V) self.concentration = linear_algebra.inverse(self.covariance) self.det_concentration = linear_algebra.determinant(self.concentration) def _test_prep(self): # Used by Estimate.test() self.concentration = linear_algebra.inverse(self.covariance) self.det_concentration = linear_algebra.determinant(self.concentration) deviation = data - self.mean self.data_outerproduct = deviation[:,NewAxis,:] * deviation[:,:,NewAxis] self.total_data_outerproduct = sum(self.weighting[:,NewAxis,NewAxis] * self.data_outerproduct) def dimensions(self): # K parameters for the mean, # K*(K+1)/2 for the covariance (covariance matrix is symmetric) return self.K + self.K*(self.K+1)/2 def neglog_prior(self): from scipy.special import gammaln h0_prior = log(self.det_concentration) log_gamma_sum = 0.0 for i in xrange(1,self.K+1): log_gamma_sum += gammaln( (self.m-i)/2.0 ) concentration_prior = ( log(pi) * self.K*(self.K-1)/4.0 - log(2) * self.K*(self.m-1)/2.0 + log_gamma_sum - log(self.det_V) * (self.m-self.K-2)/2.0 - log(self.det_concentration) * (self.m-1)/2.0 + 0.5 * trace(dot(self.concentration, self.V)) ) # (Wishart distribution) u0_deviation = self.u0 - self.mean mean_prior = neglog_multivariate_gaussian_likelihood( 1.0, self.K, self.m1*self.concentration, outerproduct(u0_deviation, u0_deviation), 1.0 ) # Was... (oops) #mean_prior = neglog_multivariate_gaussian_likelihood( # 1.0, self.K, self.m1*self.concentration, # outerproduct(self.u0, self.u0), 1.0 ) return h0_prior + concentration_prior + mean_prior def neglog_fisher(self): return - (self.K + self.K*(self.K+1)/2) * log(self.data_size) \ + self.K*log(2.0) + self.K*log(self.det_concentration) def total_neglog_data(self): return neglog_multivariate_gaussian_likelihood( self.data_size, self.K, self.concentration, self.total_data_outerproduct, self.quantum) def neglog_data(self): return individual_neglog_multivariate_gaussian_likelihoods( self.K, self.concentration, self.data_outerproduct, self.quantum) class Regression_estimate(Estimate): """ Regression with a Gaussian prior for the model weights. data : N-array of floats The data to be encoded. given_data : NxK-array of floats Some data known by both sender and receiver, which may be used to help transmit the data efficiently. Each column should be normalized to have standard deviation of one and a mean of zero. If a constant term is required (ie if the data has non-zero mean), include a column consisting entirely of ones. m : float The expected ratio of the variance of model weights to the variance of the residual. m=1 would be a neutral choice. Required only if an explicit length is needed: quantum : float The measurement accuracy of the data. max_sigma : float The maximum expected standard deviation of the residual. A safe choice for this would be the standard deviation of the data. """ parameters = [ "sigma", "model" ] def __init__(self, data, given_data, m=1.0, quantum=None, max_sigma=None, weighting=None): Estimate.__init__(self, data, weighting) self.m = m self.model_size = given_data.shape[1] transposed_weighted_given_data = transpose(given_data) * self.weighting self.left_hand_side = dot(transposed_weighted_given_data, given_data) + \ m * identity(self.model_size) right_hand_side = dot(transposed_weighted_given_data, data) self.model = \ linear_algebra.solve_linear_equations(self.left_hand_side, right_hand_side) residual = data - dot(given_data, self.model) self.squared_residual = residual*residual self.total_squared_residual = sum(self.squared_residual*self.weighting) model_squared = dot(self.model, self.model) self.sigma = ((self.total_squared_residual+m*model_squared) / self.data_size) ** 0.5 if quantum == None or max_sigma == None: self.has_prior = False else: self.quantum = quantum self.max_sigma = max_sigma self.violates_prior = self.sigma < self.quantum or \ self.sigma > self.max_sigma def _test_prep(self): # used by Estimate.test() residual = data - dot(given_data, self.model) self.squared_residual = residual*residual self.total_squared_residual = sum(self.squared_residual*self.weighting) def dimensions(self): return self.model_size + 1 def neglog_prior(self): R_sigma = log(self.max_sigma) - log(self.quantum) model_squared = dot(self.model, self.model) return ( log(R_sigma) + log(self.sigma) + neglog_gaussian_likelihood(self.model_size, self.sigma / (self.m**0.5), model_squared, 1.0) ) def neglog_fisher(self): return ( - log(2*(self.model_size+self.data_size)) - log(linear_algebra.determinant(self.left_hand_side)) + (self.model_size*2+2)*log(self.sigma) ) def total_neglog_data(self): return neglog_gaussian_likelihood(self.data_size, self.sigma, self.total_squared_residual, self.quantum) def neglog_data(self): return individual_neglog_gaussian_likelihoods( self.sigma, self.squared_residual, self.quantum) class Hidden_factor_estimate(Estimate): """ Model of multivariate data as having a single hidden factor, as described in the paper "Single Factor Analysis by MML Estimation" by C.S. Wallace and P.R. Freeman. data : NxK-array of floats The data to be encoded. Required only if an explicit length is needed: mean_range : Kx2-array of Float Anticipated minimum and maximum values for the mean. quantum : K-array of Float Measurement accuracy. max_sigma : K-array of Float The maximum anticipated standard deviation of the residual after removal of the mean and hidden factor. A safe choice for this would be the standard deviation of the data. Note: Length includes 1 bit to describe whether there is a hidden factor or not. """ parameters = [ "mean", "sigma", "has_factor", "factor_loads", "factor_scores" ] accuracy = 1e-4 # Allowable relative error in factor_loads def __init__(self, data, mean_range=None, quantum=None, max_sigma=None, weighting=None): Estimate.__init__(self, data, weighting) self.K = data.shape[1] weighted_data = transpose(transpose(data)*self.weighting) self.mean = sum(weighted_data) / self.data_size deviation = data - self.mean data_outerproduct = sum( self.weighting[:,NewAxis,NewAxis] * deviation[:,:,NewAxis] * deviation[:,NewAxis,:] ) sum_squares = diagonal(data_outerproduct) root_sum_squares = sqrt(sum_squares) correlation = data_outerproduct / outerproduct(root_sum_squares,root_sum_squares) # NOTE: inefficient, only need largest eigenvalues, eigenvectors = linear_algebra.eigenvectors(correlation) largest = argmax(eigenvalues) b = eigenvectors[largest] b = b * sqrt(eigenvalues[largest] / dot(b,b)) def length(b): # (omits constant terms) bb = b * b b2 = sum(bb) N_1 = self.data_size - 1.0 sigma = sqrt(sum_squares / ((bb+1.0)*(self.data_size-1.0))) v_scale = (1.0-self.K/((self.data_size-1.0)*b2)) / (1+b2) #w = deviation / sigma #v = dot(w, b) * v_scale #v2 = dot(v,v) #residual = w - b[NewAxis,:]*v[:,NewAxis] # ... however we can calculate what we need using just the covariance matrix: Y = data_outerproduct / outerproduct(sigma,sigma) Ybb = sum(sum(Y * outerproduct(b,b))) wbv = Ybb * v_scale v2 = wbv * v_scale residual2 = (b2+1.0)*N_1 - 2*wbv + b2*v2 return N_1*sum(log(sigma)) \ + 0.5*self.K*log(max(1e-30, self.data_size*v2)) \ + 0.5*(N_1+self.K)*log(1+b2) \ + 0.5*v2 \ + 0.5*residual2 from scipy.optimize import fmin b = fmin(length, b, xtol=self.accuracy, ftol=1e-10, disp=0) b2 = dot(b,b) if b2 * (1-self.accuracy*2)**2 * (self.data_size-1.0) <= self.K: b[:] = 0.0 b2 = 0.0 self.sigma = sqrt(sum_squares / ((b*b+1.0)*(self.data_size-1.0))) #Alternative calculation method: the iteration described in Wallace&Freeman #while 1: # bb = b * b # b2 = sum(bb) # if b2 * (self.data_size-1.0) < self.K: # b[:] = 0.0 # b2 = 0.0 # # self.sigma = sqrt(sum_squares / ((bb+1.0)*(self.data_size-1.0))) # # if b2 == 0.0: break # # Y = data_outerproduct / outerproduct(self.sigma,self.sigma) # # old_b = b # # b = ( dot(Y,b) # * ( (1.0-self.K/((self.data_size-1.0)*b2)) # / ((self.data_size-1.0)*(1.0+b2)) ) ) # # difference = b-old_b # if dot(difference,difference) < self.sufficiently_close**2: # break # Irrelevant, but nicer if sum(b) < 0.0: b *= -1.0 self.factor_loads = b * self.sigma self.has_factor = (b2 > 0.0) if self.has_factor: self.factor_scores = dot(deviation/self.sigma,b) \ * ( (1.0-self.K/((self.data_size-1.0)*b2)) / (1+b2) ) else: self.parameters = self.parameters[:4] self.factor_scores = zeros(self.data.shape[0], Float) if quantum == None or max_sigma == None or mean_range == None: self.has_prior = False else: self.quantum = quantum self.max_sigma = max_sigma self.mean_range = mean_range for i in xrange(self.K): if not mean_range[i,0] < self.mean[i] < mean_range[i,1] or \ not quantum[i] < self.sigma[i] < max_sigma[i]: self.violates_prior = True def dimensions(self): if not self.has_factor: return self.K * 2 # mean, sigma, factor_loads, factor_scores return self.K * 3 + self.data_size def neglog_prior(self): neglog_hasfactor_prior = -log(0.5) neglog_mean_prior = sum(log(self.mean_range[:,1] - self.mean_range[:,0])) neglog_sigma_prior = ( sum(log(log(self.max_sigma)-log(self.quantum))) + sum(log(self.sigma)) ) if not self.has_factor: return ( neglog_hasfactor_prior + neglog_mean_prior + neglog_sigma_prior ) def B(K): if K == 1: return pi/2 elif K == 2: return pi else: return 2.0*pi*B(K-2)/(K-1.0) b = self.factor_loads / self.sigma neglog_factor_loading_prior = ( log(B(self.K)) + (self.K-1)/2.0 * log(1+dot(b,b)) ) sum_square_factor_scores = sum(self.factor_scores*self.factor_scores*self.weighting) neglog_factor_score_prior = neglog_gaussian_likelihood( self.data_size,1.0, sum_square_factor_scores, 1.0) return ( neglog_hasfactor_prior + neglog_mean_prior + neglog_sigma_prior + neglog_factor_loading_prior + neglog_factor_score_prior ) def neglog_fisher(self): if not self.has_factor: return self.K*(-log(2)-2*log(self.data_size)) + 4*sum(log(self.sigma)) weighted_scores = self.weighting * self.factor_scores sum_square_factor_scores = dot(weighted_scores,self.factor_scores) sum_factor_scores = sum(weighted_scores) b = self.factor_loads / self.sigma return ( -self.K * log(2.0*self.data_size) -self.K * log(self.data_size*sum_square_factor_scores - sum_factor_scores) -(self.data_size-2.0) * log(1 + dot(b,b)) +6.0*sum(log(self.sigma)) ) def neglog_data(self): result = zeros(len(self.data), Float) for i in xrange(self.K): column = ( self.data[:,i] - self.mean[i] - self.factor_loads[i]*self.factor_scores ) result += individual_neglog_gaussian_likelihoods( self.sigma[i], column*column, self.quantum[i] ) return result if __name__ == "__main__": # Run tests on the various estimators if not "import"ed. if 1: data = random_array.normal(10.0, 1.0, 100) model = Gaussian_estimate(data, (-100.0,100.0), 0.1, 0.5) print model model.test() print model = Gaussian_estimate(data, (-100.0,100.0), 0.1, 1000.0) print model model.test() print if 1: data = random_array.random_integers(4,0, 100) model = Discrete_estimate(data, 5) print model model.test() print model = Discrete_estimate(data, 5, array([0.25,1.0,1.0,1.0,10.0])) print model model.test() print if 1: data = random_array.normal(5.0, 1.0, (10000, 5)) u0 = zeros(5, Float) V = identity(5, Float) model = Multivariate_gaussian_estimate(data, 1.0, 6.0, V, 1.0, u0) print model model.test() print if 1: given_data = random_array.normal(0.0, 1.0, (100,2)) data = given_data[:,0] + given_data[:,1] + random_array.normal(0.0, 1.0, 100) model = Regression_estimate(data, given_data, 1.0, 0.1, 10.0) print model model.test() print if 1: mean_range = array([[-10.0, 10.0]]*5) quantum = array([0.01]*5) max_sigma = array([10.0]*5) data = random_array.normal(5.0, 0.1, (1000, 5)) model = Hidden_factor_estimate(data, mean_range, quantum, max_sigma) print model total = 0.0 for i in xrange(5): temp = Gaussian_estimate(data[:,i], (-10.0,10.0), 0.01, 10.0) total += temp.length() print "As 5 independent Gaussians would yield length %.2f" % (total) model.test() print data += outerproduct(random_array.normal(0.0, 1.0, 1000), [1.0,2.0,0.0,0.0,-0.5]) model = Hidden_factor_estimate(data, mean_range, quantum, max_sigma) print model total = 0.0 for i in xrange(5): temp = Gaussian_estimate(data[:,i], (-10.0,10.0), 0.01, 10.0) total += temp.length() print "As 5 independent Gaussians would yield length %.2f" % (total) model.test() print