Source code for lmpy.statistics.running_stats

"""Class for keeping track of running statistics to save memory.

Note:
    * Mean and standard deviation computations based on
        https://www.johndcook.com/blog/standard_deviation/
"""
# .............................................................................
from copy import deepcopy
import numpy as np

from lmpy import Matrix


# .............................................................................
[docs]def compare_absolute_values(observed, test_data): """Compares the absolute values of the observed and random data. Args: observed (:obj:`Numpy array`): A numpy array of observed values. test_data (:obj:`Numpy array`): A numpy array of random values. Returns: bool: Boolean indicating if the absolute value of the test data is greater than the absolute value of the observed data. """ return np.abs(test_data) > np.abs(observed)
# .............................................................................
[docs]def compare_signed_values(observed, test_data): """Compares the signed values of the observed and random data. Args: observed (:obj:`Numpy array`): A numpy array of observed values. test_data (:obj:`Numpy array`): A numpy array of random values. Returns: bool: An indication if the test data is greater than the observed data. """ return test_data > observed
# .............................................................................
[docs]class RunningStats(object): """Keep track of running statistics to reduce required memory.""" # ..................................... def __init__(self, observed=None, compare_fn=compare_absolute_values): """Construct a RunningStats instance. Args: observed (numeric): The observed value to be used when computing an F-statistic. It can be a single value or an array-type. compare_fn (:obj:method): A function used to compare pushed values to the observed statistic value. """ self.count = 0.0 self.compare_fn = compare_fn if observed is not None: self.observed = observed try: self.f_counts = Matrix(np.zeros(self.observed.shape)) except Exception: self.f_counts = 0.0 else: self.observed = None self.f_counts = None self.mean = 0.0 self.s_k = 0.0 # .....................................
[docs] def push(self, val): """Add a test value to the running totals. Args: val (Matrix, Numpy array, or numeric): A value to use for the running statistics. """ if not isinstance(val, list): val = [val] if self.count == 0 and isinstance(val[0], Matrix): self.mean = Matrix(np.zeros(val[0].shape)) self.s_k = Matrix(np.zeros(val[0].shape)) self.f_counts = Matrix(self.f_counts) for v in val: self.count += 1.0 mean_k_1 = deepcopy(self.mean) self.mean = mean_k_1 + ((v - mean_k_1) / self.count) self.s_k = self.s_k + (v - mean_k_1) * (v - self.mean) mean_k_1 = None if self.observed is not None: self.f_counts += self.compare_fn(self.observed, v)
# ..................................... @property
[docs] def standard_deviation(self): """Retrieve the standard deviation of the test values. Returns: float: The standard deviation of the test values. """ return np.sqrt(self.variance)
# ..................................... @property
[docs] def variance(self): """Retrieve the variance of the test values. Returns: float: The variance of the test values. """ if self.count > 1: return self.s_k / (self.count - 1) return 0.0
# ..................................... @property
[docs] def p_values(self): """Retrieve p-values from the test values greater than the f-statistic. Returns: Matrix: Computed p-values. Raises: Exception: Raised if there are no observed values. """ if self.f_counts is not None: return self.f_counts / float(self.count) else: raise Exception('P-values cannot be computed without observed values')
# ............................................................................. __all__ = ['RunningStats', 'compare_absolute_values', 'compare_signed_values']