Source code for turbustat.statistics.tsallis.tsallis

# Licensed under an MIT open source license - see LICENSE


import numpy as np
from scipy.stats import nanmean, nanstd, chisquare
from scipy.optimize import curve_fit


[docs]class Tsallis(object): """ The Tsallis Distribution (see Tofflemire et al., 2011) Parameters ---------- img : numpy.ndarray 2D image. lags : numpy.ndarray or list Lags to calculate at. num_bins : int, optional Number of bins to use in the histograms. periodic : bool, optional Sets whether the boundaries are periodic. """ def __init__(self, img, lags=None, num_bins=500, periodic=False): ''' Parameters ---------- periodic : bool, optional Use for simulations with periodic boundaries. ''' self.img = img self.num_bins = num_bins self.periodic = periodic if lags is None: self.lags = [1, 2, 4, 8, 16, 32, 64] else: self.lags = lags self.tsallis_arrays = np.empty( (len(self.lags), img.shape[0], img.shape[1])) self.tsallis_distrib = np.empty((len(self.lags), 2, num_bins)) self.tsallis_fits = np.empty((len(self.lags), 7))
[docs] def make_tsallis(self): ''' Calculate the Tsallis distribution at each lag. We standardize each distribution such that it has a mean of zero and variance of one. ''' for i, lag in enumerate(self.lags): if self.periodic: pad_img = self.img else: pad_img = np.pad(self.img, lag, padwithzeros) rolls = np.roll(pad_img, lag, axis=0) +\ np.roll(pad_img, (-1) * lag, axis=0) +\ np.roll(pad_img, lag, axis=1) +\ np.roll(pad_img, (-1) * lag, axis=1) # Remove the padding if self.periodic: clip_resulting = (rolls / 4.) - pad_img else: clip_resulting = (rolls[lag:-lag, lag:-lag] / 4.) -\ pad_img[lag:-lag, lag:-lag] # Normalize the data data = normalize(clip_resulting) # Ignore nans for the histogram hist, bin_edges = np.histogram(data[~np.isnan(data)], bins=self.num_bins) bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2 normlog_hist = np.log10(hist / np.sum(hist, dtype="float")) # Keep results self.tsallis_arrays[i, :] = data self.tsallis_distrib[i, 0, :] = bin_centres self.tsallis_distrib[i, 1, :] = normlog_hist return self
[docs] def fit_tsallis(self, sigma_clip=2): ''' Fit the Tsallis distributions. Parameters ---------- sigma_clip : float Sets the sigma value to clip data at. ''' for i, dist in enumerate(self.tsallis_distrib): clipped = clip_to_sigma(dist[0], dist[1], sigma=sigma_clip) params, pcov = curve_fit(tsallis_function, clipped[0], clipped[1], p0=(-np.max(clipped[1]), 1., 2.), maxfev=100 * len(dist[0])) fitted_vals = tsallis_function(clipped[0], *params) self.tsallis_fits[i, :3] = params self.tsallis_fits[i, 3:6] = np.diag(pcov) self.tsallis_fits[i, 6] = chisquare( np.exp(fitted_vals), f_exp=np.exp(clipped[1]), ddof=3)[0]
[docs] def run(self, verbose=False, sigma_clip=2): ''' Run all steps. Parameters ---------- verbose : bool, optional Enables plotting. sigma_clip : float Sets the sigma value to clip data at. Passed to :func:`fit_tsallis`. ''' self.make_tsallis() self.fit_tsallis(sigma_clip=sigma_clip) if verbose: import matplotlib.pyplot as p num = len(self.lags) i = 1 for dist, arr, params in zip(self.tsallis_distrib, self.tsallis_arrays, self.tsallis_fits): p.subplot(num, 2, i) # This doesn't plot the last image p.imshow(arr, origin="lower", interpolation="nearest") p.colorbar() p.subplot(num, 2, i + 1) p.plot(dist[0], tsallis_function(dist[0], *params), "r") p.plot(dist[0], dist[1], 'rD', label="".join( ["Tsallis Distribution with Lag ", str(self.lags[i / 2])])) p.legend(loc="upper left", prop={"size": 10}) i += 2 p.show() return self
[docs]class Tsallis_Distance(object): ''' Distance Metric for the Tsallis Distribution. Parameters ---------- array1 : numpy.ndarray 2D image. array2 : numpy.ndarray 2D image. lags : numpy.ndarray or list Lags to calculate at. num_bins : int, optional Number of bins to use in the histograms. fiducial_model : Tsallis Computed Tsallis object. use to avoid recomputing. periodic : bool, optional Sets whether the boundaries are periodic. ''' def __init__(self, array1, array2, lags=None, num_bins=500, fiducial_model=None, periodic=False): super(Tsallis_Distance, self).__init__() self.array1 = array1 self.array2 = array2 if fiducial_model is not None: self.tsallis1 = fiducial_model else: self.tsallis1 = Tsallis( array1, lags=lags, num_bins=num_bins, periodic=periodic).run(verbose=False) self.tsallis2 = Tsallis( array2, lags=lags, num_bins=num_bins, periodic=periodic).run(verbose=False) self.distance = None
[docs] def distance_metric(self, verbose=False): ''' We do not consider the parameter a in the distance metric. Since we are fitting to a PDF, a is related to the number of data points and is therefore not a true measure of the differences between the data sets. The distance is computed by summing the squared difference of the parameters, normalized by the sums of the squares, for each lag. The total distance the sum between the two parameters. Parameters ---------- verbose : bool, optional Enables plotting. ''' w1 = self.tsallis1.tsallis_fits[:, 1] w2 = self.tsallis2.tsallis_fits[:, 1] q1 = self.tsallis1.tsallis_fits[:, 2] q2 = self.tsallis2.tsallis_fits[:, 2] # diff_a = (a1-a2)**2. diff_w = (w1 - w2) ** 2. / (w1 ** 2. + w2 ** 2.) diff_q = (q1 - q2) ** 2. / (q1 ** 2. + q2 ** 2.) self.distance = np.sum(diff_w + diff_q) if verbose: import matplotlib.pyplot as p lags = self.tsallis1.lags p.plot(lags, diff_w, "rD", label="Difference of w") p.plot(lags, diff_q, "gD", label="Difference of q") p.legend() p.xscale('log', basex=2) p.grid(True) p.show() return self
def tsallis_function(x, *p): ''' Tsallis distribution function as given in Tofflemire Implemented in log form Parameters ---------- x : numpy.ndarray or list x-data params : list Contains the three parameter values. ''' loga, wsquare, q = p return (-1 / (q - 1)) * (np.log10(1 + (q - 1) * (x ** 2. / wsquare)) + loga) def clip_to_sigma(x, y, sigma=2): ''' Clip to values between -2 and 2 sigma. Parameters ---------- x : numpy.ndarray x-data y : numpy.ndarray y-data ''' clip_mask = np.zeros(x.shape) for i, val in enumerate(x): if val < sigma or val > -sigma: clip_mask[i] = 1 clip_x = x[np.where(clip_mask == 1)] clip_y = y[np.where(clip_mask == 1)] return clip_x[np.isfinite(clip_y)], clip_y[np.isfinite(clip_y)] def normalize(data): ''' Standardize the data. Parameters ---------- data : numpy.ndarray Data to standardize. ''' av_val = nanmean(data, axis=None) st_dev = nanstd(data, axis=None) return (data - av_val) / st_dev def padwithzeros(vector, pad_width, iaxis, kwargs): vector[:pad_width[0]] = 0 vector[-pad_width[1]:] = 0 return vector