Source code for turbustat.statistics.tsallis.tsallis

# Licensed under an MIT open source license - see LICENSE


import numpy as np
from scipy.stats import chisquare
from scipy.optimize import curve_fit

from ..stats_utils import standardize
from ..base_statistic import BaseStatisticMixIn
from ...io import common_types, twod_types, input_data


[docs]class Tsallis(BaseStatisticMixIn):
    """
    The Tsallis Distribution (see Tofflemire et al., 2011)

    Parameters
    ----------
    img : %(dtypes)s
        2D image.
    lags : numpy.ndarray or list
        Lags to calculate at.
    num_bins : int, optional
        Number of bins to use in the histograms.
    periodic : bool, optional
        Sets whether the boundaries are periodic.
    """

    __doc__ %= {"dtypes": " or ".join(common_types + twod_types)}

    def __init__(self, img, lags=None, num_bins=500, periodic=False):
        '''
        Parameters
        ----------

        periodic : bool, optional
                   Use for simulations with periodic boundaries.
        '''

        self.need_header_flag = False
        self.header = None

        self.data = input_data(img, no_header=True)
        self.num_bins = num_bins
        self.periodic = periodic

        if lags is None:
            self.lags = [1, 2, 4, 8, 16, 32, 64]
        else:
            self.lags = lags

        self.tsallis_arrays = np.empty(
            (len(self.lags), self.data.shape[0], self.data.shape[1]))
        self.tsallis_distrib = np.empty((len(self.lags), 2, num_bins))
        self.tsallis_fits = np.empty((len(self.lags), 7))

[docs]    def make_tsallis(self):
        '''
        Calculate the Tsallis distribution at each lag.
        We standardize each distribution such that it has a mean of zero and
        variance of one.
        '''

        for i, lag in enumerate(self.lags):
            if self.periodic:
                pad_img = self.data
            else:
                pad_img = np.pad(self.data, lag, padwithzeros)
            rolls = np.roll(pad_img, lag, axis=0) +\
                np.roll(pad_img, (-1) * lag, axis=0) +\
                np.roll(pad_img, lag, axis=1) +\
                np.roll(pad_img, (-1) * lag, axis=1)

            #  Remove the padding
            if self.periodic:
                clip_resulting = (rolls / 4.) - pad_img
            else:
                clip_resulting = (rolls[lag:-lag, lag:-lag] / 4.) -\
                    pad_img[lag:-lag, lag:-lag]
            # Normalize the data
            data = standardize(clip_resulting)

            # Ignore nans for the histogram
            hist, bin_edges = np.histogram(data[~np.isnan(data)],
                                           bins=self.num_bins)
            bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2
            normlog_hist = np.log10(hist / np.sum(hist, dtype="float"))

            # Keep results
            self.tsallis_arrays[i, :] = data
            self.tsallis_distrib[i, 0, :] = bin_centres
            self.tsallis_distrib[i, 1, :] = normlog_hist

[docs]    def fit_tsallis(self, sigma_clip=2):
        '''
        Fit the Tsallis distributions.

        Parameters
        ----------
        sigma_clip : float
            Sets the sigma value to clip data at.
        '''

        for i, dist in enumerate(self.tsallis_distrib):
            clipped = clip_to_sigma(dist[0], dist[1], sigma=sigma_clip)
            params, pcov = curve_fit(tsallis_function, clipped[0], clipped[1],
                                     p0=(-np.max(clipped[1]), 1., 2.),
                                     maxfev=100 * len(dist[0]))
            fitted_vals = tsallis_function(clipped[0], *params)
            self.tsallis_fits[i, :3] = params
            self.tsallis_fits[i, 3:6] = np.diag(pcov)
            self.tsallis_fits[i, 6] = chisquare(
                np.exp(fitted_vals), f_exp=np.exp(clipped[1]), ddof=3)[0]

[docs]    def run(self, verbose=False, sigma_clip=2, save_name=None):
        '''
        Run all steps.

        Parameters
        ----------
        verbose : bool, optional
            Enables plotting.
        sigma_clip : float
            Sets the sigma value to clip data at.
            Passed to :func:`fit_tsallis`.
        save_name : str,optional
            Save the figure when a file name is given.
        '''

        self.make_tsallis()
        self.fit_tsallis(sigma_clip=sigma_clip)

        if verbose:
            import matplotlib.pyplot as p
            num = len(self.lags)

            i = 1
            for dist, arr, params in zip(self.tsallis_distrib,
                                         self.tsallis_arrays,
                                         self.tsallis_fits):

                p.subplot(num, 2, i)
                # This doesn't plot the last image
                p.imshow(arr, origin="lower", interpolation="nearest")
                p.colorbar()
                p.subplot(num, 2, i + 1)
                p.plot(dist[0], tsallis_function(dist[0], *params), "r")
                p.plot(dist[0], dist[1], 'rD', label="".join(
                    ["Tsallis Distribution with Lag ", str(self.lags[i / 2])]))
                p.legend(loc="best")

                i += 2

            if save_name is not None:
                p.savefig(save_name)
                p.close()
            else:
                p.show()

        return self


[docs]class Tsallis_Distance(object):

    '''
    Distance Metric for the Tsallis Distribution.

    Parameters
    ----------
    array1 : %(dtypes)s
        2D datas.
    array2 : %(dtypes)s
        2D datas.
    lags : numpy.ndarray or list
        Lags to calculate at.
    num_bins : int, optional
        Number of bins to use in the histograms.
    fiducial_model : Tsallis
        Computed Tsallis object. use to avoid recomputing.
    periodic : bool, optional
        Sets whether the boundaries are periodic.
    '''

    __doc__ %= {"dtypes": " or ".join(common_types + twod_types)}

    def __init__(self, array1, array2, lags=None, num_bins=500,
                 fiducial_model=None, periodic=False):
        super(Tsallis_Distance, self).__init__()

        if fiducial_model is not None:
            self.tsallis1 = fiducial_model
        else:
            self.tsallis1 = Tsallis(
                array1, lags=lags, num_bins=num_bins,
                periodic=periodic).run(verbose=False)

        self.tsallis2 = Tsallis(
            array2, lags=lags, num_bins=num_bins,
            periodic=periodic).run(verbose=False)

        self.distance = None

[docs]    def distance_metric(self, verbose=False, save_name=None):
        '''

        We do not consider the parameter a in the distance metric. Since we
        are fitting to a PDF, a is related to the number of data points and
        is therefore not a true measure of the differences between the data
        sets. The distance is computed by summing the squared difference of
        the parameters, normalized by the sums of the squares, for each lag.
        The total distance the sum between the two parameters.

        Parameters
        ----------
        verbose : bool, optional
            Enables plotting.
        save_name : str,optional
            Save the figure when a file name is given.
        '''

        w1 = self.tsallis1.tsallis_fits[:, 1]
        w2 = self.tsallis2.tsallis_fits[:, 1]

        q1 = self.tsallis1.tsallis_fits[:, 2]
        q2 = self.tsallis2.tsallis_fits[:, 2]

        # diff_a = (a1-a2)**2.
        diff_w = (w1 - w2) ** 2. / (w1 ** 2. + w2 ** 2.)
        diff_q = (q1 - q2) ** 2. / (q1 ** 2. + q2 ** 2.)

        self.distance = np.sum(diff_w + diff_q)

        if verbose:
            import matplotlib.pyplot as p
            lags = self.tsallis1.lags
            p.plot(lags, diff_w, "rD", label="Difference of w")
            p.plot(lags, diff_q, "go", label="Difference of q")
            p.legend()
            p.xscale('log', basex=2)
            p.ylabel("Normalized Difference")
            p.xlabel("Lags (pixels)")
            p.grid(True)

            if save_name is not None:
                p.savefig(save_name)
                p.close()
            else:
                p.show()
        return self


def tsallis_function(x, *p):
    '''
    Tsallis distribution function as given in Tofflemire
    Implemented in log form

    Parameters
    ----------
    x : numpy.ndarray or list
        x-data
    params : list
        Contains the three parameter values.
    '''
    loga, wsquare, q = p
    return (-1 / (q - 1)) * (np.log10(1 + (q - 1) *
                                      (x ** 2. / wsquare)) + loga)


def clip_to_sigma(x, y, sigma=2):
    '''
    Clip to values between -2 and 2 sigma.

    Parameters
    ----------
    x : numpy.ndarray
        x-data
    y : numpy.ndarray
        y-data
    '''
    clip_mask = np.zeros(x.shape)
    for i, val in enumerate(x):
        if val < sigma or val > -sigma:
            clip_mask[i] = 1
    clip_x = x[np.where(clip_mask == 1)]
    clip_y = y[np.where(clip_mask == 1)]

    return clip_x[np.isfinite(clip_y)], clip_y[np.isfinite(clip_y)]


def padwithzeros(vector, pad_width, iaxis, kwargs):
    vector[:pad_width[0]] = 0
    vector[-pad_width[1]:] = 0
    return vector