Source code for turbustat.statistics.genus.genus

# Licensed under an MIT open source license - see LICENSE

import numpy as np
import scipy.ndimage as nd
from scipy.stats import scoreatpercentile, nanmean, nanstd
from scipy.interpolate import InterpolatedUnivariateSpline
from astropy.convolution import Gaussian2DKernel, convolve_fft
from operator import itemgetter
from itertools import groupby

try:
    from scipy.fftpack import fft2
except ImportError:
    from numpy.fft import fft2


[docs]class Genus(object): """ Genus Statistics based off of Chepurnov et al. (2008). Parameters ---------- img - numpy.ndarray 2D image. lowdens_thresh : float, optional Lower threshold of the data to use. highdens_thresh : float, optional Upper threshold of the data to use. numpts : int, optional Number of thresholds to calculate statistic at. smoothing_radii : list, optional Kernel radii to smooth data to. save_name : str, optional Object or region name. Used when plotting. """ def __init__(self, img, lowdens_thresh=0, highdens_thresh=100, numpts=100, smoothing_radii=None, save_name=None): super(Genus, self).__init__() self.img = img if save_name is None: self.save_name = "Untitled" else: self.save_name = save_name self.nanflag = False if np.isnan(self.img).any(): self.nanflag = True self.lowdens_thresh = scoreatpercentile(img[~np.isnan(img)], lowdens_thresh) self.highdens_thresh = scoreatpercentile(img[~np.isnan(img)], highdens_thresh) self.thresholds = np.linspace( self.lowdens_thresh, self.highdens_thresh, numpts) if smoothing_radii is not None: assert isinstance(smoothing_radii, list) self.smoothing_radii = smoothing_radii else: self.smoothing_radii = np.linspace(1.0, 0.1 * min(img.shape), 5) self.genus_stats = np.empty([numpts, len(self.smoothing_radii)]) self.fft_images = [] self.smoothed_images = []
[docs] def make_smooth_arrays(self): ''' Smooth data using a Gaussian kernel. ''' for i, width in enumerate(self.smoothing_radii): kernel = Gaussian2DKernel( width, x_size=self.img.shape[0], y_size=self.img.shape[1]) if self.nanflag: self.smoothed_images.append( convolve_fft(self.img, kernel, normalize_kernel=True, interpolate_nan=True)) else: self.smoothed_images.append(convolve_fft(self.img, kernel)) return self # def clean_fft(self): # for j, image in enumerate(self.smoothed_images): # self.fft_images.append(fft2(image)) # return self
[docs] def make_genus_curve(self): ''' Create the genus curve. ''' self.genus_stats = compute_genus(self.smoothed_images, self.thresholds) return self
[docs] def run(self, verbose=False): ''' Run the whole statistic. Parameters ---------- verbose : bool, optional Enables plotting. ''' self.make_smooth_arrays() # self.clean_fft() self.make_genus_curve() if verbose: import matplotlib.pyplot as p num = len(self.smoothing_radii) for i in range(1, num + 1): p.subplot(num / 2, 2, i) p.title( "".join(["Smooth Size: ", str(self.smoothing_radii[i - 1])])) p.plot(self.thresholds, self.genus_stats[i - 1], "bD") p.grid(True) p.show() return self
def compute_genus(images, thresholds): ''' Computes the Genus Statistic. Parameters ---------- image : list of numpy.ndarray OR a single numpy.ndarray Images(s) to compute the Genus of. thresholds : list or numpy.ndarray Thresholds to calculate the statistic at. Returns ------- genus_stats : array The calculated statistic. ''' if not isinstance(images, list): images = [images] genus_stats = np.empty((len(images), len(thresholds))) for j, image in enumerate(images): for i, thresh in enumerate(thresholds): high_density = remove_small_objects( image > thresh, min_size=4, connectivity=1) low_density = remove_small_objects( image < thresh, min_size=4, connectivity=1) high_density_labels, high_density_num = nd.label( high_density, np.ones((3, 3))) # eight-connectivity low_density_labels, low_density_num = nd.label( low_density, np.ones((3, 3))) # eight-connectivity genus_stats[j, i] = high_density_num - low_density_num # genus_stats[j,:] = clip_genus(genus_stats[j,:]) return genus_stats def clip_genus(genus_curve, length_threshold=5): ''' Clip out uninteresting regions in the genus curve (large regions with value of 0). Parameters ---------- genus_curve : array Computed genus curve. length_threshold : int, optional Minimum length to warrant clipping. Returns ------- genus_curve : numpy.ndarray Clipped Genus Curve. ''' zeros = np.where(genus_curve == 0) continuous_sections = [] for _, g in groupby(enumerate(zeros[0]), lambda (i, x): i - x): continuous_sections.append(map(itemgetter(1), g)) try: max_cont_section = max(continuous_sections, key=len) except ValueError: max_cont_section = [] if len(max_cont_section) >= length_threshold: genus_curve[max_cont_section] = np.NaN return genus_curve
[docs]class GenusDistance(object): """ Distance Metric for the Genus Statistic. Parameters ---------- img1 - numpy.ndarray 2D image. img2 - numpy.ndarray 2D image. smoothing_radii : list, optional Kernel radii to smooth data to. fiducial_model : Genus Computed Genus object. Use to avoid recomputing. """ def __init__(self, img1, img2, smoothing_radii=None, fiducial_model=None): super(GenusDistance, self).__init__() # Standardize the intensity values in the images img1 = normalize(img1) img2 = normalize(img2) if fiducial_model is not None: self.genus1 = fiducial_model else: self.genus1 = Genus( img1, smoothing_radii=smoothing_radii, lowdens_thresh=20).run() self.genus2 = Genus( img2, smoothing_radii=smoothing_radii, lowdens_thresh=20).run() self.distance = None
[docs] def distance_metric(self, verbose=False): ''' Data is centered and normalized (via normalize). The distance is the difference between cubic splines of the curves. Parameters ---------- verbose : bool, optional Enables plotting. ''' # 2 times the average number between the two num_pts = \ int((len(self.genus1.thresholds) + len(self.genus2.thresholds))/2) # Get the min and the max of the thresholds min_pt = max(np.min(self.genus1.thresholds), np.min(self.genus2.thresholds)) max_pt = min(np.max(self.genus1.thresholds), np.max(self.genus2.thresholds)) points = np.linspace(min_pt, max_pt, 2*num_pts) interp1 = \ InterpolatedUnivariateSpline(self.genus1.thresholds, self.genus1.genus_stats[0, :], k=3) interp2 = \ InterpolatedUnivariateSpline(self.genus2.thresholds, self.genus2.genus_stats[0, :], k=3) self.distance = np.nansum(np.abs(interp1(points) - interp2(points))) / len(points) if verbose: import matplotlib.pyplot as p p.plot(self.genus1.thresholds, self.genus1.genus_stats[0, :], "bD", label=self.genus1.save_name) p.plot(self.genus2.thresholds, self.genus2.genus_stats[0, :], "gD", label=self.genus2.save_name) p.plot(points, interp1(points), "b") p.plot(points, interp2(points), "g") p.grid(True) p.legend(loc="upper right") p.show() return self
def normalize(data): av_val = nanmean(data, axis=None) st_dev = nanstd(data, axis=None) return (data - av_val) / st_dev def remove_small_objects(arr, min_size, connectivity=8): ''' Remove objects less than the given size. Function is based on skimage.morphology.remove_small_objects Parameters ---------- arr : numpy.ndarray Binary array containing the mask. min_size : int Smallest allowed size. connectivity : int, optional Connectivity of the neighborhood. ''' struct = nd.generate_binary_structure(arr.ndim, connectivity) labels, num = nd.label(arr, struct) sizes = nd.sum(arr, labels, range(1, num + 1)) for i, size in enumerate(sizes): if size >= min_size: continue posns = np.where(labels == i + 1) arr[posns] = 0 return arr