# Licensed under an MIT open source license - see LICENSE
import numpy as np
import scipy.ndimage as nd
from scipy.stats import scoreatpercentile, nanmean, nanstd
from scipy.interpolate import InterpolatedUnivariateSpline
from astropy.convolution import Gaussian2DKernel, convolve_fft
from operator import itemgetter
from itertools import groupby
try:
from scipy.fftpack import fft2
except ImportError:
from numpy.fft import fft2
[docs]class Genus(object):
"""
Genus Statistics based off of Chepurnov et al. (2008).
Parameters
----------
img - numpy.ndarray
2D image.
lowdens_thresh : float, optional
Lower threshold of the data to use.
highdens_thresh : float, optional
Upper threshold of the data to use.
numpts : int, optional
Number of thresholds to calculate statistic at.
smoothing_radii : list, optional
Kernel radii to smooth data to.
save_name : str, optional
Object or region name. Used when plotting.
"""
def __init__(self, img, lowdens_thresh=0, highdens_thresh=100, numpts=100,
smoothing_radii=None, save_name=None):
super(Genus, self).__init__()
self.img = img
if save_name is None:
self.save_name = "Untitled"
else:
self.save_name = save_name
self.nanflag = False
if np.isnan(self.img).any():
self.nanflag = True
self.lowdens_thresh = scoreatpercentile(img[~np.isnan(img)],
lowdens_thresh)
self.highdens_thresh = scoreatpercentile(img[~np.isnan(img)],
highdens_thresh)
self.thresholds = np.linspace(
self.lowdens_thresh, self.highdens_thresh, numpts)
if smoothing_radii is not None:
assert isinstance(smoothing_radii, list)
self.smoothing_radii = smoothing_radii
else:
self.smoothing_radii = np.linspace(1.0, 0.1 * min(img.shape), 5)
self.genus_stats = np.empty([numpts, len(self.smoothing_radii)])
self.fft_images = []
self.smoothed_images = []
[docs] def make_smooth_arrays(self):
'''
Smooth data using a Gaussian kernel.
'''
for i, width in enumerate(self.smoothing_radii):
kernel = Gaussian2DKernel(
width, x_size=self.img.shape[0], y_size=self.img.shape[1])
if self.nanflag:
self.smoothed_images.append(
convolve_fft(self.img, kernel,
normalize_kernel=True,
interpolate_nan=True))
else:
self.smoothed_images.append(convolve_fft(self.img, kernel))
return self
# def clean_fft(self):
# for j, image in enumerate(self.smoothed_images):
# self.fft_images.append(fft2(image))
# return self
[docs] def make_genus_curve(self):
'''
Create the genus curve.
'''
self.genus_stats = compute_genus(self.smoothed_images, self.thresholds)
return self
[docs] def run(self, verbose=False):
'''
Run the whole statistic.
Parameters
----------
verbose : bool, optional
Enables plotting.
'''
self.make_smooth_arrays()
# self.clean_fft()
self.make_genus_curve()
if verbose:
import matplotlib.pyplot as p
num = len(self.smoothing_radii)
for i in range(1, num + 1):
p.subplot(num / 2, 2, i)
p.title(
"".join(["Smooth Size: ",
str(self.smoothing_radii[i - 1])]))
p.plot(self.thresholds, self.genus_stats[i - 1], "bD")
p.grid(True)
p.show()
return self
def compute_genus(images, thresholds):
'''
Computes the Genus Statistic.
Parameters
----------
image : list of numpy.ndarray OR a single numpy.ndarray
Images(s) to compute the Genus of.
thresholds : list or numpy.ndarray
Thresholds to calculate the statistic at.
Returns
-------
genus_stats : array
The calculated statistic.
'''
if not isinstance(images, list):
images = [images]
genus_stats = np.empty((len(images), len(thresholds)))
for j, image in enumerate(images):
for i, thresh in enumerate(thresholds):
high_density = remove_small_objects(
image > thresh, min_size=4, connectivity=1)
low_density = remove_small_objects(
image < thresh, min_size=4, connectivity=1)
high_density_labels, high_density_num = nd.label(
high_density, np.ones((3, 3))) # eight-connectivity
low_density_labels, low_density_num = nd.label(
low_density, np.ones((3, 3))) # eight-connectivity
genus_stats[j, i] = high_density_num - low_density_num
# genus_stats[j,:] = clip_genus(genus_stats[j,:])
return genus_stats
def clip_genus(genus_curve, length_threshold=5):
'''
Clip out uninteresting regions in the genus curve
(large regions with value of 0).
Parameters
----------
genus_curve : array
Computed genus curve.
length_threshold : int, optional
Minimum length to warrant clipping.
Returns
-------
genus_curve : numpy.ndarray
Clipped Genus Curve.
'''
zeros = np.where(genus_curve == 0)
continuous_sections = []
for _, g in groupby(enumerate(zeros[0]), lambda (i, x): i - x):
continuous_sections.append(map(itemgetter(1), g))
try:
max_cont_section = max(continuous_sections, key=len)
except ValueError:
max_cont_section = []
if len(max_cont_section) >= length_threshold:
genus_curve[max_cont_section] = np.NaN
return genus_curve
[docs]class GenusDistance(object):
"""
Distance Metric for the Genus Statistic.
Parameters
----------
img1 - numpy.ndarray
2D image.
img2 - numpy.ndarray
2D image.
smoothing_radii : list, optional
Kernel radii to smooth data to.
fiducial_model : Genus
Computed Genus object. Use to avoid recomputing.
"""
def __init__(self, img1, img2, smoothing_radii=None, fiducial_model=None):
super(GenusDistance, self).__init__()
# Standardize the intensity values in the images
img1 = normalize(img1)
img2 = normalize(img2)
if fiducial_model is not None:
self.genus1 = fiducial_model
else:
self.genus1 = Genus(
img1, smoothing_radii=smoothing_radii, lowdens_thresh=20).run()
self.genus2 = Genus(
img2, smoothing_radii=smoothing_radii, lowdens_thresh=20).run()
self.distance = None
[docs] def distance_metric(self, verbose=False):
'''
Data is centered and normalized (via normalize).
The distance is the difference between cubic splines of the curves.
Parameters
----------
verbose : bool, optional
Enables plotting.
'''
# 2 times the average number between the two
num_pts = \
int((len(self.genus1.thresholds) + len(self.genus2.thresholds))/2)
# Get the min and the max of the thresholds
min_pt = max(np.min(self.genus1.thresholds),
np.min(self.genus2.thresholds))
max_pt = min(np.max(self.genus1.thresholds),
np.max(self.genus2.thresholds))
points = np.linspace(min_pt, max_pt, 2*num_pts)
interp1 = \
InterpolatedUnivariateSpline(self.genus1.thresholds,
self.genus1.genus_stats[0, :], k=3)
interp2 = \
InterpolatedUnivariateSpline(self.genus2.thresholds,
self.genus2.genus_stats[0, :], k=3)
self.distance = np.nansum(np.abs(interp1(points) -
interp2(points))) / len(points)
if verbose:
import matplotlib.pyplot as p
p.plot(self.genus1.thresholds,
self.genus1.genus_stats[0, :], "bD",
label=self.genus1.save_name)
p.plot(self.genus2.thresholds,
self.genus2.genus_stats[0, :], "gD",
label=self.genus2.save_name)
p.plot(points, interp1(points), "b")
p.plot(points, interp2(points), "g")
p.grid(True)
p.legend(loc="upper right")
p.show()
return self
def normalize(data):
av_val = nanmean(data, axis=None)
st_dev = nanstd(data, axis=None)
return (data - av_val) / st_dev
def remove_small_objects(arr, min_size, connectivity=8):
'''
Remove objects less than the given size.
Function is based on skimage.morphology.remove_small_objects
Parameters
----------
arr : numpy.ndarray
Binary array containing the mask.
min_size : int
Smallest allowed size.
connectivity : int, optional
Connectivity of the neighborhood.
'''
struct = nd.generate_binary_structure(arr.ndim, connectivity)
labels, num = nd.label(arr, struct)
sizes = nd.sum(arr, labels, range(1, num + 1))
for i, size in enumerate(sizes):
if size >= min_size:
continue
posns = np.where(labels == i + 1)
arr[posns] = 0
return arr