Source code for turbustat.statistics.pca.pca

# Licensed under an MIT open source license - see LICENSE


import numpy as np
import astropy.units as u
from warnings import warn

from ..base_statistic import BaseStatisticMixIn
from ...io import common_types, threed_types, input_data, find_beam_width

# PCA utilities
from ..threeD_to_twoD import var_cov_cube
from .width_estimate import WidthEstimate1D, WidthEstimate2D

# Fitting utilities
from ..fitting_utils import bayes_linear, leastsq_linear


[docs]class PCA(BaseStatisticMixIn): ''' Implementation of Principal Component Analysis (Heyer & Brunt, 2002) Parameters ---------- cube : %(dtypes)s Data cube. n_eigs : int Deprecated. Input using `~PCA.compute_pca` or `~PCA.run`. distance : `~astropy.units.Quantity`, optional Distance to object in physical units. The output spatial widths will be converted to the units given here. Examples -------- >>> from turbustat.statistics import PCA >>> from spectral_cube import SpectralCube >>> import astropy.units as u >>> cube = SpectralCube.read("adv.fits") # doctest: +SKIP >>> pca = PCA(cube, distance=250 * u.pc) # doctest: +SKIP >>> pca.run(verbose=True) # doctest: +SKIP ''' __doc__ %= {"dtypes": " or ".join(common_types + threed_types)} def __init__(self, cube, n_eigs=None, distance=None): super(PCA, self).__init__() self.data, self.header = input_data(cube) _enforce_velocity_axis(self) self.spectral_shape = self.data.shape[0] if n_eigs is not None: raise DeprecationWarning("Input of n_eigs is deprecated. Use " "inputs in `compute_pca` or `run`.") if distance is not None: self.distance = distance @property def n_eigs(self): return self._n_eigs @n_eigs.setter def n_eigs(self, value): if value <= 0: raise ValueError("n_eigs must be > 0.") self._n_eigs = value
[docs] def compute_pca(self, mean_sub=False, n_eigs='auto', min_eigval=None, eigen_cut_method='value'): ''' Create the covariance matrix and its eigenvalues. If `mean_sub` is disabled, the first eigenvalue is dominated by the mean of the data, not the variance. Parameters ---------- mean_sub : bool, optional When enabled, subtracts the means of the channels before calculating the covariance. By default, this is disabled to match the Heyer & Brunt method. n_eigs : {int, 'auto'}, optional Number of eigenvalues to compute. The default setting is 'auto', which requires `min_eigval` to be set. Otherwise, the number of eigenvalues used can be set using an int. Setting to -1 will use all of the eigenvalues. min_eigval : float, optional The cut-off value to determine the number of important eigenvalues. When `eigen_cut_method` is `proportional`, min_eigval is the total proportion of variance described up to the Nth eigenvalue. When `eigen_cut_method` is `value`, min_eigval is the minimum variance described by that eigenvalue. eigen_cut_method : {'proportion', 'value'}, optional Set whether `min_eigval` is the proportion of variance determined up to the Nth eigenvalue (`proportion`) or the minimum value of variance (`value`). ''' self.cov_matrix = var_cov_cube(self.data, mean_sub=mean_sub) all_eigsvals, eigvecs = np.linalg.eig(self.cov_matrix) all_eigsvals = np.real_if_close(all_eigsvals) eigvecs = eigvecs[:, np.argsort(all_eigsvals)[::-1]] all_eigsvals = np.sort(all_eigsvals)[::-1] # Sort by maximum if n_eigs == 'auto': if min_eigval is None: raise ValueError("min_eigval must be given when using " "n_eigs='auto'.") self.n_eigs = set_n_eigs(all_eigsvals, min_eigval, method=eigen_cut_method) elif n_eigs == -1: self.n_eigs = self.spectral_shape elif n_eigs < -1 or n_eigs > self.spectral_shape or n_eigs == 0: raise Warning("n_eigs must be less than the number of velocity" " channels ({}) or -1 for" " all".format(self.spectral_shape)) else: self.n_eigs = n_eigs if mean_sub: self._total_variance = np.sum(all_eigsvals) self._var_prop = np.sum(all_eigsvals[:self.n_eigs]) / \ self.total_variance else: self._total_variance = np.sum(all_eigsvals[1:]) self._var_prop = np.sum(all_eigsvals[1:self.n_eigs]) / \ self.total_variance self._eigvals = all_eigsvals self._eigvecs = eigvecs self._mean_sub = mean_sub
@property def var_proportion(self): ''' Proportion of variance described by the first `~PCA.n_eigs` eigenvalues. ''' return self._var_prop @property def total_variance(self): ''' Total variance of all eigenvalues. ''' return self._total_variance @property def eigvals(self): ''' All eigenvalues. ''' return self._eigvals @property def eigvecs(self): ''' All eigenvectors. ''' return self._eigvecs
[docs] def eigimages(self, n_eigs=None): ''' Create eigenimages up to the n_eigs. Parameters ---------- n_eigs : None or int The number of eigenimages to create. When n_eigs is negative, the last -n_eig eigenimages are created. If None is given, the number in `~PCA.n_eigs` will be returned. Returns ------- eigimgs : `~numpy.ndarray` 3D array, where the first dimension if the number of eigenvalues. ''' if n_eigs is None: n_eigs = self.n_eigs if n_eigs > 0: iterat = xrange(n_eigs) elif n_eigs < 0: iterat = xrange(n_eigs, 0, 1) for ct, idx in enumerate(iterat): eigimg = np.zeros(self.data.shape[1:], dtype=float) for channel in range(self.data.shape[0]): if self._mean_sub: mean_value = np.nanmean(self.data[channel]) eigimg += np.nan_to_num((self.data[channel] - mean_value) * np.real_if_close( self.eigvecs[channel, idx])) else: eigimg += np.nan_to_num(self.data[channel] * np.real_if_close( self.eigvecs[channel, idx])) if ct == 0: eigimgs = eigimg else: eigimgs = np.dstack((eigimgs, eigimg)) return eigimgs.swapaxes(0, 2)
[docs] def autocorr_images(self, n_eigs=None): ''' Create the autocorrelation of the eigenimages. Parameters ---------- n_eigs : None or int The number of autocorrelation images to create. When n_eigs is negative, the last -n_eig autocorrelation images are created. If None is given, the number in `~PCA.n_eigs` will be returned. Returns ------- acors : np.ndarray 3D array, where the first dimension if the number of eigenvalues. ''' if n_eigs is None: n_eigs = self.n_eigs # Calculate the eigenimages eigimgs = self.eigimages(n_eigs=n_eigs) for idx, image in enumerate(eigimgs): fftx = np.fft.fft2(image) fftxs = np.conjugate(fftx) acor = np.fft.ifft2((fftx - fftx.mean()) * (fftxs - fftxs.mean())) acor = np.fft.fftshift(acor) if idx == 0: acors = acor.real else: acors = np.dstack((acors, acor.real)) return acors.swapaxes(0, 2)
[docs] def autocorr_spec(self, n_eigs=None): ''' Create the autocorrelation spectra of the eigenvectors. Parameters ---------- n_eigs : None or int The number of autocorrelation vectors to create. When n_eigs is negative, the last -n_eig autocorrelation vectors are created. If None is given, the number in `~PCA.n_eigs` will be returned. Returns ------- acors : np.ndarray 2D array, where the first dimension if the number of eigenvalues. ''' if n_eigs is None: n_eigs = self.n_eigs for idx in range(n_eigs): fftx = np.fft.fft(self.eigvecs[:, idx]) fftxs = np.conjugate(fftx) acor = np.fft.ifft((fftx - fftx.mean()) * (fftxs - fftxs.mean())) if idx == 0: acors = acor.real else: acors = np.dstack((acors, acor.real)) return acors.swapaxes(0, 1).squeeze()
[docs] def noise_ACF(self, n_eigs=-10): ''' Create the noise autocorrelation function based off of the eigenvalues beyond `PCA.n_eigs`. ''' if n_eigs is None: n_eigs = self.n_eigs acors = self.autocorr_images(n_eigs=n_eigs) noise_ACF = np.nansum(acors, axis=0) / float(n_eigs) return noise_ACF
[docs] def find_spatial_widths(self, method='contour', brunt_beamcorrect=True, beam_fwhm=None, physical_scales=True, distance=None): ''' Derive the spatial widths using the autocorrelation of the eigenimages. Parameters ---------- methods : {'contour', 'fit', 'interpolate', 'xinterpolate'}, optional Spatial fitting method to use. The default method is 'contour' (fits an ellipse to the 1/e contour about the peak; this is the method used by the Heyer & Brunt works). See `~turbustat.statistics.pca.WidthEstimate2D` for a description of all methods. brunt_beamcorrect : bool, optional Apply the beam correction described in Chris Brunt's `thesis <http://search.proquest.com/docview/304529913>`_. A beam will be searched for in the given header (looking for "BMAJ"). If this fails, the value must be given in `beam_fwhm` with angular units. beam_fwhm : None of `~astropy.units.Quantity`, optional When beam correction is enabled, the FWHM beam size can be given here. physical_scales : bool, optional Attempts to convert spatial pixel scales into physical sizes. A warning is raised if no distance is provided. distance : `~astropy.units.Quantity`, optional Distance to object in physical units. The output spatial widths will be converted to the units given here. ''' # Try reading beam width from the header is it is not given. if brunt_beamcorrect: if beam_fwhm is None: beam_fwhm = find_beam_width(self.header) acors = self.autocorr_images(n_eigs=self.n_eigs) noise_ACF = self.noise_ACF() self._spatial_width, self._spatial_width_error = \ WidthEstimate2D(acors, noise_ACF=noise_ACF, method=method, brunt_beamcorrect=brunt_beamcorrect, beam_fwhm=beam_fwhm, spatial_cdelt=self.header['CDELT2'] * u.deg) self._spatial_width = self._spatial_width * u.pix self._spatial_width_error = self._spatial_width_error * u.pix # If distance is given, convert to physical scale if distance is not None: self.distance = distance if physical_scales: if not hasattr(self, "_distance"): warn("Distance must be given to use physical units") else: self._spatial_width = self.to_physical(self._spatial_width) self._spatial_width_error = \ self.to_physical(self._spatial_width_error)
@property def spatial_width(self): ''' Spatial widths for the first `~PCA.n_eigs` components. ''' return self._spatial_width @property def spatial_width_error(self): ''' The 1-sigma error bounds on the spatial widths for the first `~PCA.n_eigs` components. ''' return self._spatial_width_error
[docs] def find_spectral_widths(self, method='walk-down', physical_units=True): ''' Derive the spectral widths using the autocorrelation of the eigenvectors. Parameters ---------- method : {"walk-down", "fit", "interpolate"}, optional Spectral fitting method to use. The default method is 'walk-down' (starting at the peak, continue until reaching 1/e of the peak; this is the method used by the Heyer & Brunt works). See `~turbustat.statistics.pca.WidthEstimate1D` for a description of all methods. physical_units : bool, optional Enable conversion into spectral units, as defined within the header. ''' acorr_spec = self.autocorr_spec(n_eigs=self.n_eigs) self._spectral_width, self._spectral_width_error = \ WidthEstimate1D(acorr_spec, method=method) self._spectral_width = self._spectral_width * u.pix self._spectral_width_error = self._spectral_width_error * u.pix if physical_units: self._spectral_width = self._spectral_width.value * \ self.spectral_size self._spectral_width_error = self._spectral_width_error.value * \ self.spectral_size
@property def spectral_width(self): ''' Spectral widths for the first `~PCA.n_eigs` components. ''' return self._spectral_width @property def spectral_width_error(self): ''' The error bounds on the spectral widths for the first `~PCA.n_eigs` components. ''' return self._spectral_width_error
[docs] def fit_plaw(self, fit_method='odr', verbose=False, **kwargs): ''' Fit the size-linewidth relation. This is done through Orthogonal Distance Regression (via scipy), or through MCMC (requires installing `emcee <http://dan.iel.fm/emcee/current/>`_). Parameters ---------- fit_method : {'odr', 'bayes'}, optional Set the type of fitting to perform. Options are 'odr' (orthogonal distance regression) or 'bayes' (MCMC). Note that 'bayes' requires the emcee package to be installed. verbose : bool, optional Prints out additional information about the fitting and plots the solution. **kwargs Passed to `~turbustat.statistics.fitting_utils.bayes_linear` when fit_method is bayes. ''' if fit_method != 'odr' and fit_method != 'bayes': raise TypeError("fit_method must be 'odr' or 'bayes'.") # Hang on to this. We can't propagate the asymmetric errors when using # MCMC in sonic_length, and the laziest way is just to keep the samples # around, and estimate CIs directly. self._fit_method = fit_method # Only keep the width estimations that worked are_finite = np.isfinite(self.spectral_width) * \ np.isfinite(self.spatial_width) * \ np.isfinite(self.spatial_width_error) * \ np.isfinite(self.spectral_width_error) # Check to make sure there are enough points to fit to (minimum 2). num_pts = self.spectral_width.size - (~are_finite).sum() if num_pts < 2: raise Warning("Less then 2 valid points. Cannot fit model.") elif num_pts < 5: warn("There are less than 5 points to fit to. The fit will not be" " well constrained and results should be closely examined.") y = np.log10(self.spectral_width[are_finite].value) x = np.log10(self.spatial_width[are_finite].value) y_err = 0.434 * self.spectral_width_error[are_finite].value / \ self.spectral_width[are_finite].value x_err = 0.434 * self.spatial_width_error[are_finite].value / \ self.spatial_width[are_finite].value if fit_method == 'odr': params, errors = leastsq_linear(x, y, x_err, y_err, verbose=verbose) # Turn into +/- range values, as would be returned by the MCMC fit errors = np.vstack([params - errors, params + errors]).T else: params, errors, samps = bayes_linear(x, y, x_err, y_err, verbose=verbose, return_samples=True, **kwargs) self._samps = samps self._index = params[0] self._index_error_range = errors[0] # Take the intercept out of log scale self._intercept = 10 ** params[1] * self._spectral_width.unit self._intercept_error_range = 10 ** errors[1] * \ self._spectral_width.unit
@property def index(self): ''' Power-law index. ''' return self._index @property def index_error_range(self): ''' One-sigma error bounds on the index. ''' return self._index_error_range @property def gamma(self): ''' Slope of the size-linewidth relation with correction from Chris Brunt's `thesis <http://search.proquest.com/docview/304529913>`_. ''' return float(brunt_index_correct(self.index)) @property def gamma_error_range(self): ''' One-sigma error bounds on gamma. ''' return np.array([brunt_index_correct(val) for val in self.index_error_range]) @property def intercept(self): ''' Intercept from the fits, converted out of the log value. ''' return self._intercept @property def intercept_error_range(self): ''' One-sigma error bounds on the intercept. ''' return self._intercept_error_range
[docs] def model(self, x): ''' Model with the fit parameters from `~PCA.fit_plaw` ''' return self.index * x + np.log10(self.intercept)
[docs] def sonic_length(self, T_k=10 * u.K, mu=1.36, use_gamma=True): ''' Estimate of the sonic length based on a given temperature. Uses the intercept from the fit. Based on sonic.pro used in the Heyer & Brunt PCA implementation. Because error from the MCMC fit need not be symmetric, the MCMC samples are needed to provide the correct CIs for the sonic length. Parameters ---------- T_k : `~astropy.units.Quantity`, optional Temperature given in units convertible to Kelvin. mu : float, optional Factor to multiply by m_H to account for He and metals. use_gamma : bool, optional Toggle whether to use `~PCA.gamma` or `~PCA.index`. Returns ------- lambd : `~astropy.units.Quantity` Value of the sonic length. If distance was provided, this will be in the units given in the distance. Otherwise, the result will be in pixel units. lambd_error_range : `~astropy.units.Quantity` The 1-sigma bounds on the sonic length. The units will match lambd. ''' import astropy.constants as const try: T_k = T_k.to(u.K) except u.UnitConversionError: raise u.UnitConversionError("Cannot convert T_k to Kelvin.") # Sound speed in m/s c_s = np.sqrt(const.k_B.decompose() * T_k / (mu * const.m_p)) # Convert to pixel units, if spectral_width not in physical units c_s = c_s.to(self.spectral_width.unit, equivalencies=self.spectral_equiv) if use_gamma: index = self.gamma index_error_range = self.gamma_error_range else: index = self.index index_error_range = self.index_error_range lambd = np.power(c_s / self.intercept, 1. / index) if self._fit_method == 'odr': # Added in quadrature and simplified index_err = np.abs(index - index_error_range[0]) intercept_err = 0.434 * np.abs(self.intercept - self.intercept_error_range[0]) / \ self.intercept term1 = np.log10(c_s / self.intercept) * \ (index_err / index) term2 = intercept_err / self.intercept lambd_error = (lambd / index) * \ np.sqrt(term1.value**2 + term2.value**2) lambd_error_range = np.array([lambd - lambd_error, lambd + lambd_error]) else: # Don't propagate asymmetric errors in quadrature! Instead, # calculate CI directly from the samples. percentiles = [15, 85] slopes = self._samps[0] intercepts = self._samps[1] if use_gamma: slopes = np.array([brunt_index_correct(slope) for slope in slopes]) all_lambds = np.power(c_s / 10 ** intercepts, 1. / index) lambd_error_range = np.percentile(all_lambds, percentiles) lambd = lambd * self.spatial_width.unit lambd_error_range = lambd_error_range * self.spatial_width.unit return lambd, lambd_error_range
[docs] def run(self, verbose=False, save_name=None, mean_sub=False, decomp_only=False, n_eigs='auto', min_eigval=None, eigen_cut_method='value', spatial_method='contour', spectral_method='walk-down', fit_method='odr', beam_fwhm=None, brunt_beamcorrect=True): ''' Run the decomposition and fitting in one step. Parameters ---------- verbose : bool, optional Enables plotting of the results. save_name : str,optional Save the figure when a file name is given. mean_sub : bool, optional See `~PCA.compute_pca` decomp_only : bool, optional Only run the PCA decomposition, not the entire procedure to derive the size-linewidth relation. This should be enabled when using PCA_Distance. n_eigs : {"auto", int}, optional See `~PCA.compute_pca` min_eigval : float, optional See `~PCA.compute_pca` eigen_cut_method : {'proportion', 'value'}, optional See `~PCA.compute_pca` spatial_method : str, optional See `~PCA.fit_spatial_widths`. spectral_method : str, optional See `~PCA.fit_spectral_widths`. fit_method : str, optional See `~PCA.fit_plaw`. beam_fwhm : None of `~astropy.units.Quantity`, optional See `~PCA.fit_spatial_widths`. brunt_beamcorrect : bool, optional See `~PCA.fit_spatial_widths`. ''' self.compute_pca(mean_sub=mean_sub, n_eigs=n_eigs, min_eigval=min_eigval, eigen_cut_method=eigen_cut_method) # Run rest of the analysis if not decomp_only: self.find_spatial_widths(method=spatial_method, beam_fwhm=beam_fwhm, brunt_beamcorrect=brunt_beamcorrect) self.find_spectral_widths(method=spectral_method) self.fit_plaw(fit_method=fit_method) if verbose: import matplotlib.pyplot as p print('Proportion of Variance kept: %s' % (self.var_proportion)) print("Index: {0:.2f} ({1:.2f}, {2:.2f})" .format(self.index, *self.index_error_range)) print("Gamma: {0:.2f} ({1:.2f}, {2:.2f})" .format(self.gamma, *self.gamma_error_range)) # Compute sonic length assuming 10 K T_k = 10. * u.K sl, sl_range = self.sonic_length(T_k=T_k) print("Sonic length: {0:.3e} ({4:.3e}, {5:.3e}) {1} at {2} {3}" .format(sl.value, sl.unit.to_string(), T_k.value, T_k.unit.to_string(), *sl_range.value)) p.subplot(221) p.imshow(self.cov_matrix, origin="lower", interpolation="nearest") p.colorbar() p.subplot(223) p.bar(np.arange(1, self.n_eigs + 1), self.eigvals[:self.n_eigs], 0.5, color='r') p.xlim([0, self.n_eigs + 1]) p.xlabel('Eigenvalues') p.ylabel('Variance') p.subplot(122) p.errorbar(np.log10(self.spatial_width.value), np.log10(self.spectral_width.value), xerr=0.434 * self.spatial_width_error / self.spatial_width, yerr=0.434 * self.spectral_width_error / self.spectral_width, fmt='o', color='k') p.ylabel("log Linewidth / " "{}".format(self.spectral_width.unit.to_string())) p.xlabel("log Spatial Length / " "{}".format(self.spatial_width.unit.to_string())) xvals = np.linspace(np.log10(np.nanmin(self.spatial_width).value), np.log10(np.nanmax(self.spatial_width).value), self.spatial_width.size * 10) p.plot(xvals, self.index * xvals + np.log10(self.intercept.value), 'r-') # Some very large error bars makes it difficult to see the model x_range = \ np.ptp(np.log10(self.spatial_width.value) [np.isfinite(self.spatial_width)]) y_range = \ np.ptp(np.log10(self.spectral_width.value) [np.isfinite(self.spectral_width)]) p.xlim([np.log10(np.nanmin(self.spatial_width.value)) - y_range / 4, np.log10(np.nanmax(self.spatial_width.value)) + y_range / 4]) p.ylim([np.log10(np.nanmin(self.spectral_width.value)) - x_range / 4, np.log10(np.nanmax(self.spectral_width.value)) + x_range / 4]) p.tight_layout() if save_name is not None: p.savefig(save_name) p.close() else: p.show() return self
[docs]class PCA_Distance(object): ''' Compare two data cubes based on the eigenvalues of the PCA decomposition. The distance is the Euclidean distance between the eigenvalues. Parameters ---------- cube1 : %(dtypes)s Data cube. cube2 : %(dtypes)s Data cube. n_eigs : int Number of eigenvalues to compute. fiducial_model : PCA Computed PCA object. Use to avoid recomputing. mean_sub : bool, optional Subtracts the mean before computing the covariance matrix. Not subtracting the mean is done in the original Heyer & Brunt works. ''' __doc__ %= {"dtypes": " or ".join(common_types + threed_types)} def __init__(self, cube1, cube2, n_eigs=50, fiducial_model=None, mean_sub=True): super(PCA_Distance, self).__init__() if n_eigs == 'auto': raise ValueError("'auto' n_eigs mode is disabled for distance " "computation. The metric requires having the same" " number of eigenvalues to compare.") if fiducial_model is not None: self.pca1 = fiducial_model else: self.pca1 = PCA(cube1) self.pca1.run(mean_sub=mean_sub, n_eigs=n_eigs, decomp_only=True) self.pca2 = PCA(cube2) self.pca2.run(mean_sub=mean_sub, n_eigs=n_eigs, decomp_only=True) self._mean_sub = mean_sub self._n_eigs = n_eigs
[docs] def distance_metric(self, verbose=False, label1="Cube 1", label2="Cube 2", save_name=None): ''' Computes the distance between the cubes. Parameters ---------- verbose : bool, optional Enables plotting. label1 : str, optional Object or region name for cube1 label2 : str, optional Object or region name for cube2 save_name : str, optional Save the figure when a file name is given. ''' # The eigenvalues need to be normalized before being compared. If # mean_sub is False, the first eigenvalue is not used. if self._mean_sub: slicer = slice(0, self._n_eigs) else: slicer = slice(1, self._n_eigs) eigvals1 = self.pca1.eigvals[slicer] / \ np.sum(self.pca1.eigvals[slicer]) eigvals2 = self.pca2.eigvals[slicer] / \ np.sum(self.pca2.eigvals[slicer]) self.distance = np.linalg.norm(eigvals1 - eigvals2) if verbose: import matplotlib.pyplot as p print "Proportions of total variance: 1 - %0.3f, 2 - %0.3f" % \ (self.pca1.var_proportion, self.pca2.var_proportion) p.subplot(2, 2, 1) p.imshow( self.pca1.cov_matrix, origin="lower", interpolation="nearest", vmin=np.median(self.pca1.cov_matrix)) p.colorbar() p.title(label1) p.subplot(2, 2, 3) p.bar(np.arange(1, len(eigvals1) + 1), eigvals1, 0.5, color='r') p.xlim([0, self.pca1.n_eigs + 1]) p.xlabel('Eigenvalues') p.ylabel("Proportion of Variance") p.subplot(2, 2, 2) p.imshow( self.pca2.cov_matrix, origin="lower", interpolation="nearest", vmin=np.median(self.pca2.cov_matrix)) p.colorbar() p.title(label2) p.subplot(2, 2, 4) p.bar(np.arange(1, len(eigvals2) + 1), eigvals2, 0.5, color='r') p.xlim([0, self.pca2.n_eigs + 1]) p.xlabel('Eigenvalues') p.tight_layout() if save_name is not None: p.savefig(save_name) p.close() else: p.show() return self
def brunt_index_correct(value): ''' Apply empirical corrections from Heyer & Brunt These values are based off a broken linear fit in Section 3.3.1 from Chris Brunt's thesis. These are based off calibrating against uniform density field's with different indices. ''' if value < 0.67: return (value - 0.32) / 0.59 else: return (value - 0.03) / 1.07 def set_n_eigs(eigenvalues, min_eigval, method='value'): ''' Based on a minimum eigenvalue, find the number of components to consider. The cut-off may be the proportion of variance (method='proportion') or a minimum value for the variance (method='value') Parameters ---------- eigenvalues : `~numpy.ndarray` Array of eigenvalues. min_eigval : float Value to determine the cut-off for important eigenvalues. method : {"value", "proportion"}, optional If `value`, `min_eigval` is the smallest eigenvalue to consider important. If `proportion`, `min_eigval` is the proportion of variance at which to cut at (i.e., 0.99 for 99%). Returns ------- above.size : int The number of eigenvalues satisfying the given criteria. ''' if method == "value": above = np.where(eigenvalues >= min_eigval)[0] return above.size elif method == "proportion": cumulative = np.cumsum(eigenvalues / eigenvalues.sum()) above = np.where(cumulative <= min_eigval)[0] return above.size else: raise ValueError("method must be 'value' or 'proportion'.") def _enforce_velocity_axis(pca_obj): ''' Enforce spectral_size be in velocity units. ''' if not pca_obj.spectral_size.unit.is_equivalent(u.m / u.s): raise Warning("PCA requires the spectral axis to be in velocity units." " If using a spectral cube, perform this conversion with" " 'cube_vel = cube.with_spectral_unit(u.m / u.s, " "rest_value=113 * u.GHz)', changing to the appropriate" " rest frequency and desired velocity unit.")