# Licensed under an MIT open source license - see LICENSE
import numpy as np
from ..threeD_to_twoD import var_cov_cube
[docs]class PCA(object):
'''
Implementation of Principal Component Analysis (Heyer & Brunt, 2002)
Parameters
----------
cube : numpy.ndarray
Data cube.
n_eigs : int
Number of eigenvalues to compute.
'''
def __init__(self, cube, n_eigs=50):
super(PCA, self).__init__()
self.cube = cube
self.n_eigs = n_eigs
# Remove NaNs
self.cube[np.isnan(self.cube)] = 0
self.n_velchan = self.cube.shape[0]
self.eigvals = None
[docs] def compute_pca(self, mean_sub=False, normalize=True):
'''
Create the covariance matrix and its eigenvalues.
Parameters
----------
normalize : bool, optional
Normalize the set of eigenvalues by the 0th component.
'''
self.cov_matrix = var_cov_cube(self.cube, mean_sub=mean_sub)
all_eigsvals, eigvecs = np.linalg.eig(self.cov_matrix)
all_eigsvals = np.sort(all_eigsvals)[::-1] # Sort by maximum
self._var_prop = np.sum(all_eigsvals[:self.n_eigs]) / \
np.sum(all_eigsvals)
if normalize:
self.eigvals = all_eigsvals[:self.n_eigs] / all_eigsvals[0]
else:
self.eigvals = all_eigsvals[:self.n_eigs]
return self
@property
def var_proportion(self):
return self._var_prop
[docs] def run(self, verbose=False, normalize=True):
'''
Run method. Needed to maintain package standards.
Parameters
----------
verbose : bool, optional
Enables plotting.
normalize : bool, optional
See ```compute_pca```.
'''
self.compute_pca(normalize=normalize)
if verbose:
import matplotlib.pyplot as p
print 'Proportion of Variance kept: %s' % (self.var_proportion)
p.subplot(121)
p.imshow(self.cov_matrix, origin="lower", interpolation="nearest")
p.colorbar()
p.subplot(122)
p.bar(np.arange(1, self.n_eigs + 1), self.eigvals, 0.5, color='r')
p.xlim([0, self.n_eigs + 1])
p.xlabel('Eigenvalues')
p.ylabel('Variance')
p.show()
[docs]class PCA_Distance(object):
'''
Compare two data cubes based on the eigenvalues of the PCA decomposition.
The distance is the Euclidean distance between the eigenvalues.
Parameters
----------
cube1 : numpy.ndarray
Data cube.
cube2 : numpy.ndarray
Data cube.
n_eigs : int
Number of eigenvalues to compute.
fiducial_model : PCA
Computed PCA object. Use to avoid recomputing.
normalize : bool, optional
Sets whether to normalize the eigenvalues by the 0th eigenvalue.
'''
def __init__(self, cube1, cube2, n_eigs=50, fiducial_model=None,
normalize=True):
super(PCA_Distance, self).__init__()
self.cube1 = cube1
self.cube2 = cube2
if fiducial_model is not None:
self.pca1 = fiducial_model
else:
self.pca1 = PCA(self.cube1, n_eigs=n_eigs)
self.pca1.run(normalize=normalize)
self.pca2 = PCA(self.cube2, n_eigs=n_eigs)
self.pca2.run(normalize=normalize)
self.distance = None
[docs] def distance_metric(self, verbose=False):
'''
Computes the distance between the cubes.
Parameters
----------
verbose : bool, optional
Enables plotting.
'''
difference = np.abs((self.pca1.eigvals - self.pca2.eigvals) ** 2.)
self.distance = np.sqrt(np.sum(difference))
if verbose:
import matplotlib.pyplot as p
print "Proportions of total variance: 1 - %0.3f, 2 - %0.3f" % \
(self.pca1.var_proportion, self.pca2.var_proportion)
p.subplot(2, 2, 1)
p.imshow(
self.pca1.cov_matrix, origin="lower", interpolation="nearest")
p.colorbar()
p.title("PCA1")
p.subplot(2, 2, 3)
p.bar(np.arange(1, self.pca1.n_eigs + 1), self.pca1.eigvals, 0.5, color='r')
p.xlim([0, self.pca1.n_eigs + 1])
p.xlabel('Eigenvalues')
p.ylabel('Variance')
p.subplot(2, 2, 2)
p.imshow(
self.pca2.cov_matrix, origin="lower", interpolation="nearest")
p.colorbar()
p.title("PCA2")
p.subplot(2, 2, 4)
p.bar(np.arange(1, self.pca2.n_eigs + 1), self.pca2.eigvals, 0.5, color='r')
p.xlim([0, self.pca2.n_eigs + 1])
p.xlabel('Eigenvalues')
p.tight_layout()
p.show()
return self