Source code for turbustat.statistics.wrapping_function

'''
Wrapper to run all of the statistics between two data sets.
For large-scale comparisons, this is the function that should be called.
'''

from turbustat.statistics import Wavelet_Distance, \
    MVC_distance, \
    PSpec_Distance, \
    BiSpectrum_Distance, \
    GenusDistance, \
    DeltaVariance_Distance, \
    VCA_Distance, \
    VCS_Distance, \
    Tsallis_Distance, \
    StatMomentsDistance, \
    PCA_Distance, \
    SCF_Distance, \
    Cramer_Distance, \
    DendroDistance, \
    PDF_Distance


def stats_wrapper(dataset1, dataset2, fiducial_models=None,
[docs] statistics=None, multicore=False, vca_break=None, vcs_break=None, dendro_params=None, dendro_saves=[None, None], cleanup=True): ''' Function to run all of the statistics on two datasets. Each statistic is run with set inputs. This function needs to be altered to change the inputs. Parameters ---------- dataset1 : dict Contains the cube and all of its property arrays. dataset2 : dict See dataset1 fiducial_models : dict, optional Models for dataset1. Avoids recomputing when comparing many sets to dataset1. statistics : list, optional List of all of the statistics to use. If None, all are run. multicore : bool, optional If the wrapper is being used in parallel, this disables returning model values for dataset1. dendro_params : dict or list, optional Provides parameters to use when computing the initial dendrogram. If different parameters are required for each dataset, the the input should be a list containing the two dictionaries. cleanup : bool, optional Delete distance classes after running. ''' if statistics is None: # Run them all statistics = ["Wavelet", "MVC", "PSpec", "Bispectrum", "DeltaVariance", "Genus", "VCS", "VCA", "VCS_Density", "VCS_Velocity", "VCS_Break", "Tsallis", "PCA", "SCF", "Cramer", "Skewness", "Kurtosis", "SCF", "PCA", "Dendrogram_Hist", "Dendrogram_Num", "PDF_Hellinger", "PDF_KS"] distances = {} # Calculate the fiducial case and return it for later use if fiducial_models is None: fiducial_models = {} if any("Wavelet" in s for s in statistics): wavelet_distance = \ Wavelet_Distance(dataset1["integrated_intensity"], dataset2["integrated_intensity"]).distance_metric() distances["Wavelet"] = wavelet_distance.distance if not multicore: fiducial_models["Wavelet"] = wavelet_distance.wt1 if cleanup: del wavelet_distance if any("MVC" in s for s in statistics): mvc_distance = MVC_distance(dataset1, dataset2).distance_metric() distances["MVC"] = mvc_distance.distance if not multicore: fiducial_models["MVC"] = mvc_distance.mvc1 if cleanup: del mvc_distance if any("PSpec" in s for s in statistics): pspec_distance = \ PSpec_Distance(dataset1["integrated_intensity"], dataset2["integrated_intensity"], weights1=dataset1["integrated_intensity_error"][0]**2., weights2=dataset2["integrated_intensity_error"][0]**2.).distance_metric() distances["PSpec"] = pspec_distance.distance if not multicore: fiducial_models["PSpec"] = pspec_distance.pspec1 if cleanup: del pspec_distance if any("Bispectrum" in s for s in statistics): bispec_distance = \ BiSpectrum_Distance(dataset1["integrated_intensity"], dataset2["integrated_intensity"]).distance_metric() distances["Bispectrum"] = bispec_distance.distance if not multicore: fiducial_models["Bispectrum"] = bispec_distance.bispec1 if cleanup: del bispec_distance if any("DeltaVariance" in s for s in statistics): delvar_distance = \ DeltaVariance_Distance(dataset1["integrated_intensity"], dataset2["integrated_intensity"], weights1=dataset1["integrated_intensity_error"][0], weights2=dataset2["integrated_intensity_error"][0]).distance_metric() distances["DeltaVariance"] = delvar_distance.distance if not multicore: fiducial_models["DeltaVariance"] = delvar_distance.delvar1 if cleanup: del delvar_distance if any("Genus" in s for s in statistics): genus_distance = \ GenusDistance(dataset1["integrated_intensity"][0], dataset2["integrated_intensity"][0]).distance_metric() distances["Genus"] = genus_distance.distance if not multicore: fiducial_models["Genus"] = genus_distance.genus1 if cleanup: del genus_distance if any("VCS" in s for s in statistics): vcs_distance = VCS_Distance(dataset1["cube"], dataset2["cube"], breaks=vcs_break).distance_metric() distances["VCS"] = vcs_distance.distance distances["VCS_Density"] = vcs_distance.density_distance distances["VCS_Velocity"] = vcs_distance.velocity_distance distances["VCS_Break"] = vcs_distance.break_distance if not multicore: fiducial_models["VCS"] = vcs_distance.vcs1 if cleanup: del vcs_distance if any("VCA" in s for s in statistics): vca_distance = VCA_Distance(dataset1["cube"], dataset2["cube"], breaks=vca_break).distance_metric() distances["VCA"] = vca_distance.distance if not multicore: fiducial_models["VCA"] = vca_distance.vca1 if cleanup: del vca_distance if any("Tsallis" in s for s in statistics): tsallis_distance = \ Tsallis_Distance(dataset1["integrated_intensity"][0], dataset2["integrated_intensity"][0]).distance_metric() distances["Tsallis"] = tsallis_distance.distance if not multicore: fiducial_models["Tsallis"] = tsallis_distance.tsallis1 if cleanup: del tsallis_distance if any("Skewness" in s for s in statistics) or\ any("Kurtosis" in s for s in statistics): moment_distance = \ StatMomentsDistance(dataset1["integrated_intensity"][0], dataset2["integrated_intensity"][0], 5).distance_metric() distances["Skewness"] = moment_distance.skewness_distance distances["Kurtosis"] = moment_distance.kurtosis_distance if not multicore: fiducial_models["stat_moments"] = moment_distance.moments1 if cleanup: del moment_distance if any("PCA" in s for s in statistics): pca_distance = \ PCA_Distance(dataset1["cube"][0], dataset2["cube"][0]).distance_metric() distances["PCA"] = pca_distance.distance if not multicore: fiducial_models["PCA"] = pca_distance.pca1 if cleanup: del pca_distance if any("SCF" in s for s in statistics): scf_distance = \ SCF_Distance(dataset1["cube"][0], dataset2["cube"][0]).distance_metric() distances["SCF"] = scf_distance.distance if not multicore: fiducial_models["SCF"] = scf_distance.scf1 if cleanup: del scf_distance if any("Cramer" in s for s in statistics): cramer_distance = \ Cramer_Distance(dataset1["cube"][0], dataset2["cube"][0]).distance_metric() distances["Cramer"] = cramer_distance.distance if cleanup: del cramer_distance if any("Dendrogram_Hist" in s for s in statistics) or \ any("Dendrogram_Num" in s for s in statistics): if dendro_saves[0] is None: input1 = dataset1["cube"][0] elif isinstance(dendro_saves[0], str): input1 = dendro_saves[0] else: raise UserWarning("dendro_saves must be the filename of the" " saved file.") if dendro_saves[1] is None: input2 = dataset2["cube"][0] elif isinstance(dendro_saves[1], str): input2 = dendro_saves[1] else: raise UserWarning("dendro_saves must be the filename of the" " saved file.") dendro_distance = DendroDistance(input1, input2, dendro_params=dendro_params) dendro_distance.distance_metric() distances["Dendrogram_Hist"] = dendro_distance.histogram_distance distances["Dendrogram_Num"] = dendro_distance.num_distance if not multicore: fiducial_models["Dendrogram"] = dendro_distance.dendro1 if cleanup: del dendro_distance if any("PDF_Hellinger" in s for s in statistics) or \ any("PDF_KS" in s for s in statistics) or \ any("PDF_AD" in s for s in statistics): pdf_distance = \ PDF_Distance(dataset1["integrated_intensity"][0], dataset2["integrated_intensity"][0], min_val1=0.05, min_val2=0.05, weights1=dataset1["integrated_intensity_error"][0] ** -2., weights2=dataset2["integrated_intensity_error"][0] ** -2.) pdf_distance.distance_metric() distances["PDF_Hellinger"] = pdf_distance.hellinger_distance distances["PDF_KS"] = pdf_distance.ks_distance distances["PDF_AD"] = pdf_distance.ad_distance if not multicore: fiducial_models["PDF"] = pdf_distance.PDF1 if cleanup: del pdf_distance if multicore: return distances else: return distances, fiducial_models else: if any("Wavelet" in s for s in statistics): wavelet_distance = \ Wavelet_Distance(dataset1["integrated_intensity"], dataset2["integrated_intensity"], fiducial_model=fiducial_models["Wavelet"]).distance_metric() distances["Wavelet"] = wavelet_distance.distance if cleanup: del wavelet_distance if any("MVC" in s for s in statistics): mvc_distance = \ MVC_distance(dataset1, dataset2, fiducial_model=fiducial_models["MVC"]).distance_metric() distances["MVC"] = mvc_distance.distance if cleanup: del mvc_distance if any("PSpec" in s for s in statistics): pspec_distance = \ PSpec_Distance(dataset1["integrated_intensity"], dataset2["integrated_intensity"], weight1=dataset1["integrated_intensity_error"][0]**2., weight2=dataset2["integrated_intensity_error"][0]**2., fiducial_model=fiducial_models["PSpec"]).distance_metric() distances["PSpec"] = pspec_distance.distance if cleanup: del pspec_distance if any("Bispectrum" in s for s in statistics): bispec_distance = \ BiSpectrum_Distance(dataset1["integrated_intensity"], dataset2["integrated_intensity"], fiducial_model=fiducial_models["Bispectrum"]).distance_metric() distances["Bispectrum"] = bispec_distance.distance if cleanup: del bispec_distance if any("DeltaVariance" in s for s in statistics): delvar_distance = \ DeltaVariance_Distance(dataset1["integrated_intensity"], dataset2["integrated_intensity"], weights1=dataset1["integrated_intensity_error"][0], weights2=dataset2["integrated_intensity_error"][0], fiducial_model=fiducial_models["DeltaVariance"]).distance_metric() distances["DeltaVariance"] = delvar_distance.distance if cleanup: del delvar_distance if any("Genus" in s for s in statistics): genus_distance = \ GenusDistance(dataset1["integrated_intensity"][0], dataset2["integrated_intensity"][0], fiducial_model=fiducial_models["Genus"]).distance_metric() distances["Genus"] = genus_distance.distance if cleanup: del genus_distance if any("VCS" in s for s in statistics): vcs_distance = \ VCS_Distance(dataset1["cube"], dataset2["cube"], fiducial_model=fiducial_models["VCS"], breaks=vcs_break).distance_metric() distances["VCS_Density"] = vcs_distance.density_distance distances["VCS_Velocity"] = vcs_distance.velocity_distance distances["VCS_Break"] = vcs_distance.break_distance distances["VCS"] = vcs_distance.distance if cleanup: del vcs_distance if any("VCA" in s for s in statistics): vca_distance = \ VCA_Distance(dataset1["cube"], dataset2["cube"], fiducial_model=fiducial_models["VCA"], breaks=vca_break).distance_metric() distances["VCA"] = vca_distance.distance if cleanup: del vca_distance if any("Tsallis" in s for s in statistics): tsallis_distance= \ Tsallis_Distance(dataset1["integrated_intensity"][0], dataset2["integrated_intensity"][0], fiducial_model=fiducial_models["Tsallis"]).distance_metric() distances["Tsallis"] = tsallis_distance.distance if cleanup: del tsallis_distance if any("Skewness" in s for s in statistics) or any("Kurtosis" in s for s in statistics): moment_distance = \ StatMomentsDistance(dataset1["integrated_intensity"][0], dataset2["integrated_intensity"][0], 5, fiducial_model=fiducial_models["stat_moments"]).distance_metric() distances["Skewness"] = moment_distance.skewness_distance distances["Kurtosis"] = moment_distance.kurtosis_distance if cleanup: del moment_distance if any("PCA" in s for s in statistics): pca_distance = \ PCA_Distance(dataset1["cube"][0], dataset2["cube"][0], fiducial_model=fiducial_models["PCA"]).distance_metric() distances["PCA"] = pca_distance.distance if cleanup: del pca_distance if any("SCF" in s for s in statistics): scf_distance = \ SCF_Distance(dataset1["cube"][0], dataset2["cube"][0], fiducial_model=fiducial_models["SCF"]).distance_metric() distances["SCF"] = scf_distance.distance if cleanup: del scf_distance if any("Cramer" in s for s in statistics): cramer_distance = \ Cramer_Distance(dataset1["cube"][0], dataset2["cube"][0]).distance_metric() distances["Cramer"] = cramer_distance.distance if cleanup: del cramer_distance if any("Dendrogram_Hist" in s for s in statistics) or \ any("Dendrogram_Num" in s for s in statistics): if dendro_saves[0] is None: input1 = dataset1["cube"][0] elif isinstance(dendro_saves[0], str): input1 = dendro_saves[0] else: raise UserWarning("dendro_saves must be the filename of the" " saved file.") if dendro_saves[0] is None: input2 = dataset2["cube"][0] elif isinstance(dendro_saves[0], str): input2 = dendro_saves[1] else: raise UserWarning("dendro_saves must be the filename of the" " saved file.") dendro_distance = \ DendroDistance(input1, input2, fiducial_model=fiducial_models["Dendrogram"], dendro_params=dendro_params) dendro_distance.distance_metric() distances["Dendrogram_Hist"] = dendro_distance.histogram_distance distances["Dendrogram_Num"] = dendro_distance.num_distance if cleanup: del dendro_distance if any("PDF_Hellinger" in s for s in statistics) or \ any("PDF_KS" in s for s in statistics) or \ any("PDF_AD" in s for s in statistics): pdf_distance = \ PDF_Distance(dataset1["integrated_intensity"][0], dataset2["integrated_intensity"][0], min_val1=0.05, min_val2=0.05, weights1=dataset1["integrated_intensity_error"][0] ** -2., weights2=dataset2["integrated_intensity_error"][0] ** -2.) pdf_distance.distance_metric() distances["PDF_Hellinger"] = pdf_distance.hellinger_distance distances["PDF_KS"] = pdf_distance.ks_distance distances["PDF_AD"] = pdf_distance.ad_distance if cleanup: del pdf_distance return distances