Source code for swamp.clustering.spectral

from swamp.clustering.clustering import Clustering
from sklearn.cluster import SpectralClustering
from scipy.stats import randint, expon


[docs]class Spectral(Clustering): """This class implements methods and datastructures to work with :py:obj:`sklearn.cluster.Spectral` :example: >>> from swamp.clustering import Spectral >>> import joblib >>> dist_mtx = joblib.load('<dist_mtx.pckl>') >>> dist_mtx = dist_mtx.fillna(0) >>> my_clst = Spectral(dist_mtx) >>> my_clst.grid_search() """ @property def _algorithm_name(self): """Name of the clustering algorithm (spectral)""" return "spectral" @property def _hyper_params(self): """Dictionary with the range of possible values for each of the clustering hyper-parameters""" return {"n_clusters": randint(200, 900), "eigen_solver": [None, "arpack", "lobpcg"], "assign_labels": ["kmeans", "discretize"], "n_neighbors": randint(2, 10), "gamma": expon(0.1), } def _clustering(self, **kwargs): """Perform clustering with a given set of arguments""" return SpectralClustering(affinity='precomputed', n_jobs=1, **kwargs)
[docs] def cluster(self): """Method to perform a clustering using the :py:attr:`~swamp.clustering.Clustering.best_params` :raises ValueError: the attribute :py:attr:`~swamp.clustering.Clustering.similarity_mtx` is None """ self.logger.info(self.clustering_header) if self.similarity_mtx is None: raise ValueError('Need to load a distance matrix before clustering!') clst = SpectralClustering(n_jobs=self.nthreads, affinity='precomputed', **self.best_params) clst.fit(self.similarity_mtx) self.labels = clst.labels_