Title: | Network-Based Clustering |
---|---|
Description: | Network-based clustering using a Bayesian network mixture model with optional covariate adjustment. |
Authors: | Fritz Bayer [aut, cre, cph], Jack Kuipers [ctb] |
Maintainer: | Fritz Bayer <[email protected]> |
License: | GPL-3 |
Version: | 1.2.0 |
Built: | 2025-03-12 05:51:54 UTC |
Source: | https://github.com/cbg-ethz/clustnet |
best AIC search
bestAICsearch( binaryMatrix, minK = 2, maxK = 5, chiVec = c(0.001, 0.5, 1, 2, 3), startseed = 100, nIterations = 50, AICrange = 100, plot_heatmap = TRUE )
bestAICsearch( binaryMatrix, minK = 2, maxK = 5, chiVec = c(0.001, 0.5, 1, 2, 3), startseed = 100, nIterations = 50, AICrange = 100, plot_heatmap = TRUE )
binaryMatrix |
Data to be clustered |
minK |
Min number of clusters |
maxK |
Max number of clusters |
chiVec |
Vector of chi values |
startseed |
Seed |
nIterations |
Number of iterations |
AICrange |
AIC range |
plot_heatmap |
TRUE if plotting directly |
list of AIC scrores
Create 2d dimensionality reduction of sample fit to Bayesian network clusters
density_plot(cluster_results, var_selection = NULL, colourys = NULL)
density_plot(cluster_results, var_selection = NULL, colourys = NULL)
cluster_results |
Cluster results from function get_clusters |
var_selection |
Selected variables to consider, e.g. c(1:5) for first five only |
colourys |
A vector specifying the colors of each cluster (optional) |
A density plot of class recordedplot.
# Simulate data sampled_data <- sampleData(n_vars = 15, n_samples = c(200,200,200))$sampled_data # Learn clusters cluster_results <- get_clusters(sampled_data) # Load additional pacakges to create a 2d dimensionality reduction library(car) library(ks) library(ggplot2) library(graphics) library(stats) # Plot a 2d dimensionality reduction density_plot(cluster_results)
# Simulate data sampled_data <- sampleData(n_vars = 15, n_samples = c(200,200,200))$sampled_data # Learn clusters cluster_results <- get_clusters(sampled_data) # Load additional pacakges to create a 2d dimensionality reduction library(car) library(ks) library(ggplot2) library(graphics) library(stats) # Plot a 2d dimensionality reduction density_plot(cluster_results)
Classification based on clustering
get_classification(cluster_results, data_classify)
get_classification(cluster_results, data_classify)
cluster_results |
Output from get_clusters() |
data_classify |
Data that should be classified; colnames need to match the ones of cluster_results$data; missing cols are allowed |
a list containing the classification as "clustermembership" and the probabilities of belonging to the clusters as "allrelativeprobabs"
# choose data sampled_data <- sampleData(n_vars = 15, n_samples = c(300,300,300))$sampled_data # learn clusters cluster_results <- get_clusters(sampled_data) # visualize the networks classification_results <- get_classification(cluster_results, sampled_data)
# choose data sampled_data <- sampleData(n_vars = 15, n_samples = c(300,300,300))$sampled_data # learn clusters cluster_results <- get_clusters(sampled_data) # visualize the networks classification_results <- get_classification(cluster_results, sampled_data)
Network-based clustering
get_clusters( myData, k_clust = 3, n_bg = 0, quick = TRUE, EMseeds = 1, edgepmat = NULL, blacklist = NULL, bdepar = list(chi = 0.5, edgepf = 8), newallrelativeprobabs = NULL )
get_clusters( myData, k_clust = 3, n_bg = 0, quick = TRUE, EMseeds = 1, edgepmat = NULL, blacklist = NULL, bdepar = list(chi = 0.5, edgepf = 8), newallrelativeprobabs = NULL )
myData |
Data to be clustered, must be either binary (with levels "0"/"1") or categorical (with levels "0"/"1"/"2"/...) |
k_clust |
Number of clusters |
n_bg |
Number of covariates to be adjusted for; the position of the covariates must be in the last column of the myData matrix |
quick |
if TRUE, then the runtime is quick but accuracy is lower |
EMseeds |
Seeds |
edgepmat |
Matrix of penalized edges in the search space |
blacklist |
Matrix of forbidden edges in the search space |
bdepar |
Hyperparameters for structure learning (BDE score) |
newallrelativeprobabs |
relative probability of cluster assignment of each sample |
a list containing the clusterMemberships and "assignprogress"
# choose data sampled_data <- sampleData(n_vars = 15, n_samples = c(300,300,300))$sampled_data # learn clusters cluster_results <- get_clusters(sampled_data) # visualize the networks library(ggplot2) library(ggraph) library(igraph) library(ggpubr) plot_clusters(cluster_results)
# choose data sampled_data <- sampleData(n_vars = 15, n_samples = c(300,300,300))$sampled_data # learn clusters cluster_results <- get_clusters(sampled_data) # visualize the networks library(ggplot2) library(ggraph) library(igraph) library(ggpubr) plot_clusters(cluster_results)
Categorical version of Bernoulli mixture model (binary clustering function BBMMclusterEM)
get_clusters_bernoulli( binaryMatrix, chi = 0.5, k_clust = 5, startseed = 100, nIterations = 10, verbose = FALSE )
get_clusters_bernoulli( binaryMatrix, chi = 0.5, k_clust = 5, startseed = 100, nIterations = 10, verbose = FALSE )
binaryMatrix |
Data to be clustered |
chi |
hyperparameter chi |
k_clust |
Number of clusters |
startseed |
Start seed |
nIterations |
number of iterations |
verbose |
set TRUE to display progress |
a list containing the clusterMemberships
DAG visualization
nice_DAG_plot( my_DAG, print_direct = TRUE, node_size = NULL, CPDAG = TRUE, node_colours = "#fdae61", directed = TRUE )
nice_DAG_plot( my_DAG, print_direct = TRUE, node_size = NULL, CPDAG = TRUE, node_colours = "#fdae61", directed = TRUE )
my_DAG |
DAG |
print_direct |
print DAG if TRUE |
node_size |
node size vector |
CPDAG |
if TRUE, then plot CPDAG instead of DAG |
node_colours |
node colours |
directed |
TRUE if nodes should be directed |
A plot of the DAG of class c("gg", "ggplot").
Plot clusters
plot_clusters( cluster_results, node_colours = "#fdae61", scale_entropy = FALSE, directed = TRUE )
plot_clusters( cluster_results, node_colours = "#fdae61", scale_entropy = FALSE, directed = TRUE )
cluster_results |
Cluster results |
node_colours |
node colours |
scale_entropy |
if true, entropy measure will be used to determine size of the nodes |
directed |
TRUE if nodes should be directed |
A summary plot of all cluster networks of class c("gg", "ggplot", "ggarrange").
# Simulate data sampled_data <- sampleData(n_vars = 15, n_bg = 0)$sampled_data # learn clusters cluster_results <- get_clusters(sampled_data) # Load additional pacakges to visualize the networks library(ggplot2) library(ggraph) library(igraph) library(ggpubr) # Visualize networks plot_clusters(cluster_results)
# Simulate data sampled_data <- sampleData(n_vars = 15, n_bg = 0)$sampled_data # learn clusters cluster_results <- get_clusters(sampled_data) # Load additional pacakges to visualize the networks library(ggplot2) library(ggraph) library(igraph) library(ggpubr) # Visualize networks plot_clusters(cluster_results)
Sample binary data from different Bayes nets
sampleData( k_clust = 3, n_vars = 20, n_bg = 0, n_samples = NULL, bgedges = "different", equal_cpt_bg = TRUE )
sampleData( k_clust = 3, n_vars = 20, n_bg = 0, n_samples = NULL, bgedges = "different", equal_cpt_bg = TRUE )
k_clust |
Number of clusters |
n_vars |
Number of variables |
n_bg |
number of conditioned covariates |
n_samples |
number of samples |
bgedges |
type of background edges |
equal_cpt_bg |
specify if conditional probability table of the background edges is constant across clusters |
sampled binary data
# sample data simulation_data <- sampleData(k_clust = 3, n_vars = 15, n_samples = c(200,200,200)) sampled_data <- simulation_data$sampled_data head(sampled_data)
# sample data simulation_data <- sampleData(k_clust = 3, n_vars = 15, n_samples = c(200,200,200)) sampled_data <- simulation_data$sampled_data head(sampled_data)