# ?kmeans
# ?pam
require(graphics)
# a 2-dimensional example
x <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2), matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
colnames(x) <- c("x", "y")
(cl <- kmeans(x, 3))
## K-means clustering with 3 clusters of sizes 28, 24, 48
##
## Cluster means:
## x y
## 1 1.30778873 0.9638207
## 2 0.63247885 0.9965951
## 3 -0.07017531 0.0580808
##
## Clustering vector:
## [1] 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [36] 3 3 3 2 3 3 3 3 3 3 2 3 3 3 3 1 2 1 1 2 2 1 1 2 1 1 2 1 1 2 1 1 2 1 1
## [71] 1 2 1 2 1 2 1 1 2 1 1 2 3 1 1 2 2 1 2 2 1 2 2 2 2 1 2 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 3.891288 2.611649 8.014188
## (between_SS / total_SS = 79.3 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
plot(x, col = cl$cluster)
points(cl$centers, col = 1:2, pch = 8, cex = 2)
cl$tot.withinss
## [1] 14.51713
sum(cl$withinss)
## [1] 14.51713
library(cluster)
## generate 25 objects, divided into 2 clusters.
x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)),cbind(rnorm(15,5,0.5), rnorm(15,5,0.5)))
pamx <- pam(x, 3)
pamx # Medoids: '7' and '25' ...
## Medoids:
## ID
## [1,] 2 0.263570 -0.1544024
## [2,] 14 4.401965 4.4589694
## [3,] 24 5.673318 4.6480571
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 2 3 3 2 3 3 2 2 2 3 3 3 3 3 3
## Objective function:
## build swap
## 0.5209973 0.4754930
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
summary(pamx)
## Medoids:
## ID
## [1,] 2 0.263570 -0.1544024
## [2,] 14 4.401965 4.4589694
## [3,] 24 5.673318 4.6480571
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 2 3 3 2 3 3 2 2 2 3 3 3 3 3 3
## Objective function:
## build swap
## 0.5209973 0.4754930
##
## Numerical information per cluster:
## size max_diss av_diss diameter separation
## [1,] 10 0.7174081 0.4360287 1.372669 5.2191930
## [2,] 5 0.9907098 0.4953644 1.472623 0.4564432
## [3,] 10 0.8412981 0.5050216 1.299617 0.4564432
##
## Isolated clusters:
## L-clusters: character(0)
## L*-clusters: [1] 1
##
## Silhouette plot information:
## cluster neighbor sil_width
## 2 1 2 0.92472190
## 7 1 2 0.92148835
## 8 1 2 0.92049326
## 4 1 2 0.91496693
## 9 1 2 0.91386021
## 1 1 2 0.91025454
## 3 1 2 0.90897281
## 5 1 2 0.90168967
## 6 1 2 0.87215871
## 10 1 2 0.84092273
## 14 2 3 0.49215618
## 18 2 3 0.34442638
## 11 2 3 0.33838970
## 17 2 3 0.33056555
## 19 2 3 0.31442705
## 23 3 2 0.57148584
## 22 3 2 0.57108403
## 24 3 2 0.57106767
## 12 3 2 0.47685778
## 25 3 2 0.44079073
## 15 3 2 0.37763967
## 16 3 2 0.29161290
## 20 3 2 0.26450843
## 21 3 2 0.24039360
## 13 3 2 -0.03918781
## Average silhouette width per cluster:
## [1] 0.9029529 0.3639930 0.3766253
## Average silhouette width of total data set:
## [1] 0.5846299
##
## 300 dissimilarities, summarized :
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.008409 0.809900 3.591800 3.942100 7.032100 8.509000
## Metric : euclidean
## Number of objects : 25
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
plot(pamx)
pamx$silinfo$avg.width
## [1] 0.5846299
pamx$silinfo$clus.avg.widths
## [1] 0.9029529 0.3639930 0.3766253
# head(x)
# dim(x)
K-中心点算法与K-均值算法在原理上十分接近,主要区别在于在选取每个类别的中心点时,K-中心点算法在类别内选取到其余样本距离之和最小的样本为中心。
K-中心点算法在R中的软件包为Cluster主要函数为pam ()。
mdata <- read.csv("http://data.galaxystatistics.com/blog_data/regression/iris.csv")
Iris <- mdata[,-1]
names(Iris) <- c("label","v1","v2","v3","v4") # 设置变量名
var <- Iris$label # 将标签赋予var
var <- as.character(var) # 将var转换为字符型
# install.packages("cluster")
library(cluster)
(kc <- pam(Iris[,-1],3)) # 去掉标签
## Medoids:
## ID v1 v2 v3 v4
## [1,] 8 5.0 3.4 1.5 0.2
## [2,] 79 6.0 2.9 4.5 1.5
## [3,] 113 6.8 3.0 5.5 2.1
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [71] 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3
## [106] 3 2 3 3 3 3 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 3 3 3 3 2 3
## [141] 3 3 2 3 3 3 2 3 3 2
## Objective function:
## build swap
## 0.6709391 0.6542077
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
(kc <- pam(Iris[,-2],3,cluster.only=TRUE)) # 显示聚类结果
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [71] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3
## [106] 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [141] 3 3 3 3 3 3 3 3 3 3
Medoids:该项指明聚类完成时聚类完成的各类别的中心点分别是哪几个样本点,它们的变量取值为多少。 Objective function:该项给出了build和swap两个过程中目标方程的值。其中,build过程用于在未指定初始中心点情况下,对于最优初始中心点的寻找;而swap过程则用于在初始中心点的基础上,对目标方程寻找其能达到局部最优类别划分状态。
pam算法实际为书本上所述的k中心点算法。是对k均值算法的改进,削弱了离群值的敏感度。中心选用的是具体的某一个点,而不是k均值的几何中心。但是其运算量较大,适合少量数据的分析。
可以先计算出数据的距离矩阵,后续可以直接读取矩阵的数据而不用继续单次计算距离。
pam <- function(data, k){
set.seed(1024)
# names(data) <- c("Sepal.Length","Sepal.Width","Petal.Length","Petal.Width")
n <- length(data[, 1])
cost <- matrix(0, ncol=n, nrow=n)
# dim(cost)
data_matrix <- as.matrix(dist(data))
# dim(data_matrix)
# head(data_matrix)
data$flg <- rep(" ", n)
# head(data)
current_k <- sample(1:n ,k)
data[current_k,]$flg <- current_k
# ?setdiff
# x <- 140
# y <- as.numeric(data[58, 1:4])
# 根据随机选择的初始点初始化簇
current_flg <- sapply(setdiff(1:n, current_k), function(x){
temp <- apply(data[current_k, 1:4], 1, function(y){sqrt(sum((data[x, 1:4]-y)^2))})
# class(temp)
names(temp)[which.min(temp)]
}
)
data[setdiff(1:n, current_k), ]$flg <- current_flg
flg <- T
num <- 1
# oNUM <- 2
while(flg){
print(num)
# 利用未选择的点来计算代替后的耗费
for(i in current_k){
for(j in setdiff(1:n, current_k)){
# i <- 58
# j <- 1
cost[i, j] <- 0
# 选取点作为新的中心点
new_k <- union(setdiff(current_k, i),j)
# 查看所有点的划分,和原始点比较
# h <- 1
for(h in 1:n){
t <- sapply(new_k, function(x){data_matrix[h, x]})
cost[i,j] <- cost[i,j] + data_matrix[new_k[which.min(t)], h] - data_matrix[data[h, ]$flg, h]
}
}
}
# 选择最新的点(距离和最小的新的中心点)
new_k <- sapply(current_k, function(x){which.min(cost[x, ])})
# 用新选择的点,给原始数据打上(新中心点)标识
for(i in current_k){
data[data$flg==i, "flg"] <- new_k[which(current_k==i)]
}
# ?identical 判断新选点和原始点是否一致,程序是否终止的条件
if(identical(current_k, new_k)){
flg <- F
}
# if(num==oNUM){
# flg <- F
# }
# file <- paste("C:/Users/abdata/Pictures/kmeans_pic/", num, ".jpeg", sep="")
# jpeg(file)
files <- paste("C:/Users/abdata/Pictures/kmeans_pic/", num, ".png", sep="")
png(file=files)
print(ggplot(data, aes(x=Sepal.Length, y=Sepal.Width, color=factor(data$flg))) + geom_point())
dev.off()
current_k <- new_k
num <- num + 1
}
return(data)
}
library(ggplot2)
mdata <- read.csv("http://data.galaxystatistics.com/blog_data/regression/iris.csv")
data <- mdata[,-c(1:2)]
k <- 3
pam(data, k)
用iris数据集测试,假设为3个簇,非簇的有147个,每个非簇要匹配150个点。总计比较为:3×147×150 。
原理解析:
它将数据集看作一个含有隐性变量的概率模型,并以实现模型最优化,即获取与数据本身性质最契合的聚类方式为目的,通过“反复估计”模型参数找到最优解,同时给出相应的最优类别k。而“反复估计”的过程即是EM算法的精华所在,这一过程由E-step(Expectation)和M-step(Maximization)两个步骤交替进行来实现。
mdata <- read.csv("http://data.galaxystatistics.com/blog_data/regression/iris.csv")
Iris <- mdata[,-1]
library(mclust)
fit_em <- Mclust(Iris[,-1])
fit_em[1:length(fit_em)]
## $call
## Mclust(data = Iris[, -1])
##
## $data
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
## [7,] 4.6 3.4 1.4 0.3
## [8,] 5.0 3.4 1.5 0.2
## [9,] 4.4 2.9 1.4 0.2
## [10,] 4.9 3.1 1.5 0.1
## [11,] 5.4 3.7 1.5 0.2
## [12,] 4.8 3.4 1.6 0.2
## [13,] 4.8 3.0 1.4 0.1
## [14,] 4.3 3.0 1.1 0.1
## [15,] 5.8 4.0 1.2 0.2
## [16,] 5.7 4.4 1.5 0.4
## [17,] 5.4 3.9 1.3 0.4
## [18,] 5.1 3.5 1.4 0.3
## [19,] 5.7 3.8 1.7 0.3
## [20,] 5.1 3.8 1.5 0.3
## [21,] 5.4 3.4 1.7 0.2
## [22,] 5.1 3.7 1.5 0.4
## [23,] 4.6 3.6 1.0 0.2
## [24,] 5.1 3.3 1.7 0.5
## [25,] 4.8 3.4 1.9 0.2
## [26,] 5.0 3.0 1.6 0.2
## [27,] 5.0 3.4 1.6 0.4
## [28,] 5.2 3.5 1.5 0.2
## [29,] 5.2 3.4 1.4 0.2
## [30,] 4.7 3.2 1.6 0.2
## [31,] 4.8 3.1 1.6 0.2
## [32,] 5.4 3.4 1.5 0.4
## [33,] 5.2 4.1 1.5 0.1
## [34,] 5.5 4.2 1.4 0.2
## [35,] 4.9 3.1 1.5 0.2
## [36,] 5.0 3.2 1.2 0.2
## [37,] 5.5 3.5 1.3 0.2
## [38,] 4.9 3.6 1.4 0.1
## [39,] 4.4 3.0 1.3 0.2
## [40,] 5.1 3.4 1.5 0.2
## [41,] 5.0 3.5 1.3 0.3
## [42,] 4.5 2.3 1.3 0.3
## [43,] 4.4 3.2 1.3 0.2
## [44,] 5.0 3.5 1.6 0.6
## [45,] 5.1 3.8 1.9 0.4
## [46,] 4.8 3.0 1.4 0.3
## [47,] 5.1 3.8 1.6 0.2
## [48,] 4.6 3.2 1.4 0.2
## [49,] 5.3 3.7 1.5 0.2
## [50,] 5.0 3.3 1.4 0.2
## [51,] 7.0 3.2 4.7 1.4
## [52,] 6.4 3.2 4.5 1.5
## [53,] 6.9 3.1 4.9 1.5
## [54,] 5.5 2.3 4.0 1.3
## [55,] 6.5 2.8 4.6 1.5
## [56,] 5.7 2.8 4.5 1.3
## [57,] 6.3 3.3 4.7 1.6
## [58,] 4.9 2.4 3.3 1.0
## [59,] 6.6 2.9 4.6 1.3
## [60,] 5.2 2.7 3.9 1.4
## [61,] 5.0 2.0 3.5 1.0
## [62,] 5.9 3.0 4.2 1.5
## [63,] 6.0 2.2 4.0 1.0
## [64,] 6.1 2.9 4.7 1.4
## [65,] 5.6 2.9 3.6 1.3
## [66,] 6.7 3.1 4.4 1.4
## [67,] 5.6 3.0 4.5 1.5
## [68,] 5.8 2.7 4.1 1.0
## [69,] 6.2 2.2 4.5 1.5
## [70,] 5.6 2.5 3.9 1.1
## [71,] 5.9 3.2 4.8 1.8
## [72,] 6.1 2.8 4.0 1.3
## [73,] 6.3 2.5 4.9 1.5
## [74,] 6.1 2.8 4.7 1.2
## [75,] 6.4 2.9 4.3 1.3
## [76,] 6.6 3.0 4.4 1.4
## [77,] 6.8 2.8 4.8 1.4
## [78,] 6.7 3.0 5.0 1.7
## [79,] 6.0 2.9 4.5 1.5
## [80,] 5.7 2.6 3.5 1.0
## [81,] 5.5 2.4 3.8 1.1
## [82,] 5.5 2.4 3.7 1.0
## [83,] 5.8 2.7 3.9 1.2
## [84,] 6.0 2.7 5.1 1.6
## [85,] 5.4 3.0 4.5 1.5
## [86,] 6.0 3.4 4.5 1.6
## [87,] 6.7 3.1 4.7 1.5
## [88,] 6.3 2.3 4.4 1.3
## [89,] 5.6 3.0 4.1 1.3
## [90,] 5.5 2.5 4.0 1.3
## [91,] 5.5 2.6 4.4 1.2
## [92,] 6.1 3.0 4.6 1.4
## [93,] 5.8 2.6 4.0 1.2
## [94,] 5.0 2.3 3.3 1.0
## [95,] 5.6 2.7 4.2 1.3
## [96,] 5.7 3.0 4.2 1.2
## [97,] 5.7 2.9 4.2 1.3
## [98,] 6.2 2.9 4.3 1.3
## [99,] 5.1 2.5 3.0 1.1
## [100,] 5.7 2.8 4.1 1.3
## [101,] 6.3 3.3 6.0 2.5
## [102,] 5.8 2.7 5.1 1.9
## [103,] 7.1 3.0 5.9 2.1
## [104,] 6.3 2.9 5.6 1.8
## [105,] 6.5 3.0 5.8 2.2
## [106,] 7.6 3.0 6.6 2.1
## [107,] 4.9 2.5 4.5 1.7
## [108,] 7.3 2.9 6.3 1.8
## [109,] 6.7 2.5 5.8 1.8
## [110,] 7.2 3.6 6.1 2.5
## [111,] 6.5 3.2 5.1 2.0
## [112,] 6.4 2.7 5.3 1.9
## [113,] 6.8 3.0 5.5 2.1
## [114,] 5.7 2.5 5.0 2.0
## [115,] 5.8 2.8 5.1 2.4
## [116,] 6.4 3.2 5.3 2.3
## [117,] 6.5 3.0 5.5 1.8
## [118,] 7.7 3.8 6.7 2.2
## [119,] 7.7 2.6 6.9 2.3
## [120,] 6.0 2.2 5.0 1.5
## [121,] 6.9 3.2 5.7 2.3
## [122,] 5.6 2.8 4.9 2.0
## [123,] 7.7 2.8 6.7 2.0
## [124,] 6.3 2.7 4.9 1.8
## [125,] 6.7 3.3 5.7 2.1
## [126,] 7.2 3.2 6.0 1.8
## [127,] 6.2 2.8 4.8 1.8
## [128,] 6.1 3.0 4.9 1.8
## [129,] 6.4 2.8 5.6 2.1
## [130,] 7.2 3.0 5.8 1.6
## [131,] 7.4 2.8 6.1 1.9
## [132,] 7.9 3.8 6.4 2.0
## [133,] 6.4 2.8 5.6 2.2
## [134,] 6.3 2.8 5.1 1.5
## [135,] 6.1 2.6 5.6 1.4
## [136,] 7.7 3.0 6.1 2.3
## [137,] 6.3 3.4 5.6 2.4
## [138,] 6.4 3.1 5.5 1.8
## [139,] 6.0 3.0 4.8 1.8
## [140,] 6.9 3.1 5.4 2.1
## [141,] 6.7 3.1 5.6 2.4
## [142,] 6.9 3.1 5.1 2.3
## [143,] 5.8 2.7 5.1 1.9
## [144,] 6.8 3.2 5.9 2.3
## [145,] 6.7 3.3 5.7 2.5
## [146,] 6.7 3.0 5.2 2.3
## [147,] 6.3 2.5 5.0 1.9
## [148,] 6.5 3.0 5.2 2.0
## [149,] 6.2 3.4 5.4 2.3
## [150,] 5.9 3.0 5.1 1.8
##
## $modelName
## [1] "VEV"
##
## $n
## [1] 150
##
## $d
## [1] 4
##
## $G
## [1] 2
##
## $BIC
## Bayesian Information Criterion (BIC):
## EII VII EEI VEI EVI VVI
## 1 -1804.0854 -1804.0854 -1522.1202 -1522.1202 -1522.1202 -1522.1202
## 2 -1123.4115 -1012.2352 -1042.9680 -956.2823 -1007.3082 -857.5515
## 3 -878.7651 -853.8145 -813.0506 -779.1565 -797.8356 -744.6356
## 4 -784.3102 -783.8267 -735.4820 -716.5253 -732.4576 -705.0688
## 5 -734.3865 -746.9931 -694.3922 -703.0523 -695.6736 -700.9100
## 6 -715.7148 -705.7813 -693.8005 -675.5832 -722.1517 -696.9024
## 7 -712.1014 -708.7210 -671.6757 -666.8672 -704.1649 -703.9925
## 8 -686.0967 -707.2610 -661.0846 -657.2447 -703.6602 -702.1138
## 9 -694.5242 -700.0220 -678.5986 -671.8247 -737.3109 -727.6346
## EEE EVE VEE VVE EEV VEV EVV
## 1 -829.9782 -829.9782 -829.9782 -829.9782 -829.9782 -829.9782 -829.9782
## 2 -688.0972 -657.2263 -656.3270 -605.1931 -644.5997 -561.7285 -658.3306
## 3 -632.9658 -618.6003 -605.3982 -589.5227 -610.0853 -562.5514 -621.5200
## 4 -591.4097 -648.3787 -611.9268 -618.6018 -646.0011 -603.9266 -663.1150
## 5 -604.9299 -612.7253 -609.6644 -627.9771 -621.6906 -635.2087 -666.4540
## 6 -621.8177 -655.4745 -609.3411 -645.2745 -669.7188 -681.3062 -729.2513
## 7 -617.6212 -664.2666 -616.0728 -669.3395 -711.3150 -715.2100 -768.6957
## 8 -622.4221 -687.8967 -627.0606 -703.5972 -750.1897 -724.1750 -798.7173
## 9 -638.2076 -719.4489 -640.7214 -731.3868 -799.6408 -810.1318 -854.4504
## VVV
## 1 -829.9782
## 2 -574.0178
## 3 -580.8399
## 4 -628.9650
## 5 -683.8206
## 6 -711.5726
## 7 -728.5508
## 8 -801.7295
## 9 -835.9095
## attr(,"criterion")
## [1] "BIC"
##
## Top 3 models based on the BIC criterion:
## VEV,2 VEV,3 VVV,2
## -561.7285 -562.5514 -574.0178
##
## $bic
## [1] -561.7285
##
## $loglik
## [1] -215.726
##
## $df
## [1] 26
##
## $hypvol
## [1] NA
##
## $parameters
## $parameters$pro
## [1] 0.333332 0.666668
##
## $parameters$mean
## [,1] [,2]
## Sepal.Length 5.0060021 6.261996
## Sepal.Width 3.4280046 2.871999
## Petal.Length 1.4620006 4.905993
## Petal.Width 0.2459998 1.675997
##
## $parameters$variance
## $parameters$variance$modelName
## [1] "VEV"
##
## $parameters$variance$d
## [1] 4
##
## $parameters$variance$G
## [1] 2
##
## $parameters$variance$sigma
## , , 1
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 0.15065097 0.13080108 0.020844624 0.013091029
## Sepal.Width 0.13080108 0.17604544 0.016032479 0.012214539
## Petal.Length 0.02084462 0.01603248 0.028082603 0.006015675
## Petal.Width 0.01309103 0.01221454 0.006015675 0.010423651
##
## , , 2
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 0.4000437 0.10865439 0.3994013 0.14368238
## Sepal.Width 0.1086544 0.10928074 0.1238902 0.07284378
## Petal.Length 0.3994013 0.12389025 0.6109012 0.25738947
## Petal.Width 0.1436824 0.07284378 0.2573895 0.16808166
##
##
## $parameters$variance$scale
## [1] 0.03772382 0.13307644
##
## $parameters$variance$shape
## [1] 7.9106903 0.9228736 0.6299552 0.2174371
##
## $parameters$variance$orientation
## , , 1
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length -0.66908521 0.5978667 -0.4399764 -0.03607235
## Sepal.Width -0.73414150 -0.6206715 0.2746283 -0.01955806
## Petal.Length -0.09654303 0.4900796 0.8324347 -0.23990386
## Petal.Width -0.06356640 0.1309367 0.1950705 0.96992907
##
## , , 2
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 0.5565151 0.75863415 -0.006116625 -0.3387149
## Sepal.Width 0.1865006 0.02937334 -0.901899603 0.3884995
## Petal.Length 0.7428956 -0.33350447 0.344100714 0.4674137
## Petal.Width 0.3218922 -0.55891520 -0.261025652 -0.7182373
##
##
##
## $parameters$Vinv
## NULL
##
##
## $z
## [,1] [,2]
## [1,] 1.000000e+00 2.513157e-11
## [2,] 9.999999e-01 5.556411e-08
## [3,] 1.000000e+00 3.635438e-09
## [4,] 9.999999e-01 8.611811e-08
## [5,] 1.000000e+00 8.504494e-12
## [6,] 1.000000e+00 1.400364e-12
## [7,] 1.000000e+00 2.971650e-09
## [8,] 1.000000e+00 4.052951e-10
## [9,] 9.999993e-01 6.585295e-07
## [10,] 9.999999e-01 7.275844e-08
## [11,] 1.000000e+00 1.189551e-12
## [12,] 1.000000e+00 3.285857e-09
## [13,] 9.999999e-01 1.034956e-07
## [14,] 9.999998e-01 1.791880e-07
## [15,] 1.000000e+00 2.623115e-16
## [16,] 1.000000e+00 1.664927e-18
## [17,] 1.000000e+00 2.856982e-14
## [18,] 1.000000e+00 3.517450e-11
## [19,] 1.000000e+00 2.111071e-12
## [20,] 1.000000e+00 9.844806e-13
## [21,] 1.000000e+00 6.254257e-09
## [22,] 1.000000e+00 1.579143e-11
## [23,] 1.000000e+00 1.159088e-10
## [24,] 9.999995e-01 4.953746e-07
## [25,] 9.999989e-01 1.061678e-06
## [26,] 9.999994e-01 5.953849e-07
## [27,] 1.000000e+00 6.015441e-09
## [28,] 1.000000e+00 5.309697e-11
## [29,] 1.000000e+00 1.021487e-10
## [30,] 9.999999e-01 6.263544e-08
## [31,] 9.999998e-01 1.604430e-07
## [32,] 1.000000e+00 6.134170e-10
## [33,] 1.000000e+00 5.533936e-15
## [34,] 1.000000e+00 3.164980e-17
## [35,] 1.000000e+00 3.683124e-08
## [36,] 1.000000e+00 1.058309e-09
## [37,] 1.000000e+00 9.055077e-12
## [38,] 1.000000e+00 2.144895e-11
## [39,] 9.999999e-01 1.445697e-07
## [40,] 1.000000e+00 3.176616e-10
## [41,] 1.000000e+00 3.224099e-11
## [42,] 9.997974e-01 2.025599e-04
## [43,] 1.000000e+00 2.424785e-08
## [44,] 9.999997e-01 3.077476e-07
## [45,] 1.000000e+00 2.213430e-09
## [46,] 9.999999e-01 9.248426e-08
## [47,] 1.000000e+00 2.393892e-12
## [48,] 1.000000e+00 1.123846e-08
## [49,] 1.000000e+00 1.458938e-12
## [50,] 1.000000e+00 6.608479e-10
## [51,] 5.013980e-97 1.000000e+00
## [52,] 1.064140e-88 1.000000e+00
## [53,] 2.002770e-110 1.000000e+00
## [54,] 2.021080e-68 1.000000e+00
## [55,] 5.251133e-98 1.000000e+00
## [56,] 5.873868e-85 1.000000e+00
## [57,] 4.044792e-100 1.000000e+00
## [58,] 3.693071e-36 1.000000e+00
## [59,] 1.711080e-91 1.000000e+00
## [60,] 2.496554e-65 1.000000e+00
## [61,] 1.718720e-44 1.000000e+00
## [62,] 1.255525e-77 1.000000e+00
## [63,] 1.580958e-64 1.000000e+00
## [64,] 6.715566e-97 1.000000e+00
## [65,] 3.284335e-50 1.000000e+00
## [66,] 2.488900e-83 1.000000e+00
## [67,] 5.063960e-90 1.000000e+00
## [68,] 2.135003e-62 1.000000e+00
## [69,] 9.314071e-99 1.000000e+00
## [70,] 5.368295e-58 1.000000e+00
## [71,] 1.593204e-114 1.000000e+00
## [72,] 1.544294e-65 1.000000e+00
## [73,] 1.093600e-114 1.000000e+00
## [74,] 1.254395e-92 1.000000e+00
## [75,] 2.218343e-77 1.000000e+00
## [76,] 4.064993e-84 1.000000e+00
## [77,] 5.126120e-106 1.000000e+00
## [78,] 8.081674e-123 1.000000e+00
## [79,] 5.062634e-91 1.000000e+00
## [80,] 8.744350e-42 1.000000e+00
## [81,] 5.435104e-55 1.000000e+00
## [82,] 2.182435e-49 1.000000e+00
## [83,] 3.129452e-59 1.000000e+00
## [84,] 3.734366e-125 1.000000e+00
## [85,] 2.775916e-90 1.000000e+00
## [86,] 2.764258e-90 1.000000e+00
## [87,] 7.510216e-100 1.000000e+00
## [88,] 5.994859e-88 1.000000e+00
## [89,] 8.912424e-67 1.000000e+00
## [90,] 7.231915e-67 1.000000e+00
## [91,] 3.262293e-79 1.000000e+00
## [92,] 2.747464e-91 1.000000e+00
## [93,] 8.120655e-64 1.000000e+00
## [94,] 8.983566e-37 1.000000e+00
## [95,] 4.355435e-73 1.000000e+00
## [96,] 7.803994e-68 1.000000e+00
## [97,] 1.572729e-71 1.000000e+00
## [98,] 1.267403e-76 1.000000e+00
## [99,] 1.592687e-30 1.000000e+00
## [100,] 1.810258e-68 1.000000e+00
## [101,] 2.738286e-219 1.000000e+00
## [102,] 3.179616e-138 1.000000e+00
## [103,] 6.128882e-193 1.000000e+00
## [104,] 2.393780e-159 1.000000e+00
## [105,] 1.229250e-190 1.000000e+00
## [106,] 1.049686e-242 1.000000e+00
## [107,] 4.074788e-103 1.000000e+00
## [108,] 1.165357e-208 1.000000e+00
## [109,] 1.375133e-177 1.000000e+00
## [110,] 1.922156e-221 1.000000e+00
## [111,] 5.753548e-139 1.000000e+00
## [112,] 1.661381e-149 1.000000e+00
## [113,] 2.522208e-168 1.000000e+00
## [114,] 3.502854e-140 1.000000e+00
## [115,] 3.351619e-168 1.000000e+00
## [116,] 6.720935e-167 1.000000e+00
## [117,] 5.175423e-153 1.000000e+00
## [118,] 1.008243e-242 1.000000e+00
## [119,] 7.666929e-282 1.000000e+00
## [120,] 2.119341e-121 1.000000e+00
## [121,] 5.006692e-189 1.000000e+00
## [122,] 2.454989e-133 1.000000e+00
## [123,] 9.319211e-250 1.000000e+00
## [124,] 3.324569e-124 1.000000e+00
## [125,] 4.943694e-176 1.000000e+00
## [126,] 1.245396e-183 1.000000e+00
## [127,] 3.204137e-118 1.000000e+00
## [128,] 6.332258e-121 1.000000e+00
## [129,] 2.764661e-175 1.000000e+00
## [130,] 4.206098e-167 1.000000e+00
## [131,] 2.582071e-201 1.000000e+00
## [132,] 4.979503e-213 1.000000e+00
## [133,] 5.636644e-181 1.000000e+00
## [134,] 1.534474e-121 1.000000e+00
## [135,] 6.800929e-148 1.000000e+00
## [136,] 1.586579e-219 1.000000e+00
## [137,] 4.923523e-188 1.000000e+00
## [138,] 1.132452e-151 1.000000e+00
## [139,] 3.529325e-116 1.000000e+00
## [140,] 7.067660e-162 1.000000e+00
## [141,] 1.221577e-190 1.000000e+00
## [142,] 5.086004e-158 1.000000e+00
## [143,] 3.179616e-138 1.000000e+00
## [144,] 9.761390e-201 1.000000e+00
## [145,] 5.526102e-201 1.000000e+00
## [146,] 7.089295e-164 1.000000e+00
## [147,] 7.438380e-136 1.000000e+00
## [148,] 3.252696e-146 1.000000e+00
## [149,] 1.122191e-170 1.000000e+00
## [150,] 8.593596e-131 1.000000e+00
##
## $classification
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [71] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [106] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [141] 2 2 2 2 2 2 2 2 2 2
##
## $uncertainty
## [1] 2.513167e-11 5.556411e-08 3.635438e-09 8.611811e-08 8.504530e-12
## [6] 1.400435e-12 2.971650e-09 4.052951e-10 6.585295e-07 7.275844e-08
## [11] 1.189493e-12 3.285857e-09 1.034956e-07 1.791880e-07 2.220446e-16
## [16] 0.000000e+00 2.864375e-14 3.517453e-11 2.110978e-12 9.845458e-13
## [21] 6.254257e-09 1.579137e-11 1.159088e-10 4.953746e-07 1.061678e-06
## [26] 5.953849e-07 6.015441e-09 5.309686e-11 1.021487e-10 6.263544e-08
## [31] 1.604430e-07 6.134171e-10 5.551115e-15 0.000000e+00 3.683124e-08
## [36] 1.058309e-09 9.054979e-12 2.144884e-11 1.445697e-07 3.176617e-10
## [41] 3.224110e-11 2.025599e-04 2.424785e-08 3.077476e-07 2.213430e-09
## [46] 9.248426e-08 2.393863e-12 1.123846e-08 1.458833e-12 6.608480e-10
## [51] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [56] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [61] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [66] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [71] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [76] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [81] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [86] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [91] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [96] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [101] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [106] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [111] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [116] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [121] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [126] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [131] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [136] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [141] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [146] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
summary(fit_em)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VEV (ellipsoidal, equal shape) model with 2 components:
##
## log.likelihood n df BIC ICL
## -215.726 150 26 -561.7285 -561.7289
##
## Clustering table:
## 1 2
## 50 100
fit_em <- mclustBIC(Iris[,-1]) # 第二个聚类函数
fit_em
## Bayesian Information Criterion (BIC):
## EII VII EEI VEI EVI VVI
## 1 -1804.0854 -1804.0854 -1522.1202 -1522.1202 -1522.1202 -1522.1202
## 2 -1123.4115 -1012.2352 -1042.9680 -956.2823 -1007.3082 -857.5515
## 3 -878.7651 -853.8145 -813.0506 -779.1565 -797.8356 -744.6356
## 4 -784.3102 -783.8267 -735.4820 -716.5253 -732.4576 -705.0688
## 5 -734.3865 -746.9931 -694.3922 -703.0523 -695.6736 -700.9100
## 6 -715.7148 -705.7813 -693.8005 -675.5832 -722.1517 -696.9024
## 7 -712.1014 -708.7210 -671.6757 -666.8672 -704.1649 -703.9925
## 8 -686.0967 -707.2610 -661.0846 -657.2447 -703.6602 -702.1138
## 9 -694.5242 -700.0220 -678.5986 -671.8247 -737.3109 -727.6346
## EEE EVE VEE VVE EEV VEV EVV
## 1 -829.9782 -829.9782 -829.9782 -829.9782 -829.9782 -829.9782 -829.9782
## 2 -688.0972 -657.2263 -656.3270 -605.1931 -644.5997 -561.7285 -658.3306
## 3 -632.9658 -618.6003 -605.3982 -589.5227 -610.0853 -562.5514 -621.5200
## 4 -591.4097 -648.3787 -611.9268 -618.6018 -646.0011 -603.9266 -663.1150
## 5 -604.9299 -612.7253 -609.6644 -627.9771 -621.6906 -635.2087 -666.4540
## 6 -621.8177 -655.4745 -609.3411 -645.2745 -669.7188 -681.3062 -729.2513
## 7 -617.6212 -664.2666 -616.0728 -669.3395 -711.3150 -715.2100 -768.6957
## 8 -622.4221 -687.8967 -627.0606 -703.5972 -750.1897 -724.1750 -798.7173
## 9 -638.2076 -719.4489 -640.7214 -731.3868 -799.6408 -810.1318 -854.4504
## VVV
## 1 -829.9782
## 2 -574.0178
## 3 -580.8399
## 4 -628.9650
## 5 -683.8206
## 6 -711.5726
## 7 -728.5508
## 8 -801.7295
## 9 -835.9095
## attr(,"criterion")
## [1] "BIC"
##
## Top 3 models based on the BIC criterion:
## VEV,2 VEV,3 VVV,2
## -561.7285 -562.5514 -574.0178
plot(fit_em)
BICsum <- summary(fit_em,data=Iris[,-1])
BICsum
## Best BIC values:
## VEV,2 VEV,3 VVV,2
## BIC -561.7285 -562.5514380 -574.01783
## BIC diff 0.0000 -0.8229759 -12.28937
##
## Classification table for model (VEV,2):
## 1 2
## 50 100
# ?Mclust
# ?mclustBIC