我会尽力回答这个问题。当您想要使用 2D 散点图可视化聚类时,可以使用下面的示例。您可以将其推断为 3D 井,但对于多维数据集,也许可以使用成对散点图?
请注意,我没有按原样使用您的代码,但我仍在使用虹膜数据集。我这样做是为了不对行索引进行硬编码。
希望这能在某种程度上有所帮助。
library(plyr)
library(ggplot2)
set.seed(123)
# Create training and testing data sets
idx = sample(1:nrow(iris), size = 100)
train.idx = 1:nrow(iris) %in% idx
test.idx = ! 1:nrow(iris) %in% idx
train = iris[train.idx, 1:4]
test = iris[test.idx, 1:4]
# Get labels
labels = iris[train.idx, 5]
# Do knn
fit = knn(train, test, labels)
fit
# Create a dataframe to simplify charting
plot.df = data.frame(test, predicted = fit)
# Use ggplot
# 2-D plots example only
# Sepal.Length vs Sepal.Width
# First use Convex hull to determine boundary points of each cluster
plot.df1 = data.frame(x = plot.df$Sepal.Length,
y = plot.df$Sepal.Width,
predicted = plot.df$predicted)
find_hull = function(df) df[chull(df$x, df$y), ]
boundary = ddply(plot.df1, .variables = "predicted", .fun = find_hull)
ggplot(plot.df, aes(Sepal.Length, Sepal.Width, color = predicted, fill = predicted)) +
geom_point(size = 5) +
geom_polygon(data = boundary, aes(x,y), alpha = 0.5)