commit 03f07f0f3b10204c231226339d64b2558475685b
parent fc9e838cfc3da77b71757927b32c2f68c90b5929
Author: Dominik Schmidt <das1993@hotmail.com>
Date: Sat, 30 Jun 2018 16:36:28 +0000
Upgrade kmeans to kmeans++
Diffstat:
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/src/laundryclustery.d b/src/laundryclustery.d
@@ -78,14 +78,31 @@ void addToClosest(Cluster[] clusters, Point p){
closest(clusters,p)[0].add(p);
}
-import std.random;
/**
- * The standard k-means algorithm
+ * The k-means++ algorithm
*/
void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){
- foreach(v;enumerate(points.randomSample(clusters.length))){
- clusters[v[0]].mean=v[1];
+ import std.random;
+
+ //K-means++ initialization
+ clusters[0].mean=points.choice();
+ ulong[] distances=new ulong[points.length];
+ for(uint i=1; i<clusters.length; i++){
+ auto initialized=clusters[0..i];
+ ulong sum=0;
+ points
+ .map!(a=>cast(ulong)closest(initialized,a)[1]^^2)
+ .tee!(a=>sum+=a)
+ .copy(distances);
+ auto rand=uniform(0,sum);
+ foreach(ii,dist;enumerate(distances)){
+ if(rand<dist){
+ clusters[i].mean=points[ii];
+ break;
+ }
+ rand-=dist;
+ }
}
void reset(){