LaundrySorcery

Log | Files | Refs

commit 03f07f0f3b10204c231226339d64b2558475685b
parent fc9e838cfc3da77b71757927b32c2f68c90b5929
Author: Dominik Schmidt <das1993@hotmail.com>
Date:   Sat, 30 Jun 2018 16:36:28 +0000

Upgrade kmeans to kmeans++

Diffstat:
src/laundryclustery.d | 25+++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/laundryclustery.d b/src/laundryclustery.d @@ -78,14 +78,31 @@ void addToClosest(Cluster[] clusters, Point p){ closest(clusters,p)[0].add(p); } -import std.random; /** - * The standard k-means algorithm + * The k-means++ algorithm */ void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){ - foreach(v;enumerate(points.randomSample(clusters.length))){ - clusters[v[0]].mean=v[1]; + import std.random; + + //K-means++ initialization + clusters[0].mean=points.choice(); + ulong[] distances=new ulong[points.length]; + for(uint i=1; i<clusters.length; i++){ + auto initialized=clusters[0..i]; + ulong sum=0; + points + .map!(a=>cast(ulong)closest(initialized,a)[1]^^2) + .tee!(a=>sum+=a) + .copy(distances); + auto rand=uniform(0,sum); + foreach(ii,dist;enumerate(distances)){ + if(rand<dist){ + clusters[i].mean=points[ii]; + break; + } + rand-=dist; + } } void reset(){