commit 6259e4757ec602642ab41aecedee22c0b8d2e956
parent 829a403b01ca3e3409b4fd5950f388ca15dbab32
Author: Dominik Schmidt <das1993@hotmail.com>
Date: Sat, 30 Jun 2018 15:50:52 +0000
Improve the clustering model where the variance is too high.
We have a relative tolerance and an absolute one.
The relative tolerance states that the standard deviation should not be larger than 10% of the mean.
The absolute tolerance states that the standard deviation should not be larger than 30 minutes
Diffstat:
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/src/laundryclustery.d b/src/laundryclustery.d
@@ -109,12 +109,16 @@ void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){
* std-deviation/mean ratio is below cutoff.
*/
-Cluster[] autokmeans(Point[] points, float cutoff=0.5, uint maxclusters=10){
+Cluster[] autokmeans(Point[] points, float reltol=0.1, float abstol=30*60, uint maxclusters=10){
Cluster[] res;
foreach(nc; iota(1,maxclusters+1)){
res=new Cluster[nc];
kmeans(points, res);
- if(res.map!(a=>sqrt(a.variance)/a.mean).fold!"max(a,b)"(0.0)<cutoff){
+ auto tol=res
+ .map!((a){auto stdev=sqrt(a.variance); return tuple(stdev/a.mean, stdev);})
+ .map!(a=>tuple(a[0]<reltol,a[1]<abstol))
+ .fold!((a,b)=>a = a && b[0] && b[1])(true);
+ if(tol){
return res;
}
}
@@ -151,7 +155,7 @@ int main(string[] args){
.byRecord!(uint, uint)("%s %s")
.map!(a=>a[1]-a[0])
.filter!(a=>a>10*60)
- .filter!(a=>a<5*5*60)
+ .filter!(a=>a<5*60*60)
.array;
auto res=points.autokmeans();