LaundrySorcery

Log | Files | Refs

commit 6259e4757ec602642ab41aecedee22c0b8d2e956
parent 829a403b01ca3e3409b4fd5950f388ca15dbab32
Author: Dominik Schmidt <das1993@hotmail.com>
Date:   Sat, 30 Jun 2018 15:50:52 +0000

Improve the clustering model where the variance is too high.

We have a relative tolerance and an absolute one.
The relative tolerance states that the standard deviation should not be larger than 10% of the mean.
The absolute tolerance states that the standard deviation should not be larger than 30 minutes

Diffstat:
src/laundryclustery.d | 10+++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/laundryclustery.d b/src/laundryclustery.d @@ -109,12 +109,16 @@ void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){ * std-deviation/mean ratio is below cutoff. */ -Cluster[] autokmeans(Point[] points, float cutoff=0.5, uint maxclusters=10){ +Cluster[] autokmeans(Point[] points, float reltol=0.1, float abstol=30*60, uint maxclusters=10){ Cluster[] res; foreach(nc; iota(1,maxclusters+1)){ res=new Cluster[nc]; kmeans(points, res); - if(res.map!(a=>sqrt(a.variance)/a.mean).fold!"max(a,b)"(0.0)<cutoff){ + auto tol=res + .map!((a){auto stdev=sqrt(a.variance); return tuple(stdev/a.mean, stdev);}) + .map!(a=>tuple(a[0]<reltol,a[1]<abstol)) + .fold!((a,b)=>a = a && b[0] && b[1])(true); + if(tol){ return res; } } @@ -151,7 +155,7 @@ int main(string[] args){ .byRecord!(uint, uint)("%s %s") .map!(a=>a[1]-a[0]) .filter!(a=>a>10*60) - .filter!(a=>a<5*5*60) + .filter!(a=>a<5*60*60) .array; auto res=points.autokmeans();