Improve the clustering model where the variance is too high.

commit 6259e4757ec602642ab41aecedee22c0b8d2e956
parent 829a403b01ca3e3409b4fd5950f388ca15dbab32
Author: Dominik Schmidt <das1993@hotmail.com>
Date:   Sat, 30 Jun 2018 15:50:52 +0000

Improve the clustering model where the variance is too high.

We have a relative tolerance and an absolute one.
The relative tolerance states that the standard deviation should not be larger than 10% of the mean.
The absolute tolerance states that the standard deviation should not be larger than 30 minutes

Diffstat:
src/laundryclustery.d  | 10 +++++++---

1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/src/laundryclustery.d b/src/laundryclustery.d
@@ -109,12 +109,16 @@ void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){
  * std-deviation/mean ratio is below cutoff.
  */
 
-Cluster[] autokmeans(Point[] points, float cutoff=0.5, uint maxclusters=10){
+Cluster[] autokmeans(Point[] points, float reltol=0.1, float abstol=30*60, uint maxclusters=10){
 	Cluster[] res;
 	foreach(nc; iota(1,maxclusters+1)){
 		res=new Cluster[nc];
 		kmeans(points, res);
-		if(res.map!(a=>sqrt(a.variance)/a.mean).fold!"max(a,b)"(0.0)<cutoff){
+		auto tol=res
+			.map!((a){auto stdev=sqrt(a.variance); return tuple(stdev/a.mean, stdev);})
+			.map!(a=>tuple(a[0]<reltol,a[1]<abstol))
+			.fold!((a,b)=>a = a && b[0] && b[1])(true);
+		if(tol){
 			return res;
 		}
 	}
@@ -151,7 +155,7 @@ int main(string[] args){
 		.byRecord!(uint, uint)("%s %s")
 		.map!(a=>a[1]-a[0])
 		.filter!(a=>a>10*60)
-		.filter!(a=>a<5*5*60)
+		.filter!(a=>a<5*60*60)
 		.array;
 		
 	auto res=points.autokmeans();

	LaundrySorcery
	Log \| Files \| Refs

LaundrySorcery