Rename clustering.d to laundryclustery.d

commit 5db2b899d2de829212b7910c5325587ab58f22b3
parent 307e631efbd4afcc6b418406d9f9ef2e0657505d
Author: Dominik Schmidt <das1993@hotmail.com>
Date:   Fri, 29 Jun 2018 00:23:03 +0000

Rename clustering.d to laundryclustery.d

Diffstat:
Makefile  | 2 +-
src/clustering.d  | 162 -------------------------------------------------------------------------------
src/laundryclustery.d  | 162 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

3 files changed, 163 insertions(+), 163 deletions(-)
diff --git a/Makefile b/Makefile
@@ -11,7 +11,7 @@ DFLAGS?=-O -release
 
 .PHONY:all database.rrd plot.png install
 
-src/clustering: src/clustering.d
+src/laundryclustery: src/laundryclustery.d
 	$(DMD) $(DFLAGS) $^ -of=$@
 
 src/laundrysorcery: src/laundrysorcery.c
diff --git a/src/clustering.d b/src/clustering.d
@@ -1,162 +0,0 @@
-import std.stdio;
-import std.range;
-import std.algorithm;
-import std.typecons;
-import std.math;
-import std.traits;
-
-alias Point=uint;
-
-/**
- * Returns mean and variance of the points given in the range
- */
-auto gaussian(T=double, R)(R r) if(isForwardRange!R && isNumeric!(ElementType!R)){
-	auto p=r
-		.fold!((a,b){return tuple!(T,T,size_t)(a[0]+b, a[1]+b^^2,a[2]+1);})(tuple!(T,T,size_t)(0,0,0));
-	return tuple!(T,"mean",T,"variance")(p[0]/p[2],p[1]/p[2]-(p[0]/p[2])^^2);
-}
-unittest{
-	auto res=gaussian([3,4,5]);
-	assert(res.mean==4.0);
-	assert(res.variance==2.0);
-}
-
-//struct Cluster(T) if(hasLength!T && isForwardRange!T && is(ElementType!T == Point)){
-//	T points;
-struct Cluster{
-	Point[] points;
-	double mean;
-	double variance;
-
-	void add(Point p){
-		points~=p;
-	}
-	void reset(){
-		points.length=0;
-	}
-	
-	void calculate(){
-		auto g=gaussian(points);
-		mean=g[0];
-		variance=g[1];
-	}
-	
-	float calculate_delta(){
-		auto m=mean,v=variance;
-		calculate();
-		return (m-mean)^^2+(v-variance)^^2;
-	}
-}
-
-/**
- * Returns a pointer to the cluster that is closest to point p
- */
-auto closest(Cluster[] clusters, Point p)
-in{
-	assert(clusters.length>0);
-}
-out(res){
-	assert(res!=null);
-}
-do{
-	Cluster *res;
-	float mindist=float.max;
-	foreach(ref c; clusters){
-		auto dist=abs(p-c.mean);
-		if(dist<mindist){
-			res=&c;
-			mindist=dist;
-		}
-	}
-	return res;
-}
-
-/**
- * Adds the point p to the cluster whose mean is closest to it
- */
-void addToClosest(Cluster[] clusters, Point p){
-	closest(clusters,p).add(p);
-}
-
-import std.random;
-
-/**
- * The standard k-means algorithm
- */
-void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){
-	foreach(v;enumerate(points.randomSample(clusters.length))){
-		clusters[v[0]].mean=v[1];
-	}
-	
-	void reset(){
-		foreach(ref c; clusters){
-			c.reset();
-		}
-	}
-	
-	foreach(iteration; iota(0,maxiter)){
-		reset();
-		points.each!(a=>addToClosest(clusters,a));
-		auto s=clusters.map!((ref a)=>a.calculate_delta()).sum;
-		if(s<1e-6){
-			return;
-		}
-	}
-}
-
-/**
- * This does k-means with increasing cluster sizes until the maximal 
- * std-deviation/mean ratio is below cutoff.
- */
-
-Cluster[] autokmeans(Point[] points, float cutoff=0.5, uint maxclusters=10){
-	Cluster[] res;
-	foreach(nc; iota(1,maxclusters+1)){
-		res=new Cluster[nc];
-		kmeans(points, res);
-		if(res.map!(a=>sqrt(a.variance)/a.mean).fold!"max(a,b)"(0.0)<cutoff){
-			return res;
-		}
-	}
-	return res;
-}
-unittest{
-	Cluster c;
-	c.points=[5,2];
-	c.calculate();
-	assert(c.mean==3.5);
-	assert(c.variance==2.25);
-}
-
-import std.file;
-
-int main(string[] args){
-	File f;
-	if(args.length!=2){
-		stderr.writeln("Usage: ", args[0], " </path/to/log/file>");
-		return 1;
-	}
-	if(args[1]=="-"){
-		f=stdin;
-	}
-	else if(!exists(args[1])){
-		stderr.writeln(args[1], " does not exist");
-		return 1;
-	}
-	else{
-		f.open(args[1]);
-	}
-	
-	auto points=f
-		.byRecord!(uint, uint)("%s %s")
-		.map!(a=>a[1]-a[0])
-		.filter!(a=>a>10*60)
-		.array;
-		
-	auto res=points.autokmeans();
-	res.sort!"a.mean<b.mean";
-	res
-		.each!(a=>writeln(a.points.length, "\t", a.mean, "\t", a.variance));
-	
-	return 0;
-}
diff --git a/src/laundryclustery.d b/src/laundryclustery.d
@@ -0,0 +1,162 @@
+import std.stdio;
+import std.range;
+import std.algorithm;
+import std.typecons;
+import std.math;
+import std.traits;
+
+alias Point=uint;
+
+/**
+ * Returns mean and variance of the points given in the range
+ */
+auto gaussian(T=double, R)(R r) if(isForwardRange!R && isNumeric!(ElementType!R)){
+	auto p=r
+		.fold!((a,b){return tuple!(T,T,size_t)(a[0]+b, a[1]+b^^2,a[2]+1);})(tuple!(T,T,size_t)(0,0,0));
+	return tuple!(T,"mean",T,"variance")(p[0]/p[2],p[1]/p[2]-(p[0]/p[2])^^2);
+}
+unittest{
+	auto res=gaussian([3,4,5]);
+	assert(res.mean==4.0);
+	assert(res.variance==2.0);
+}
+
+//struct Cluster(T) if(hasLength!T && isForwardRange!T && is(ElementType!T == Point)){
+//	T points;
+struct Cluster{
+	Point[] points;
+	double mean;
+	double variance;
+
+	void add(Point p){
+		points~=p;
+	}
+	void reset(){
+		points.length=0;
+	}
+	
+	void calculate(){
+		auto g=gaussian(points);
+		mean=g[0];
+		variance=g[1];
+	}
+	
+	float calculate_delta(){
+		auto m=mean,v=variance;
+		calculate();
+		return (m-mean)^^2+(v-variance)^^2;
+	}
+}
+
+/**
+ * Returns a pointer to the cluster that is closest to point p
+ */
+auto closest(Cluster[] clusters, Point p)
+in{
+	assert(clusters.length>0);
+}
+out(res){
+	assert(res!=null);
+}
+do{
+	Cluster *res;
+	float mindist=float.max;
+	foreach(ref c; clusters){
+		auto dist=abs(p-c.mean);
+		if(dist<mindist){
+			res=&c;
+			mindist=dist;
+		}
+	}
+	return res;
+}
+
+/**
+ * Adds the point p to the cluster whose mean is closest to it
+ */
+void addToClosest(Cluster[] clusters, Point p){
+	closest(clusters,p).add(p);
+}
+
+import std.random;
+
+/**
+ * The standard k-means algorithm
+ */
+void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){
+	foreach(v;enumerate(points.randomSample(clusters.length))){
+		clusters[v[0]].mean=v[1];
+	}
+	
+	void reset(){
+		foreach(ref c; clusters){
+			c.reset();
+		}
+	}
+	
+	foreach(iteration; iota(0,maxiter)){
+		reset();
+		points.each!(a=>addToClosest(clusters,a));
+		auto s=clusters.map!((ref a)=>a.calculate_delta()).sum;
+		if(s<1e-6){
+			return;
+		}
+	}
+}
+
+/**
+ * This does k-means with increasing cluster sizes until the maximal 
+ * std-deviation/mean ratio is below cutoff.
+ */
+
+Cluster[] autokmeans(Point[] points, float cutoff=0.5, uint maxclusters=10){
+	Cluster[] res;
+	foreach(nc; iota(1,maxclusters+1)){
+		res=new Cluster[nc];
+		kmeans(points, res);
+		if(res.map!(a=>sqrt(a.variance)/a.mean).fold!"max(a,b)"(0.0)<cutoff){
+			return res;
+		}
+	}
+	return res;
+}
+unittest{
+	Cluster c;
+	c.points=[5,2];
+	c.calculate();
+	assert(c.mean==3.5);
+	assert(c.variance==2.25);
+}
+
+import std.file;
+
+int main(string[] args){
+	File f;
+	if(args.length!=2){
+		stderr.writeln("Usage: ", args[0], " </path/to/log/file>");
+		return 1;
+	}
+	if(args[1]=="-"){
+		f=stdin;
+	}
+	else if(!exists(args[1])){
+		stderr.writeln(args[1], " does not exist");
+		return 1;
+	}
+	else{
+		f.open(args[1]);
+	}
+	
+	auto points=f
+		.byRecord!(uint, uint)("%s %s")
+		.map!(a=>a[1]-a[0])
+		.filter!(a=>a>10*60)
+		.array;
+		
+	auto res=points.autokmeans();
+	res.sort!"a.mean<b.mean";
+	res
+		.each!(a=>writeln(a.points.length, "\t", a.mean, "\t", a.variance));
+	
+	return 0;
+}

Makefile	\|	2	+-
src/clustering.d	\|	162	-------------------------------------------------------------------------------
src/laundryclustery.d	\|	162	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

	LaundrySorcery
	Log \| Files \| Refs

LaundrySorcery