commit 5db2b899d2de829212b7910c5325587ab58f22b3
parent 307e631efbd4afcc6b418406d9f9ef2e0657505d
Author: Dominik Schmidt <das1993@hotmail.com>
Date: Fri, 29 Jun 2018 00:23:03 +0000
Rename clustering.d to laundryclustery.d
Diffstat:
3 files changed, 163 insertions(+), 163 deletions(-)
diff --git a/Makefile b/Makefile
@@ -11,7 +11,7 @@ DFLAGS?=-O -release
.PHONY:all database.rrd plot.png install
-src/clustering: src/clustering.d
+src/laundryclustery: src/laundryclustery.d
$(DMD) $(DFLAGS) $^ -of=$@
src/laundrysorcery: src/laundrysorcery.c
diff --git a/src/clustering.d b/src/clustering.d
@@ -1,162 +0,0 @@
-import std.stdio;
-import std.range;
-import std.algorithm;
-import std.typecons;
-import std.math;
-import std.traits;
-
-alias Point=uint;
-
-/**
- * Returns mean and variance of the points given in the range
- */
-auto gaussian(T=double, R)(R r) if(isForwardRange!R && isNumeric!(ElementType!R)){
- auto p=r
- .fold!((a,b){return tuple!(T,T,size_t)(a[0]+b, a[1]+b^^2,a[2]+1);})(tuple!(T,T,size_t)(0,0,0));
- return tuple!(T,"mean",T,"variance")(p[0]/p[2],p[1]/p[2]-(p[0]/p[2])^^2);
-}
-unittest{
- auto res=gaussian([3,4,5]);
- assert(res.mean==4.0);
- assert(res.variance==2.0);
-}
-
-//struct Cluster(T) if(hasLength!T && isForwardRange!T && is(ElementType!T == Point)){
-// T points;
-struct Cluster{
- Point[] points;
- double mean;
- double variance;
-
- void add(Point p){
- points~=p;
- }
- void reset(){
- points.length=0;
- }
-
- void calculate(){
- auto g=gaussian(points);
- mean=g[0];
- variance=g[1];
- }
-
- float calculate_delta(){
- auto m=mean,v=variance;
- calculate();
- return (m-mean)^^2+(v-variance)^^2;
- }
-}
-
-/**
- * Returns a pointer to the cluster that is closest to point p
- */
-auto closest(Cluster[] clusters, Point p)
-in{
- assert(clusters.length>0);
-}
-out(res){
- assert(res!=null);
-}
-do{
- Cluster *res;
- float mindist=float.max;
- foreach(ref c; clusters){
- auto dist=abs(p-c.mean);
- if(dist<mindist){
- res=&c;
- mindist=dist;
- }
- }
- return res;
-}
-
-/**
- * Adds the point p to the cluster whose mean is closest to it
- */
-void addToClosest(Cluster[] clusters, Point p){
- closest(clusters,p).add(p);
-}
-
-import std.random;
-
-/**
- * The standard k-means algorithm
- */
-void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){
- foreach(v;enumerate(points.randomSample(clusters.length))){
- clusters[v[0]].mean=v[1];
- }
-
- void reset(){
- foreach(ref c; clusters){
- c.reset();
- }
- }
-
- foreach(iteration; iota(0,maxiter)){
- reset();
- points.each!(a=>addToClosest(clusters,a));
- auto s=clusters.map!((ref a)=>a.calculate_delta()).sum;
- if(s<1e-6){
- return;
- }
- }
-}
-
-/**
- * This does k-means with increasing cluster sizes until the maximal
- * std-deviation/mean ratio is below cutoff.
- */
-
-Cluster[] autokmeans(Point[] points, float cutoff=0.5, uint maxclusters=10){
- Cluster[] res;
- foreach(nc; iota(1,maxclusters+1)){
- res=new Cluster[nc];
- kmeans(points, res);
- if(res.map!(a=>sqrt(a.variance)/a.mean).fold!"max(a,b)"(0.0)<cutoff){
- return res;
- }
- }
- return res;
-}
-unittest{
- Cluster c;
- c.points=[5,2];
- c.calculate();
- assert(c.mean==3.5);
- assert(c.variance==2.25);
-}
-
-import std.file;
-
-int main(string[] args){
- File f;
- if(args.length!=2){
- stderr.writeln("Usage: ", args[0], " </path/to/log/file>");
- return 1;
- }
- if(args[1]=="-"){
- f=stdin;
- }
- else if(!exists(args[1])){
- stderr.writeln(args[1], " does not exist");
- return 1;
- }
- else{
- f.open(args[1]);
- }
-
- auto points=f
- .byRecord!(uint, uint)("%s %s")
- .map!(a=>a[1]-a[0])
- .filter!(a=>a>10*60)
- .array;
-
- auto res=points.autokmeans();
- res.sort!"a.mean<b.mean";
- res
- .each!(a=>writeln(a.points.length, "\t", a.mean, "\t", a.variance));
-
- return 0;
-}
diff --git a/src/laundryclustery.d b/src/laundryclustery.d
@@ -0,0 +1,162 @@
+import std.stdio;
+import std.range;
+import std.algorithm;
+import std.typecons;
+import std.math;
+import std.traits;
+
+alias Point=uint;
+
+/**
+ * Returns mean and variance of the points given in the range
+ */
+auto gaussian(T=double, R)(R r) if(isForwardRange!R && isNumeric!(ElementType!R)){
+ auto p=r
+ .fold!((a,b){return tuple!(T,T,size_t)(a[0]+b, a[1]+b^^2,a[2]+1);})(tuple!(T,T,size_t)(0,0,0));
+ return tuple!(T,"mean",T,"variance")(p[0]/p[2],p[1]/p[2]-(p[0]/p[2])^^2);
+}
+unittest{
+ auto res=gaussian([3,4,5]);
+ assert(res.mean==4.0);
+ assert(res.variance==2.0);
+}
+
+//struct Cluster(T) if(hasLength!T && isForwardRange!T && is(ElementType!T == Point)){
+// T points;
+struct Cluster{
+ Point[] points;
+ double mean;
+ double variance;
+
+ void add(Point p){
+ points~=p;
+ }
+ void reset(){
+ points.length=0;
+ }
+
+ void calculate(){
+ auto g=gaussian(points);
+ mean=g[0];
+ variance=g[1];
+ }
+
+ float calculate_delta(){
+ auto m=mean,v=variance;
+ calculate();
+ return (m-mean)^^2+(v-variance)^^2;
+ }
+}
+
+/**
+ * Returns a pointer to the cluster that is closest to point p
+ */
+auto closest(Cluster[] clusters, Point p)
+in{
+ assert(clusters.length>0);
+}
+out(res){
+ assert(res!=null);
+}
+do{
+ Cluster *res;
+ float mindist=float.max;
+ foreach(ref c; clusters){
+ auto dist=abs(p-c.mean);
+ if(dist<mindist){
+ res=&c;
+ mindist=dist;
+ }
+ }
+ return res;
+}
+
+/**
+ * Adds the point p to the cluster whose mean is closest to it
+ */
+void addToClosest(Cluster[] clusters, Point p){
+ closest(clusters,p).add(p);
+}
+
+import std.random;
+
+/**
+ * The standard k-means algorithm
+ */
+void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){
+ foreach(v;enumerate(points.randomSample(clusters.length))){
+ clusters[v[0]].mean=v[1];
+ }
+
+ void reset(){
+ foreach(ref c; clusters){
+ c.reset();
+ }
+ }
+
+ foreach(iteration; iota(0,maxiter)){
+ reset();
+ points.each!(a=>addToClosest(clusters,a));
+ auto s=clusters.map!((ref a)=>a.calculate_delta()).sum;
+ if(s<1e-6){
+ return;
+ }
+ }
+}
+
+/**
+ * This does k-means with increasing cluster sizes until the maximal
+ * std-deviation/mean ratio is below cutoff.
+ */
+
+Cluster[] autokmeans(Point[] points, float cutoff=0.5, uint maxclusters=10){
+ Cluster[] res;
+ foreach(nc; iota(1,maxclusters+1)){
+ res=new Cluster[nc];
+ kmeans(points, res);
+ if(res.map!(a=>sqrt(a.variance)/a.mean).fold!"max(a,b)"(0.0)<cutoff){
+ return res;
+ }
+ }
+ return res;
+}
+unittest{
+ Cluster c;
+ c.points=[5,2];
+ c.calculate();
+ assert(c.mean==3.5);
+ assert(c.variance==2.25);
+}
+
+import std.file;
+
+int main(string[] args){
+ File f;
+ if(args.length!=2){
+ stderr.writeln("Usage: ", args[0], " </path/to/log/file>");
+ return 1;
+ }
+ if(args[1]=="-"){
+ f=stdin;
+ }
+ else if(!exists(args[1])){
+ stderr.writeln(args[1], " does not exist");
+ return 1;
+ }
+ else{
+ f.open(args[1]);
+ }
+
+ auto points=f
+ .byRecord!(uint, uint)("%s %s")
+ .map!(a=>a[1]-a[0])
+ .filter!(a=>a>10*60)
+ .array;
+
+ auto res=points.autokmeans();
+ res.sort!"a.mean<b.mean";
+ res
+ .each!(a=>writeln(a.points.length, "\t", a.mean, "\t", a.variance));
+
+ return 0;
+}