LaundrySorcery

Log | Files | Refs

commit 5db2b899d2de829212b7910c5325587ab58f22b3
parent 307e631efbd4afcc6b418406d9f9ef2e0657505d
Author: Dominik Schmidt <das1993@hotmail.com>
Date:   Fri, 29 Jun 2018 00:23:03 +0000

Rename clustering.d to laundryclustery.d

Diffstat:
Makefile | 2+-
src/clustering.d | 162-------------------------------------------------------------------------------
src/laundryclustery.d | 162+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 163 insertions(+), 163 deletions(-)

diff --git a/Makefile b/Makefile @@ -11,7 +11,7 @@ DFLAGS?=-O -release .PHONY:all database.rrd plot.png install -src/clustering: src/clustering.d +src/laundryclustery: src/laundryclustery.d $(DMD) $(DFLAGS) $^ -of=$@ src/laundrysorcery: src/laundrysorcery.c diff --git a/src/clustering.d b/src/clustering.d @@ -1,162 +0,0 @@ -import std.stdio; -import std.range; -import std.algorithm; -import std.typecons; -import std.math; -import std.traits; - -alias Point=uint; - -/** - * Returns mean and variance of the points given in the range - */ -auto gaussian(T=double, R)(R r) if(isForwardRange!R && isNumeric!(ElementType!R)){ - auto p=r - .fold!((a,b){return tuple!(T,T,size_t)(a[0]+b, a[1]+b^^2,a[2]+1);})(tuple!(T,T,size_t)(0,0,0)); - return tuple!(T,"mean",T,"variance")(p[0]/p[2],p[1]/p[2]-(p[0]/p[2])^^2); -} -unittest{ - auto res=gaussian([3,4,5]); - assert(res.mean==4.0); - assert(res.variance==2.0); -} - -//struct Cluster(T) if(hasLength!T && isForwardRange!T && is(ElementType!T == Point)){ -// T points; -struct Cluster{ - Point[] points; - double mean; - double variance; - - void add(Point p){ - points~=p; - } - void reset(){ - points.length=0; - } - - void calculate(){ - auto g=gaussian(points); - mean=g[0]; - variance=g[1]; - } - - float calculate_delta(){ - auto m=mean,v=variance; - calculate(); - return (m-mean)^^2+(v-variance)^^2; - } -} - -/** - * Returns a pointer to the cluster that is closest to point p - */ -auto closest(Cluster[] clusters, Point p) -in{ - assert(clusters.length>0); -} -out(res){ - assert(res!=null); -} -do{ - Cluster *res; - float mindist=float.max; - foreach(ref c; clusters){ - auto dist=abs(p-c.mean); - if(dist<mindist){ - res=&c; - mindist=dist; - } - } - return res; -} - -/** - * Adds the point p to the cluster whose mean is closest to it - */ -void addToClosest(Cluster[] clusters, Point p){ - closest(clusters,p).add(p); -} - -import std.random; - -/** - * The standard k-means algorithm - */ -void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){ - foreach(v;enumerate(points.randomSample(clusters.length))){ - clusters[v[0]].mean=v[1]; - } - - void reset(){ - foreach(ref c; clusters){ - c.reset(); - } - } - - foreach(iteration; iota(0,maxiter)){ - reset(); - points.each!(a=>addToClosest(clusters,a)); - auto s=clusters.map!((ref a)=>a.calculate_delta()).sum; - if(s<1e-6){ - return; - } - } -} - -/** - * This does k-means with increasing cluster sizes until the maximal - * std-deviation/mean ratio is below cutoff. - */ - -Cluster[] autokmeans(Point[] points, float cutoff=0.5, uint maxclusters=10){ - Cluster[] res; - foreach(nc; iota(1,maxclusters+1)){ - res=new Cluster[nc]; - kmeans(points, res); - if(res.map!(a=>sqrt(a.variance)/a.mean).fold!"max(a,b)"(0.0)<cutoff){ - return res; - } - } - return res; -} -unittest{ - Cluster c; - c.points=[5,2]; - c.calculate(); - assert(c.mean==3.5); - assert(c.variance==2.25); -} - -import std.file; - -int main(string[] args){ - File f; - if(args.length!=2){ - stderr.writeln("Usage: ", args[0], " </path/to/log/file>"); - return 1; - } - if(args[1]=="-"){ - f=stdin; - } - else if(!exists(args[1])){ - stderr.writeln(args[1], " does not exist"); - return 1; - } - else{ - f.open(args[1]); - } - - auto points=f - .byRecord!(uint, uint)("%s %s") - .map!(a=>a[1]-a[0]) - .filter!(a=>a>10*60) - .array; - - auto res=points.autokmeans(); - res.sort!"a.mean<b.mean"; - res - .each!(a=>writeln(a.points.length, "\t", a.mean, "\t", a.variance)); - - return 0; -} diff --git a/src/laundryclustery.d b/src/laundryclustery.d @@ -0,0 +1,162 @@ +import std.stdio; +import std.range; +import std.algorithm; +import std.typecons; +import std.math; +import std.traits; + +alias Point=uint; + +/** + * Returns mean and variance of the points given in the range + */ +auto gaussian(T=double, R)(R r) if(isForwardRange!R && isNumeric!(ElementType!R)){ + auto p=r + .fold!((a,b){return tuple!(T,T,size_t)(a[0]+b, a[1]+b^^2,a[2]+1);})(tuple!(T,T,size_t)(0,0,0)); + return tuple!(T,"mean",T,"variance")(p[0]/p[2],p[1]/p[2]-(p[0]/p[2])^^2); +} +unittest{ + auto res=gaussian([3,4,5]); + assert(res.mean==4.0); + assert(res.variance==2.0); +} + +//struct Cluster(T) if(hasLength!T && isForwardRange!T && is(ElementType!T == Point)){ +// T points; +struct Cluster{ + Point[] points; + double mean; + double variance; + + void add(Point p){ + points~=p; + } + void reset(){ + points.length=0; + } + + void calculate(){ + auto g=gaussian(points); + mean=g[0]; + variance=g[1]; + } + + float calculate_delta(){ + auto m=mean,v=variance; + calculate(); + return (m-mean)^^2+(v-variance)^^2; + } +} + +/** + * Returns a pointer to the cluster that is closest to point p + */ +auto closest(Cluster[] clusters, Point p) +in{ + assert(clusters.length>0); +} +out(res){ + assert(res!=null); +} +do{ + Cluster *res; + float mindist=float.max; + foreach(ref c; clusters){ + auto dist=abs(p-c.mean); + if(dist<mindist){ + res=&c; + mindist=dist; + } + } + return res; +} + +/** + * Adds the point p to the cluster whose mean is closest to it + */ +void addToClosest(Cluster[] clusters, Point p){ + closest(clusters,p).add(p); +} + +import std.random; + +/** + * The standard k-means algorithm + */ +void kmeans(Point[] points, ref Cluster[] clusters,uint maxiter=100){ + foreach(v;enumerate(points.randomSample(clusters.length))){ + clusters[v[0]].mean=v[1]; + } + + void reset(){ + foreach(ref c; clusters){ + c.reset(); + } + } + + foreach(iteration; iota(0,maxiter)){ + reset(); + points.each!(a=>addToClosest(clusters,a)); + auto s=clusters.map!((ref a)=>a.calculate_delta()).sum; + if(s<1e-6){ + return; + } + } +} + +/** + * This does k-means with increasing cluster sizes until the maximal + * std-deviation/mean ratio is below cutoff. + */ + +Cluster[] autokmeans(Point[] points, float cutoff=0.5, uint maxclusters=10){ + Cluster[] res; + foreach(nc; iota(1,maxclusters+1)){ + res=new Cluster[nc]; + kmeans(points, res); + if(res.map!(a=>sqrt(a.variance)/a.mean).fold!"max(a,b)"(0.0)<cutoff){ + return res; + } + } + return res; +} +unittest{ + Cluster c; + c.points=[5,2]; + c.calculate(); + assert(c.mean==3.5); + assert(c.variance==2.25); +} + +import std.file; + +int main(string[] args){ + File f; + if(args.length!=2){ + stderr.writeln("Usage: ", args[0], " </path/to/log/file>"); + return 1; + } + if(args[1]=="-"){ + f=stdin; + } + else if(!exists(args[1])){ + stderr.writeln(args[1], " does not exist"); + return 1; + } + else{ + f.open(args[1]); + } + + auto points=f + .byRecord!(uint, uint)("%s %s") + .map!(a=>a[1]-a[0]) + .filter!(a=>a>10*60) + .array; + + auto res=points.autokmeans(); + res.sort!"a.mean<b.mean"; + res + .each!(a=>writeln(a.points.length, "\t", a.mean, "\t", a.variance)); + + return 0; +}