rss.d (7167B)
1 module drss.rss; 2 import std.net.curl; 3 import std.stdio; 4 import std.container.slist; 5 import std.datetime; 6 import std.algorithm; 7 import std.string; 8 import std.range; 9 static import std.conv; 10 import kxml.xml; 11 12 /** 13 * A basic Entry-type. 14 * 15 * If it is sufficient, you don't have to define your own... 16 */ 17 struct Entry{ 18 ///Content-Type of the entry. 19 enum Type{ 20 HTML, 21 Plain 22 } 23 static immutable string[] TypeTable=[Type.HTML:"html", Type.Plain:"text"]; 24 ///The Content-type of this entry. 25 Type type; 26 ///The ID of the entry 27 string id; 28 ///The raw content to be displayed either as HTML or Plain-text (Depending on type) 29 string content; 30 ///The publishdate of the entry 31 SysTime date; 32 ///Compares according to the publishdate 33 int opCmp(inout ref Entry b) const{ 34 return date.opCmp(b.date); 35 } 36 ///ditto 37 int opCmp(inout Entry b) const{ 38 return date.opCmp(b.date); 39 } 40 41 ///Generates a atom-xmlnode for the entry 42 XmlNode toXML(){ 43 XmlNode e=new XmlNode("entry"); 44 e.addChild(new XmlNode("id").addCData(id)); 45 e.addChild(new XmlNode("published").addCData(date.toISOExtString())); 46 e.addChild(new XmlNode("content").setAttribute("type",TypeTable[type]).addCData(content)); 47 return e; 48 } 49 } 50 51 /** 52 * The main RSS-Class. 53 * 54 * Its template-parameter is to specify the entry-type, which is of 55 * Type drss.rss.Entry by default. 56 * 57 */ 58 abstract class DRSS(T=Entry){ 59 private string lastUpdated; 60 SList!(T) entries; 61 uint new_entries=0; 62 /** 63 * Determines wether the date set by parse is reliable or not. 64 * 65 * This has an impact in adding it to the entry-list entries. 66 * 67 */ 68 enum DateReliable{ 69 ///Always iterate through the entries and check if the ID exists 70 NO, 71 ///Only iterate through the entries and check if the ID exists, if the new entry is newer than the newest old one 72 SEMI, //If entry is newer, check the id additionally. 73 ///Never iterate through the entries to check if the ID exists 74 YES 75 }; 76 /// 77 DateReliable date_reliability=DateReliable.SEMI; 78 ///At what point shall the entries be discarded. 79 uint max_entries=100; 80 81 protected uint entry_count=0; 82 protected HTTP http; 83 protected ubyte[] buffer; 84 85 /** 86 * Parse a raw document. 87 * 88 * This function should extract the Posts from document and add 89 * them using addEntry 90 * --- 91 * override void parse(string document){ 92 * foreach(Entry e; splitDocumentIntoEntries(document)){ 93 * this.addEntry(e); 94 * } 95 * } 96 * --- 97 * 98 */ 99 abstract void parse(string document); 100 101 /** 102 * Initialize the Feed by url 103 * Params: 104 * url = The url to fetch the raw-data from 105 */ 106 this(string url){ 107 auto h=HTTP(); 108 h.url=url; 109 this(h); 110 } 111 /** 112 * Initialize the feed by a custom HTTP-request 113 * 114 * This allows for things like: 115 * --- 116 * this(){ 117 * auto h=HTTP(); 118 * h.url="http://herp.derp/rss.xml"; 119 * h.setUserAgent("MyCustomUserAgent 1.0"); 120 * super(h); 121 * } 122 * --- 123 * Params: 124 * http = The http-request to execute when updateing 125 */ 126 this(HTTP http){ 127 this.http=http; 128 this.http.onReceive = &addContent; 129 this.http.onReceiveHeader = &parseHeader; 130 } 131 132 /** 133 * Check if the entry newent is new. 134 * Operation depends on the value of date_reliability. 135 * $(SEE_ALSO enum DateReliable) 136 * 137 * Params: 138 * newent = the entry to check 139 * Returns: 140 * false if the entry is already in entries, true otherwise 141 */ 142 private bool checkIfNewEntry(in ref T newent){ 143 if(entries.empty){ 144 return true; 145 } 146 switch(date_reliability){ 147 case DateReliable.NO: 148 return !canFind!("a.id==b.id")(entries[], newent); 149 break; 150 case DateReliable.YES: 151 return (newent>entries.front); 152 break; 153 case DateReliable.SEMI: 154 if(newent>entries.front){ 155 return !canFind!("a.id==b.id")(entries[], newent); 156 } 157 break; 158 default: 159 throw new Exception("Unknonwn reliability"); 160 } 161 return false; 162 } 163 164 /** 165 * Add an entry to the list. 166 * 167 * Params: 168 * newent = The entry to add 169 * Returns: 170 * true if the entry was added, false otherwise. 171 * 172 */ 173 bool addEntry(T newent){ 174 if(checkIfNewEntry(newent)){ 175 entries.insertFront(newent); 176 new_entries++; 177 entry_count++; 178 return true; 179 } 180 else{ 181 return false; 182 } 183 } 184 185 /** 186 * Updates the whole stream. 187 * 188 * $(OL 189 * $(LI Calls `fetch();`, if document is null) 190 * $(LI Passes the result to `update();`) 191 * $(LI Discards the old Entries with `removeOldEntries();` 192 * ) 193 * 194 * Params: 195 * document = The document to parse or null, to fetch it. 196 * Returns: 197 * true if the feed has new entries. 198 * 199 */ 200 final bool update(ubyte[] document=null){ 201 if(document is null){ 202 if(!fetch()){ 203 return false; 204 } 205 document=buffer; 206 } 207 new_entries=0; 208 parse(cast(string)document); 209 if(entry_count>max_entries){ 210 removeOldEntries(); 211 } 212 return (new_entries>0); 213 } 214 215 ///Return the total count of entries 216 @property size_t length(){ 217 return entry_count; 218 } 219 220 /** 221 * Removes every entry older than the max_entries newest. 222 */ 223 private void removeOldEntries() 224 out{ 225 size_t real_length=entries[].walkLength; 226 assert(real_length==max_entries); 227 assert(real_length==entry_count); 228 } 229 body{ 230 auto r=entries[]; 231 auto i=max_entries; 232 while(i-->0 && !r.empty){ 233 r.popFront(); 234 } 235 if(!r.empty){ 236 entries.linearRemove(r); 237 entry_count=max_entries; 238 } 239 } 240 241 ///Returns a range with the new entries since the last call to update() 242 @property auto news(){ 243 return entries[].take(new_entries); 244 } 245 246 ///Sets the new entries since the last call to update to 0. 247 public void discardNews(){ 248 new_entries=0; 249 } 250 251 private void parseHeader(in char[] key, in char[] value){ 252 if(key=="date"){ 253 lastUpdated=value.dup; 254 } 255 } 256 257 private auto addContent(ubyte[] data){ 258 buffer~=data; 259 return data.length; 260 } 261 /** 262 * Fetches the document and saves it internally. 263 * 264 * Returns: 265 * true if upstream reports to have new entries, false otherwise 266 */ 267 public bool fetch(){ 268 debug(RSS) writeln("Updateing"); 269 if(lastUpdated.length!=0){ 270 debug(RSS) writeln("Adding if-not-modified-since"); 271 http.addRequestHeader("If-Modified-Since",lastUpdated); 272 } 273 debug(RSS) writeln("Performing HTTP request"); 274 http.perform(); 275 if(http.statusLine.code==200){ 276 debug(RSS) writeln("Have news, parsing and returning xml"); 277 return true; 278 } 279 else if(http.statusLine.code==304){ 280 debug(RSS) writeln("Have no news, returning null"); 281 return false; 282 } 283 else{ 284 throw new Exception("Error in HTTP-Request: statuscode "~std.conv.to!string(http.statusLine.code)); 285 } 286 } 287 } 288 289 /// 290 unittest{ 291 class TeRSSt : DRSS!(){ 292 private uint counter; 293 this(){ 294 date_reliability=DateReliable.YES; 295 max_entries=10; 296 super(HTTP()); 297 } 298 override void parse(string document){ 299 import std.datetime; 300 import std.conv; 301 302 string cnt=to!string(counter); 303 SysTime st=SysTime(DateTime(1986,01,counter+1)); 304 assert(addEntry(Entry(Entry.Type.Plain, "ID"~cnt, "This is id number "~cnt, st))); 305 counter++; 306 } 307 override bool fetch(){ 308 return true; 309 } 310 } 311 TeRSSt t=new TeRSSt(); 312 foreach(i; 0..30){ 313 assert(t.update()); 314 assert(t.news.walkLength==1); 315 assert(t.news.front.id=="ID"~to!string(i)); 316 assert(t.length==min(t.max_entries,i+1)); 317 } 318 }