DRSS

git clone git://xatko.vsos.ethz.ch/DRSS.git
Log | Files | Refs | Submodules

rss.d (7167B)


      1 module drss.rss;
      2 import std.net.curl;
      3 import std.stdio;
      4 import std.container.slist;
      5 import std.datetime;
      6 import std.algorithm;
      7 import std.string;
      8 import std.range;
      9 static import std.conv;
     10 import kxml.xml;
     11 
     12 /**
     13  * A basic Entry-type.
     14  * 
     15  * If it is sufficient, you don't have to define your own...
     16  */
     17 struct Entry{
     18 	///Content-Type of the entry.
     19 	enum Type{
     20 		HTML,
     21 		Plain
     22 	}
     23 	static immutable string[] TypeTable=[Type.HTML:"html", Type.Plain:"text"];
     24 	///The Content-type of this entry.
     25 	Type type;
     26 	///The ID of the entry
     27 	string id;
     28 	///The raw content to be displayed either as HTML or Plain-text (Depending on type)
     29 	string content;
     30 	///The publishdate of the entry
     31 	SysTime date;
     32 	///Compares according to the publishdate
     33 	int opCmp(inout ref Entry b) const{
     34 		return date.opCmp(b.date);
     35 	}
     36 	///ditto
     37 	int opCmp(inout Entry b) const{
     38 		return date.opCmp(b.date);
     39 	}
     40 	
     41 	///Generates a atom-xmlnode for the entry
     42 	XmlNode toXML(){
     43 		XmlNode e=new XmlNode("entry");
     44 		e.addChild(new XmlNode("id").addCData(id));
     45 		e.addChild(new XmlNode("published").addCData(date.toISOExtString()));
     46 		e.addChild(new XmlNode("content").setAttribute("type",TypeTable[type]).addCData(content));
     47 		return e;
     48 	}
     49 }
     50 
     51 /**
     52  * The main RSS-Class.
     53  * 
     54  * Its template-parameter is to specify the entry-type, which is of 
     55  * Type drss.rss.Entry by default.
     56  * 
     57  */
     58 abstract class DRSS(T=Entry){
     59 	private string lastUpdated;
     60 	SList!(T) entries;
     61 	uint new_entries=0;
     62 	/**
     63 	 * Determines wether the date set by parse is reliable or not.
     64 	 * 
     65 	 * This has an impact in adding it to the entry-list entries.
     66 	 * 
     67 	 */
     68 	enum DateReliable{
     69 		///Always iterate through the entries and check if the ID exists
     70 		NO,
     71 		///Only iterate through the entries and check if the ID exists, if the new entry is newer than the newest old one
     72 		SEMI,	//If entry is newer, check the id additionally.
     73 		///Never iterate through the entries to check if the ID exists
     74 		YES
     75 	};
     76 	///
     77 	DateReliable date_reliability=DateReliable.SEMI;
     78 	///At what point shall the entries be discarded.
     79 	uint max_entries=100;
     80 	
     81 	protected uint entry_count=0;
     82 	protected HTTP http;
     83 	protected ubyte[] buffer;
     84 	
     85 	/**
     86 	 * Parse a raw document.
     87 	 * 
     88 	 * This function should extract the Posts from document and add
     89 	 * them using addEntry
     90 	 * ---
     91 	 * override void parse(string document){
     92 	 * 	foreach(Entry e; splitDocumentIntoEntries(document)){
     93 	 * 		this.addEntry(e);
     94 	 * 	}
     95 	 * }
     96 	 * ---
     97 	 * 
     98 	 */
     99 	abstract void parse(string document);
    100 
    101 	/**
    102 	 * Initialize the Feed by url
    103 	 * Params:
    104 	 * 	url = The url to fetch the raw-data from
    105 	 */
    106 	this(string url){
    107 		auto h=HTTP();
    108 		h.url=url;
    109 		this(h);
    110 	}
    111 	/**
    112 	 * Initialize the feed by a custom HTTP-request
    113 	 * 
    114 	 * This allows for things like:
    115 	 * ---
    116 	 * this(){
    117 	 * 	auto h=HTTP();
    118 	 * 	h.url="http://herp.derp/rss.xml";
    119 	 * 	h.setUserAgent("MyCustomUserAgent 1.0");
    120 	 * 	super(h);
    121 	 * }
    122 	 * ---
    123 	 * Params:
    124 	 * 	http = The http-request to execute when updateing
    125 	 */
    126 	this(HTTP http){
    127 		this.http=http;
    128 		this.http.onReceive = &addContent;
    129 		this.http.onReceiveHeader = &parseHeader;
    130 	}
    131 	
    132 	/**
    133 	 * Check if the entry newent is new.
    134 	 * Operation depends on the value of date_reliability.
    135 	 * $(SEE_ALSO enum DateReliable)
    136 	 * 
    137 	 * Params:
    138 	 * 	newent = the entry to check
    139 	 * Returns:
    140 	 * 	false if the entry is already in entries, true otherwise
    141 	 */
    142 	private bool checkIfNewEntry(in ref T newent){
    143 		if(entries.empty){
    144 			return true;
    145 		}
    146 		switch(date_reliability){
    147 			case DateReliable.NO:
    148 				return !canFind!("a.id==b.id")(entries[], newent);
    149 				break;
    150 			case DateReliable.YES: 
    151 				return (newent>entries.front);
    152 				break;
    153 			case DateReliable.SEMI:
    154 				if(newent>entries.front){
    155 					return !canFind!("a.id==b.id")(entries[], newent);
    156 				}
    157 				break;
    158 			default:
    159 				throw new Exception("Unknonwn reliability"); 
    160 		}
    161 		return false;
    162 	}
    163 	
    164 	/**
    165 	 * Add an entry to the list.
    166 	 * 
    167 	 * Params:
    168 	 * 	newent = The entry to add
    169 	 * Returns:
    170 	 * 	true if the entry was added, false otherwise.
    171 	 * 
    172 	 */
    173 	bool addEntry(T newent){
    174 		if(checkIfNewEntry(newent)){
    175 			entries.insertFront(newent);
    176 			new_entries++;
    177 			entry_count++;
    178 			return true;
    179 		}
    180 		else{
    181 			return false;
    182 		}
    183 	}
    184 	
    185 	/**
    186 	 * Updates the whole stream.
    187 	 * 
    188 	 * $(OL
    189 	 * 	$(LI Calls `fetch();`, if document is null)
    190 	 * 	$(LI Passes the result to `update();`)
    191 	 * 	$(LI Discards the old Entries with `removeOldEntries();`
    192 	 * )
    193 	 * 
    194 	 * Params:
    195 	 * 	document = The document to parse or null, to fetch it.
    196 	 * Returns:
    197 	 * 	true if the feed has new entries.
    198 	 * 
    199 	 */
    200 	final bool update(ubyte[] document=null){
    201 		if(document is null){
    202 			if(!fetch()){
    203 				return false;
    204 			}
    205 			document=buffer;
    206 		}
    207 		new_entries=0;
    208 		parse(cast(string)document);
    209 		if(entry_count>max_entries){
    210 			removeOldEntries();
    211 		}
    212 		return (new_entries>0);
    213 	}
    214 	
    215 	///Return the total count of entries
    216 	@property size_t length(){
    217 		return entry_count;
    218 	}
    219 	
    220 	/**
    221 	 * Removes every entry older than the max_entries newest.
    222 	 */
    223 	private void removeOldEntries()
    224 	out{
    225 		size_t real_length=entries[].walkLength;
    226 		assert(real_length==max_entries);
    227 		assert(real_length==entry_count);
    228 	}
    229 	body{
    230 		auto r=entries[];
    231 		auto i=max_entries;
    232 		while(i-->0 && !r.empty){
    233 			r.popFront();
    234 		}
    235 		if(!r.empty){
    236 			entries.linearRemove(r);
    237 			entry_count=max_entries;
    238 		}
    239 	}
    240 	
    241 	///Returns a range with the new entries since the last call to update()
    242 	@property auto news(){
    243 		return entries[].take(new_entries);
    244 	}
    245 
    246 	///Sets the new entries since the last call to update to 0.
    247 	public void discardNews(){
    248 		new_entries=0;
    249 	}
    250 	
    251 	private void parseHeader(in char[] key, in char[] value){
    252 		if(key=="date"){
    253 			lastUpdated=value.dup;
    254 		}
    255 	}
    256 	
    257 	private auto addContent(ubyte[] data){
    258 		buffer~=data;
    259 		return data.length;
    260 	}
    261 	/**
    262 	 * Fetches the document and saves it internally.
    263 	 * 
    264 	 * Returns:
    265 	 * 	true if upstream reports to have new entries, false otherwise
    266 	 */
    267 	public bool fetch(){
    268 		debug(RSS) writeln("Updateing");
    269 		if(lastUpdated.length!=0){
    270 			debug(RSS) writeln("Adding if-not-modified-since");
    271 			http.addRequestHeader("If-Modified-Since",lastUpdated);
    272 		}
    273 		debug(RSS) writeln("Performing HTTP request");
    274 		http.perform();
    275 		if(http.statusLine.code==200){
    276 			debug(RSS) writeln("Have news, parsing and returning xml");
    277 			return true;
    278 		}
    279 		else if(http.statusLine.code==304){
    280 			debug(RSS) writeln("Have no news, returning null");
    281 			return false;
    282 		}
    283 		else{
    284 			throw new Exception("Error in HTTP-Request: statuscode "~std.conv.to!string(http.statusLine.code));
    285 		}
    286 	}
    287 }
    288 
    289 ///
    290 unittest{
    291 	class TeRSSt : DRSS!(){
    292 		private uint counter;
    293 		this(){
    294 			date_reliability=DateReliable.YES;
    295 			max_entries=10;
    296 			super(HTTP());
    297 		}
    298 		override void parse(string document){
    299 			import std.datetime;
    300 			import std.conv;
    301 			
    302 			string cnt=to!string(counter);
    303 			SysTime st=SysTime(DateTime(1986,01,counter+1));
    304 			assert(addEntry(Entry(Entry.Type.Plain, "ID"~cnt, "This is id number "~cnt, st)));
    305 			counter++;
    306 		}
    307 		override bool fetch(){
    308 			return true;
    309 		}
    310 	}
    311 	TeRSSt t=new TeRSSt();
    312 	foreach(i; 0..30){
    313 		assert(t.update());
    314 		assert(t.news.walkLength==1);
    315 		assert(t.news.front.id=="ID"~to!string(i));
    316 		assert(t.length==min(t.max_entries,i+1));
    317 	}
    318 }