Fb2RSS

A Facebook to RSS conversion tool
git clone git://xatko.vsos.ethz.ch/Fb2RSS.git
Log | Files | Refs | Submodules

commit d5ae15efbb9d05f815f16b5b98609b0062004e5e
parent 82d33a75ed2fac35f4229e2c39282cdd5468f0f5
Author: Dominik Schmidt <das1993@hotmail.com>
Date:   Wed,  1 Jul 2015 00:19:50 +0200

Documentation - Lots of it.

Diffstat:
Fb2RSS.d | 115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 110 insertions(+), 5 deletions(-)

diff --git a/Fb2RSS.d b/Fb2RSS.d @@ -1,3 +1,21 @@ +/** + * @file Fb2RSS.d + * + * @author Dominik Schmidt + * + * @brief Fb2RSS is a translator from the HTML structure generated by Facebook to + * an atom feed. + * + * The page is formatted like this: + * - The relevant data is inside `<code></code>` blocks + * - Inside these blocks is further HTML-Data, which is commented out. + * - The posting and metadata is inside a `<div></div>`, which has the date-time attribute set. + * - The actual text to the post is inside another `<div></div>`, with class="_5pbx userContent" + * - The link to the Post is inside the href of `<a></a>` with class="_5pcq" + * + * + */ + import std.net.curl; import std.stdio; import std.string; @@ -6,32 +24,62 @@ import std.range; import std.file; import kxml.xml; +/** + * Manages all the relevant tasks of + * - Fetching + * - Parsing + * - Formatting and Outputting + */ class FBStream : RandomFiniteAssignable!(Post){ + ///Holds all the retrieved posts Post posts[]; + ///Holds the feed url string url; + ///Holds the url, where we get the data from. Can either be an URL or a filename. private string fetch_url; + ///The title of the feed string title; - XmlNode dataNodes[]; - string document; + ///The generated data Nodes, which hold all relevant data. + XmlNode dataNodes[]; + ///The plaintext string holding the whole file + string document; + /** + * The useragent to use for requesting the page with facebook. + * Facebook does check this, and if it doesn't know it, it displays an + * "Update your Browser"-Message + */ string userAgent="Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20110504 Firefox/7.0.1"; - XmlNode root; + ///The root node + XmlNode root; + + /** + * @brief Functions for the Range-Interface + * + * Mostly, they only wrap around the #posts array + */ @property FBStream save(){ FBStream str=this.clone(); str.posts=this.posts.save; return str; } + + ///@copydoc save @property void front(Post newVal){ posts~=newVal; } + ///@copydoc save @property void back(Post newVal){ posts=[newVal]~posts; } + ///@copydoc save void opIndexAssign(Post val, size_t index){ posts[index]=val; } + ///@copydoc save Post opIndex(size_t i){ return posts[i]; } + ///@copydoc save Post moveAt(size_t i){ return posts.moveAt(i); } @@ -46,23 +94,29 @@ class FBStream : RandomFiniteAssignable!(Post){ str.root=this.root; return str; } + ///@copydoc save @property size_t length(){ return posts.length; } + ///@copydoc save FBStream opSlice(size_t a, size_t b){ FBStream str=this.clone(); str.posts=this.posts[a..b]; return str; } + ///@copydoc save @property Post back(){ return posts.back(); } + ///@copydoc save Post moveBack(){ return posts.moveBack(); } + ///@copydoc save void popBack(){ posts.popBack(); } + ///@copydoc save int opApply(int delegate(Post) func){ int result=0; foreach(ref Post p; posts){ @@ -71,6 +125,7 @@ class FBStream : RandomFiniteAssignable!(Post){ } return result; } + ///@copydoc save int opApply(int delegate(size_t,Post) func){ int result=0; foreach(size_t c,ref Post p; posts){ @@ -79,22 +134,33 @@ class FBStream : RandomFiniteAssignable!(Post){ } return result; } + ///@copydoc save @property bool empty(){ return posts.empty; } + ///@copydoc save void popFront(){ posts.popFront(); } + ///@copydoc save Post moveFront(){ return posts.moveFront(); } + ///@copydoc save @property Post front(){ return posts.front; } + + /** + * @param fetch_url Fetch the Data from this source + */ this(string fetch_url){ this.fetch_url=fetch_url; } + /** + * Fetch the data from #fetch_url, and save it in #document + */ public void fetch(){ if(exists(fetch_url) && isFile(fetch_url)){ document=cast(string)read(fetch_url); @@ -107,6 +173,9 @@ class FBStream : RandomFiniteAssignable!(Post){ h.perform(); } } + /** + * Parses #document. Afterwords #posts, #root, #dataNodes will be filled. + */ public void parse(){ XmlNode[] arr; root=readDocument(document); @@ -118,6 +187,11 @@ class FBStream : RandomFiniteAssignable!(Post){ XmlNode[] nodes=root.parseXPath(`//code`); generatePosts(nodes); } + + /** + * Generates #posts + * @param nodes The `<code></code>` nodes, where the data can be found. + */ private void generatePosts(XmlNode[] nodes){ foreach(ref XmlNode node; nodes){ XmlNode subTree=readDocument((cast(XmlComment)(node.getChildren()[0]))._comment); @@ -129,6 +203,11 @@ class FBStream : RandomFiniteAssignable!(Post){ } } } + + /** + * Gets the information from the data-div and appends it to #posts + * @param match The data-div node + */ private void appendPost(XmlNode match){ XmlNode[] usercontent=match.parseXPath(`//div[@class="_5pbx userContent"]`); if(usercontent.length==0){ @@ -143,6 +222,11 @@ class FBStream : RandomFiniteAssignable!(Post){ posts~=Post(usercontent[0],t,href[0].getAttribute("href")); } + /** + * Generates an XML-Document which validates as an Atom-Feed corresponding + * to the Facebookpage found in #fetch_url, or the document in #document. + * @return The root-node of the Atom-Feed + */ private XmlNode generateRSS(){ XmlNode rss = new XmlNode("feed"); rss.setAttribute("xmlns","http://www.w3.org/2005/Atom"); @@ -161,6 +245,10 @@ class FBStream : RandomFiniteAssignable!(Post){ return rss; } + /** + * Writes a valid Atom-Feed xmlfile to the file specified + * @param into The file to write the feed to + */ public void writeRSS(File into){ XmlNode rss=generateRSS(); into.writeln(`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>`); @@ -169,11 +257,16 @@ class FBStream : RandomFiniteAssignable!(Post){ } struct Post{ + ///The userdata `<div></div>` XmlNode content; + ///The modification date SysTime time; + ///The Post-href string href; + ///The count of characters, until the title gets cut off. static ushort title_cutoff=80; + ///@return The title of the posting @property string title(){ string cont=content.getChildren()[0].getCData(); if(cont.length>title_cutoff){ @@ -182,25 +275,36 @@ struct Post{ } return cont; } - + ///@return The link to the post. @property string link() const{ return "https://facebook.com"~href; } + /** + * @return An unique id to the post + * @bug It should be something sensible here, not just the link. + * Optimally, it should be the same as the facebookfeed read. + */ @property string id() const{ return link(); } + /// @return The Atom-valid datestring @property string ISOTime() const{ return time.toISOExtString(); } + /// @return An UCData-Object describing the content of the post. @property UCData getUCContent(){ UCData uc=new UCData(); uc.setCData(content.toString()); return uc; } - + + /** + * Compares the object with b by comparing the dates + * @return -1 if b is bigger, 1 if b is smaller, 0 if they're equal + */ int opCmp(ref Post b) const{ if(time<b.time){ return -1; @@ -212,6 +316,7 @@ struct Post{ return 0; } } + bool opEquals(ref Post b) const{ return (opCmp(b)==0); }