commit d5ae15efbb9d05f815f16b5b98609b0062004e5e
parent 82d33a75ed2fac35f4229e2c39282cdd5468f0f5
Author: Dominik Schmidt <das1993@hotmail.com>
Date: Wed, 1 Jul 2015 00:19:50 +0200
Documentation - Lots of it.
Diffstat:
Fb2RSS.d | | | 115 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- |
1 file changed, 110 insertions(+), 5 deletions(-)
diff --git a/Fb2RSS.d b/Fb2RSS.d
@@ -1,3 +1,21 @@
+/**
+ * @file Fb2RSS.d
+ *
+ * @author Dominik Schmidt
+ *
+ * @brief Fb2RSS is a translator from the HTML structure generated by Facebook to
+ * an atom feed.
+ *
+ * The page is formatted like this:
+ * - The relevant data is inside `<code></code>` blocks
+ * - Inside these blocks is further HTML-Data, which is commented out.
+ * - The posting and metadata is inside a `<div></div>`, which has the date-time attribute set.
+ * - The actual text to the post is inside another `<div></div>`, with class="_5pbx userContent"
+ * - The link to the Post is inside the href of `<a></a>` with class="_5pcq"
+ *
+ *
+ */
+
import std.net.curl;
import std.stdio;
import std.string;
@@ -6,32 +24,62 @@ import std.range;
import std.file;
import kxml.xml;
+/**
+ * Manages all the relevant tasks of
+ * - Fetching
+ * - Parsing
+ * - Formatting and Outputting
+ */
class FBStream : RandomFiniteAssignable!(Post){
+ ///Holds all the retrieved posts
Post posts[];
+ ///Holds the feed url
string url;
+ ///Holds the url, where we get the data from. Can either be an URL or a filename.
private string fetch_url;
+ ///The title of the feed
string title;
- XmlNode dataNodes[];
- string document;
+ ///The generated data Nodes, which hold all relevant data.
+ XmlNode dataNodes[];
+ ///The plaintext string holding the whole file
+ string document;
+ /**
+ * The useragent to use for requesting the page with facebook.
+ * Facebook does check this, and if it doesn't know it, it displays an
+ * "Update your Browser"-Message
+ */
string userAgent="Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20110504 Firefox/7.0.1";
- XmlNode root;
+ ///The root node
+ XmlNode root;
+
+ /**
+ * @brief Functions for the Range-Interface
+ *
+ * Mostly, they only wrap around the #posts array
+ */
@property FBStream save(){
FBStream str=this.clone();
str.posts=this.posts.save;
return str;
}
+
+ ///@copydoc save
@property void front(Post newVal){
posts~=newVal;
}
+ ///@copydoc save
@property void back(Post newVal){
posts=[newVal]~posts;
}
+ ///@copydoc save
void opIndexAssign(Post val, size_t index){
posts[index]=val;
}
+ ///@copydoc save
Post opIndex(size_t i){
return posts[i];
}
+ ///@copydoc save
Post moveAt(size_t i){
return posts.moveAt(i);
}
@@ -46,23 +94,29 @@ class FBStream : RandomFiniteAssignable!(Post){
str.root=this.root;
return str;
}
+ ///@copydoc save
@property size_t length(){
return posts.length;
}
+ ///@copydoc save
FBStream opSlice(size_t a, size_t b){
FBStream str=this.clone();
str.posts=this.posts[a..b];
return str;
}
+ ///@copydoc save
@property Post back(){
return posts.back();
}
+ ///@copydoc save
Post moveBack(){
return posts.moveBack();
}
+ ///@copydoc save
void popBack(){
posts.popBack();
}
+ ///@copydoc save
int opApply(int delegate(Post) func){
int result=0;
foreach(ref Post p; posts){
@@ -71,6 +125,7 @@ class FBStream : RandomFiniteAssignable!(Post){
}
return result;
}
+ ///@copydoc save
int opApply(int delegate(size_t,Post) func){
int result=0;
foreach(size_t c,ref Post p; posts){
@@ -79,22 +134,33 @@ class FBStream : RandomFiniteAssignable!(Post){
}
return result;
}
+ ///@copydoc save
@property bool empty(){
return posts.empty;
}
+ ///@copydoc save
void popFront(){
posts.popFront();
}
+ ///@copydoc save
Post moveFront(){
return posts.moveFront();
}
+ ///@copydoc save
@property Post front(){
return posts.front;
}
+
+ /**
+ * @param fetch_url Fetch the Data from this source
+ */
this(string fetch_url){
this.fetch_url=fetch_url;
}
+ /**
+ * Fetch the data from #fetch_url, and save it in #document
+ */
public void fetch(){
if(exists(fetch_url) && isFile(fetch_url)){
document=cast(string)read(fetch_url);
@@ -107,6 +173,9 @@ class FBStream : RandomFiniteAssignable!(Post){
h.perform();
}
}
+ /**
+ * Parses #document. Afterwords #posts, #root, #dataNodes will be filled.
+ */
public void parse(){
XmlNode[] arr;
root=readDocument(document);
@@ -118,6 +187,11 @@ class FBStream : RandomFiniteAssignable!(Post){
XmlNode[] nodes=root.parseXPath(`//code`);
generatePosts(nodes);
}
+
+ /**
+ * Generates #posts
+ * @param nodes The `<code></code>` nodes, where the data can be found.
+ */
private void generatePosts(XmlNode[] nodes){
foreach(ref XmlNode node; nodes){
XmlNode subTree=readDocument((cast(XmlComment)(node.getChildren()[0]))._comment);
@@ -129,6 +203,11 @@ class FBStream : RandomFiniteAssignable!(Post){
}
}
}
+
+ /**
+ * Gets the information from the data-div and appends it to #posts
+ * @param match The data-div node
+ */
private void appendPost(XmlNode match){
XmlNode[] usercontent=match.parseXPath(`//div[@class="_5pbx userContent"]`);
if(usercontent.length==0){
@@ -143,6 +222,11 @@ class FBStream : RandomFiniteAssignable!(Post){
posts~=Post(usercontent[0],t,href[0].getAttribute("href"));
}
+ /**
+ * Generates an XML-Document which validates as an Atom-Feed corresponding
+ * to the Facebookpage found in #fetch_url, or the document in #document.
+ * @return The root-node of the Atom-Feed
+ */
private XmlNode generateRSS(){
XmlNode rss = new XmlNode("feed");
rss.setAttribute("xmlns","http://www.w3.org/2005/Atom");
@@ -161,6 +245,10 @@ class FBStream : RandomFiniteAssignable!(Post){
return rss;
}
+ /**
+ * Writes a valid Atom-Feed xmlfile to the file specified
+ * @param into The file to write the feed to
+ */
public void writeRSS(File into){
XmlNode rss=generateRSS();
into.writeln(`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>`);
@@ -169,11 +257,16 @@ class FBStream : RandomFiniteAssignable!(Post){
}
struct Post{
+ ///The userdata `<div></div>`
XmlNode content;
+ ///The modification date
SysTime time;
+ ///The Post-href
string href;
+ ///The count of characters, until the title gets cut off.
static ushort title_cutoff=80;
+ ///@return The title of the posting
@property string title(){
string cont=content.getChildren()[0].getCData();
if(cont.length>title_cutoff){
@@ -182,25 +275,36 @@ struct Post{
}
return cont;
}
-
+ ///@return The link to the post.
@property string link() const{
return "https://facebook.com"~href;
}
+ /**
+ * @return An unique id to the post
+ * @bug It should be something sensible here, not just the link.
+ * Optimally, it should be the same as the facebookfeed read.
+ */
@property string id() const{
return link();
}
+ /// @return The Atom-valid datestring
@property string ISOTime() const{
return time.toISOExtString();
}
+ /// @return An UCData-Object describing the content of the post.
@property UCData getUCContent(){
UCData uc=new UCData();
uc.setCData(content.toString());
return uc;
}
-
+
+ /**
+ * Compares the object with b by comparing the dates
+ * @return -1 if b is bigger, 1 if b is smaller, 0 if they're equal
+ */
int opCmp(ref Post b) const{
if(time<b.time){
return -1;
@@ -212,6 +316,7 @@ struct Post{
return 0;
}
}
+
bool opEquals(ref Post b) const{
return (opCmp(b)==0);
}