commit 26bf5e68f8f4cad7a0a964b3d628c0df44e349d5
parent 70fd2b248f7181b480275f153018a5a190816b6e
Author: Dominik Schmidt <das1993@hotmail.com>
Date: Tue, 24 Apr 2018 12:59:01 +0000
Normalize the classnames with regexp before parsing the tree
This is a hack since kxml does not support regex matching on attributes (or globbing),
and facebook changes the format of the classnames always slightly.
Diffstat:
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/fbstream.d b/fbstream.d
@@ -136,6 +136,14 @@ class FBStream : DRSS!(Post){
auto script_end=ctRegex!"</script>";
document=document.replaceAll(script_start, "<!--").replaceAll(script_end, "-->");
+ /*
+ * Now, since the exact class names of facebook always vary, we
+ * normalize them to a common denominatory
+ */
+ auto userContent_normalize=ctRegex!`class="[^"]*(userContentWrapper|userContent)[^"]*"`;
+ document=document
+ .replaceAll(userContent_normalize, `class="$1"`);
+
//Add important End-Tags
document~="</body></HTML>";
@@ -148,7 +156,7 @@ class FBStream : DRSS!(Post){
headers[1][1]=arr[0].getCData().idup;
headers[0][1]=url;
- XmlNode[] nodes=root.parseXPath(`//div[@class="_5pcr userContentWrapper"]`);
+ XmlNode[] nodes=root.parseXPath(`//div[@class="userContentWrapper"]`);
assert(nodes.length>0, "No data nodes found!");
foreach(node; nodes.retro){
appendPost(node);
@@ -160,7 +168,7 @@ class FBStream : DRSS!(Post){
* Params: match = The data-div node
*/
private void appendPost(XmlNode match){
- XmlNode[] usercontent=match.parseXPath(`//div[@class="_5pbx userContent"]`);
+ XmlNode[] usercontent=match.parseXPath(`//div[@class="userContent"]`);
if(usercontent.length==0){
return;
}