Fb2RSS

A Facebook to RSS conversion tool
git clone git://xatko.vsos.ethz.ch/Fb2RSS.git
Log | Files | Refs | Submodules

commit 26bf5e68f8f4cad7a0a964b3d628c0df44e349d5
parent 70fd2b248f7181b480275f153018a5a190816b6e
Author: Dominik Schmidt <das1993@hotmail.com>
Date:   Tue, 24 Apr 2018 12:59:01 +0000

Normalize the classnames with regexp before parsing the tree

This is a hack since kxml does not support regex matching on attributes (or globbing),
and facebook changes the format of the classnames always slightly.

Diffstat:
fbstream.d | 12++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fbstream.d b/fbstream.d @@ -136,6 +136,14 @@ class FBStream : DRSS!(Post){ auto script_end=ctRegex!"</script>"; document=document.replaceAll(script_start, "<!--").replaceAll(script_end, "-->"); + /* + * Now, since the exact class names of facebook always vary, we + * normalize them to a common denominatory + */ + auto userContent_normalize=ctRegex!`class="[^"]*(userContentWrapper|userContent)[^"]*"`; + document=document + .replaceAll(userContent_normalize, `class="$1"`); + //Add important End-Tags document~="</body></HTML>"; @@ -148,7 +156,7 @@ class FBStream : DRSS!(Post){ headers[1][1]=arr[0].getCData().idup; headers[0][1]=url; - XmlNode[] nodes=root.parseXPath(`//div[@class="_5pcr userContentWrapper"]`); + XmlNode[] nodes=root.parseXPath(`//div[@class="userContentWrapper"]`); assert(nodes.length>0, "No data nodes found!"); foreach(node; nodes.retro){ appendPost(node); @@ -160,7 +168,7 @@ class FBStream : DRSS!(Post){ * Params: match = The data-div node */ private void appendPost(XmlNode match){ - XmlNode[] usercontent=match.parseXPath(`//div[@class="_5pbx userContent"]`); + XmlNode[] usercontent=match.parseXPath(`//div[@class="userContent"]`); if(usercontent.length==0){ return; }