commit 53b909191ac05f4bcc69746c185aca93aa41444b
parent 16b892d26bfa0edf70cf45c30385520f6a9e2475
Author: Dominik Schmidt <das1993@hotmail.com>
Date: Sat, 13 May 2017 11:09:49 +0200
Correct the faulty XML delivered by Facebook prior to parsing
Diffstat:
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
@@ -14,7 +14,7 @@ standardpaths/libstandardpaths.a: standardpaths/source/standardpaths.o
$(DMD) $(IOPTS) -c $< -of$@
.PHONY:
DRSS/drss.a:
- cd DRSS/; make DMD="$(DMD)" OPTS="$(OPTS) -version=FORGIVING" drss.a
+ cd DRSS/; make DMD="$(DMD)" drss.a
clean:
rm -f *.o standardpaths/source/standardpaths.o
cd DRSS/; make clean
diff --git a/fbstream.d b/fbstream.d
@@ -125,6 +125,20 @@ class FBStream : DRSS!(Post){
override public void parse(string document){
XmlNode[] arr;
XmlNode root;
+
+ //Make the HTML valid for the parser
+ import std.regex;
+ /*
+ * Scripts aren't properly commented, so just replace them with comments
+ * We don't need them anyways
+ */
+ auto script_start=ctRegex!"<script[^>]*>";
+ auto script_end=ctRegex!"</script>";
+ document=document.replaceAll(script_start, "<!--").replaceAll(script_end, "-->");
+
+ //Add important End-Tags
+ document~="</body></HTML>";
+
root=readDocument(document);
if(!captchaSolved(document)){
throw new CaptchaException("Captcha has not been solved yet. "