commit 9804aec4177eb906990bcf69b0a0e05204a48356
parent 03f9bf88d25ca86215fa40ec222d76b76f11c3ca
Author: Dominik Schmidt <das1993@hotmail.com>
Date: Sat, 23 Jan 2016 00:02:32 +0100
Add captcha solving support.
Facebook thought it a nifty "security" feature, to add
"security check"-captcha to their pages.
Whilst this has nothing to do with "security", it effectively renders
my attempts at fetching the data useless.
So here's what to do:
1) Add cookie storage to the request
2) Solve a captcha once
3) ???
4) Non-Profit
Which is what was implemented in this commit.
You'll have to execute the ./captcha-Binary once, and interpret the string
inside the image, and then it should continue to work normally.
For compatibility-reasons I included the standardpaths-library, which handles
Paths quite nicely.
Diffstat:
5 files changed, 141 insertions(+), 3 deletions(-)
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,6 @@
[submodule "DRSS"]
path = DRSS
url = https://github.com/Doeme/DRSS.git
+[submodule "standardpaths"]
+ path = standardpaths
+ url = https://github.com/MyLittleRobo/standardpaths.git
diff --git a/Makefile b/Makefile
@@ -1,9 +1,13 @@
DMD?=ldmd2
OPTS?=-release -O
-IOPTS=$(OPTS) -IDRSS/ -IDRSS/kxml/source/
+IOPTS=$(OPTS) -IDRSS/ -IDRSS/kxml/source/ -Istandardpaths/source/
-Fb2RSS: fbstream.o Fb2RSS.o DRSS/drss.a
+Fb2RSS: fbstream.o Fb2RSS.o DRSS/drss.a standardpaths/libstandardpaths.a
$(DMD) $(IOPTS) $^ -of$@
+captcha: captcha.o fbstream.o DRSS/drss.a standardpaths/libstandardpaths.a
+ $(DMD) $(IOPTS) $^ -of$@
+standardpaths/libstandardpaths.a: standardpaths/source/standardpaths.o
+ $(DMD) $(IOPTS) -lib $^ -of$@
%.o: %.d
$(DMD) $(IOPTS) -c $< -of$@
.PHONY:
diff --git a/captcha.d b/captcha.d
@@ -0,0 +1,87 @@
+import fbstream;
+import std.net.curl;
+import std.stdio;
+import std.file;
+import std.path;
+import std.regex;
+import std.format;
+import std.range;
+import std.algorithm.searching;
+
+
+/**
+ * Tries to fetch the captcha and set the cookies
+ *
+ * Returns: 0 if the captcha is solved, 1 otherwise.
+ */
+int main(string[] args){
+ auto h=HTTP();
+ char[] buf;
+
+ h.url=args[1];
+ h.setUserAgent(FBStream.userAgent);
+ h.setCookieJar(getCookiePath());
+ h.onReceive = (ubyte[] data){
+ buf~=cast(char[])data;
+ return data.length;
+ };
+ h.perform();
+
+ if(FBStream.captchaSolved(buf)){
+ writeln("Captcha already solved :)");
+ return 0;
+ }
+
+ auto url_regex=ctRegex!(".*(https://www.facebook.com/captcha/tfbimage.php[^\"]+).*");
+ auto url=matchFirst(buf, url_regex)[1];
+ auto datr_regex=ctRegex!(".*\"_js_datr\",\"([^\"]+)\".*");
+ auto datr=matchFirst(buf, datr_regex);
+
+ h.setCookie("_js_datr="~datr[1]);
+
+
+ auto captcha_regex=ctRegex!(".*name=\"captcha_persist_data\" value=\"([^\"]+)\".*");
+ auto captcha_hash=matchFirst(buf, captcha_regex)[1];
+
+ buf=null;
+ h.url=url;
+ h.perform();
+
+ File f;
+ string file=buildPath(tempDir(),"fb2rss_captcha.png");
+ f.open(file, "w+");
+ scope(exit){
+ f.close();
+ remove(file);
+ }
+ f.write(buf);
+ f.close();
+ writeln("The captcha has been written to "~file);
+ writeln("Please enter the text below:");
+ char[] captcha;
+ readln(captcha);
+ captcha=captcha[0..$-1]; //Exclude '\n'
+
+ buf=null;
+ h.url=args[1];
+ h.method=HTTP.Method.post;
+ h.setPostData(
+ format(
+ "captcha_persist_data=%s&captcha_response=%s&captcha_submit=1",
+ captcha_hash,
+ captcha
+ ),
+ "application/x-www-form-urlencoded"
+ );
+ h.perform();
+
+ if(FBStream.captchaSolved(buf)){
+ writeln("Success");
+ }
+ else{
+ writeln("Sorry, didn't work :C");
+ writeln("Please, try again!");
+ return 1;
+ }
+ return 0;
+}
diff --git a/fbstream.d b/fbstream.d
@@ -44,6 +44,23 @@ import drss.render;
import kxml.xml;
import std.typecons;
+
+string getCookiePath(){
+ import std.path;
+ import standardpaths;
+ string base=writablePath(StandardPath.config);
+ return buildPath(base, "Fb2RSS_cookiejar.txt");
+}
+
+class CaptchaException : Exception{
+ this(string msg, string file=__FILE__, size_t line=__LINE__, Throwable next=null){
+ super(msg,file,line,next);
+ }
+ override string toString(){
+ return msg;
+ }
+}
+
/**
* Manages all the relevant tasks of
* $(UL
@@ -82,10 +99,24 @@ class FBStream : DRSS!(Post){
date_reliability=DateReliable.YES;
url=fetch_url;
+ h.setCookieJar(getCookiePath());
+
super(h);
}
/**
+ * Returns wether the page in buf is already unlocked.
+ *
+ * Params:
+ * buf = The chararray of the page.
+ * Returns: True if the page is unlocked, false otherwise
+ */
+ static bool captchaSolved(in char[] buf){
+ import std.algorithm.searching : canFind;
+ return !canFind(buf, "captcha");
+ }
+
+ /**
* Parses the document.
*
* Params:
@@ -93,7 +124,19 @@ class FBStream : DRSS!(Post){
*/
override public void parse(string document){
XmlNode[] arr;
- XmlNode root=readDocument(document);
+ XmlNode root;
+ try{
+ root=readDocument(document);
+ }
+ catch(Exception e){
+ if(!captchaSolved(document)){
+ throw new CaptchaException("Captcha has not been solved yet. "
+ "Please run the ./captcha utility");
+ }
+ else{
+ throw e;
+ }
+ }
arr=root.parseXPath(`//title`);
headers[1][1]=arr[0].getCData().idup;
headers[0][1]=url;
diff --git a/standardpaths b/standardpaths
@@ -0,0 +1 @@
+Subproject commit 4bc270dfdca83e5e6fe8d0558efd40cca28b57ca