Fb2RSS

A Facebook to RSS conversion tool
git clone git://xatko.vsos.ethz.ch/Fb2RSS.git
Log | Files | Refs | Submodules

commit 9804aec4177eb906990bcf69b0a0e05204a48356
parent 03f9bf88d25ca86215fa40ec222d76b76f11c3ca
Author: Dominik Schmidt <das1993@hotmail.com>
Date:   Sat, 23 Jan 2016 00:02:32 +0100

Add captcha solving support.

Facebook thought it a nifty "security" feature, to add
"security check"-captcha to their pages.

Whilst this has nothing to do with "security", it effectively renders
my attempts at fetching the data useless.

So here's what to do:
1) Add cookie storage to the request
2) Solve a captcha once
3) ???
4) Non-Profit

Which is what was implemented in this commit.
You'll have to execute the ./captcha-Binary once, and interpret the string
inside the image, and then it should continue to work normally.

For compatibility-reasons I included the standardpaths-library, which handles
Paths quite nicely.

Diffstat:
.gitmodules | 3+++
Makefile | 8++++++--
captcha.d | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
fbstream.d | 45++++++++++++++++++++++++++++++++++++++++++++-
standardpaths | 1+
5 files changed, 141 insertions(+), 3 deletions(-)

diff --git a/.gitmodules b/.gitmodules @@ -1,3 +1,6 @@ [submodule "DRSS"] path = DRSS url = https://github.com/Doeme/DRSS.git +[submodule "standardpaths"] + path = standardpaths + url = https://github.com/MyLittleRobo/standardpaths.git diff --git a/Makefile b/Makefile @@ -1,9 +1,13 @@ DMD?=ldmd2 OPTS?=-release -O -IOPTS=$(OPTS) -IDRSS/ -IDRSS/kxml/source/ +IOPTS=$(OPTS) -IDRSS/ -IDRSS/kxml/source/ -Istandardpaths/source/ -Fb2RSS: fbstream.o Fb2RSS.o DRSS/drss.a +Fb2RSS: fbstream.o Fb2RSS.o DRSS/drss.a standardpaths/libstandardpaths.a $(DMD) $(IOPTS) $^ -of$@ +captcha: captcha.o fbstream.o DRSS/drss.a standardpaths/libstandardpaths.a + $(DMD) $(IOPTS) $^ -of$@ +standardpaths/libstandardpaths.a: standardpaths/source/standardpaths.o + $(DMD) $(IOPTS) -lib $^ -of$@ %.o: %.d $(DMD) $(IOPTS) -c $< -of$@ .PHONY: diff --git a/captcha.d b/captcha.d @@ -0,0 +1,87 @@ +import fbstream; +import std.net.curl; +import std.stdio; +import std.file; +import std.path; +import std.regex; +import std.format; +import std.range; +import std.algorithm.searching; + + +/** + * Tries to fetch the captcha and set the cookies + * + * Returns: 0 if the captcha is solved, 1 otherwise. + */ +int main(string[] args){ + auto h=HTTP(); + char[] buf; + + h.url=args[1]; + h.setUserAgent(FBStream.userAgent); + h.setCookieJar(getCookiePath()); + h.onReceive = (ubyte[] data){ + buf~=cast(char[])data; + return data.length; + }; + h.perform(); + + if(FBStream.captchaSolved(buf)){ + writeln("Captcha already solved :)"); + return 0; + } + + auto url_regex=ctRegex!(".*(https://www.facebook.com/captcha/tfbimage.php[^\"]+).*"); + auto url=matchFirst(buf, url_regex)[1]; + auto datr_regex=ctRegex!(".*\"_js_datr\",\"([^\"]+)\".*"); + auto datr=matchFirst(buf, datr_regex); + + h.setCookie("_js_datr="~datr[1]); + + + auto captcha_regex=ctRegex!(".*name=\"captcha_persist_data\" value=\"([^\"]+)\".*"); + auto captcha_hash=matchFirst(buf, captcha_regex)[1]; + + buf=null; + h.url=url; + h.perform(); + + File f; + string file=buildPath(tempDir(),"fb2rss_captcha.png"); + f.open(file, "w+"); + scope(exit){ + f.close(); + remove(file); + } + f.write(buf); + f.close(); + writeln("The captcha has been written to "~file); + writeln("Please enter the text below:"); + char[] captcha; + readln(captcha); + captcha=captcha[0..$-1]; //Exclude '\n' + + buf=null; + h.url=args[1]; + h.method=HTTP.Method.post; + h.setPostData( + format( + "captcha_persist_data=%s&captcha_response=%s&captcha_submit=1", + captcha_hash, + captcha + ), + "application/x-www-form-urlencoded" + ); + h.perform(); + + if(FBStream.captchaSolved(buf)){ + writeln("Success"); + } + else{ + writeln("Sorry, didn't work :C"); + writeln("Please, try again!"); + return 1; + } + return 0; +} diff --git a/fbstream.d b/fbstream.d @@ -44,6 +44,23 @@ import drss.render; import kxml.xml; import std.typecons; + +string getCookiePath(){ + import std.path; + import standardpaths; + string base=writablePath(StandardPath.config); + return buildPath(base, "Fb2RSS_cookiejar.txt"); +} + +class CaptchaException : Exception{ + this(string msg, string file=__FILE__, size_t line=__LINE__, Throwable next=null){ + super(msg,file,line,next); + } + override string toString(){ + return msg; + } +} + /** * Manages all the relevant tasks of * $(UL @@ -82,10 +99,24 @@ class FBStream : DRSS!(Post){ date_reliability=DateReliable.YES; url=fetch_url; + h.setCookieJar(getCookiePath()); + super(h); } /** + * Returns wether the page in buf is already unlocked. + * + * Params: + * buf = The chararray of the page. + * Returns: True if the page is unlocked, false otherwise + */ + static bool captchaSolved(in char[] buf){ + import std.algorithm.searching : canFind; + return !canFind(buf, "captcha"); + } + + /** * Parses the document. * * Params: @@ -93,7 +124,19 @@ class FBStream : DRSS!(Post){ */ override public void parse(string document){ XmlNode[] arr; - XmlNode root=readDocument(document); + XmlNode root; + try{ + root=readDocument(document); + } + catch(Exception e){ + if(!captchaSolved(document)){ + throw new CaptchaException("Captcha has not been solved yet. " + "Please run the ./captcha utility"); + } + else{ + throw e; + } + } arr=root.parseXPath(`//title`); headers[1][1]=arr[0].getCData().idup; headers[0][1]=url; diff --git a/standardpaths b/standardpaths @@ -0,0 +1 @@ +Subproject commit 4bc270dfdca83e5e6fe8d0558efd40cca28b57ca