commit 78976dd5f0b050c15d58166d1a05be7356676e82
parent f59d0cc200111fce54a690f04bfe2dfd0c44073b
Author: Dominik Schmidt <dominik@schm1dt.ch>
Date: Sun, 13 Sep 2020 15:59:48 +0200
Adapt to upstream changes
Diffstat:
4 files changed, 90 insertions(+), 45 deletions(-)
diff --git a/Fb2RSS.d b/Fb2RSS.d
@@ -1,8 +1,20 @@
import std.stdio;
import fbstream;
+import std.algorithm: endsWith;
+import std.regex;
+import std.format;
void main(string[] args){
- FBStream str=new FBStream(args[1]);
+ string url=args[1];
+ auto reg = ctRegex!`([^:]+)://([^/]+)/(.*)`;
+
+ auto m = url.matchFirst(reg);
+ if(!m){
+ throw new Exception("Not an url");
+ }
+ url = format("https://m.facebook.com/%s",m[3]);
+
+ FBStream str=new FBStream(url);
str.update();
str.writeRSS(stdout);
}
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
DMD?=dmd
-OPTS?=-release -O
+OPTS?=-g
IOPTS=$(OPTS) -IDRSS/ -IDRSS/kxml/source/ -Istandardpaths/source/
all: Fb2RSS captcha
diff --git a/captcha.d b/captcha.d
@@ -6,6 +6,7 @@ import std.path;
import std.regex;
import std.format;
import std.range;
+import std.process;
import std.algorithm.searching;
@@ -60,8 +61,11 @@ int main(string[] args){
f.close();
writeln("The captcha has been written to "~file);
writeln("Please enter the text below:");
+ auto pid = spawnProcess(["/usr/bin/pqiv", "-i", file]);
+
char[] captcha;
readln(captcha);
+ kill(pid);
captcha=captcha[0..$-1]; //Exclude '\n'
buf=null;
diff --git a/fbstream.d b/fbstream.d
@@ -43,8 +43,9 @@ import drss.rss;
import drss.render;
import kxml.xml;
import std.typecons;
+import std.conv;
+import std.json;
-
string getCookiePath(){
import std.path;
import standardpaths;
@@ -61,6 +62,36 @@ class CaptchaException : Exception{
}
}
+JSONValue search(JSONValue tree, string id){
+ with(JSONType)
+ switch(tree.type){
+ case object:
+ auto o = tree.object;
+ if(id in o){
+ return o[id];
+ }
+ foreach(v; o.byValue){
+ auto nv= search(v, id);
+ if(nv.type != null_){
+ return nv;
+ }
+ }
+ break;
+ case array:
+ foreach(v; tree.array){
+ auto nv= search(v, id);
+ if(nv.type != null_){
+ return nv;
+ }
+ }
+ break;
+ default:
+ return JSONValue.init;
+ break;
+ }
+ return JSONValue.init;
+}
+
/**
* Manages all the relevant tasks of
* $(UL
@@ -82,7 +113,7 @@ class FBStream : DRSS!(Post){
* Facebook does check this, and if it doesn't know it, it displays an
* "Update your Browser"-Message
*/
- static string userAgent="Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.04";
+ static string userAgent="curl/7.72.0";
///The RSS-Header to append.
static string rss_header=`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>`;
@@ -99,7 +130,7 @@ class FBStream : DRSS!(Post){
date_reliability=DateReliable.YES;
url=fetch_url;
- h.setCookieJar(getCookiePath());
+ //h.setCookieJar(getCookiePath());
super(h);
}
@@ -126,28 +157,8 @@ class FBStream : DRSS!(Post){
XmlNode[] arr;
XmlNode root;
- //Make the HTML valid for the parser
- import std.regex;
- /*
- * Scripts aren't properly commented, so just replace them with comments
- * We don't need them anyways
- */
- auto script_start=ctRegex!"<script[^>]*>";
- auto script_end=ctRegex!"</script>";
- document=document.replaceAll(script_start, "<!--").replaceAll(script_end, "-->");
-
- /*
- * Now, since the exact class names of facebook always vary, we
- * normalize them to a common denominatory
- */
- auto userContent_normalize=ctRegex!`class="[^"]*(userContentWrapper|userContent)[^"]*"`;
- document=document
- .replaceAll(userContent_normalize, `class="$1"`);
-
- //Add important End-Tags
- document~="</body></HTML>";
-
root=readDocument(document);
+
if(!captchaSolved(document)){
throw new CaptchaException("Captcha has not been solved yet. "
~"Please run the ./captcha utility");
@@ -156,8 +167,9 @@ class FBStream : DRSS!(Post){
headers[1][1]=arr[0].getCData().idup;
headers[0][1]=url;
- XmlNode[] nodes=root.parseXPath(`//div[@class="userContentWrapper"]`);
+ XmlNode[] nodes=root.parseXPath(`//div[@id="recent"]`);
assert(nodes.length>0, "No data nodes found!");
+ nodes = nodes[0].getChildren()[0].getChildren()[0].getChildren();
foreach(node; nodes.retro){
appendPost(node);
}
@@ -168,21 +180,29 @@ class FBStream : DRSS!(Post){
* Params: match = The data-div node
*/
private void appendPost(XmlNode match){
- XmlNode[] usercontent=match.parseXPath(`//div[@class="userContent"]`);
- if(usercontent.length==0){
- return;
+ XmlNode usercontent;
+ try{
+ usercontent=match.parseXPath(`//div[@style]`)[0];
}
- XmlNode[] translatediv=usercontent[0].parseXPath(`/div[@class="_43f9"]`);
- if(translatediv.length>0){
- usercontent[0].removeChild(translatediv[0]);
+ catch(Exception e){
+ return;
}
SysTime t=getPostTimestamp(match);
- XmlNode[] href=match.parseXPath(`//a[@class="_5pcq"]`);
+ XmlNode[] href=match.parseXPath(`//a`);
string hrefs;
if(href.length!=0){
- hrefs=href[0].getAttribute("href");
+ hrefs=href[$-1].getAttribute("href");
+ /*
+ import std.regex;
+ auto re = ctRegex!"[^?]+";
+ auto m = hrefs.matchFirst(re);
+ if(m){
+ hrefs = m[0];
+ }
+ */
}
- addEntry(Post(usercontent[0],t,hrefs));
+ assert(hrefs.length>0);
+ addEntry(Post(usercontent,t,hrefs));
}
/**
@@ -190,10 +210,11 @@ class FBStream : DRSS!(Post){
*
*/
private SysTime getPostTimestamp(XmlNode post){
- XmlNode[] matches=post.parseXPath(`//abbr[@data-utime]`);
- assert(matches.length>0, "No date-utime node found in post");
- string time=matches[0].getAttribute("data-utime");
- return SysTime(unixTimeToStdTime(to!ulong(time)));
+ import std.json;
+ auto attr = post.getAttribute("data-ft");
+ auto json = parseJSON(attr);
+ auto m = search(json, "publish_time");
+ return SysTime(unixTimeToStdTime(m.integer));
}
/**
@@ -235,17 +256,25 @@ struct Post{
///The count of characters, until the title gets cut off.
static ushort title_cutoff=80;
+ static string plaintext(XmlNode n){
+ Appender!string app = Appender!string();
+ plaintext(n, app);
+ return app.data;
+ }
+ static void plaintext(XmlNode n, ref Appender!string app){
+ app~=n.getCData();
+ foreach(c; n.getChildren){
+ plaintext(c, app);
+ }
+ }
+
/**
* Return: The title of the posting
* Bugs: title_cutoff is reached with fewer characters when there are
* a lot of multibyte characters in the string.
*/
@property string title(){
- auto children=content.getChildren();
- if(children.length==0){
- return "";
- }
- string cont=children[0].getCData();
+ string cont=plaintext(content);
if(cont.length>title_cutoff){
cont=cont[0..toUTFindex(cont,title_cutoff)];
cont~="...";