summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorRobin Appelman <robin@icewind.nl>2017-01-04 11:10:19 +0100
committerBernhard Posselt <BernhardPosselt@users.noreply.github.com>2017-01-04 11:10:19 +0100
commitbc27596f70cb170203655a55c0f53ee55d8d6316 (patch)
tree2ae44ee8f5ff0e77f5f8ff0a886db905be55d081 /lib
parent04f66c9710faf9438adcc69028eed082c98a5178 (diff)
Add postprocessor for getting lwn subscriber articles (#72)
Diffstat (limited to 'lib')
-rw-r--r--lib/Fetcher/FeedFetcher.php8
-rw-r--r--lib/PostProcessor/LWNProcessor.php102
2 files changed, 109 insertions, 1 deletions
diff --git a/lib/Fetcher/FeedFetcher.php b/lib/Fetcher/FeedFetcher.php
index 194a927f9..587e64af2 100644
--- a/lib/Fetcher/FeedFetcher.php
+++ b/lib/Fetcher/FeedFetcher.php
@@ -15,6 +15,8 @@ namespace OCA\News\Fetcher;
use Exception;
+use OCA\News\PostProcessor\LWNProcessor;
+use OCP\Http\Client\IClientService;
use PicoFeed\Parser\MalFormedXmlException;
use PicoFeed\Reader\Reader;
use PicoFeed\Parser\Parser;
@@ -42,15 +44,18 @@ class FeedFetcher implements IFeedFetcher {
private $reader;
private $l10n;
private $time;
+ private $clientService;
public function __construct(Reader $reader,
PicoFeedFaviconFactory $faviconFactory,
IL10N $l10n,
- Time $time) {
+ Time $time,
+ IClientService $clientService) {
$this->faviconFactory = $faviconFactory;
$this->reader = $reader;
$this->time = $time;
$this->l10n = $l10n;
+ $this->clientService = $clientService;
}
@@ -108,6 +113,7 @@ class FeedFetcher implements IFeedFetcher {
if ($fullTextEnabled) {
$parser->enableContentGrabber();
+ $parser->getItemPostProcessor()->register(new LWNProcessor($basicAuthUser, $basicAuthPassword, $this->clientService));
}
$parsedFeed = $parser->execute();
diff --git a/lib/PostProcessor/LWNProcessor.php b/lib/PostProcessor/LWNProcessor.php
new file mode 100644
index 000000000..f931bb41c
--- /dev/null
+++ b/lib/PostProcessor/LWNProcessor.php
@@ -0,0 +1,102 @@
+<?php
+/**
+ * Nextcloud - News
+ *
+ * This file is licensed under the Affero General Public License version 3 or
+ * later. See the COPYING file.
+ *
+ * @author Robin Appelman <robin@icewind.nl>
+ */
+
+namespace OCA\News\PostProcessor;
+
+use GuzzleHttp\Cookie\CookieJar;
+use OCP\Http\Client\IClientService;
+use PicoFeed\Parser\Feed;
+use PicoFeed\Parser\Item;
+use PicoFeed\Processor\ItemProcessorInterface;
+use PicoFeed\Scraper\RuleParser;
+
+class LWNProcessor implements ItemProcessorInterface {
+ private $user;
+
+ private $password;
+
+ private $clientService;
+
+ private $cookieJar;
+
+ /**
+ * @param $user
+ * @param $password
+ */
+ public function __construct($user, $password, IClientService $clientService) {
+ $this->user = $user;
+ $this->password = $password;
+ $this->clientService = $clientService;
+ $this->cookieJar = new CookieJar();
+ }
+
+ private function login() {
+ if ($this->cookieJar->count() > 0) {
+ return true;
+ }
+ if (!$this->user || !$this->password) {
+ return false;
+ }
+
+ $client = $this->clientService->newClient();
+ $response = $client->post('https://lwn.net/login', [
+ 'cookies' => $this->cookieJar,
+ 'body' => [
+ 'Username' => $this->user,
+ 'Password' => $this->password,
+ 'target' => '/'
+ ]
+ ]);
+ return ($response->getStatusCode() === 200 && $this->cookieJar->count() > 0);
+ }
+
+ private function getBody($url) {
+ $client = $this->clientService->newClient();
+ $response = $client->get($url, [
+ 'cookies' => $this->cookieJar
+ ]);
+ $parser = new RuleParser($response->getBody(), [
+ 'body' => array(
+ '//div[@class="ArticleText"]',
+ ),
+ 'strip' => array(
+ '//div[@class="FeatureByline"]'
+ )
+ ]);
+ $articleBody = $parser->execute();
+ // make all links absolute
+ return str_replace('href="/', 'href="https://lwn.net/', $articleBody);
+ }
+
+ private function canHandle($url) {
+ $regex = '%(?:https?://|//)?(?:www.)?lwn.net%';
+
+ return (bool)preg_match($regex, $url);
+ }
+
+ /**
+ * Execute Item Processor
+ *
+ * @access public
+ * @param Feed $feed
+ * @param Item $item
+ * @return bool
+ */
+ public function execute(Feed $feed, Item $item) {
+ if ($this->canHandle($item->getUrl())) {
+ $loggedIn = $this->login();
+
+ $item->setUrl(str_replace('/rss', '', $item->getUrl()));
+ if ($loggedIn) {
+ $item->setContent($this->getBody($item->getUrl()));
+ }
+ }
+ }
+}