diff options
author | Robin Appelman <robin@icewind.nl> | 2017-01-04 11:10:19 +0100 |
---|---|---|
committer | Bernhard Posselt <BernhardPosselt@users.noreply.github.com> | 2017-01-04 11:10:19 +0100 |
commit | bc27596f70cb170203655a55c0f53ee55d8d6316 (patch) | |
tree | 2ae44ee8f5ff0e77f5f8ff0a886db905be55d081 /lib/PostProcessor | |
parent | 04f66c9710faf9438adcc69028eed082c98a5178 (diff) |
Add postprocessor for getting lwn subscriber articles (#72)
Diffstat (limited to 'lib/PostProcessor')
-rw-r--r-- | lib/PostProcessor/LWNProcessor.php | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/lib/PostProcessor/LWNProcessor.php b/lib/PostProcessor/LWNProcessor.php new file mode 100644 index 000000000..f931bb41c --- /dev/null +++ b/lib/PostProcessor/LWNProcessor.php @@ -0,0 +1,102 @@ +<?php +/** + * Nextcloud - News + * + * This file is licensed under the Affero General Public License version 3 or + * later. See the COPYING file. + * + * @author Robin Appelman <robin@icewind.nl> + */ + +namespace OCA\News\PostProcessor; + +use GuzzleHttp\Cookie\CookieJar; +use OCP\Http\Client\IClientService; +use PicoFeed\Parser\Feed; +use PicoFeed\Parser\Item; +use PicoFeed\Processor\ItemProcessorInterface; +use PicoFeed\Scraper\RuleParser; + +class LWNProcessor implements ItemProcessorInterface { + private $user; + + private $password; + + private $clientService; + + private $cookieJar; + + /** + * @param $user + * @param $password + */ + public function __construct($user, $password, IClientService $clientService) { + $this->user = $user; + $this->password = $password; + $this->clientService = $clientService; + $this->cookieJar = new CookieJar(); + } + + private function login() { + if ($this->cookieJar->count() > 0) { + return true; + } + if (!$this->user || !$this->password) { + return false; + } + + $client = $this->clientService->newClient(); + $response = $client->post('https://lwn.net/login', [ + 'cookies' => $this->cookieJar, + 'body' => [ + 'Username' => $this->user, + 'Password' => $this->password, + 'target' => '/' + ] + ]); + return ($response->getStatusCode() === 200 && $this->cookieJar->count() > 0); + } + + private function getBody($url) { + $client = $this->clientService->newClient(); + $response = $client->get($url, [ + 'cookies' => $this->cookieJar + ]); + $parser = new RuleParser($response->getBody(), [ + 'body' => array( + '//div[@class="ArticleText"]', + ), + 'strip' => array( + '//div[@class="FeatureByline"]' + ) + ]); + $articleBody = $parser->execute(); + // make all links absolute + return str_replace('href="/', 'href="https://lwn.net/', $articleBody); + } + + private function canHandle($url) { + $regex = '%(?:https?://|//)?(?:www.)?lwn.net%'; + + return (bool)preg_match($regex, $url); + } + + /** + * Execute Item Processor + * + * @access public + * @param Feed $feed + * @param Item $item + * @return bool + */ + public function execute(Feed $feed, Item $item) { + if ($this->canHandle($item->getUrl())) { + $loggedIn = $this->login(); + + $item->setUrl(str_replace('/rss', '', $item->getUrl())); + if ($loggedIn) { + $item->setContent($this->getBody($item->getUrl())); + } + } + } +} |