summaryrefslogtreecommitdiffstats
path: root/lib/PostProcessor/LWNProcessor.php
blob: f931bb41c6f94c4a2f38041b551f70ed225c6caf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
<?php
/**
 * Nextcloud - News
 *
 * This file is licensed under the Affero General Public License version 3 or
 * later. See the COPYING file.
 *
 * @author Robin Appelman <robin@icewind.nl>
 */

namespace OCA\News\PostProcessor;

use GuzzleHttp\Cookie\CookieJar;
use OCP\Http\Client\IClientService;
use PicoFeed\Parser\Feed;
use PicoFeed\Parser\Item;
use PicoFeed\Processor\ItemProcessorInterface;
use PicoFeed\Scraper\RuleParser;

class LWNProcessor implements ItemProcessorInterface {
    private $user;

    private $password;

    private $clientService;

    private $cookieJar;

    /**
     * @param $user
     * @param $password
     */
    public function __construct($user, $password, IClientService $clientService) {
        $this->user = $user;
        $this->password = $password;
        $this->clientService = $clientService;
        $this->cookieJar = new CookieJar();
    }

    private function login() {
        if ($this->cookieJar->count() > 0) {
            return true;
        }
        if (!$this->user || !$this->password) {
            return false;
        }

        $client = $this->clientService->newClient();
        $response = $client->post('https://lwn.net/login', [
            'cookies' => $this->cookieJar,
            'body' => [
                'Username' => $this->user,
                'Password' => $this->password,
                'target' => '/'
            ]
        ]);
        return ($response->getStatusCode() === 200 && $this->cookieJar->count() > 0);
    }

    private function getBody($url) {
        $client = $this->clientService->newClient();
        $response = $client->get($url, [
            'cookies' => $this->cookieJar
        ]);
        $parser = new RuleParser($response->getBody(), [
            'body' => array(
                '//div[@class="ArticleText"]',
            ),
            'strip' => array(
                '//div[@class="FeatureByline"]'
            )
        ]);
        $articleBody = $parser->execute();
        // make all links absolute
        return str_replace('href="/', 'href="https://lwn.net/', $articleBody);
    }

    private function canHandle($url) {
        $regex = '%(?:https?://|//)?(?:www.)?lwn.net%';

        return (bool)preg_match($regex, $url);
    }

    /**
     * Execute Item Processor
     *
     * @access public
     * @param  Feed $feed
     * @param  Item $item
     * @return bool
     */
    public function execute(Feed $feed, Item $item) {
        if ($this->canHandle($item->getUrl())) {
            $loggedIn = $this->login();

            $item->setUrl(str_replace('/rss', '', $item->getUrl()));
            if ($loggedIn) {
                $item->setContent($this->getBody($item->getUrl()));
            }
        }
    }
}