From a9eb72911b6f022da645dc08cf8c0f4b1702d1e1 Mon Sep 17 00:00:00 2001
From: Bernhard Posselt
Date: Wed, 28 Aug 2013 17:26:38 +0200
Subject: add enhancers for articles, fix #14
---
CHANGELOG | 5 +
businesslayer/feedbusinesslayer.php | 9 +-
dependencyinjection/dicontainer.php | 37 ++++-
tests/unit/businesslayer/FeedBusinessLayerTest.php | 163 +++----------------
.../articleenhancer/ArticleEnhancerTest.php | 178 +++++++++++++++++++++
.../articleenhancer/DefaultEnhancerTest.php | 54 +++++++
.../unit/utility/articleenhancer/EnhancerTest.php | 109 +++++++++++++
utility/articleenhancer/articleenhancer.php | 112 +++++++++++++
.../cyanideandhappinessenhancer.php | 46 ++++++
utility/articleenhancer/defaultenhancer.php | 49 ++++++
utility/articleenhancer/enhancer.php | 52 ++++++
utility/simplepiefilefactory.php | 35 ++++
12 files changed, 707 insertions(+), 142 deletions(-)
create mode 100644 tests/unit/utility/articleenhancer/ArticleEnhancerTest.php
create mode 100644 tests/unit/utility/articleenhancer/DefaultEnhancerTest.php
create mode 100644 tests/unit/utility/articleenhancer/EnhancerTest.php
create mode 100644 utility/articleenhancer/articleenhancer.php
create mode 100644 utility/articleenhancer/cyanideandhappinessenhancer.php
create mode 100644 utility/articleenhancer/defaultenhancer.php
create mode 100644 utility/articleenhancer/enhancer.php
create mode 100644 utility/simplepiefilefactory.php
diff --git a/CHANGELOG b/CHANGELOG
index aeaa24667..25fb1b237 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,8 @@
+owncloud-news (1.207)
+* Add possibility to hook up article enhancers which fetch article content directly from the web page
+* Add article enhancer for explosm.net to directly fetch comics
+
+
owncloud-news (1.206)
* Also handle URLErrors in updater script that are thrown when the domain of a feed is not found
diff --git a/businesslayer/feedbusinesslayer.php b/businesslayer/feedbusinesslayer.php
index 8cd4fc663..764cb07c6 100644
--- a/businesslayer/feedbusinesslayer.php
+++ b/businesslayer/feedbusinesslayer.php
@@ -36,6 +36,8 @@ use \OCA\News\Utility\Fetcher;
use \OCA\News\Utility\FetcherException;
use \OCA\News\Utility\ImportParser;
+use \OCA\News\Utility\ArticleEnhancer\Enhancer;
+
class FeedBusinessLayer extends BusinessLayer {
private $feedFetcher;
@@ -44,12 +46,14 @@ class FeedBusinessLayer extends BusinessLayer {
private $timeFactory;
private $importParser;
private $autoPurgeMinimumInterval;
+ private $enhancer;
public function __construct(FeedMapper $feedMapper, Fetcher $feedFetcher,
ItemMapper $itemMapper, API $api,
TimeFactory $timeFactory,
ImportParser $importParser,
- $autoPurgeMinimumInterval){
+ $autoPurgeMinimumInterval,
+ Enhancer $enhancer){
parent::__construct($feedMapper);
$this->feedFetcher = $feedFetcher;
$this->itemMapper = $itemMapper;
@@ -57,6 +61,7 @@ class FeedBusinessLayer extends BusinessLayer {
$this->timeFactory = $timeFactory;
$this->importParser = $importParser;
$this->autoPurgeMinimumInterval = $autoPurgeMinimumInterval;
+ $this->enhancer = $enhancer;
}
/**
@@ -118,6 +123,7 @@ class FeedBusinessLayer extends BusinessLayer {
continue;
} catch(DoesNotExistException $ex){
$unreadCount += 1;
+ $item = $this->enhancer->enhance($item);
$this->itemMapper->insert($item);
}
}
@@ -183,6 +189,7 @@ class FeedBusinessLayer extends BusinessLayer {
try {
$this->itemMapper->findByGuidHash($item->getGuidHash(), $feedId, $userId);
} catch(DoesNotExistException $ex){
+ $item = $this->enhancer->enhance($item);
$this->itemMapper->insert($item);
}
}
diff --git a/dependencyinjection/dicontainer.php b/dependencyinjection/dicontainer.php
index 48206eea5..39f5a7be4 100644
--- a/dependencyinjection/dicontainer.php
+++ b/dependencyinjection/dicontainer.php
@@ -54,6 +54,11 @@ use \OCA\News\Utility\TwitterFetcher;
use \OCA\News\Utility\OPMLExporter;
use \OCA\News\Utility\ImportParser;
use \OCA\News\Utility\Updater;
+use \OCA\News\Utility\SimplePieFileFactory;
+
+use \OCA\News\Utility\ArticleEnhancer\Enhancer;
+use \OCA\News\Utility\ArticleEnhancer\DefaultEnhancer;
+use \OCA\News\Utility\ArticleEnhancer\CyanideAndHappinessEnhancer;
require_once __DIR__ . '/../3rdparty/htmlpurifier/library/HTMLPurifier.auto.php';
@@ -167,7 +172,8 @@ class DIContainer extends BaseContainer {
$c['API'],
$c['TimeFactory'],
$c['ImportParser'],
- $c['autoPurgeMinimumInterval']);
+ $c['autoPurgeMinimumInterval'],
+ $c['Enhancer']);
});
$this['ItemBusinessLayer'] = $this->share(function($c){
@@ -223,6 +229,30 @@ class DIContainer extends BaseContainer {
/**
* Utility
*/
+ $this['Enhancer'] = $this->share(function($c){
+ $enhancer = new Enhancer();
+
+ // register fetchers in order
+ // the most generic enhancer should be the last one
+ $enhancer->registerEnhancer($c['CyanideAndHappinessEnhancer']);
+ $enhancer->registerEnhancer($c['DefaultEnhancer']);
+
+ return $enhancer;
+ });
+
+ $this['DefaultEnhancer'] = $this->share(function($c){
+ return new DefaultEnhancer();
+ });
+
+ $this['CyanideAndHappinessEnhancer'] = $this->share(function($c){
+ return new CyanideAndHappinessEnhancer(
+ $c['SimplePieFileFactory'],
+ $c['HTMLPurifier'],
+ $c['feedFetcherTimeout']
+ );
+ });
+
+
$this['Fetcher'] = $this->share(function($c){
$fetcher = new Fetcher();
@@ -250,6 +280,7 @@ class DIContainer extends BaseContainer {
return new TwitterFetcher($c['FeedFetcher']);
});
+
$this['ImportParser'] = $this->share(function($c){
return new ImportParser($c['TimeFactory'], $c['HTMLPurifier']);
});
@@ -268,6 +299,10 @@ class DIContainer extends BaseContainer {
$c['ItemBusinessLayer']);
});
+ $this['SimplePieFileFactory'] = $this->share(function($c){
+ return new SimplePieFileFactory();
+ });
+
}
}
diff --git a/tests/unit/businesslayer/FeedBusinessLayerTest.php b/tests/unit/businesslayer/FeedBusinessLayerTest.php
index 0a923cace..7a4cf24e6 100644
--- a/tests/unit/businesslayer/FeedBusinessLayerTest.php
+++ b/tests/unit/businesslayer/FeedBusinessLayerTest.php
@@ -48,6 +48,7 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
private $time;
private $importParser;
private $autoPurgeMinimumInterval;
+ private $enhancer;
protected function setUp(){
$this->api = $this->getAPIMock();
@@ -72,9 +73,13 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->importParser = $this->getMockBuilder('\OCA\News\Utility\ImportParser')
->disableOriginalConstructor()
->getMock();
+ $this->enhancer = $this->getMockBuilder('\OCA\News\Utility\ArticleEnhancer\Enhancer')
+ ->disableOriginalConstructor()
+ ->getMock();
$this->feedBusinessLayer = new FeedBusinessLayer($this->feedMapper,
$this->fetcher, $this->itemMapper, $this->api,
- $timeFactory, $this->importParser, $this->autoPurgeMinimumInterval);
+ $timeFactory, $this->importParser, $this->autoPurgeMinimumInterval,
+ $this->enhancer);
$this->user = 'jack';
$response = 'hi';
}
@@ -146,6 +151,10 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->equalTo($item2->getFeedId()),
$this->equalTo($this->user))
->will($this->throwException($ex));
+ $this->enhancer->expects($this->at(0))
+ ->method('enhance')
+ ->with($this->equalTo($return[1][1]))
+ ->will($this->returnValue($return[1][1]));
$this->itemMapper->expects($this->at(1))
->method('insert')
->with($this->equalTo($return[1][1]));
@@ -156,6 +165,10 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->equalTo($item1->getFeedId()),
$this->equalTo($this->user))
->will($this->throwException($ex));
+ $this->enhancer->expects($this->at(1))
+ ->method('enhance')
+ ->with($this->equalTo($return[1][0]))
+ ->will($this->returnValue($return[1][0]));
$this->itemMapper->expects($this->at(3))
->method('insert')
->with($this->equalTo($return[1][0]));
@@ -201,6 +214,10 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->equalTo($item2->getFeedId()),
$this->equalTo($this->user))
->will($this->throwException($ex));
+ $this->enhancer->expects($this->at(0))
+ ->method('enhance')
+ ->with($this->equalTo($return[1][1]))
+ ->will($this->returnValue($return[1][1]));
$this->itemMapper->expects($this->at(1))
->method('insert')
->with($this->equalTo($return[1][1]));
@@ -249,6 +266,10 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->equalTo($items[0]->getFeedId()),
$this->equalTo($this->user))
->will($this->throwException($ex));
+ $this->enhancer->expects($this->at(0))
+ ->method('enhance')
+ ->with($this->equalTo($items[0]))
+ ->will($this->returnValue($items[0]));
$this->itemMapper->expects($this->once())
->method('insert')
->with($this->equalTo($items[0]));
@@ -263,145 +284,7 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->assertEquals($return, $feed);
}
-
- public function testUpdateUpdatesEntryNotWhenPubDateSame(){
- $feed = new Feed();
- $feed->setId(3);
- $feed->getUrl('test');
-
- $item = new Item();
- $item->setGuidHash(md5('hi'));
- $item->setPubDate(3333);
- $items = array(
- $item
- );
-
- $fetchReturn = array($feed, $items);
-
- $this->feedMapper->expects($this->at(0))
- ->method('find')
- ->with($this->equalTo($feed->getId()),
- $this->equalTo($this->user))
- ->will($this->returnValue($feed));
- $this->fetcher->expects($this->once())
- ->method('fetch')
- ->will($this->returnValue($fetchReturn));
- $this->itemMapper->expects($this->once())
- ->method('findByGuidHash')
- ->with($this->equalTo($item->getGuidHash()),
- $this->equalTo($feed->getId()),
- $this->equalTo($this->user))
- ->will($this->returnValue($item));
- $this->itemMapper->expects($this->never())
- ->method('insert');
- $this->itemMapper->expects($this->never())
- ->method('delete');
-
- $this->feedMapper->expects($this->at(1))
- ->method('find')
- ->with($feed->getId(), $this->user)
- ->will($this->returnValue($feed));
-
- $return = $this->feedBusinessLayer->update($feed->getId(), $this->user);
-
- $this->assertEquals($return, $feed);
- }
-
-
- public function testUpdateUpdatesEntryNotWhenPubDateUnkown(){
- $feed = new Feed();
- $feed->setId(3);
- $feed->getUrl('test');
-
- $item = new Item();
- $item->setGuidHash(md5('hi'));
- $item->setPubDate(false);
- $items = array(
- $item
- );
-
- $item2 = new Item();
- $item2->setPubDate(0);
-
- $fetchReturn = array($feed, $items);
-
- $this->feedMapper->expects($this->at(0))
- ->method('find')
- ->with($this->equalTo($feed->getId()),
- $this->equalTo($this->user))
- ->will($this->returnValue($feed));
- $this->fetcher->expects($this->once())
- ->method('fetch')
- ->will($this->returnValue($fetchReturn));
- $this->itemMapper->expects($this->once())
- ->method('findByGuidHash')
- ->with($this->equalTo($item->getGuidHash()),
- $this->equalTo($feed->getId()),
- $this->equalTo($this->user))
- ->will($this->returnValue($item2));
- $this->itemMapper->expects($this->never())
- ->method('insert');
- $this->itemMapper->expects($this->never())
- ->method('delete');
-
- $this->feedMapper->expects($this->at(1))
- ->method('find')
- ->with($feed->getId(), $this->user)
- ->will($this->returnValue($feed));
-
- $return = $this->feedBusinessLayer->update($feed->getId(), $this->user);
-
- $this->assertEquals($return, $feed);
- }
-
- public function testUpdateUpdatesEntryNotWhenNoPubDate(){
- $feed = new Feed();
- $feed->setId(3);
- $feed->getUrl('test');
-
- $item = new Item();
- $item->setGuidHash(md5('hi'));
- $item->setPubDate(null);
- $items = array(
- $item
- );
-
- $item2 = new Item();
- $item2->setPubDate(null);
-
- $fetchReturn = array($feed, $items);
-
- $this->feedMapper->expects($this->at(0))
- ->method('find')
- ->with($this->equalTo($feed->getId()),
- $this->equalTo($this->user))
- ->will($this->returnValue($feed));
- $this->fetcher->expects($this->once())
- ->method('fetch')
- ->will($this->returnValue($fetchReturn));
- $this->itemMapper->expects($this->once())
- ->method('findByGuidHash')
- ->with($this->equalTo($item->getGuidHash()),
- $this->equalTo($feed->getId()),
- $this->equalTo($this->user))
- ->will($this->returnValue($item2));
- $this->itemMapper->expects($this->never())
- ->method('insert');
- $this->itemMapper->expects($this->never())
- ->method('delete');
-
- $this->feedMapper->expects($this->at(1))
- ->method('find')
- ->with($feed->getId(), $this->user)
- ->will($this->returnValue($feed));
-
- $return = $this->feedBusinessLayer->update($feed->getId(), $this->user);
-
- $this->assertEquals($return, $feed);
- }
-
-
- public function testCreateUpdateFails(){
+ public function testUpdateFails(){
$feed = new Feed();
$feed->setId(3);
$feed->getUrl('test');
diff --git a/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php b/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php
new file mode 100644
index 000000000..8d507c0f8
--- /dev/null
+++ b/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php
@@ -0,0 +1,178 @@
+.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Db\Item;
+
+require_once(__DIR__ . "/../../../classloader.php");
+
+
+class TestEnhancer extends ArticleEnhancer {
+ public function __construct($purifier, $fileFactory, $articleRegex,
+ $articleXPATH, $timeout){
+ parent::__construct($purifier, $fileFactory, $articleRegex,
+ $articleXPATH, $timeout);
+ }
+}
+
+
+class ArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
+
+ private $purifier;
+ private $testEnhancer;
+ private $fileFactory;
+ private $timeout;
+
+ protected function setUp() {
+ $timeout = 30;
+ $this->fileFactory = $this->getMockBuilder('\OCA\News\Utility\SimplePieFileFactory')
+ ->disableOriginalConstructor()
+ ->getMock();
+ $this->purifier = $this->getMock('purifier', array('purify'));
+
+ $this->testEnhancer = new TestEnhancer(
+ $this->purifier,
+ $this->fileFactory,
+ '/explosm.net\/comics/',
+ '//*[@id=\'maincontent\']/div[2]/img',
+ $this->timeout
+ );
+ }
+
+
+ public function testCanHandle() {
+ $item = new Item();
+ $item->setUrl('http://explosm.net/comics');
+ $this->assertTrue($this->testEnhancer->canHandle($item));
+ }
+
+
+ public function testDoesModifiyArticlesThatMatch() {
+ $file = new \stdClass;
+ $file->body = '
+
+
+
+ ';
+ $item = new Item();
+ $item->setUrl('https://www.explosm.net/comics/312');
+ $item->setBody('Hello thar');
+
+ $this->fileFactory->expects($this->once())
+ ->method('getFile')
+ ->with($this->equalTo($item->getUrl()),
+ $this->equalTo($this->timeout))
+ ->will($this->returnValue($file));
+ $this->purifier->expects($this->once())
+ ->method('purify')
+ ->with($this->equalTo(''))
+ ->will($this->returnValue(''));
+
+ $result = $this->testEnhancer->enhance($item);
+ $this->assertEquals('', $result->getBody());
+ }
+
+
+ public function testModificationHandlesEmptyResults() {
+ $file = new \stdClass;
+ $file->body = '
+
+
+
+
+ ';
+ $item = new Item();
+ $item->setUrl('https://www.explosm.net/comics/312');
+ $item->setBody('Hello thar');
+
+ $this->fileFactory->expects($this->once())
+ ->method('getFile')
+ ->with($this->equalTo($item->getUrl()),
+ $this->equalTo($this->timeout))
+ ->will($this->returnValue($file));
+ $this->purifier->expects($this->once())
+ ->method('purify')
+ ->with($this->equalTo(null))
+ ->will($this->returnValue(null));
+
+ $result = $this->testEnhancer->enhance($item);
+ $this->assertEquals(null, $result->getBody());
+ }
+
+
+ public function testModificationDoesNotBreakOnEmptyDom() {
+ $file = new \stdClass;
+ $file->body = '';
+ $item = new Item();
+ $item->setUrl('https://www.explosm.net/comics/312');
+ $item->setBody('Hello thar');
+
+ $this->fileFactory->expects($this->once())
+ ->method('getFile')
+ ->with($this->equalTo($item->getUrl()),
+ $this->equalTo($this->timeout))
+ ->will($this->returnValue($file));
+ $this->purifier->expects($this->once())
+ ->method('purify')
+ ->with($this->equalTo(null))
+ ->will($this->returnValue(null));
+
+ $result = $this->testEnhancer->enhance($item);
+ $this->assertEquals(null, $result->getBody());
+ }
+
+
+ public function testModificationDoesNotBreakOnBrokenDom() {
+ $file = new \stdClass;
+ $file->body = '
+
+
+
+
+ ';
+ $item = new Item();
+ $item->setUrl('https://www.explosm.net/comics/312');
+ $item->setBody('Hello thar');
+
+ $this->fileFactory->expects($this->once())
+ ->method('getFile')
+ ->with($this->equalTo($item->getUrl()),
+ $this->equalTo($this->timeout))
+ ->will($this->returnValue($file));
+ $this->purifier->expects($this->once())
+ ->method('purify')
+ ->with($this->equalTo(null))
+ ->will($this->returnValue(null));
+
+ $result = $this->testEnhancer->enhance($item);
+ $this->assertEquals(null, $result->getBody());
+ }
+
+
+}
\ No newline at end of file
diff --git a/tests/unit/utility/articleenhancer/DefaultEnhancerTest.php b/tests/unit/utility/articleenhancer/DefaultEnhancerTest.php
new file mode 100644
index 000000000..901428616
--- /dev/null
+++ b/tests/unit/utility/articleenhancer/DefaultEnhancerTest.php
@@ -0,0 +1,54 @@
+.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Db\Item;
+
+require_once(__DIR__ . "/../../../classloader.php");
+
+
+class DefaultEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
+
+ private $testEnhancer;
+
+ protected function setUp() {
+ $this->testEnhancer = new DefaultEnhancer();
+ }
+
+
+ public function testCanHandle() {
+ $item = new Item();
+ $this->assertTrue($this->testEnhancer->canHandle($item));
+ }
+
+
+ public function testEnhance() {
+ $item = new Item();
+ $this->assertEquals($item, $this->testEnhancer->enhance($item));
+ }
+
+
+}
\ No newline at end of file
diff --git a/tests/unit/utility/articleenhancer/EnhancerTest.php b/tests/unit/utility/articleenhancer/EnhancerTest.php
new file mode 100644
index 000000000..559722e60
--- /dev/null
+++ b/tests/unit/utility/articleenhancer/EnhancerTest.php
@@ -0,0 +1,109 @@
+.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Db\Item;
+
+require_once(__DIR__ . "/../../../classloader.php");
+
+
+class EnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
+
+ private $enhancer;
+ private $articleEnhancer;
+ private $articleEnhancer2;
+
+ protected function setUp(){
+ $this->enhancer = new Enhancer();
+ $this->articleEnhancer = $this->getMockBuilder(
+ '\OCA\News\Utility\ArticleEnhancer\ArticleEnhancer')
+ ->disableOriginalConstructor()
+ ->getMock();
+ $this->articleEnhancer2 = $this->getMockBuilder(
+ '\OCA\News\Utility\ArticleEnhancer\ArticleEnhancer')
+ ->disableOriginalConstructor()
+ ->getMock();
+ }
+
+
+ public function testFetch(){
+ $item = new Item();
+ $item->setUrl('hi');
+
+ $this->articleEnhancer->expects($this->once())
+ ->method('canHandle')
+ ->with($this->equalTo($item))
+ ->will($this->returnValue(true));
+ $this->enhancer->registerEnhancer($this->articleEnhancer);
+
+ $this->enhancer->enhance($item);
+ }
+
+
+ public function testMultipleFetchers(){
+ $item = new Item();
+ $item->setUrl('hi');
+ $this->articleEnhancer->expects($this->once())
+ ->method('canHandle')
+ ->with($this->equalTo($item))
+ ->will($this->returnValue(false));
+ $this->articleEnhancer2->expects($this->once())
+ ->method('canHandle')
+ ->with($this->equalTo($item))
+ ->will($this->returnValue(true));
+
+ $this->enhancer->registerEnhancer($this->articleEnhancer);
+ $this->enhancer->registerEnhancer($this->articleEnhancer2);
+
+ $this->enhancer->enhance($item);
+ }
+
+
+ public function testMultipleFetchersOnlyOneShouldHandle(){
+ $item = new Item();
+ $item->setUrl('hi');
+ $return = 'zeas';
+ $this->articleEnhancer->expects($this->once())
+ ->method('canHandle')
+ ->with($this->equalTo($item))
+ ->will($this->returnValue(true));
+ $this->articleEnhancer->expects($this->once())
+ ->method('enhance')
+ ->with($this->equalTo($item))
+ ->will($this->returnValue($return));
+ $this->articleEnhancer2->expects($this->never())
+ ->method('canHandle');
+
+ $this->enhancer->registerEnhancer($this->articleEnhancer);
+ $this->enhancer->registerEnhancer($this->articleEnhancer2);
+
+ $result = $this->enhancer->enhance($item);
+
+ $this->assertEquals($return, $result);
+ }
+
+
+}
\ No newline at end of file
diff --git a/utility/articleenhancer/articleenhancer.php b/utility/articleenhancer/articleenhancer.php
new file mode 100644
index 000000000..d7701d53b
--- /dev/null
+++ b/utility/articleenhancer/articleenhancer.php
@@ -0,0 +1,112 @@
+.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Utility\SimplePieFileFactory;
+
+
+abstract class ArticleEnhancer {
+
+
+ private $feedRegex;
+ private $articleUrlRegex;
+ private $articleXPath;
+ private $purifier;
+ private $fileFactory;
+ private $maximumTimeout;
+
+
+ /**
+ * @param $purifier the purifier object to clean the html which will be
+ * matched
+ * @param SimplePieFileFactory a factory for getting a simple pie file instance
+ * @param string $articleUrlRegex the regex to match which article should be
+ * handled
+ * @param string $articleXPath the xpath which tells the fetcher with what
+ * body the feed should be replaced
+ * @param int $maximumTimeout maximum timeout in seconds
+ */
+ public function __construct($purifier, SimplePieFileFactory $fileFactory,
+ $articleUrlRegex, $articleXPath,
+ $maximumTimeout=10){
+ $this->purifier = $purifier;
+ $this->articleUrlRegex = $articleUrlRegex;
+ $this->articleXPath = $articleXPath;
+ $this->fileFactory = $fileFactory;
+ $this->timeout = $maximumTimeout;
+ }
+
+
+ public function canHandle($item){
+ return preg_match($this->articleUrlRegex, $item->getUrl()) == true;
+ }
+
+
+ public function enhance($item){
+ $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
+ $dom = new \DOMDocument();
+ @$dom->loadHTML($file->body);
+ $xpath = new \DOMXpath($dom);
+ $xpathResult = $xpath->evaluate($this->articleXPath);
+
+ // in case it wasnt a text query assume its a single
+ if(!is_string($xpathResult)) {
+ $xpathResult = $this->domToString($xpathResult);
+ }
+
+ $sanitizedResult = $this->purifier->purify($xpathResult);
+ $item->setBody($sanitizedResult);
+
+
+ return $item;
+ }
+
+
+ /**
+ * Method which turns an xpath result to a string
+ * Assumes that the result matches a single element. If the result
+ * is not a single element, you can customize it by overwriting this
+ * method
+ * @param $xpathResult the result from the xpath query
+ * @return the result as a string
+ */
+ protected function domToString($xpathResult) {
+ if($xpathResult->length > 0) {
+ return $this->toInnerHTML($xpathResult->item(0));
+ } else {
+ return "";
+ }
+ }
+
+
+ protected function toInnerHTML($node) {
+ $dom = new \DOMDocument();
+ $dom->appendChild($dom->importNode($node, true));
+ return trim($dom->saveHTML());
+ }
+
+
+}
\ No newline at end of file
diff --git a/utility/articleenhancer/cyanideandhappinessenhancer.php b/utility/articleenhancer/cyanideandhappinessenhancer.php
new file mode 100644
index 000000000..1faee6d5c
--- /dev/null
+++ b/utility/articleenhancer/cyanideandhappinessenhancer.php
@@ -0,0 +1,46 @@
+.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Utility\SimplePieFileFactory;
+
+
+class CyanideAndHappinessEnhancer extends ArticleEnhancer {
+
+
+ public function __construct(SimplePieFileFactory $fileFactory, $purifier,
+ $timeout){
+ parent::__construct(
+ $purifier,
+ $fileFactory,
+ '/explosm.net\/comics/', // match article url
+ '//*[@id=\'maincontent\']/div[2]/div', // xpath statement to extract the html from the page
+ $timeout
+ );
+ }
+
+
+}
\ No newline at end of file
diff --git a/utility/articleenhancer/defaultenhancer.php b/utility/articleenhancer/defaultenhancer.php
new file mode 100644
index 000000000..eb3045ceb
--- /dev/null
+++ b/utility/articleenhancer/defaultenhancer.php
@@ -0,0 +1,49 @@
+.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Utility\SimplePieFileFactory;
+
+
+class DefaultEnhancer extends ArticleEnhancer {
+
+
+ public function __construct(){
+ parent::__construct(null, new SimplePieFileFactory(), null, null, null);
+ }
+
+
+ public function canHandle($item){
+ return true;
+ }
+
+
+ public function enhance($item){
+ return $item;
+ }
+
+
+}
\ No newline at end of file
diff --git a/utility/articleenhancer/enhancer.php b/utility/articleenhancer/enhancer.php
new file mode 100644
index 000000000..059904f63
--- /dev/null
+++ b/utility/articleenhancer/enhancer.php
@@ -0,0 +1,52 @@
+.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+
+class Enhancer {
+
+ private $enhancers;
+
+ public function __construct(){
+ $this->enhancers = array();
+ }
+
+
+ public function registerEnhancer(ArticleEnhancer $enhancer){
+ array_push($this->enhancers, $enhancer);
+ }
+
+
+ public function enhance($item){
+ foreach($this->enhancers as $enhancer){
+ if($enhancer->canHandle($item)){
+ return $enhancer->enhance($item);
+ }
+ }
+ }
+
+
+}
\ No newline at end of file
diff --git a/utility/simplepiefilefactory.php b/utility/simplepiefilefactory.php
new file mode 100644
index 000000000..13b56dc9e
--- /dev/null
+++ b/utility/simplepiefilefactory.php
@@ -0,0 +1,35 @@
+.
+*
+*/
+
+namespace OCA\News\Utility;
+
+
+class SimplePieFileFactory {
+
+ public function getFile($url, $timeout) {
+ return new \SimplePie_File($url, $timeout);
+ }
+
+}
\ No newline at end of file
--
cgit v1.2.3