From a9eb72911b6f022da645dc08cf8c0f4b1702d1e1 Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Wed, 28 Aug 2013 17:26:38 +0200 Subject: add enhancers for articles, fix #14 --- CHANGELOG | 5 + businesslayer/feedbusinesslayer.php | 9 +- dependencyinjection/dicontainer.php | 37 ++++- tests/unit/businesslayer/FeedBusinessLayerTest.php | 163 +++---------------- .../articleenhancer/ArticleEnhancerTest.php | 178 +++++++++++++++++++++ .../articleenhancer/DefaultEnhancerTest.php | 54 +++++++ .../unit/utility/articleenhancer/EnhancerTest.php | 109 +++++++++++++ utility/articleenhancer/articleenhancer.php | 112 +++++++++++++ .../cyanideandhappinessenhancer.php | 46 ++++++ utility/articleenhancer/defaultenhancer.php | 49 ++++++ utility/articleenhancer/enhancer.php | 52 ++++++ utility/simplepiefilefactory.php | 35 ++++ 12 files changed, 707 insertions(+), 142 deletions(-) create mode 100644 tests/unit/utility/articleenhancer/ArticleEnhancerTest.php create mode 100644 tests/unit/utility/articleenhancer/DefaultEnhancerTest.php create mode 100644 tests/unit/utility/articleenhancer/EnhancerTest.php create mode 100644 utility/articleenhancer/articleenhancer.php create mode 100644 utility/articleenhancer/cyanideandhappinessenhancer.php create mode 100644 utility/articleenhancer/defaultenhancer.php create mode 100644 utility/articleenhancer/enhancer.php create mode 100644 utility/simplepiefilefactory.php diff --git a/CHANGELOG b/CHANGELOG index aeaa24667..25fb1b237 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +owncloud-news (1.207) +* Add possibility to hook up article enhancers which fetch article content directly from the web page +* Add article enhancer for explosm.net to directly fetch comics + + owncloud-news (1.206) * Also handle URLErrors in updater script that are thrown when the domain of a feed is not found diff --git a/businesslayer/feedbusinesslayer.php b/businesslayer/feedbusinesslayer.php index 8cd4fc663..764cb07c6 100644 --- a/businesslayer/feedbusinesslayer.php +++ b/businesslayer/feedbusinesslayer.php @@ -36,6 +36,8 @@ use \OCA\News\Utility\Fetcher; use \OCA\News\Utility\FetcherException; use \OCA\News\Utility\ImportParser; +use \OCA\News\Utility\ArticleEnhancer\Enhancer; + class FeedBusinessLayer extends BusinessLayer { private $feedFetcher; @@ -44,12 +46,14 @@ class FeedBusinessLayer extends BusinessLayer { private $timeFactory; private $importParser; private $autoPurgeMinimumInterval; + private $enhancer; public function __construct(FeedMapper $feedMapper, Fetcher $feedFetcher, ItemMapper $itemMapper, API $api, TimeFactory $timeFactory, ImportParser $importParser, - $autoPurgeMinimumInterval){ + $autoPurgeMinimumInterval, + Enhancer $enhancer){ parent::__construct($feedMapper); $this->feedFetcher = $feedFetcher; $this->itemMapper = $itemMapper; @@ -57,6 +61,7 @@ class FeedBusinessLayer extends BusinessLayer { $this->timeFactory = $timeFactory; $this->importParser = $importParser; $this->autoPurgeMinimumInterval = $autoPurgeMinimumInterval; + $this->enhancer = $enhancer; } /** @@ -118,6 +123,7 @@ class FeedBusinessLayer extends BusinessLayer { continue; } catch(DoesNotExistException $ex){ $unreadCount += 1; + $item = $this->enhancer->enhance($item); $this->itemMapper->insert($item); } } @@ -183,6 +189,7 @@ class FeedBusinessLayer extends BusinessLayer { try { $this->itemMapper->findByGuidHash($item->getGuidHash(), $feedId, $userId); } catch(DoesNotExistException $ex){ + $item = $this->enhancer->enhance($item); $this->itemMapper->insert($item); } } diff --git a/dependencyinjection/dicontainer.php b/dependencyinjection/dicontainer.php index 48206eea5..39f5a7be4 100644 --- a/dependencyinjection/dicontainer.php +++ b/dependencyinjection/dicontainer.php @@ -54,6 +54,11 @@ use \OCA\News\Utility\TwitterFetcher; use \OCA\News\Utility\OPMLExporter; use \OCA\News\Utility\ImportParser; use \OCA\News\Utility\Updater; +use \OCA\News\Utility\SimplePieFileFactory; + +use \OCA\News\Utility\ArticleEnhancer\Enhancer; +use \OCA\News\Utility\ArticleEnhancer\DefaultEnhancer; +use \OCA\News\Utility\ArticleEnhancer\CyanideAndHappinessEnhancer; require_once __DIR__ . '/../3rdparty/htmlpurifier/library/HTMLPurifier.auto.php'; @@ -167,7 +172,8 @@ class DIContainer extends BaseContainer { $c['API'], $c['TimeFactory'], $c['ImportParser'], - $c['autoPurgeMinimumInterval']); + $c['autoPurgeMinimumInterval'], + $c['Enhancer']); }); $this['ItemBusinessLayer'] = $this->share(function($c){ @@ -223,6 +229,30 @@ class DIContainer extends BaseContainer { /** * Utility */ + $this['Enhancer'] = $this->share(function($c){ + $enhancer = new Enhancer(); + + // register fetchers in order + // the most generic enhancer should be the last one + $enhancer->registerEnhancer($c['CyanideAndHappinessEnhancer']); + $enhancer->registerEnhancer($c['DefaultEnhancer']); + + return $enhancer; + }); + + $this['DefaultEnhancer'] = $this->share(function($c){ + return new DefaultEnhancer(); + }); + + $this['CyanideAndHappinessEnhancer'] = $this->share(function($c){ + return new CyanideAndHappinessEnhancer( + $c['SimplePieFileFactory'], + $c['HTMLPurifier'], + $c['feedFetcherTimeout'] + ); + }); + + $this['Fetcher'] = $this->share(function($c){ $fetcher = new Fetcher(); @@ -250,6 +280,7 @@ class DIContainer extends BaseContainer { return new TwitterFetcher($c['FeedFetcher']); }); + $this['ImportParser'] = $this->share(function($c){ return new ImportParser($c['TimeFactory'], $c['HTMLPurifier']); }); @@ -268,6 +299,10 @@ class DIContainer extends BaseContainer { $c['ItemBusinessLayer']); }); + $this['SimplePieFileFactory'] = $this->share(function($c){ + return new SimplePieFileFactory(); + }); + } } diff --git a/tests/unit/businesslayer/FeedBusinessLayerTest.php b/tests/unit/businesslayer/FeedBusinessLayerTest.php index 0a923cace..7a4cf24e6 100644 --- a/tests/unit/businesslayer/FeedBusinessLayerTest.php +++ b/tests/unit/businesslayer/FeedBusinessLayerTest.php @@ -48,6 +48,7 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility { private $time; private $importParser; private $autoPurgeMinimumInterval; + private $enhancer; protected function setUp(){ $this->api = $this->getAPIMock(); @@ -72,9 +73,13 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility { $this->importParser = $this->getMockBuilder('\OCA\News\Utility\ImportParser') ->disableOriginalConstructor() ->getMock(); + $this->enhancer = $this->getMockBuilder('\OCA\News\Utility\ArticleEnhancer\Enhancer') + ->disableOriginalConstructor() + ->getMock(); $this->feedBusinessLayer = new FeedBusinessLayer($this->feedMapper, $this->fetcher, $this->itemMapper, $this->api, - $timeFactory, $this->importParser, $this->autoPurgeMinimumInterval); + $timeFactory, $this->importParser, $this->autoPurgeMinimumInterval, + $this->enhancer); $this->user = 'jack'; $response = 'hi'; } @@ -146,6 +151,10 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility { $this->equalTo($item2->getFeedId()), $this->equalTo($this->user)) ->will($this->throwException($ex)); + $this->enhancer->expects($this->at(0)) + ->method('enhance') + ->with($this->equalTo($return[1][1])) + ->will($this->returnValue($return[1][1])); $this->itemMapper->expects($this->at(1)) ->method('insert') ->with($this->equalTo($return[1][1])); @@ -156,6 +165,10 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility { $this->equalTo($item1->getFeedId()), $this->equalTo($this->user)) ->will($this->throwException($ex)); + $this->enhancer->expects($this->at(1)) + ->method('enhance') + ->with($this->equalTo($return[1][0])) + ->will($this->returnValue($return[1][0])); $this->itemMapper->expects($this->at(3)) ->method('insert') ->with($this->equalTo($return[1][0])); @@ -201,6 +214,10 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility { $this->equalTo($item2->getFeedId()), $this->equalTo($this->user)) ->will($this->throwException($ex)); + $this->enhancer->expects($this->at(0)) + ->method('enhance') + ->with($this->equalTo($return[1][1])) + ->will($this->returnValue($return[1][1])); $this->itemMapper->expects($this->at(1)) ->method('insert') ->with($this->equalTo($return[1][1])); @@ -249,6 +266,10 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility { $this->equalTo($items[0]->getFeedId()), $this->equalTo($this->user)) ->will($this->throwException($ex)); + $this->enhancer->expects($this->at(0)) + ->method('enhance') + ->with($this->equalTo($items[0])) + ->will($this->returnValue($items[0])); $this->itemMapper->expects($this->once()) ->method('insert') ->with($this->equalTo($items[0])); @@ -263,145 +284,7 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility { $this->assertEquals($return, $feed); } - - public function testUpdateUpdatesEntryNotWhenPubDateSame(){ - $feed = new Feed(); - $feed->setId(3); - $feed->getUrl('test'); - - $item = new Item(); - $item->setGuidHash(md5('hi')); - $item->setPubDate(3333); - $items = array( - $item - ); - - $fetchReturn = array($feed, $items); - - $this->feedMapper->expects($this->at(0)) - ->method('find') - ->with($this->equalTo($feed->getId()), - $this->equalTo($this->user)) - ->will($this->returnValue($feed)); - $this->fetcher->expects($this->once()) - ->method('fetch') - ->will($this->returnValue($fetchReturn)); - $this->itemMapper->expects($this->once()) - ->method('findByGuidHash') - ->with($this->equalTo($item->getGuidHash()), - $this->equalTo($feed->getId()), - $this->equalTo($this->user)) - ->will($this->returnValue($item)); - $this->itemMapper->expects($this->never()) - ->method('insert'); - $this->itemMapper->expects($this->never()) - ->method('delete'); - - $this->feedMapper->expects($this->at(1)) - ->method('find') - ->with($feed->getId(), $this->user) - ->will($this->returnValue($feed)); - - $return = $this->feedBusinessLayer->update($feed->getId(), $this->user); - - $this->assertEquals($return, $feed); - } - - - public function testUpdateUpdatesEntryNotWhenPubDateUnkown(){ - $feed = new Feed(); - $feed->setId(3); - $feed->getUrl('test'); - - $item = new Item(); - $item->setGuidHash(md5('hi')); - $item->setPubDate(false); - $items = array( - $item - ); - - $item2 = new Item(); - $item2->setPubDate(0); - - $fetchReturn = array($feed, $items); - - $this->feedMapper->expects($this->at(0)) - ->method('find') - ->with($this->equalTo($feed->getId()), - $this->equalTo($this->user)) - ->will($this->returnValue($feed)); - $this->fetcher->expects($this->once()) - ->method('fetch') - ->will($this->returnValue($fetchReturn)); - $this->itemMapper->expects($this->once()) - ->method('findByGuidHash') - ->with($this->equalTo($item->getGuidHash()), - $this->equalTo($feed->getId()), - $this->equalTo($this->user)) - ->will($this->returnValue($item2)); - $this->itemMapper->expects($this->never()) - ->method('insert'); - $this->itemMapper->expects($this->never()) - ->method('delete'); - - $this->feedMapper->expects($this->at(1)) - ->method('find') - ->with($feed->getId(), $this->user) - ->will($this->returnValue($feed)); - - $return = $this->feedBusinessLayer->update($feed->getId(), $this->user); - - $this->assertEquals($return, $feed); - } - - public function testUpdateUpdatesEntryNotWhenNoPubDate(){ - $feed = new Feed(); - $feed->setId(3); - $feed->getUrl('test'); - - $item = new Item(); - $item->setGuidHash(md5('hi')); - $item->setPubDate(null); - $items = array( - $item - ); - - $item2 = new Item(); - $item2->setPubDate(null); - - $fetchReturn = array($feed, $items); - - $this->feedMapper->expects($this->at(0)) - ->method('find') - ->with($this->equalTo($feed->getId()), - $this->equalTo($this->user)) - ->will($this->returnValue($feed)); - $this->fetcher->expects($this->once()) - ->method('fetch') - ->will($this->returnValue($fetchReturn)); - $this->itemMapper->expects($this->once()) - ->method('findByGuidHash') - ->with($this->equalTo($item->getGuidHash()), - $this->equalTo($feed->getId()), - $this->equalTo($this->user)) - ->will($this->returnValue($item2)); - $this->itemMapper->expects($this->never()) - ->method('insert'); - $this->itemMapper->expects($this->never()) - ->method('delete'); - - $this->feedMapper->expects($this->at(1)) - ->method('find') - ->with($feed->getId(), $this->user) - ->will($this->returnValue($feed)); - - $return = $this->feedBusinessLayer->update($feed->getId(), $this->user); - - $this->assertEquals($return, $feed); - } - - - public function testCreateUpdateFails(){ + public function testUpdateFails(){ $feed = new Feed(); $feed->setId(3); $feed->getUrl('test'); diff --git a/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php b/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php new file mode 100644 index 000000000..8d507c0f8 --- /dev/null +++ b/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php @@ -0,0 +1,178 @@ +. +* +*/ + +namespace OCA\News\Utility\ArticleEnhancer; + +use \OCA\News\Db\Item; + +require_once(__DIR__ . "/../../../classloader.php"); + + +class TestEnhancer extends ArticleEnhancer { + public function __construct($purifier, $fileFactory, $articleRegex, + $articleXPATH, $timeout){ + parent::__construct($purifier, $fileFactory, $articleRegex, + $articleXPATH, $timeout); + } +} + + +class ArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility { + + private $purifier; + private $testEnhancer; + private $fileFactory; + private $timeout; + + protected function setUp() { + $timeout = 30; + $this->fileFactory = $this->getMockBuilder('\OCA\News\Utility\SimplePieFileFactory') + ->disableOriginalConstructor() + ->getMock(); + $this->purifier = $this->getMock('purifier', array('purify')); + + $this->testEnhancer = new TestEnhancer( + $this->purifier, + $this->fileFactory, + '/explosm.net\/comics/', + '//*[@id=\'maincontent\']/div[2]/img', + $this->timeout + ); + } + + + public function testCanHandle() { + $item = new Item(); + $item->setUrl('http://explosm.net/comics'); + $this->assertTrue($this->testEnhancer->canHandle($item)); + } + + + public function testDoesModifiyArticlesThatMatch() { + $file = new \stdClass; + $file->body = ' + +
+
nooo
+
+
+ + '; + $item = new Item(); + $item->setUrl('https://www.explosm.net/comics/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo('')) + ->will($this->returnValue('')); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals('', $result->getBody()); + } + + + public function testModificationHandlesEmptyResults() { + $file = new \stdClass; + $file->body = ' + +
+
+ + '; + $item = new Item(); + $item->setUrl('https://www.explosm.net/comics/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo(null)) + ->will($this->returnValue(null)); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals(null, $result->getBody()); + } + + + public function testModificationDoesNotBreakOnEmptyDom() { + $file = new \stdClass; + $file->body = ''; + $item = new Item(); + $item->setUrl('https://www.explosm.net/comics/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo(null)) + ->will($this->returnValue(null)); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals(null, $result->getBody()); + } + + + public function testModificationDoesNotBreakOnBrokenDom() { + $file = new \stdClass; + $file->body = '

+ +

+
+ + '; + $item = new Item(); + $item->setUrl('https://www.explosm.net/comics/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo(null)) + ->will($this->returnValue(null)); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals(null, $result->getBody()); + } + + +} \ No newline at end of file diff --git a/tests/unit/utility/articleenhancer/DefaultEnhancerTest.php b/tests/unit/utility/articleenhancer/DefaultEnhancerTest.php new file mode 100644 index 000000000..901428616 --- /dev/null +++ b/tests/unit/utility/articleenhancer/DefaultEnhancerTest.php @@ -0,0 +1,54 @@ +. +* +*/ + +namespace OCA\News\Utility\ArticleEnhancer; + +use \OCA\News\Db\Item; + +require_once(__DIR__ . "/../../../classloader.php"); + + +class DefaultEnhancerTest extends \OCA\AppFramework\Utility\TestUtility { + + private $testEnhancer; + + protected function setUp() { + $this->testEnhancer = new DefaultEnhancer(); + } + + + public function testCanHandle() { + $item = new Item(); + $this->assertTrue($this->testEnhancer->canHandle($item)); + } + + + public function testEnhance() { + $item = new Item(); + $this->assertEquals($item, $this->testEnhancer->enhance($item)); + } + + +} \ No newline at end of file diff --git a/tests/unit/utility/articleenhancer/EnhancerTest.php b/tests/unit/utility/articleenhancer/EnhancerTest.php new file mode 100644 index 000000000..559722e60 --- /dev/null +++ b/tests/unit/utility/articleenhancer/EnhancerTest.php @@ -0,0 +1,109 @@ +. +* +*/ + +namespace OCA\News\Utility\ArticleEnhancer; + +use \OCA\News\Db\Item; + +require_once(__DIR__ . "/../../../classloader.php"); + + +class EnhancerTest extends \OCA\AppFramework\Utility\TestUtility { + + private $enhancer; + private $articleEnhancer; + private $articleEnhancer2; + + protected function setUp(){ + $this->enhancer = new Enhancer(); + $this->articleEnhancer = $this->getMockBuilder( + '\OCA\News\Utility\ArticleEnhancer\ArticleEnhancer') + ->disableOriginalConstructor() + ->getMock(); + $this->articleEnhancer2 = $this->getMockBuilder( + '\OCA\News\Utility\ArticleEnhancer\ArticleEnhancer') + ->disableOriginalConstructor() + ->getMock(); + } + + + public function testFetch(){ + $item = new Item(); + $item->setUrl('hi'); + + $this->articleEnhancer->expects($this->once()) + ->method('canHandle') + ->with($this->equalTo($item)) + ->will($this->returnValue(true)); + $this->enhancer->registerEnhancer($this->articleEnhancer); + + $this->enhancer->enhance($item); + } + + + public function testMultipleFetchers(){ + $item = new Item(); + $item->setUrl('hi'); + $this->articleEnhancer->expects($this->once()) + ->method('canHandle') + ->with($this->equalTo($item)) + ->will($this->returnValue(false)); + $this->articleEnhancer2->expects($this->once()) + ->method('canHandle') + ->with($this->equalTo($item)) + ->will($this->returnValue(true)); + + $this->enhancer->registerEnhancer($this->articleEnhancer); + $this->enhancer->registerEnhancer($this->articleEnhancer2); + + $this->enhancer->enhance($item); + } + + + public function testMultipleFetchersOnlyOneShouldHandle(){ + $item = new Item(); + $item->setUrl('hi'); + $return = 'zeas'; + $this->articleEnhancer->expects($this->once()) + ->method('canHandle') + ->with($this->equalTo($item)) + ->will($this->returnValue(true)); + $this->articleEnhancer->expects($this->once()) + ->method('enhance') + ->with($this->equalTo($item)) + ->will($this->returnValue($return)); + $this->articleEnhancer2->expects($this->never()) + ->method('canHandle'); + + $this->enhancer->registerEnhancer($this->articleEnhancer); + $this->enhancer->registerEnhancer($this->articleEnhancer2); + + $result = $this->enhancer->enhance($item); + + $this->assertEquals($return, $result); + } + + +} \ No newline at end of file diff --git a/utility/articleenhancer/articleenhancer.php b/utility/articleenhancer/articleenhancer.php new file mode 100644 index 000000000..d7701d53b --- /dev/null +++ b/utility/articleenhancer/articleenhancer.php @@ -0,0 +1,112 @@ +. +* +*/ + +namespace OCA\News\Utility\ArticleEnhancer; + +use \OCA\News\Utility\SimplePieFileFactory; + + +abstract class ArticleEnhancer { + + + private $feedRegex; + private $articleUrlRegex; + private $articleXPath; + private $purifier; + private $fileFactory; + private $maximumTimeout; + + + /** + * @param $purifier the purifier object to clean the html which will be + * matched + * @param SimplePieFileFactory a factory for getting a simple pie file instance + * @param string $articleUrlRegex the regex to match which article should be + * handled + * @param string $articleXPath the xpath which tells the fetcher with what + * body the feed should be replaced + * @param int $maximumTimeout maximum timeout in seconds + */ + public function __construct($purifier, SimplePieFileFactory $fileFactory, + $articleUrlRegex, $articleXPath, + $maximumTimeout=10){ + $this->purifier = $purifier; + $this->articleUrlRegex = $articleUrlRegex; + $this->articleXPath = $articleXPath; + $this->fileFactory = $fileFactory; + $this->timeout = $maximumTimeout; + } + + + public function canHandle($item){ + return preg_match($this->articleUrlRegex, $item->getUrl()) == true; + } + + + public function enhance($item){ + $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout); + $dom = new \DOMDocument(); + @$dom->loadHTML($file->body); + $xpath = new \DOMXpath($dom); + $xpathResult = $xpath->evaluate($this->articleXPath); + + // in case it wasnt a text query assume its a single + if(!is_string($xpathResult)) { + $xpathResult = $this->domToString($xpathResult); + } + + $sanitizedResult = $this->purifier->purify($xpathResult); + $item->setBody($sanitizedResult); + + + return $item; + } + + + /** + * Method which turns an xpath result to a string + * Assumes that the result matches a single element. If the result + * is not a single element, you can customize it by overwriting this + * method + * @param $xpathResult the result from the xpath query + * @return the result as a string + */ + protected function domToString($xpathResult) { + if($xpathResult->length > 0) { + return $this->toInnerHTML($xpathResult->item(0)); + } else { + return ""; + } + } + + + protected function toInnerHTML($node) { + $dom = new \DOMDocument(); + $dom->appendChild($dom->importNode($node, true)); + return trim($dom->saveHTML()); + } + + +} \ No newline at end of file diff --git a/utility/articleenhancer/cyanideandhappinessenhancer.php b/utility/articleenhancer/cyanideandhappinessenhancer.php new file mode 100644 index 000000000..1faee6d5c --- /dev/null +++ b/utility/articleenhancer/cyanideandhappinessenhancer.php @@ -0,0 +1,46 @@ +. +* +*/ + +namespace OCA\News\Utility\ArticleEnhancer; + +use \OCA\News\Utility\SimplePieFileFactory; + + +class CyanideAndHappinessEnhancer extends ArticleEnhancer { + + + public function __construct(SimplePieFileFactory $fileFactory, $purifier, + $timeout){ + parent::__construct( + $purifier, + $fileFactory, + '/explosm.net\/comics/', // match article url + '//*[@id=\'maincontent\']/div[2]/div', // xpath statement to extract the html from the page + $timeout + ); + } + + +} \ No newline at end of file diff --git a/utility/articleenhancer/defaultenhancer.php b/utility/articleenhancer/defaultenhancer.php new file mode 100644 index 000000000..eb3045ceb --- /dev/null +++ b/utility/articleenhancer/defaultenhancer.php @@ -0,0 +1,49 @@ +. +* +*/ + +namespace OCA\News\Utility\ArticleEnhancer; + +use \OCA\News\Utility\SimplePieFileFactory; + + +class DefaultEnhancer extends ArticleEnhancer { + + + public function __construct(){ + parent::__construct(null, new SimplePieFileFactory(), null, null, null); + } + + + public function canHandle($item){ + return true; + } + + + public function enhance($item){ + return $item; + } + + +} \ No newline at end of file diff --git a/utility/articleenhancer/enhancer.php b/utility/articleenhancer/enhancer.php new file mode 100644 index 000000000..059904f63 --- /dev/null +++ b/utility/articleenhancer/enhancer.php @@ -0,0 +1,52 @@ +. +* +*/ + +namespace OCA\News\Utility\ArticleEnhancer; + + +class Enhancer { + + private $enhancers; + + public function __construct(){ + $this->enhancers = array(); + } + + + public function registerEnhancer(ArticleEnhancer $enhancer){ + array_push($this->enhancers, $enhancer); + } + + + public function enhance($item){ + foreach($this->enhancers as $enhancer){ + if($enhancer->canHandle($item)){ + return $enhancer->enhance($item); + } + } + } + + +} \ No newline at end of file diff --git a/utility/simplepiefilefactory.php b/utility/simplepiefilefactory.php new file mode 100644 index 000000000..13b56dc9e --- /dev/null +++ b/utility/simplepiefilefactory.php @@ -0,0 +1,35 @@ +. +* +*/ + +namespace OCA\News\Utility; + + +class SimplePieFileFactory { + + public function getFile($url, $timeout) { + return new \SimplePie_File($url, $timeout); + } + +} \ No newline at end of file -- cgit v1.2.3