From c56d433ca432fba641a0b734287cca9431fd6d12 Mon Sep 17 00:00:00 2001
From: Bernhard Posselt
Date: Thu, 26 Sep 2013 20:54:56 +0200
Subject: create a seperate config file for regex enhancers
---
dependencyinjection/dicontainer.php | 36 ++-
.../articleenhancer/ArticleEnhancerTest.php | 297 ---------------------
.../articleenhancer/RegexArticleEnhancerTest.php | 49 ++++
.../articleenhancer/XPathArticleEnhancerTest.php | 288 ++++++++++++++++++++
utility/articleenhancer/articleenhancer.php | 186 +------------
utility/articleenhancer/enhancers.json | 40 ---
utility/articleenhancer/regexarticleenhancer.php | 55 ++++
utility/articleenhancer/regexenhancers.json | 7 +
utility/articleenhancer/twogagenhancer.php | 52 ----
utility/articleenhancer/xpatharticleenhancer.php | 214 +++++++++++++++
utility/articleenhancer/xpathenhancers.json | 40 +++
11 files changed, 673 insertions(+), 591 deletions(-)
delete mode 100644 tests/unit/utility/articleenhancer/ArticleEnhancerTest.php
create mode 100644 tests/unit/utility/articleenhancer/RegexArticleEnhancerTest.php
create mode 100644 tests/unit/utility/articleenhancer/XPathArticleEnhancerTest.php
delete mode 100644 utility/articleenhancer/enhancers.json
create mode 100644 utility/articleenhancer/regexarticleenhancer.php
create mode 100644 utility/articleenhancer/regexenhancers.json
delete mode 100644 utility/articleenhancer/twogagenhancer.php
create mode 100644 utility/articleenhancer/xpatharticleenhancer.php
create mode 100644 utility/articleenhancer/xpathenhancers.json
diff --git a/dependencyinjection/dicontainer.php b/dependencyinjection/dicontainer.php
index 9efc9669d..4297e7113 100644
--- a/dependencyinjection/dicontainer.php
+++ b/dependencyinjection/dicontainer.php
@@ -61,8 +61,8 @@ use \OCA\News\Utility\Updater;
use \OCA\News\Utility\SimplePieFileFactory;
use \OCA\News\Utility\ArticleEnhancer\Enhancer;
-use \OCA\News\Utility\ArticleEnhancer\ArticleEnhancer;
-use OCA\News\Utility\ArticleEnhancer\TwoGAGEnhancer;
+use \OCA\News\Utility\ArticleEnhancer\XPathArticleEnhancer;
+use OCA\News\Utility\ArticleEnhancer\RegexArticleEnhancer;
use \OCA\News\Middleware\CORSMiddleware;
@@ -257,16 +257,13 @@ class DIContainer extends BaseContainer {
$this['Enhancer'] = $this->share(function($c){
$enhancer = new Enhancer();
- // register enhancers which need special implementation
- $enhancer->registerEnhancer('twogag.com', $c['TwoGAGEnhancer']);
-
// register simple enhancers from config json file
- $enhancerConfig = file_get_contents(
- __DIR__ . '/../utility/articleenhancer/enhancers.json'
+ $xpathEnhancerConfig = file_get_contents(
+ __DIR__ . '/../utility/articleenhancer/xpathenhancers.json'
);
- //print_r( json_decode($enhancerConfig, true) );
- foreach(json_decode($enhancerConfig, true) as $feed => $config) {
- $articleEnhancer = new ArticleEnhancer(
+
+ foreach(json_decode($xpathEnhancerConfig, true) as $feed => $config) {
+ $articleEnhancer = new XPathArticleEnhancer(
$c['HTMLPurifier'],
$c['SimplePieFileFactory'],
$config,
@@ -275,16 +272,17 @@ class DIContainer extends BaseContainer {
$enhancer->registerEnhancer($feed, $articleEnhancer);
}
- return $enhancer;
- });
-
-
- $this['TwoGAGEnhancer'] = $this->share(function($c){
- return new TwoGAGEnhancer(
- $c['SimplePieFileFactory'],
- $c['HTMLPurifier'],
- $c['feedFetcherTimeout']
+ $regexEnhancerConfig = file_get_contents(
+ __DIR__ . '/../utility/articleenhancer/regexenhancers.json'
);
+ foreach(json_decode($regexEnhancerConfig, true) as $feed => $config) {
+ foreach ($config as $matchArticleUrl => $regex) {
+ $articleEnhancer = new RegexArticleEnhancer($matchArticleUrl, $regex);
+ $enhancer->registerEnhancer($feed, $articleEnhancer);
+ }
+ }
+
+ return $enhancer;
});
$this['Fetcher'] = $this->share(function($c){
diff --git a/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php b/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php
deleted file mode 100644
index f252c4fac..000000000
--- a/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php
+++ /dev/null
@@ -1,297 +0,0 @@
-.
-*
-*/
-
-namespace OCA\News\Utility\ArticleEnhancer;
-
-use \OCA\News\Db\Item;
-
-require_once(__DIR__ . "/../../../classloader.php");
-
-
-class TestEnhancer extends ArticleEnhancer {
- public function __construct($purifier, $fileFactory, $regexXPathPair,
- $timeout){
- parent::__construct($purifier, $fileFactory, $regexXPathPair,
- $timeout);
- }
-}
-
-
-class ArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
-
- private $purifier;
- private $testEnhancer;
- private $fileFactory;
- private $timeout;
-
- protected function setUp() {
- $timeout = 30;
- $this->fileFactory = $this->getMockBuilder('\OCA\News\Utility\SimplePieFileFactory')
- ->disableOriginalConstructor()
- ->getMock();
- $this->purifier = $this->getMock('purifier', array('purify'));
-
- $this->testEnhancer = new TestEnhancer(
- $this->purifier,
- $this->fileFactory,
- array(
- '/explosm.net\/comics/' => '//*[@id=\'maincontent\']/div[2]/div/span',
- '/explosm.net\/shorts/' => '//*[@id=\'maincontent\']/div/div',
- '/explosm.net\/all/' => '//body/*',
- '/themerepublic.net/' => '//*[@class=\'post hentry\']'
- ),
- $this->timeout
- );
- }
-
-
- public function testDoesNotModifiyNotMatchingResults() {
- $item = new Item();
- $item->setUrl('http://explosm.net');
- $this->assertEquals($item, $this->testEnhancer->enhance($item));
- }
-
-
- public function testDoesModifiyArticlesThatMatch() {
- $file = new \stdClass;
- $file->headers = array("content-type"=>"text/html; charset=utf-8");
- $file->body = '
-
-
-
- ';
- $item = new Item();
- $item->setUrl('https://www.explosm.net/comics/312');
- $item->setBody('Hello thar');
-
- $this->fileFactory->expects($this->once())
- ->method('getFile')
- ->with($this->equalTo($item->getUrl()),
- $this->equalTo($this->timeout))
- ->will($this->returnValue($file));
- $this->purifier->expects($this->once())
- ->method('purify')
- ->with($this->equalTo('hiho'))
- ->will($this->returnValue('hiho'));
-
- $result = $this->testEnhancer->enhance($item);
- $this->assertEquals('hiho', $result->getBody());
- }
-
-
- public function testDoesModifiyAllArticlesThatMatch() {
- $file = new \stdClass;
- $file->headers = array("content-type"=>"text/html; charset=utf-8");
- $file->body = '
-
-
-
- ';
- $item = new Item();
- $item->setUrl('https://www.explosm.net/shorts/312');
- $item->setBody('Hello thar');
-
- $this->fileFactory->expects($this->once())
- ->method('getFile')
- ->with($this->equalTo($item->getUrl()),
- $this->equalTo($this->timeout))
- ->will($this->returnValue($file));
- $this->purifier->expects($this->once())
- ->method('purify')
- ->with($this->equalTo('hiho
rawr
'))
- ->will($this->returnValue('hiho
rawr
'));
-
- $result = $this->testEnhancer->enhance($item);
- $this->assertEquals('hiho
rawr
', $result->getBody());
- }
-
-
- public function testModificationHandlesEmptyResults() {
- $file = new \stdClass;
- $file->headers = array("content-type"=>"text/html; charset=utf-8");
- $file->body = '
-
-
-
-
- ';
- $item = new Item();
- $item->setUrl('https://www.explosm.net/comics/312');
- $item->setBody('Hello thar');
-
- $this->fileFactory->expects($this->once())
- ->method('getFile')
- ->with($this->equalTo($item->getUrl()),
- $this->equalTo($this->timeout))
- ->will($this->returnValue($file));
- $this->purifier->expects($this->once())
- ->method('purify')
- ->with($this->equalTo(null))
- ->will($this->returnValue(null));
-
- $result = $this->testEnhancer->enhance($item);
- $this->assertEquals(null, $result->getBody());
- }
-
-
- public function testModificationDoesNotBreakOnEmptyDom() {
- $file = new \stdClass;
- $file->headers = array("content-type"=>"text/html; charset=utf-8");
- $file->body = '';
- $item = new Item();
- $item->setUrl('https://www.explosm.net/comics/312');
- $item->setBody('Hello thar');
-
- $this->fileFactory->expects($this->once())
- ->method('getFile')
- ->with($this->equalTo($item->getUrl()),
- $this->equalTo($this->timeout))
- ->will($this->returnValue($file));
- $this->purifier->expects($this->once())
- ->method('purify')
- ->with($this->equalTo(null))
- ->will($this->returnValue(null));
-
- $result = $this->testEnhancer->enhance($item);
- $this->assertEquals(null, $result->getBody());
- }
-
-
- public function testModificationDoesNotBreakOnBrokenDom() {
- $file = new \stdClass;
- $file->headers = array("content-type"=>"text/html; charset=utf-8");
- $file->body = '
-
-
-
-
- ';
- $item = new Item();
- $item->setUrl('https://www.explosm.net/comics/312');
- $item->setBody('Hello thar');
-
- $this->fileFactory->expects($this->once())
- ->method('getFile')
- ->with($this->equalTo($item->getUrl()),
- $this->equalTo($this->timeout))
- ->will($this->returnValue($file));
- $this->purifier->expects($this->once())
- ->method('purify')
- ->with($this->equalTo(null))
- ->will($this->returnValue(null));
-
- $result = $this->testEnhancer->enhance($item);
- $this->assertEquals(null, $result->getBody());
- }
-
-
- public function testTransformRelativeUrls() {
- $file = new \stdClass;
- $file->headers = array("content-type"=>"text/html; charset=utf-8");
- $file->body = '
-
- link
- link2
-
-
- ';
- $item = new Item();
- $item->setUrl('https://www.explosm.net/all/312');
- $item->setBody('Hello thar');
-
- $this->fileFactory->expects($this->once())
- ->method('getFile')
- ->with($this->equalTo($item->getUrl()),
- $this->equalTo($this->timeout))
- ->will($this->returnValue($file));
- $this->purifier->expects($this->once())
- ->method('purify')
- ->with($this->equalTo('linklink2'))
- ->will($this->returnValue('linklink2'));
-
- $result = $this->testEnhancer->enhance($item);
- $this->assertEquals('linklink2', $result->getBody());
- }
-
- public function testTransformRelativeUrlSpecials() {
- $file = new \stdClass;
- $file->headers = array("content-type"=>"text/html; charset=utf-8");
- $file->body = '
-
-
-
- ';
- $item = new Item();
- $item->setUrl('https://username:secret@www.explosm.net/all/312');
- $item->setBody('Hello thar');
-
- $this->fileFactory->expects($this->once())
- ->method('getFile')
- ->with($this->equalTo($item->getUrl()),
- $this->equalTo($this->timeout))
- ->will($this->returnValue($file));
- $this->purifier->expects($this->once())
- ->method('purify')
- ->with($this->equalTo(''))
- ->will($this->returnValue(''));
-
- $result = $this->testEnhancer->enhance($item);
- $this->assertEquals('', $result->getBody());
- }
-
- public function testDontTransformAbsoluteUrlsAndMails() {
- $file = new \stdClass;
- $file->headers = array("content-type"=>"text/html; charset=utf-8");
- $file->body = '
-
-
- mail
-
- ';
- $item = new Item();
- $item->setUrl('https://www.explosm.net/all/312');
- $item->setBody('Hello thar');
-
- $this->fileFactory->expects($this->once())
- ->method('getFile')
- ->with($this->equalTo($item->getUrl()),
- $this->equalTo($this->timeout))
- ->will($this->returnValue($file));
- $this->purifier->expects($this->once())
- ->method('purify')
- ->with($this->equalTo('mail'))
- ->will($this->returnValue('mail'));
-
- $result = $this->testEnhancer->enhance($item);
- $this->assertEquals('mail', $result->getBody());
- }
-
-}
\ No newline at end of file
diff --git a/tests/unit/utility/articleenhancer/RegexArticleEnhancerTest.php b/tests/unit/utility/articleenhancer/RegexArticleEnhancerTest.php
new file mode 100644
index 000000000..f775eac91
--- /dev/null
+++ b/tests/unit/utility/articleenhancer/RegexArticleEnhancerTest.php
@@ -0,0 +1,49 @@
+.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Db\Item;
+
+require_once(__DIR__ . "/../../../classloader.php");
+
+
+class RegexArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
+
+
+ public function testRegexEnhancer() {
+ $item = new Item();
+ $item->setBody('atests is a nice thing');
+ $item->setUrl('http://john.com');
+ $regex = array("%tes(ts)%" => "heho$1tests");
+
+ $regexEnhancer = new RegexArticleEnhancer('%john.com%', $regex);
+ $item = $regexEnhancer->enhance($item);
+
+ $this->assertEquals('ahehotstests is a nice thing', $item->getBody());
+ }
+
+
+}
\ No newline at end of file
diff --git a/tests/unit/utility/articleenhancer/XPathArticleEnhancerTest.php b/tests/unit/utility/articleenhancer/XPathArticleEnhancerTest.php
new file mode 100644
index 000000000..b9c902147
--- /dev/null
+++ b/tests/unit/utility/articleenhancer/XPathArticleEnhancerTest.php
@@ -0,0 +1,288 @@
+.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Db\Item;
+
+require_once(__DIR__ . "/../../../classloader.php");
+
+
+class XPathArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
+
+ private $purifier;
+ private $testEnhancer;
+ private $fileFactory;
+ private $timeout;
+
+ protected function setUp() {
+ $timeout = 30;
+ $this->fileFactory = $this->getMockBuilder('\OCA\News\Utility\SimplePieFileFactory')
+ ->disableOriginalConstructor()
+ ->getMock();
+ $this->purifier = $this->getMock('purifier', array('purify'));
+
+ $this->testEnhancer = new XPathArticleEnhancer(
+ $this->purifier,
+ $this->fileFactory,
+ array(
+ '/explosm.net\/comics/' => '//*[@id=\'maincontent\']/div[2]/div/span',
+ '/explosm.net\/shorts/' => '//*[@id=\'maincontent\']/div/div',
+ '/explosm.net\/all/' => '//body/*',
+ '/themerepublic.net/' => '//*[@class=\'post hentry\']'
+ ),
+ $this->timeout
+ );
+ }
+
+
+ public function testDoesNotModifiyNotMatchingResults() {
+ $item = new Item();
+ $item->setUrl('http://explosm.net');
+ $this->assertEquals($item, $this->testEnhancer->enhance($item));
+ }
+
+
+ public function testDoesModifiyArticlesThatMatch() {
+ $file = new \stdClass;
+ $file->headers = array("content-type"=>"text/html; charset=utf-8");
+ $file->body = '
+
+
+
+ ';
+ $item = new Item();
+ $item->setUrl('https://www.explosm.net/comics/312');
+ $item->setBody('Hello thar');
+
+ $this->fileFactory->expects($this->once())
+ ->method('getFile')
+ ->with($this->equalTo($item->getUrl()),
+ $this->equalTo($this->timeout))
+ ->will($this->returnValue($file));
+ $this->purifier->expects($this->once())
+ ->method('purify')
+ ->with($this->equalTo('hiho'))
+ ->will($this->returnValue('hiho'));
+
+ $result = $this->testEnhancer->enhance($item);
+ $this->assertEquals('hiho', $result->getBody());
+ }
+
+
+ public function testDoesModifiyAllArticlesThatMatch() {
+ $file = new \stdClass;
+ $file->headers = array("content-type"=>"text/html; charset=utf-8");
+ $file->body = '
+
+
+
+ ';
+ $item = new Item();
+ $item->setUrl('https://www.explosm.net/shorts/312');
+ $item->setBody('Hello thar');
+
+ $this->fileFactory->expects($this->once())
+ ->method('getFile')
+ ->with($this->equalTo($item->getUrl()),
+ $this->equalTo($this->timeout))
+ ->will($this->returnValue($file));
+ $this->purifier->expects($this->once())
+ ->method('purify')
+ ->with($this->equalTo('hiho
rawr
'))
+ ->will($this->returnValue('hiho
rawr
'));
+
+ $result = $this->testEnhancer->enhance($item);
+ $this->assertEquals('hiho
rawr
', $result->getBody());
+ }
+
+
+ public function testModificationHandlesEmptyResults() {
+ $file = new \stdClass;
+ $file->headers = array("content-type"=>"text/html; charset=utf-8");
+ $file->body = '
+
+
+
+
+ ';
+ $item = new Item();
+ $item->setUrl('https://www.explosm.net/comics/312');
+ $item->setBody('Hello thar');
+
+ $this->fileFactory->expects($this->once())
+ ->method('getFile')
+ ->with($this->equalTo($item->getUrl()),
+ $this->equalTo($this->timeout))
+ ->will($this->returnValue($file));
+ $this->purifier->expects($this->once())
+ ->method('purify')
+ ->with($this->equalTo(null))
+ ->will($this->returnValue(null));
+
+ $result = $this->testEnhancer->enhance($item);
+ $this->assertEquals(null, $result->getBody());
+ }
+
+
+ public function testModificationDoesNotBreakOnEmptyDom() {
+ $file = new \stdClass;
+ $file->headers = array("content-type"=>"text/html; charset=utf-8");
+ $file->body = '';
+ $item = new Item();
+ $item->setUrl('https://www.explosm.net/comics/312');
+ $item->setBody('Hello thar');
+
+ $this->fileFactory->expects($this->once())
+ ->method('getFile')
+ ->with($this->equalTo($item->getUrl()),
+ $this->equalTo($this->timeout))
+ ->will($this->returnValue($file));
+ $this->purifier->expects($this->once())
+ ->method('purify')
+ ->with($this->equalTo(null))
+ ->will($this->returnValue(null));
+
+ $result = $this->testEnhancer->enhance($item);
+ $this->assertEquals(null, $result->getBody());
+ }
+
+
+ public function testModificationDoesNotBreakOnBrokenDom() {
+ $file = new \stdClass;
+ $file->headers = array("content-type"=>"text/html; charset=utf-8");
+ $file->body = '
+
+
+
+