summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--dependencyinjection/dicontainer.php4
-rw-r--r--tests/unit/utility/articleenhancer/ArticleEnhancerTest.php45
-rw-r--r--utility/articleenhancer/articleenhancer.php46
-rw-r--r--utility/articleenhancer/cyanideandhappinessenhancer.php6
4 files changed, 67 insertions, 34 deletions
diff --git a/dependencyinjection/dicontainer.php b/dependencyinjection/dicontainer.php
index 9d9a085b0..de49236c6 100644
--- a/dependencyinjection/dicontainer.php
+++ b/dependencyinjection/dicontainer.php
@@ -109,7 +109,9 @@ class DIContainer extends BaseContainer {
$config->set('Cache.SerializerPath', $directory);
$config->set('HTML.SafeIframe', true);
$config->set('URI.SafeIframeRegexp',
- '%^http://(www.youtube(?:-nocookie)?.com/embed/|player.vimeo.com/video/)%'); //allow YouTube and Vimeo
+ '%^(?:https?:)?//(' .
+ 'www.youtube(?:-nocookie)?.com/embed/|' .
+ 'player.vimeo.com/video/)%'); //allow YouTube and Vimeo
return new \HTMLPurifier($config);
});
diff --git a/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php b/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php
index c808a0e49..5f82a4752 100644
--- a/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php
+++ b/tests/unit/utility/articleenhancer/ArticleEnhancerTest.php
@@ -31,10 +31,10 @@ require_once(__DIR__ . "/../../../classloader.php");
class TestEnhancer extends ArticleEnhancer {
- public function __construct($purifier, $fileFactory, $articleRegex,
- $articleXPATH, $timeout){
- parent::__construct($purifier, $fileFactory, $articleRegex,
- $articleXPATH, $timeout);
+ public function __construct($purifier, $fileFactory, $regexXPathPair,
+ $timeout){
+ parent::__construct($purifier, $fileFactory, $regexXPathPair,
+ $timeout);
}
}
@@ -56,8 +56,10 @@ class ArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->testEnhancer = new TestEnhancer(
$this->purifier,
$this->fileFactory,
- '/explosm.net\/comics/',
- '//*[@id=\'maincontent\']/div[2]/img',
+ array(
+ '/explosm.net\/comics/' => '//*[@id=\'maincontent\']/div[2]/div/img',
+ '/explosm.net\/shorts/' => '//*[@id=\'maincontent\']/div[2]/div'
+ ),
$this->timeout
);
}
@@ -76,7 +78,7 @@ class ArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
<body>
<div id="maincontent">
<div>nooo</div>
- <div><img src="hiho"></div>
+ <div><div><img src="hiho"></div></div>
</div>
</body>
</html>';
@@ -99,6 +101,35 @@ class ArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
}
+ public function testDoesModifiyAllArticlesThatMatch() {
+ $file = new \stdClass;
+ $file->body = '<html>
+ <body>
+ <div id="maincontent">
+ <div>nooo</div>
+ <div><div>rawr</div></div>
+ </div>
+ </body>
+ </html>';
+ $item = new Item();
+ $item->setUrl('https://www.explosm.net/shorts/312');
+ $item->setBody('Hello thar');
+
+ $this->fileFactory->expects($this->once())
+ ->method('getFile')
+ ->with($this->equalTo($item->getUrl()),
+ $this->equalTo($this->timeout))
+ ->will($this->returnValue($file));
+ $this->purifier->expects($this->once())
+ ->method('purify')
+ ->with($this->equalTo('<div>rawr</div>'))
+ ->will($this->returnValue('<div>rawr</div>'));
+
+ $result = $this->testEnhancer->enhance($item);
+ $this->assertEquals('<div>rawr</div>', $result->getBody());
+ }
+
+
public function testModificationHandlesEmptyResults() {
$file = new \stdClass;
$file->body = '<html>
diff --git a/utility/articleenhancer/articleenhancer.php b/utility/articleenhancer/articleenhancer.php
index 194137e72..7fc67c660 100644
--- a/utility/articleenhancer/articleenhancer.php
+++ b/utility/articleenhancer/articleenhancer.php
@@ -32,8 +32,6 @@ abstract class ArticleEnhancer {
private $feedRegex;
- private $articleUrlRegex;
- private $articleXPath;
private $purifier;
private $fileFactory;
private $maximumTimeout;
@@ -43,38 +41,38 @@ abstract class ArticleEnhancer {
* @param $purifier the purifier object to clean the html which will be
* matched
* @param SimplePieFileFactory a factory for getting a simple pie file instance
- * @param string $articleUrlRegex the regex to match which article should be
- * handled
- * @param string $articleXPath the xpath which tells the fetcher with what
- * body the feed should be replaced
+ * @param array $regexXPathPair an associative array containing regex to
+ * match the url and the xpath that should be used for it to extract the
+ * page
* @param int $maximumTimeout maximum timeout in seconds
*/
public function __construct($purifier, SimplePieFileFactory $fileFactory,
- $articleUrlRegex, $articleXPath,
- $maximumTimeout=10){
+ array $regexXPathPair, $maximumTimeout=10){
$this->purifier = $purifier;
- $this->articleUrlRegex = $articleUrlRegex;
- $this->articleXPath = $articleXPath;
+ $this->regexXPathPair = $regexXPathPair;
$this->fileFactory = $fileFactory;
- $this->timeout = $maximumTimeout;
+ $this->maximumTimeout = $maximumTimeout;
}
public function enhance($item){
- if(preg_match($this->articleUrlRegex, $item->getUrl())) {
- $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
- $dom = new \DOMDocument();
- @$dom->loadHTML($file->body);
- $xpath = new \DOMXpath($dom);
- $xpathResult = $xpath->evaluate($this->articleXPath);
-
- // in case it wasnt a text query assume its a single
- if(!is_string($xpathResult)) {
- $xpathResult = $this->domToString($xpathResult);
+ foreach($this->regexXPathPair as $regex => $search) {
+
+ if(preg_match($regex, $item->getUrl())) {
+ $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
+ $dom = new \DOMDocument();
+ @$dom->loadHTML($file->body);
+ $xpath = new \DOMXpath($dom);
+ $xpathResult = $xpath->evaluate($search);
+
+ // in case it wasnt a text query assume its a single
+ if(!is_string($xpathResult)) {
+ $xpathResult = $this->domToString($xpathResult);
+ }
+
+ $sanitizedResult = $this->purifier->purify($xpathResult);
+ $item->setBody($sanitizedResult);
}
-
- $sanitizedResult = $this->purifier->purify($xpathResult);
- $item->setBody($sanitizedResult);
}
return $item;
diff --git a/utility/articleenhancer/cyanideandhappinessenhancer.php b/utility/articleenhancer/cyanideandhappinessenhancer.php
index 1faee6d5c..037a3179e 100644
--- a/utility/articleenhancer/cyanideandhappinessenhancer.php
+++ b/utility/articleenhancer/cyanideandhappinessenhancer.php
@@ -36,8 +36,10 @@ class CyanideAndHappinessEnhancer extends ArticleEnhancer {
parent::__construct(
$purifier,
$fileFactory,
- '/explosm.net\/comics/', // match article url
- '//*[@id=\'maincontent\']/div[2]/div', // xpath statement to extract the html from the page
+ array(
+ '/explosm.net\/comics/' => '//*[@id=\'maincontent\']/div[2]/div',
+ '/explosm.net\/show/' => '//*[@id=\'videoPlayer\']/iframe'
+ ),
$timeout
);
}