summaryrefslogtreecommitdiffstats
path: root/utility
diff options
context:
space:
mode:
authorBernhard Posselt <nukeawhale@gmail.com>2013-08-28 17:26:38 +0200
committerBernhard Posselt <nukeawhale@gmail.com>2013-08-28 17:26:38 +0200
commita9eb72911b6f022da645dc08cf8c0f4b1702d1e1 (patch)
tree4e16f84cd4a8d0214fc5a445f4817bc4a3a93bf0 /utility
parent998b3c29438b09e50d2c56ed982b48df8038a910 (diff)
add enhancers for articles, fix #14
Diffstat (limited to 'utility')
-rw-r--r--utility/articleenhancer/articleenhancer.php112
-rw-r--r--utility/articleenhancer/cyanideandhappinessenhancer.php46
-rw-r--r--utility/articleenhancer/defaultenhancer.php49
-rw-r--r--utility/articleenhancer/enhancer.php52
-rw-r--r--utility/simplepiefilefactory.php35
5 files changed, 294 insertions, 0 deletions
diff --git a/utility/articleenhancer/articleenhancer.php b/utility/articleenhancer/articleenhancer.php
new file mode 100644
index 000000000..d7701d53b
--- /dev/null
+++ b/utility/articleenhancer/articleenhancer.php
@@ -0,0 +1,112 @@
+<?php
+
+/**
+* ownCloud - News
+*
+* @author Alessandro Cosentino
+* @author Bernhard Posselt
+* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
+* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
+*
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
+* License as published by the Free Software Foundation; either
+* version 3 of the License, or any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
+*
+* You should have received a copy of the GNU Affero General Public
+* License along with this library. If not, see <http://www.gnu.org/licenses/>.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Utility\SimplePieFileFactory;
+
+
+abstract class ArticleEnhancer {
+
+
+ private $feedRegex;
+ private $articleUrlRegex;
+ private $articleXPath;
+ private $purifier;
+ private $fileFactory;
+ private $maximumTimeout;
+
+
+ /**
+ * @param $purifier the purifier object to clean the html which will be
+ * matched
+ * @param SimplePieFileFactory a factory for getting a simple pie file instance
+ * @param string $articleUrlRegex the regex to match which article should be
+ * handled
+ * @param string $articleXPath the xpath which tells the fetcher with what
+ * body the feed should be replaced
+ * @param int $maximumTimeout maximum timeout in seconds
+ */
+ public function __construct($purifier, SimplePieFileFactory $fileFactory,
+ $articleUrlRegex, $articleXPath,
+ $maximumTimeout=10){
+ $this->purifier = $purifier;
+ $this->articleUrlRegex = $articleUrlRegex;
+ $this->articleXPath = $articleXPath;
+ $this->fileFactory = $fileFactory;
+ $this->timeout = $maximumTimeout;
+ }
+
+
+ public function canHandle($item){
+ return preg_match($this->articleUrlRegex, $item->getUrl()) == true;
+ }
+
+
+ public function enhance($item){
+ $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
+ $dom = new \DOMDocument();
+ @$dom->loadHTML($file->body);
+ $xpath = new \DOMXpath($dom);
+ $xpathResult = $xpath->evaluate($this->articleXPath);
+
+ // in case it wasnt a text query assume its a single
+ if(!is_string($xpathResult)) {
+ $xpathResult = $this->domToString($xpathResult);
+ }
+
+ $sanitizedResult = $this->purifier->purify($xpathResult);
+ $item->setBody($sanitizedResult);
+
+
+ return $item;
+ }
+
+
+ /**
+ * Method which turns an xpath result to a string
+ * Assumes that the result matches a single element. If the result
+ * is not a single element, you can customize it by overwriting this
+ * method
+ * @param $xpathResult the result from the xpath query
+ * @return the result as a string
+ */
+ protected function domToString($xpathResult) {
+ if($xpathResult->length > 0) {
+ return $this->toInnerHTML($xpathResult->item(0));
+ } else {
+ return "";
+ }
+ }
+
+
+ protected function toInnerHTML($node) {
+ $dom = new \DOMDocument();
+ $dom->appendChild($dom->importNode($node, true));
+ return trim($dom->saveHTML());
+ }
+
+
+} \ No newline at end of file
diff --git a/utility/articleenhancer/cyanideandhappinessenhancer.php b/utility/articleenhancer/cyanideandhappinessenhancer.php
new file mode 100644
index 000000000..1faee6d5c
--- /dev/null
+++ b/utility/articleenhancer/cyanideandhappinessenhancer.php
@@ -0,0 +1,46 @@
+<?php
+
+/**
+* ownCloud - News
+*
+* @author Alessandro Cosentino
+* @author Bernhard Posselt
+* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
+* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
+*
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
+* License as published by the Free Software Foundation; either
+* version 3 of the License, or any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
+*
+* You should have received a copy of the GNU Affero General Public
+* License along with this library. If not, see <http://www.gnu.org/licenses/>.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Utility\SimplePieFileFactory;
+
+
+class CyanideAndHappinessEnhancer extends ArticleEnhancer {
+
+
+ public function __construct(SimplePieFileFactory $fileFactory, $purifier,
+ $timeout){
+ parent::__construct(
+ $purifier,
+ $fileFactory,
+ '/explosm.net\/comics/', // match article url
+ '//*[@id=\'maincontent\']/div[2]/div', // xpath statement to extract the html from the page
+ $timeout
+ );
+ }
+
+
+} \ No newline at end of file
diff --git a/utility/articleenhancer/defaultenhancer.php b/utility/articleenhancer/defaultenhancer.php
new file mode 100644
index 000000000..eb3045ceb
--- /dev/null
+++ b/utility/articleenhancer/defaultenhancer.php
@@ -0,0 +1,49 @@
+<?php
+
+/**
+* ownCloud - News
+*
+* @author Alessandro Cosentino
+* @author Bernhard Posselt
+* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
+* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
+*
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
+* License as published by the Free Software Foundation; either
+* version 3 of the License, or any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
+*
+* You should have received a copy of the GNU Affero General Public
+* License along with this library. If not, see <http://www.gnu.org/licenses/>.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+use \OCA\News\Utility\SimplePieFileFactory;
+
+
+class DefaultEnhancer extends ArticleEnhancer {
+
+
+ public function __construct(){
+ parent::__construct(null, new SimplePieFileFactory(), null, null, null);
+ }
+
+
+ public function canHandle($item){
+ return true;
+ }
+
+
+ public function enhance($item){
+ return $item;
+ }
+
+
+} \ No newline at end of file
diff --git a/utility/articleenhancer/enhancer.php b/utility/articleenhancer/enhancer.php
new file mode 100644
index 000000000..059904f63
--- /dev/null
+++ b/utility/articleenhancer/enhancer.php
@@ -0,0 +1,52 @@
+<?php
+
+/**
+* ownCloud - News
+*
+* @author Alessandro Cosentino
+* @author Bernhard Posselt
+* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
+* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
+*
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
+* License as published by the Free Software Foundation; either
+* version 3 of the License, or any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
+*
+* You should have received a copy of the GNU Affero General Public
+* License along with this library. If not, see <http://www.gnu.org/licenses/>.
+*
+*/
+
+namespace OCA\News\Utility\ArticleEnhancer;
+
+
+class Enhancer {
+
+ private $enhancers;
+
+ public function __construct(){
+ $this->enhancers = array();
+ }
+
+
+ public function registerEnhancer(ArticleEnhancer $enhancer){
+ array_push($this->enhancers, $enhancer);
+ }
+
+
+ public function enhance($item){
+ foreach($this->enhancers as $enhancer){
+ if($enhancer->canHandle($item)){
+ return $enhancer->enhance($item);
+ }
+ }
+ }
+
+
+} \ No newline at end of file
diff --git a/utility/simplepiefilefactory.php b/utility/simplepiefilefactory.php
new file mode 100644
index 000000000..13b56dc9e
--- /dev/null
+++ b/utility/simplepiefilefactory.php
@@ -0,0 +1,35 @@
+<?php
+
+/**
+* ownCloud - News
+*
+* @author Alessandro Cosentino
+* @author Bernhard Posselt
+* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
+* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
+*
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
+* License as published by the Free Software Foundation; either
+* version 3 of the License, or any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
+*
+* You should have received a copy of the GNU Affero General Public
+* License along with this library. If not, see <http://www.gnu.org/licenses/>.
+*
+*/
+
+namespace OCA\News\Utility;
+
+
+class SimplePieFileFactory {
+
+ public function getFile($url, $timeout) {
+ return new \SimplePie_File($url, $timeout);
+ }
+
+} \ No newline at end of file