From 2f67340e551b12dce8824381c3291bb2137857cb Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Wed, 28 Aug 2013 19:19:28 +0200 Subject: Possible backwards incompatible change by using the link provided by simplepie instead of the user for the url hash. This prevents duplication of the feed when adding a slightly different feed url which points to the same feed and allows a speedup from O(n) to O(1) for article enhanchers --- utility/articleenhancer/articleenhancer.php | 32 +++++++++---------- utility/articleenhancer/defaultenhancer.php | 49 ----------------------------- utility/articleenhancer/enhancer.php | 33 +++++++++++++------ utility/feedfetcher.php | 4 +-- 4 files changed, 39 insertions(+), 79 deletions(-) delete mode 100644 utility/articleenhancer/defaultenhancer.php (limited to 'utility') diff --git a/utility/articleenhancer/articleenhancer.php b/utility/articleenhancer/articleenhancer.php index d7701d53b..194137e72 100644 --- a/utility/articleenhancer/articleenhancer.php +++ b/utility/articleenhancer/articleenhancer.php @@ -60,27 +60,23 @@ abstract class ArticleEnhancer { } - public function canHandle($item){ - return preg_match($this->articleUrlRegex, $item->getUrl()) == true; - } - - public function enhance($item){ - $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout); - $dom = new \DOMDocument(); - @$dom->loadHTML($file->body); - $xpath = new \DOMXpath($dom); - $xpathResult = $xpath->evaluate($this->articleXPath); - - // in case it wasnt a text query assume its a single - if(!is_string($xpathResult)) { - $xpathResult = $this->domToString($xpathResult); + if(preg_match($this->articleUrlRegex, $item->getUrl())) { + $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout); + $dom = new \DOMDocument(); + @$dom->loadHTML($file->body); + $xpath = new \DOMXpath($dom); + $xpathResult = $xpath->evaluate($this->articleXPath); + + // in case it wasnt a text query assume its a single + if(!is_string($xpathResult)) { + $xpathResult = $this->domToString($xpathResult); + } + + $sanitizedResult = $this->purifier->purify($xpathResult); + $item->setBody($sanitizedResult); } - $sanitizedResult = $this->purifier->purify($xpathResult); - $item->setBody($sanitizedResult); - - return $item; } diff --git a/utility/articleenhancer/defaultenhancer.php b/utility/articleenhancer/defaultenhancer.php deleted file mode 100644 index eb3045ceb..000000000 --- a/utility/articleenhancer/defaultenhancer.php +++ /dev/null @@ -1,49 +0,0 @@ -. -* -*/ - -namespace OCA\News\Utility\ArticleEnhancer; - -use \OCA\News\Utility\SimplePieFileFactory; - - -class DefaultEnhancer extends ArticleEnhancer { - - - public function __construct(){ - parent::__construct(null, new SimplePieFileFactory(), null, null, null); - } - - - public function canHandle($item){ - return true; - } - - - public function enhance($item){ - return $item; - } - - -} \ No newline at end of file diff --git a/utility/articleenhancer/enhancer.php b/utility/articleenhancer/enhancer.php index 059904f63..d7d96f6a9 100644 --- a/utility/articleenhancer/enhancer.php +++ b/utility/articleenhancer/enhancer.php @@ -28,23 +28,36 @@ namespace OCA\News\Utility\ArticleEnhancer; class Enhancer { - private $enhancers; + private $enhancers = array(); - public function __construct(){ - $this->enhancers = array(); + public function registerEnhancer($feedUrl, ArticleEnhancer $enhancer){ + $feedUrl = $this->removeTrailingSlash($feedUrl); + + // create hashkeys for all supported protocols for quick access + $this->enhancers[$feedUrl] = $enhancer; + $this->enhancers['https://' . $feedUrl] = $enhancer; + $this->enhancers['http://' . $feedUrl] = $enhancer; + $this->enhancers['https://www.' . $feedUrl] = $enhancer; + $this->enhancers['http://www.' . $feedUrl] = $enhancer; } - public function registerEnhancer(ArticleEnhancer $enhancer){ - array_push($this->enhancers, $enhancer); + public function enhance($item, $feedUrl){ + $feedUrl = $this->removeTrailingSlash($feedUrl); + + if(array_key_exists($feedUrl, $this->enhancers)) { + return $this->enhancers[$feedUrl]->enhance($item); + } else { + return $item; + } } - public function enhance($item){ - foreach($this->enhancers as $enhancer){ - if($enhancer->canHandle($item)){ - return $enhancer->enhance($item); - } + private function removeTrailingSlash($url) { + if($url[strlen($url)-1] === '/') { + return substr($url, 0, -1); + } else { + return $url; } } diff --git a/utility/feedfetcher.php b/utility/feedfetcher.php index 10a141e38..8ad800d3c 100644 --- a/utility/feedfetcher.php +++ b/utility/feedfetcher.php @@ -187,8 +187,8 @@ class FeedFetcher implements IFeedFetcher { $feed->setTitle($title); $feed->setUrl($url); - $feed->setLink($simplePieFeed->get_link()); - $feed->setUrlHash(md5($url)); + $feed->setLink($simplePieFeed->get_permalink()); + $feed->setUrlHash(md5($feed->getLink())); $feed->setAdded($this->time->getTime()); if ($getFavicon) { -- cgit v1.2.3