From d6066c382083dedf426abbfd5f2f1df725c68aaf Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Fri, 27 Sep 2013 20:03:00 +0200 Subject: use seperate direcotires for article enhancers and fetchers --- articleenhancer/articleenhancer.php | 33 ++ articleenhancer/enhancer.php | 65 ++++ articleenhancer/regexarticleenhancer.php | 55 +++ articleenhancer/regexenhancers.json | 12 + articleenhancer/xpatharticleenhancer.php | 218 ++++++++++++ articleenhancer/xpathenhancers.json | 57 +++ businesslayer/feedbusinesslayer.php | 7 +- dependencyinjection/dicontainer.php | 21 +- fetcher/feedfetcher.php | 213 +++++++++++ fetcher/fetcher.php | 52 +++ fetcher/fetcherexception.php | 38 ++ fetcher/ifeedfetcher.php | 47 +++ tests/unit/articleenhancer/EnhancerTest.php | 91 +++++ .../articleenhancer/RegexArticleEnhancerTest.php | 49 +++ .../articleenhancer/XPathArticleEnhancerTest.php | 288 +++++++++++++++ tests/unit/businesslayer/FeedBusinessLayerTest.php | 8 +- tests/unit/controller/TwitterFetcherTest.php | 77 ---- tests/unit/fetcher/FeedFetcherTest.php | 392 +++++++++++++++++++++ tests/unit/fetcher/FetcherTest.php | 108 ++++++ tests/unit/utility/FeedFetcherTest.php | 392 --------------------- tests/unit/utility/FetcherTest.php | 108 ------ .../unit/utility/articleenhancer/EnhancerTest.php | 91 ----- .../articleenhancer/RegexArticleEnhancerTest.php | 49 --- .../articleenhancer/XPathArticleEnhancerTest.php | 288 --------------- utility/articleenhancer/articleenhancer.php | 33 -- utility/articleenhancer/enhancer.php | 65 ---- utility/articleenhancer/regexarticleenhancer.php | 55 --- utility/articleenhancer/regexenhancers.json | 12 - utility/articleenhancer/xpatharticleenhancer.php | 218 ------------ utility/articleenhancer/xpathenhancers.json | 57 --- utility/feedfetcher.php | 213 ----------- utility/fetcher.php | 52 --- utility/fetcherexception.php | 38 -- utility/ifeedfetcher.php | 47 --- utility/twitterfetcher.php | 69 ---- 35 files changed, 1733 insertions(+), 1885 deletions(-) create mode 100644 articleenhancer/articleenhancer.php create mode 100644 articleenhancer/enhancer.php create mode 100644 articleenhancer/regexarticleenhancer.php create mode 100644 articleenhancer/regexenhancers.json create mode 100644 articleenhancer/xpatharticleenhancer.php create mode 100644 articleenhancer/xpathenhancers.json create mode 100644 fetcher/feedfetcher.php create mode 100644 fetcher/fetcher.php create mode 100644 fetcher/fetcherexception.php create mode 100644 fetcher/ifeedfetcher.php create mode 100644 tests/unit/articleenhancer/EnhancerTest.php create mode 100644 tests/unit/articleenhancer/RegexArticleEnhancerTest.php create mode 100644 tests/unit/articleenhancer/XPathArticleEnhancerTest.php delete mode 100644 tests/unit/controller/TwitterFetcherTest.php create mode 100644 tests/unit/fetcher/FeedFetcherTest.php create mode 100644 tests/unit/fetcher/FetcherTest.php delete mode 100644 tests/unit/utility/FeedFetcherTest.php delete mode 100644 tests/unit/utility/FetcherTest.php delete mode 100644 tests/unit/utility/articleenhancer/EnhancerTest.php delete mode 100644 tests/unit/utility/articleenhancer/RegexArticleEnhancerTest.php delete mode 100644 tests/unit/utility/articleenhancer/XPathArticleEnhancerTest.php delete mode 100644 utility/articleenhancer/articleenhancer.php delete mode 100644 utility/articleenhancer/enhancer.php delete mode 100644 utility/articleenhancer/regexarticleenhancer.php delete mode 100644 utility/articleenhancer/regexenhancers.json delete mode 100644 utility/articleenhancer/xpatharticleenhancer.php delete mode 100644 utility/articleenhancer/xpathenhancers.json delete mode 100644 utility/feedfetcher.php delete mode 100644 utility/fetcher.php delete mode 100644 utility/fetcherexception.php delete mode 100644 utility/ifeedfetcher.php delete mode 100644 utility/twitterfetcher.php diff --git a/articleenhancer/articleenhancer.php b/articleenhancer/articleenhancer.php new file mode 100644 index 000000000..53b2d6fa8 --- /dev/null +++ b/articleenhancer/articleenhancer.php @@ -0,0 +1,33 @@ +. +* +*/ + +namespace OCA\News\ArticleEnhancer; + +use \OCA\News\Db\Item; + + +interface ArticleEnhancer { + public function enhance(Item $item); +} \ No newline at end of file diff --git a/articleenhancer/enhancer.php b/articleenhancer/enhancer.php new file mode 100644 index 000000000..7981751ea --- /dev/null +++ b/articleenhancer/enhancer.php @@ -0,0 +1,65 @@ +. +* +*/ + +namespace OCA\News\ArticleEnhancer; + + +class Enhancer { + + private $enhancers = array(); + + public function registerEnhancer($feedUrl, ArticleEnhancer $enhancer){ + $feedUrl = $this->removeTrailingSlash($feedUrl); + + // create hashkeys for all supported protocols for quick access + $this->enhancers[$feedUrl] = $enhancer; + $this->enhancers['https://' . $feedUrl] = $enhancer; + $this->enhancers['http://' . $feedUrl] = $enhancer; + $this->enhancers['https://www.' . $feedUrl] = $enhancer; + $this->enhancers['http://www.' . $feedUrl] = $enhancer; + } + + + public function enhance($item, $feedUrl){ + $feedUrl = $this->removeTrailingSlash($feedUrl); + + if(array_key_exists($feedUrl, $this->enhancers)) { + return $this->enhancers[$feedUrl]->enhance($item); + } else { + return $item; + } + } + + + private function removeTrailingSlash($url) { + if($url[strlen($url)-1] === '/') { + return substr($url, 0, -1); + } else { + return $url; + } + } + + +} \ No newline at end of file diff --git a/articleenhancer/regexarticleenhancer.php b/articleenhancer/regexarticleenhancer.php new file mode 100644 index 000000000..0742bc127 --- /dev/null +++ b/articleenhancer/regexarticleenhancer.php @@ -0,0 +1,55 @@ +. + * + */ + +namespace OCA\News\ArticleEnhancer; + +use \OCA\News\Utility\SimplePieFileFactory; +use \OCA\News\Db\Item; + + +class RegexArticleEnhancer implements ArticleEnhancer { + + private $matchArticleUrl; + private $regexPair; + + public function __construct($matchArticleUrl, array $regexPair) { + $this->matchArticleUrl = $matchArticleUrl; + $this->regexPair = $regexPair; + } + + + public function enhance(Item $item) { + if (preg_match($this->matchArticleUrl, $item->getUrl())) { + $body = $item->getBody(); + foreach($this->regexPair as $search => $replaceWith) { + $body = preg_replace($search, $replaceWith, $body); + } + $item->setBody($body); + } + return $item; + } + + +} diff --git a/articleenhancer/regexenhancers.json b/articleenhancer/regexenhancers.json new file mode 100644 index 000000000..95231985d --- /dev/null +++ b/articleenhancer/regexenhancers.json @@ -0,0 +1,12 @@ +{ + "twogag.com": { + "%(?:www.twogag.com/archives)|(feedproxy.google.com/~r/TwoGuysAndGuy)%": { + "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%": "http://www.twogag.com/comics/$1.jpg" + } + }, + "buttersafe.com": { + "%(?:buttersafe.com)|(feedproxy.google.com/~r/Buttersafe)%": { + "%buttersafe.com/comics/rss/([^.]+)RSS([^.]+)?.jpg%": "buttersafe.com/comics/$1$2.jpg" + } + } +} diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php new file mode 100644 index 000000000..77a16f04d --- /dev/null +++ b/articleenhancer/xpatharticleenhancer.php @@ -0,0 +1,218 @@ +. +* +*/ + +namespace OCA\News\ArticleEnhancer; + +use \OCA\News\Utility\SimplePieFileFactory; +use \OCA\News\Db\Item; + + +class XPathArticleEnhancer implements ArticleEnhancer { + + + private $feedRegex; + private $purifier; + private $fileFactory; + private $maximumTimeout; + + + /** + * @param $purifier the purifier object to clean the html which will be + * matched + * @param SimplePieFileFactory a factory for getting a simple pie file instance + * @param array $regexXPathPair an associative array containing regex to + * match the url and the xpath that should be used for it to extract the + * page + * @param int $maximumTimeout maximum timeout in seconds, defaults to 10 sec + */ + public function __construct($purifier, SimplePieFileFactory $fileFactory, + array $regexXPathPair, $maximumTimeout=10){ + $this->purifier = $purifier; + $this->regexXPathPair = $regexXPathPair; + $this->fileFactory = $fileFactory; + $this->maximumTimeout = $maximumTimeout; + } + + + public function enhance(Item $item){ + + foreach($this->regexXPathPair as $regex => $search) { + + if(preg_match($regex, $item->getUrl())) { + $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout); + + // convert encoding by detecting charset from header + $contentType = $file->headers['content-type']; + if( preg_match( '/(?<=charset=)[^;]*/', $contentType, $matches ) ) { + $body = mb_convert_encoding($file->body, 'HTML-ENTITIES', $matches[0]); + } else { + $body = $file->body; + } + + $dom = new \DOMDocument(); + @$dom->loadHTML($body); + + $xpath = new \DOMXpath($dom); + $xpathResult = $xpath->evaluate($search); + + // in case it wasnt a text query assume its a single + if(!is_string($xpathResult)) { + $xpathResult = $this->domToString($xpathResult); + } + + // convert all relative to absolute URLs + $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl()); + + $sanitizedResult = $this->purifier->purify($xpathResult); + $item->setBody($sanitizedResult); + } + } + + return $item; + } + + + /** + * Method which converts all relative "href" and "src" URLs of + * a HTML snippet with their absolute equivalent + * @param string $xmlString a HTML snippet as string with the relative URLs to be replaced + * @param string $absoluteUrl the approptiate absolute url of the HTML snippet + * @return string the result HTML snippet as a string + */ + protected function substituteRelativeLinks($xmlString, $absoluteUrl) { + $dom = new \DOMDocument(); + $dom->preserveWhiteSpace = false; + + // return, if xml is empty or loading the HTML fails + if( trim($xmlString) == "" || !@$dom->loadHTML($xmlString) ) { + return $xmlString; + } + + // remove removeChild($dom->firstChild); + // remove + $dom->replaceChild($dom->firstChild->firstChild, $dom->firstChild); + + $substitution = array("href", "src"); + + foreach ($substitution as $attribute) { + $xpath = new \DOMXpath($dom); + $xpathResult = $xpath->query( + "//*[@" . $attribute . " " . + "and not(contains(@" . $attribute . ", '://')) " . + "and not(starts-with(@" . $attribute . ", 'mailto:'))]"); + foreach ($xpathResult as $linkNode) { + $urlElement = $linkNode->attributes->getNamedItem($attribute); + $abs = $this->relativeToAbsoluteUrl( $urlElement->nodeValue, $absoluteUrl ); + $urlElement->nodeValue = htmlspecialchars($abs); + } + } + + // save dom to string and remove + $xmlString = substr(trim($dom->saveHTML()), 6, -7); + // domdocument spoils the string with line breaks between the elements. strip them. + $xmlString = str_replace("\n", "", $xmlString); + + return $xmlString; + } + + + /** + * Method which builds a URL by taking a relative URL and its corresponding + * absolute URL + * For examle relative URL "../example/path/file.php?a=1#anchor" and + * absolute URL "https://username:password@www.website.com/subfolder/index.html" + * will result in "https://username:password@www.website.com/example/path/file.php?a=1#anchor" + * @param string $relativeUrl the relative URL + * @param string $absoluteUrl the absolute URL with at least scheme and host + * @return string the resulting absolute URL + */ + protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl) { + $abs = parse_url($absoluteUrl); + + $newUrl = $abs["scheme"]."://" + .( (isset($abs["user"])) ? $abs["user"] . ( (isset($abs["pass"])) ? ":".$abs["pass"] : "") . "@" : "" ) + .$abs["host"] + .( (isset($abs["port"])) ? ":".$abs["port"] : "" ); + + if(substr(trim($relativeUrl), 0, 1) == "/") { + // we have a relative url like "/a/path/file" + return $newUrl . $relativeUrl; + } else { + // we have a relative url like "a/path/file", handle "."" and ".." directories + + // the starting point is the absolute path, but with out the last part (we don't need the file name) + $newPath = explode("/", substr($abs["path"], 1) ); + array_pop($newPath); + + $relPath = parse_url($relativeUrl, PHP_URL_PATH); + $relPath = explode("/", $relPath); + + // cross the relative and the absolute path + for($i=0; $itoInnerHTML($node); + } + return $result; + } + + + protected function toInnerHTML($node) { + $dom = new \DOMDocument(); + $dom->appendChild($dom->importNode($node, true)); + return trim($dom->saveHTML($dom->documentElement)); + } + + +} \ No newline at end of file diff --git a/articleenhancer/xpathenhancers.json b/articleenhancer/xpathenhancers.json new file mode 100644 index 000000000..29296c79f --- /dev/null +++ b/articleenhancer/xpathenhancers.json @@ -0,0 +1,57 @@ +{ + "cad-comic.com": { + "%cad-comic.com/cad/\\d+/$%": "//*[@id='content']/img" + }, + "explosm.net": { + "%explosm.net/comics%": "//*[@id='maincontent']/div[2]/div/img", + "%explosm.net/show%": "//*[@id='videoPlayer']/iframe" + }, + "themerepublic.net": { + "%feedproxy.google.com/~r/blogspot/DngUJ%": "//*[@class='post hentry']" + }, + "penny-arcade.com": { + "%feeds.penny-arcade.com/~r/pa-mainsite%": "//*[starts-with(@class, \"post\")]" + }, + "leasticoulddo.com": { + "%feedproxy.google.com/~r/LICD%": "//*[@id='comic-img']/a/img | //*[@id='comic-img']/img" + }, + "escapistmagazine.com/articles/view/comics/critical-miss": { + "%escapistmagazine.com/articles/view/comics/critical-miss%": "//*[@class='body']/span/img" + }, + "escapistmagazine.com/articles/view/comics/namegame": { + "%escapistmagazine.com/articles/view/comics/namegame%": "//*[@class='body']/span/p/img[@height != \"120\"]" + }, + "escapistmagazine.com/articles/view/comics/stolen-pixels": { + "%escapistmagazine.com/articles/view/comics/stolen-pixels%": "//*[@class='body']/span/p[2]/img" + }, + "escapistmagazine.com/articles/view/comics/bumhugparade": { + "%escapistmagazine.com/articles/view/comics/bumhugparade%": "//*[@class='body']/span/p[2]/img" + }, + "escapistmagazine.com/articles/view/comics/escapistradiotheater": { + "%escapistmagazine.com/articles/view/comics/escapistradiotheater%": "//*[@class='body']/span/p[2]/img" + }, + "escapistmagazine.com/articles/view/comics/paused": { + "%escapistmagazine.com/articles/view/comics/paused%": "//*[@class='body']/span/p[2]/img | //*[@class='body']/span/div/img" + }, + "escapistmagazine.com/articles/view/comics/fraughtwithperil": { + "%escapistmagazine.com/articles/view/comics/fraughtwithperil%": "//*[@class='body']" + }, + "trenchescomic.com": { + "%trenchescomic.com/comic%": "//*[@class=\"top\"]/img", + "%trenchescomic.com/tales%": "//*[@class=\"copy\"]" + }, + "lfgcomic.com": { + "%(lfgcomic.com/page)|(feedproxy.google.com/~r/LookingForGroup)%": "//*[@id=\"comic\"]/img | //*[@class=\"content\"]" + }, + "sandraandwoo.com": { + "%sandraandwoo.com%": "//*[@id=\"comic\"]/img | //*[@class=\"post\"]" + }, + "sandraandwoo.com/gaia": { + "%sandraandwoo.com%": "//*[@id=\"comic\"]/img | //*[@class=\"post\"]" + }, + "theoatmeal.com": { + "%theoatmeal.com/blog%": "//*[@class=\"post_body\"]", + "%theoatmeal.com/comics%": "//*[@id=\"comic\"] | //*[@class=\"super_vacum\"] | //*[@class=\"pad\"]" + } +} + diff --git a/businesslayer/feedbusinesslayer.php b/businesslayer/feedbusinesslayer.php index e337e5785..b7ad5987a 100644 --- a/businesslayer/feedbusinesslayer.php +++ b/businesslayer/feedbusinesslayer.php @@ -33,10 +33,9 @@ use \OCA\News\Db\Feed; use \OCA\News\Db\Item; use \OCA\News\Db\FeedMapper; use \OCA\News\Db\ItemMapper; -use \OCA\News\Utility\Fetcher; -use \OCA\News\Utility\FetcherException; - -use \OCA\News\Utility\ArticleEnhancer\Enhancer; +use \OCA\News\Fetcher\Fetcher; +use \OCA\News\Fetcher\FetcherException; +use \OCA\News\ArticleEnhancer\Enhancer; class FeedBusinessLayer extends BusinessLayer { diff --git a/dependencyinjection/dicontainer.php b/dependencyinjection/dicontainer.php index 4297e7113..ce095ccbf 100644 --- a/dependencyinjection/dicontainer.php +++ b/dependencyinjection/dicontainer.php @@ -53,16 +53,16 @@ use \OCA\News\External\FeedAPI; use \OCA\News\External\ItemAPI; use \OCA\News\Utility\Config; -use \OCA\News\Utility\Fetcher; -use \OCA\News\Utility\FeedFetcher; -use \OCA\News\Utility\TwitterFetcher; use \OCA\News\Utility\OPMLExporter; use \OCA\News\Utility\Updater; use \OCA\News\Utility\SimplePieFileFactory; -use \OCA\News\Utility\ArticleEnhancer\Enhancer; -use \OCA\News\Utility\ArticleEnhancer\XPathArticleEnhancer; -use OCA\News\Utility\ArticleEnhancer\RegexArticleEnhancer; +use \OCA\News\Fetcher\Fetcher; +use \OCA\News\Fetcher\FeedFetcher; + +use \OCA\News\ArticleEnhancer\Enhancer; +use \OCA\News\ArticleEnhancer\XPathArticleEnhancer; +use \OCA\News\ArticleEnhancer\RegexArticleEnhancer; use \OCA\News\Middleware\CORSMiddleware; @@ -259,7 +259,7 @@ class DIContainer extends BaseContainer { // register simple enhancers from config json file $xpathEnhancerConfig = file_get_contents( - __DIR__ . '/../utility/articleenhancer/xpathenhancers.json' + __DIR__ . '/../articleenhancer/xpathenhancers.json' ); foreach(json_decode($xpathEnhancerConfig, true) as $feed => $config) { @@ -273,7 +273,7 @@ class DIContainer extends BaseContainer { } $regexEnhancerConfig = file_get_contents( - __DIR__ . '/../utility/articleenhancer/regexenhancers.json' + __DIR__ . '/../articleenhancer/regexenhancers.json' ); foreach(json_decode($regexEnhancerConfig, true) as $feed => $config) { foreach ($config as $matchArticleUrl => $regex) { @@ -290,7 +290,6 @@ class DIContainer extends BaseContainer { // register fetchers in order // the most generic fetcher should be the last one - $fetcher->registerFetcher($c['TwitterFetcher']); // twitter timeline $fetcher->registerFetcher($c['FeedFetcher']); return $fetcher; @@ -308,10 +307,6 @@ class DIContainer extends BaseContainer { $c['HTMLPurifier']); }); - $this['TwitterFetcher'] = $this->share(function($c){ - return new TwitterFetcher($c['FeedFetcher']); - }); - $this['StatusFlag'] = $this->share(function($c){ return new StatusFlag(); }); diff --git a/fetcher/feedfetcher.php b/fetcher/feedfetcher.php new file mode 100644 index 000000000..fdc062d6c --- /dev/null +++ b/fetcher/feedfetcher.php @@ -0,0 +1,213 @@ +. +* +*/ + +namespace OCA\News\Fetcher; + +use \OCA\AppFramework\Core\API; +use \OCA\AppFramework\Utility\FaviconFetcher; +use \OCA\AppFramework\Utility\SimplePieAPIFactory; +use \OCA\AppFramework\Utility\TimeFactory; + +use \OCA\News\Db\Item; +use \OCA\News\Db\Feed; + + +class FeedFetcher implements IFeedFetcher { + + private $api; + private $cacheDirectory; + private $cacheDuration; + private $faviconFetcher; + private $simplePieFactory; + private $fetchTimeout; + private $time; + private $purifier; + + public function __construct(API $api, + SimplePieAPIFactory $simplePieFactory, + FaviconFetcher $faviconFetcher, + TimeFactory $time, + $cacheDirectory, + $cacheDuration, + $fetchTimeout, + $purifier){ + $this->api = $api; + $this->cacheDirectory = $cacheDirectory; + $this->cacheDuration = $cacheDuration; + $this->faviconFetcher = $faviconFetcher; + $this->simplePieFactory = $simplePieFactory; + $this->time = $time; + $this->purifier = $purifier; + $this->fetchTimeout = $fetchTimeout; + } + + + /** + * This fetcher handles all the remaining urls therefore always returns true + */ + public function canHandle($url){ + return true; + } + + + /** + * Fetch a feed from remote + * @param string url remote url of the feed + * @throws FetcherException if simple pie fails + * @return array an array containing the new feed and its items + */ + public function fetch($url, $getFavicon=true) { + $simplePie = $this->simplePieFactory->getCore(); + $simplePie->set_feed_url($url); + $simplePie->enable_cache(true); + $simplePie->set_timeout($this->fetchTimeout); + $simplePie->set_cache_location($this->cacheDirectory); + $simplePie->set_cache_duration($this->cacheDuration); + + if (!$simplePie->init()) { + throw new FetcherException('Could not initialize simple pie on feed with url ' . $url); + } + + + try { + // somehow $simplePie turns into a feed after init + $items = array(); + if ($feedItems = $simplePie->get_items()) { + foreach($feedItems as $feedItem) { + array_push($items, $this->buildItem($feedItem)); + } + } + + $feed = $this->buildFeed($simplePie, $url, $getFavicon); + + return array($feed, $items); + + } catch(\Exception $ex){ + throw new FetcherException($ex->getMessage()); + } + + } + + + private function decodeTwice($string) { + // behold! ' is not converted by PHP thats why we need to do it + // manually (TM) + return str_replace(''', '\'', + html_entity_decode( + html_entity_decode( + $string, ENT_QUOTES, 'UTF-8' + ), + ENT_QUOTES, 'UTF-8' + ) + ); + } + + + protected function buildItem($simplePieItem) { + $item = new Item(); + $item->setStatus(0); + $item->setUnread(); + $item->setUrl($this->decodeTwice($simplePieItem->get_permalink())); + + // unescape content because angularjs helps against XSS + $item->setTitle($this->decodeTwice($simplePieItem->get_title())); + $guid = $simplePieItem->get_id(); + $item->setGuid($guid); + + // links should always open in a new window + $item->setBody( + str_replace( + 'purifier->purify( + $simplePieItem->get_content() + ) + ) + ); + + // pubdate is not required. if not given use the current date + $date = $simplePieItem->get_date('U'); + if(!$date) { + $date = $this->time->getTime(); + } + + $item->setPubDate($date); + + $item->setLastModified($this->time->getTime()); + + $author = $simplePieItem->get_author(); + if ($author !== null) { + $name = $this->decodeTwice($author->get_name()); + if ($name) { + $item->setAuthor($name); + } else { + $item->setAuthor($this->decodeTwice($author->get_email())); + } + } + + // TODO: make it work for video files also + $enclosure = $simplePieItem->get_enclosure(); + if($enclosure !== null) { + $enclosureType = $enclosure->get_type(); + if(stripos($enclosureType, "audio/") !== false) { + $item->setEnclosureMime($enclosureType); + $item->setEnclosureLink($enclosure->get_link()); + } + } + + return $item; + } + + + protected function buildFeed($simplePieFeed, $url, $getFavicon) { + $feed = new Feed(); + + // unescape content because angularjs helps against XSS + $title = $this->decodeTwice($simplePieFeed->get_title(), + ENT_COMPAT, 'UTF-8' ); + + // if there is no title use the url + if(!$title) { + $title = $url; + } + + $feed->setTitle($title); + $feed->setUrl($url); + $feed->setLink($simplePieFeed->get_permalink()); + $feed->setAdded($this->time->getTime()); + + if ($getFavicon) { + // use the favicon from the page first since most feeds use a weird image + $favicon = $this->faviconFetcher->fetch($feed->getLink()); + + if (!$favicon) { + $favicon = $simplePieFeed->get_image_url(); + } + + $feed->setFaviconLink($favicon); + } + + return $feed; + } + +} diff --git a/fetcher/fetcher.php b/fetcher/fetcher.php new file mode 100644 index 000000000..c86db6b64 --- /dev/null +++ b/fetcher/fetcher.php @@ -0,0 +1,52 @@ +. +* +*/ + +namespace OCA\News\Fetcher; + + +class Fetcher { + + private $fetchers; + + public function __construct(){ + $this->fetchers = array(); + } + + + public function registerFetcher(IFeedFetcher $fetcher){ + array_push($this->fetchers, $fetcher); + } + + + public function fetch($url, $getFavicon=true){ + foreach($this->fetchers as $fetcher){ + if($fetcher->canHandle($url)){ + return $fetcher->fetch($url, $getFavicon); + } + } + } + + +} \ No newline at end of file diff --git a/fetcher/fetcherexception.php b/fetcher/fetcherexception.php new file mode 100644 index 000000000..a9082dd30 --- /dev/null +++ b/fetcher/fetcherexception.php @@ -0,0 +1,38 @@ +. +* +*/ + +namespace OCA\News\Fetcher; + +class FetcherException extends \Exception { + + /** + * Constructor + * @param string $msg the error message + */ + public function __construct($msg){ + parent::__construct($msg); + } + +} \ No newline at end of file diff --git a/fetcher/ifeedfetcher.php b/fetcher/ifeedfetcher.php new file mode 100644 index 000000000..5fa7fc678 --- /dev/null +++ b/fetcher/ifeedfetcher.php @@ -0,0 +1,47 @@ +. +* +*/ + +namespace OCA\News\Fetcher; + +interface IFeedFetcher { + + /** + * @param string url the url that the user entered in the add feed dialog + * box + * @throws FetcherException if the fetcher encounters a problem + * @return array with the first element being the feed and the + * second element being an array of items. Those items will be saved into + * into the database + */ + function fetch($url, $getFavicon=true); + + /** + * @param string $url the url that should be fetched + * @return boolean if the fetcher can handle the url. This fetcher will be + * used exclusively to fetch the feed and the items of the page + */ + function canHandle($url); + +} \ No newline at end of file diff --git a/tests/unit/articleenhancer/EnhancerTest.php b/tests/unit/articleenhancer/EnhancerTest.php new file mode 100644 index 000000000..84cbe2a7c --- /dev/null +++ b/tests/unit/articleenhancer/EnhancerTest.php @@ -0,0 +1,91 @@ +. +* +*/ + +namespace OCA\News\ArticleEnhancer; + +use \OCA\News\Db\Item; + +require_once(__DIR__ . "/../../classloader.php"); + + +class EnhancerTest extends \OCA\AppFramework\Utility\TestUtility { + + private $enhancer; + private $articleEnhancer; + private $articleEnhancer2; + + protected function setUp(){ + $this->enhancer = new Enhancer(); + $this->articleEnhancer = $this->getMockBuilder( + '\OCA\News\ArticleEnhancer\ArticleEnhancer') + ->disableOriginalConstructor() + ->getMock(); + $this->enhancer->registerEnhancer('test.com', $this->articleEnhancer); + } + + + public function testEnhanceSetsCorrectHash(){ + $item = new Item(); + $item->setUrl('hi'); + $urls = array( + 'https://test.com', + 'https://www.test.com', + 'https://test.com/', + 'http://test.com', + 'http://test.com/', + 'http://www.test.com' + ); + for ($i=0; $i < count($urls); $i++) { + $url = $urls[$i]; + $this->articleEnhancer->expects($this->at($i)) + ->method('enhance') + ->with($this->equalTo($item)) + ->will($this->returnValue($item)); + } + + for ($i=0; $i < count($urls); $i++) { + $url = $urls[$i]; + $result = $this->enhancer->enhance($item, $url); + $this->assertEquals($item, $result); + } + + } + + + public function testNotMatchShouldJustReturnItem() { + $item = new Item(); + $item->setUrl('hi'); + + $url = 'https://tests.com'; + $this->articleEnhancer->expects($this->never()) + ->method('enhance'); + + $result = $this->enhancer->enhance($item, $url); + $this->assertEquals($item, $result); + + } + + +} \ No newline at end of file diff --git a/tests/unit/articleenhancer/RegexArticleEnhancerTest.php b/tests/unit/articleenhancer/RegexArticleEnhancerTest.php new file mode 100644 index 000000000..2d985edf1 --- /dev/null +++ b/tests/unit/articleenhancer/RegexArticleEnhancerTest.php @@ -0,0 +1,49 @@ +. +* +*/ + +namespace OCA\News\ArticleEnhancer; + +use \OCA\News\Db\Item; + +require_once(__DIR__ . "/../../classloader.php"); + + +class RegexArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility { + + + public function testRegexEnhancer() { + $item = new Item(); + $item->setBody('atests is a nice thing'); + $item->setUrl('http://john.com'); + $regex = array("%tes(ts)%" => "heho$1tests"); + + $regexEnhancer = new RegexArticleEnhancer('%john.com%', $regex); + $item = $regexEnhancer->enhance($item); + + $this->assertEquals('ahehotstests is a nice thing', $item->getBody()); + } + + +} \ No newline at end of file diff --git a/tests/unit/articleenhancer/XPathArticleEnhancerTest.php b/tests/unit/articleenhancer/XPathArticleEnhancerTest.php new file mode 100644 index 000000000..a0f8db388 --- /dev/null +++ b/tests/unit/articleenhancer/XPathArticleEnhancerTest.php @@ -0,0 +1,288 @@ +. +* +*/ + +namespace OCA\News\ArticleEnhancer; + +use \OCA\News\Db\Item; + +require_once(__DIR__ . "/../../classloader.php"); + + +class XPathArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility { + + private $purifier; + private $testEnhancer; + private $fileFactory; + private $timeout; + + protected function setUp() { + $timeout = 30; + $this->fileFactory = $this->getMockBuilder('\OCA\News\Utility\SimplePieFileFactory') + ->disableOriginalConstructor() + ->getMock(); + $this->purifier = $this->getMock('purifier', array('purify')); + + $this->testEnhancer = new XPathArticleEnhancer( + $this->purifier, + $this->fileFactory, + array( + '/explosm.net\/comics/' => '//*[@id=\'maincontent\']/div[2]/div/span', + '/explosm.net\/shorts/' => '//*[@id=\'maincontent\']/div/div', + '/explosm.net\/all/' => '//body/*', + '/themerepublic.net/' => '//*[@class=\'post hentry\']' + ), + $this->timeout + ); + } + + + public function testDoesNotModifiyNotMatchingResults() { + $item = new Item(); + $item->setUrl('http://explosm.net'); + $this->assertEquals($item, $this->testEnhancer->enhance($item)); + } + + + public function testDoesModifiyArticlesThatMatch() { + $file = new \stdClass; + $file->headers = array("content-type"=>"text/html; charset=utf-8"); + $file->body = ' + +
+
nooo
+
hiho
+
+ + '; + $item = new Item(); + $item->setUrl('https://www.explosm.net/comics/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo('hiho')) + ->will($this->returnValue('hiho')); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals('hiho', $result->getBody()); + } + + + public function testDoesModifiyAllArticlesThatMatch() { + $file = new \stdClass; + $file->headers = array("content-type"=>"text/html; charset=utf-8"); + $file->body = ' + +
+
nooo
hiho
+
rawr
+
+ + '; + $item = new Item(); + $item->setUrl('https://www.explosm.net/shorts/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo('
hiho
rawr
')) + ->will($this->returnValue('
hiho
rawr
')); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals('
hiho
rawr
', $result->getBody()); + } + + + public function testModificationHandlesEmptyResults() { + $file = new \stdClass; + $file->headers = array("content-type"=>"text/html; charset=utf-8"); + $file->body = ' + +
+
+ + '; + $item = new Item(); + $item->setUrl('https://www.explosm.net/comics/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo(null)) + ->will($this->returnValue(null)); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals(null, $result->getBody()); + } + + + public function testModificationDoesNotBreakOnEmptyDom() { + $file = new \stdClass; + $file->headers = array("content-type"=>"text/html; charset=utf-8"); + $file->body = ''; + $item = new Item(); + $item->setUrl('https://www.explosm.net/comics/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo(null)) + ->will($this->returnValue(null)); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals(null, $result->getBody()); + } + + + public function testModificationDoesNotBreakOnBrokenDom() { + $file = new \stdClass; + $file->headers = array("content-type"=>"text/html; charset=utf-8"); + $file->body = '

+ +

+
+ + '; + $item = new Item(); + $item->setUrl('https://www.explosm.net/comics/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo(null)) + ->will($this->returnValue(null)); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals(null, $result->getBody()); + } + + + public function testTransformRelativeUrls() { + $file = new \stdClass; + $file->headers = array("content-type"=>"text/html; charset=utf-8"); + $file->body = ' + + link + link2 + + + '; + $item = new Item(); + $item->setUrl('https://www.explosm.net/all/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo('linklink2')) + ->will($this->returnValue('linklink2')); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals('linklink2', $result->getBody()); + } + + public function testTransformRelativeUrlSpecials() { + $file = new \stdClass; + $file->headers = array("content-type"=>"text/html; charset=utf-8"); + $file->body = ' + + + + '; + $item = new Item(); + $item->setUrl('https://username:secret@www.explosm.net/all/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo('')) + ->will($this->returnValue('')); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals('', $result->getBody()); + } + + public function testDontTransformAbsoluteUrlsAndMails() { + $file = new \stdClass; + $file->headers = array("content-type"=>"text/html; charset=utf-8"); + $file->body = ' + + + mail + + '; + $item = new Item(); + $item->setUrl('https://www.explosm.net/all/312'); + $item->setBody('Hello thar'); + + $this->fileFactory->expects($this->once()) + ->method('getFile') + ->with($this->equalTo($item->getUrl()), + $this->equalTo($this->timeout)) + ->will($this->returnValue($file)); + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo('mail')) + ->will($this->returnValue('mail')); + + $result = $this->testEnhancer->enhance($item); + $this->assertEquals('mail', $result->getBody()); + } + +} \ No newline at end of file diff --git a/tests/unit/businesslayer/FeedBusinessLayerTest.php b/tests/unit/businesslayer/FeedBusinessLayerTest.php index 550f37dcb..5e379525d 100644 --- a/tests/unit/businesslayer/FeedBusinessLayerTest.php +++ b/tests/unit/businesslayer/FeedBusinessLayerTest.php @@ -32,8 +32,8 @@ use \OCA\AppFramework\Db\DoesNotExistException; use \OCA\News\Db\Feed; use \OCA\News\Db\Item; -use \OCA\News\Utility\Fetcher; -use \OCA\News\Utility\FetcherException; +use \OCA\News\Fetcher\Fetcher; +use \OCA\News\Fetcher\FetcherException; class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility { @@ -63,13 +63,13 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility { $this->feedMapper = $this->getMockBuilder('\OCA\News\Db\FeedMapper') ->disableOriginalConstructor() ->getMock(); - $this->fetcher = $this->getMockBuilder('\OCA\News\Utility\Fetcher') + $this->fetcher = $this->getMockBuilder('\OCA\News\Fetcher\Fetcher') ->disableOriginalConstructor() ->getMock(); $this->itemMapper = $this->getMockBuilder('\OCA\News\Db\ItemMapper') ->disableOriginalConstructor() ->getMock(); - $this->enhancer = $this->getMockBuilder('\OCA\News\Utility\ArticleEnhancer\Enhancer') + $this->enhancer = $this->getMockBuilder('\OCA\News\ArticleEnhancer\Enhancer') ->disableOriginalConstructor() ->getMock(); $this->feedBusinessLayer = new FeedBusinessLayer($this->feedMapper, diff --git a/tests/unit/controller/TwitterFetcherTest.php b/tests/unit/controller/TwitterFetcherTest.php deleted file mode 100644 index 9bda485bd..000000000 --- a/tests/unit/controller/TwitterFetcherTest.php +++ /dev/null @@ -1,77 +0,0 @@ -. -* -*/ - -namespace OCA\News\Utility; - -require_once(__DIR__ . "/../../classloader.php"); - - -class TwitterFetcherTest extends \OCA\AppFramework\Utility\TestUtility { - - private $fetcher; - private $twitter; - - protected function setUp(){ - $this->fetcher = $this->getMockBuilder('\OCA\News\Utility\FeedFetcher') - ->disableOriginalConstructor() - ->getMock(); - $this->twitter = new TwitterFetcher($this->fetcher); - } - - - public function testCanHandle(){ - $urls = array( - 'https://twitter.com/GeorgeTakei', - 'https://www.twitter.com/GeorgeTakei', - 'http://twitter.com/GeorgeTakei', - 'http://www.twitter.com/GeorgeTakei', - 'www.twitter.com/GeorgeTakei', - 'twitter.com/GeorgeTakei' - ); - foreach($urls as $url){ - $this->assertTrue($this->twitter->canHandle($url), $url); - } - } - - - public function testCanHandleDoesNotUseApiUrls(){ - $url = 'https://api.twitter.com/1/statuses/user_timeline.rss?screen_name=GeorgeTakei'; - $this->assertFalse($this->twitter->canHandle($url)); - } - - - public function testFetch(){ - $inUrl = 'https://www.twitter.com/GeorgeTakei'; - $outUrl = 'https://api.twitter.com/1/statuses/user_timeline.rss?screen_name=GeorgeTakei'; - $out = 'hi'; - $this->fetcher->expects($this->once()) - ->method('fetch') - ->with($this->equalTo($outUrl)) - ->will($this->returnValue($out)); - - $return = $this->twitter->fetch($inUrl); - $this->assertEquals($out, $return); - } -} \ No newline at end of file diff --git a/tests/unit/fetcher/FeedFetcherTest.php b/tests/unit/fetcher/FeedFetcherTest.php new file mode 100644 index 000000000..466bcc446 --- /dev/null +++ b/tests/unit/fetcher/FeedFetcherTest.php @@ -0,0 +1,392 @@ +. +* +*/ + +namespace OCA\News\Fetcher; + +use \OCA\News\Db\Item; +use \OCA\News\Db\Feed; + +require_once(__DIR__ . "/../../classloader.php"); + + +class FeedFetcherTest extends \OCA\AppFramework\Utility\TestUtility { + + private $fetcher; + private $core; + private $coreFactory; + private $faviconFetcher; + private $url; + private $cacheDirectory; + private $cacheDuration; + private $time; + private $item; + private $purifier; + private $fetchTimeout; + + // items + private $permalink; + private $title; + private $guid; + private $pub; + private $body; + private $author; + private $authorMail; + private $enclosureLink; + + // feed + private $feedTitle; + private $feedLink; + private $feedImage; + private $webFavicon; + + protected function setUp(){ + $this->core = $this->getMockBuilder( + '\SimplePie_Core') + ->disableOriginalConstructor() + ->getMock(); + $this->coreFactory = $this->getMockBuilder( + '\OCA\AppFramework\Utility\SimplePieAPIFactory') + ->disableOriginalConstructor() + ->getMock(); + $this->coreFactory->expects($this->any()) + ->method('getCore') + ->will($this->returnValue($this->core)); + $this->item = $this->getMockBuilder( + '\SimplePie_Item') + ->disableOriginalConstructor() + ->getMock(); + $this->faviconFetcher = $this->getMockBuilder( + '\OCA\AppFramework\Utility\FaviconFetcher') + ->disableOriginalConstructor() + ->getMock(); + $this->purifier = $this->getMock('purifier', array('purify')); + $this->time = 2323; + $timeFactory = $this->getMockBuilder( + '\OCA\AppFramework\Utility\TimeFactory') + ->disableOriginalConstructor() + ->getMock(); + $timeFactory->expects($this->any()) + ->method('getTime') + ->will($this->returnValue($this->time)); + $this->cacheDuration = 100; + $this->cacheDirectory = 'dir/'; + $this->fetchTimeout = 40; + $this->fetcher = new FeedFetcher($this->getAPIMock(), + $this->coreFactory, + $this->faviconFetcher, + $timeFactory, + $this->cacheDirectory, + $this->cacheDuration, + $this->fetchTimeout, + $this->purifier); + $this->url = 'http://tests'; + + $this->permalink = 'http://permalink'; + $this->title = 'my&lt;' title'; + $this->guid = 'hey guid here'; + $this->body = 'let the bodies hit the floor test'; + $this->body2 = 'let the bodies hit the floor test'; + $this->pub = 23111; + $this->author = '<boogieman'; + $this->enclosureLink = 'http://enclosure.you'; + + $this->feedTitle = '<e;its a title'; + $this->feedLink = 'http://goatse'; + $this->feedImage = '/an/image'; + $this->webFavicon = 'http://anon.google.com'; + $this->authorMail = 'doe@joes.com'; + } + + + public function testCanHandle(){ + $url = 'google.de'; + + $this->assertTrue($this->fetcher->canHandle($url)); + } + + + public function testFetchThrowsExceptionWhenInitFailed() { + $this->core->expects($this->once()) + ->method('set_feed_url') + ->with($this->equalTo($this->url)); + $this->core->expects($this->once()) + ->method('enable_cache') + ->with($this->equalTo(true)); + $this->core->expects($this->once()) + ->method('set_timeout') + ->with($this->equalTo($this->fetchTimeout)); + $this->core->expects($this->once()) + ->method('set_cache_location') + ->with($this->equalTo($this->cacheDirectory)); + $this->core->expects($this->once()) + ->method('set_cache_duration') + ->with($this->equalTo($this->cacheDuration)); + $this->setExpectedException('\OCA\News\Fetcher\FetcherException'); + $this->fetcher->fetch($this->url); + } + + + public function testShouldCatchExceptionsAndThrowOwnException() { + $this->core->expects($this->once()) + ->method('init') + ->will($this->returnValue(true)); + $this->core->expects($this->once()) + ->method('get_items') + ->will($this->throwException(new \Exception('oh noes!'))); + $this->setExpectedException('\OCA\News\Fetcher\FetcherException'); + $this->fetcher->fetch($this->url); + } + + + private function expectCore($method, $return) { + $this->core->expects($this->once()) + ->method($method) + ->will($this->returnValue($return)); + } + + private function expectItem($method, $return) { + $this->item->expects($this->once()) + ->method($method) + ->will($this->returnValue($return)); + } + + + private function createItem($author=false, $enclosureType=null, $noPubDate=false) { + $this->purifier->expects($this->once()) + ->method('purify') + ->with($this->equalTo($this->body)) + ->will($this->returnValue($this->body)); + $this->expectItem('get_permalink', $this->permalink); + $this->expectItem('get_title', $this->title); + $this->expectItem('get_id', $this->guid); + $this->expectItem('get_content', $this->body); + + $item = new Item(); + + if($noPubDate) { + $this->expectItem('get_date', 0); + $item->setPubDate($this->time); + } else { + $this->expectItem('get_date', $this->pub); + $item->setPubDate($this->pub); + } + + $item->setStatus(0); + $item->setUnread(); + $item->setUrl($this->permalink); + $item->setTitle('my<\' title'); + $item->setGuid($this->guid); + $item->setGuidHash(md5($this->guid)); + $item->setBody($this->body2); + $item->setLastModified($this->time); + if($author) { + $mock = $this->getMock('author', array('get_name')); + $mock->expects($this->once()) + ->method('get_name') + ->will($this->returnValue($this->author)); + $this->expectItem('get_author', $mock); + $item->setAuthor(html_entity_decode($this->author)); + } else { + $mock = $this->getMock('author', array('get_name', 'get_email')); + $mock->expects($this->any()) + ->method('get_name') + ->will($this->returnValue('')); + $mock->expects($this->any()) + ->method('get_email') + ->will($this->returnValue($this->authorMail)); + + $this->expectItem('get_author', $mock); + $item->setAuthor(html_entity_decode($this->authorMail)); + } + + if($enclosureType === 'audio/ogg') { + $mock = $this->getMock('enclosure', array('get_type', 'get_link')); + $mock->expects($this->any()) + ->method('get_type') + ->will($this->returnValue($enclosureType)); + $this->expectItem('get_enclosure', $this->mock); + $item->setEnclosureMime($enclosureType); + $item->setEnclosureLink($this->enclosureLink); + } + return $item; + } + + + private function createFeed($hasFeedFavicon=false, $hasWebFavicon=false) { + $this->expectCore('get_title', $this->feedTitle); + $this->expectCore('get_permalink', $this->feedLink); + + $feed = new Feed(); + $feed->setTitle(html_entity_decode($this->feedTitle)); + $feed->setUrl($this->url); + $feed->setLink($this->feedLink); + $feed->setAdded($this->time); + + if($hasWebFavicon) { + $this->faviconFetcher->expects($this->once()) + ->method('fetch') + ->with($this->equalTo($this->feedLink)) + ->will($this->returnValue($this->webFavicon)); + $feed->setFaviconLink($this->webFavicon); + } + + if($hasFeedFavicon) { + $this->expectCore('get_image_url', $this->feedImage); + $feed->setFaviconLink($this->feedImage); + } elseif(!$hasWebFavicon) { + $feed->setFaviconLink(null); + $this->expectCore('get_image_url', null); + } + + + return $feed; + } + + + public function testFetchMapItems(){ + $this->core->expects($this->once()) + ->method('init') + ->will($this->returnValue(true)); + $item = $this->createItem(); + $feed = $this->createFeed(); + $this->expectCore('get_items', array($this->item)); + $result = $this->fetcher->fetch($this->url); + + $this->assertEquals(array($feed, array($item)), $result); + } + + + public function testFetchMapItemsNoFeedTitleUsesUrl(){ + $this->expectCore('get_title', ''); + $this->expectCore('get_permalink', $this->feedLink); + + $feed = new Feed(); + $feed->setTitle($this->url); + $feed->setUrl($this->url); + $feed->setLink($this->feedLink); + $feed->setAdded($this->time); + $feed->setFaviconLink(null); + + $this->core->expects($this->once()) + ->method('init') + ->will($this->returnValue(true)); + $item = $this->createItem(); + $this->expectCore('get_items', array($this->item)); + $result = $this->fetcher->fetch($this->url); + + $this->assertEquals(array($feed, array($item)), $result); + } + + public function testFetchMapItemsAuthorExists(){ + $this->core->expects($this->once()) + ->method('init') + ->will($this->returnValue(true)); + $item = $this->createItem(true); + $feed = $this->createFeed(true); + $this->expectCore('get_items', array($this->item)); + $result = $this->fetcher->fetch($this->url); + + $this->assertEquals(array($feed, array($item)), $result); + } + + + public function testFetchMapItemsEnclosureExists(){ + $this->core->expects($this->once()) + ->method('init') + ->will($this->returnValue(true)); + $item = $this->createItem(false, true); + $feed = $this->createFeed(false, true); + $this->expectCore('get_items', array($this->item)); + $result = $this->fetcher->fetch($this->url); + + $this->assertEquals(array($feed, array($item)), $result); + } + + + public function testFetchMapItemsNoPubdate(){ + $this->core->expects($this->once()) + ->method('init') + ->will($this->returnValue(true)); + $item = $this->createItem(false, true, true); + $feed = $this->createFeed(false, true); + $this->expectCore('get_items', array($this->item)); + $result = $this->fetcher->fetch($this->url); + + $this->assertEquals(array($feed, array($item)), $result); + } + + + public function testFetchMapItemsGetFavicon() { + $this->expectCore('get_title', $this->feedTitle); + $this->expectCore('get_permalink', $this->feedLink); + + $feed = new Feed(); + $feed->setTitle(html_entity_decode($this->feedTitle)); + $feed->setUrl($this->url); + $feed->setLink($this->feedLink); + $feed->setAdded($this->time); + $feed->setFaviconLink($this->webFavicon); + + $this->core->expects($this->once()) + ->method('init') + ->will($this->returnValue(true)); + + $this->faviconFetcher->expects($this->once()) + ->method('fetch') + ->will($this->returnValue($this->webFavicon)); + + $item = $this->createItem(false, true); + $this->expectCore('get_items', array($this->item)); + $result = $this->fetcher->fetch($this->url /*, true*/); + + $this->assertEquals(array($feed, array($item)), $result); + } + + public function testFetchMapItemsNoGetFavicon() { + $this->expectCore('get_title', $this->feedTitle); + $this->expectCore('get_permalink', $this->feedLink); + + $feed = new Feed(); + $feed->setTitle(html_entity_decode($this->feedTitle)); + $feed->setUrl($this->url); + $feed->setLink($this->feedLink); + $feed->setAdded($this->time); + + $this->core->expects($this->once()) + ->method('init') + ->will($this->returnValue(true)); + + $this->faviconFetcher->expects($this->never()) + ->method('fetch'); + + $item = $this->createItem(false, true); + $this->expectCore('get_items', array($this->item)); + $result = $this->fetcher->fetch($this->url, false); + + $this->assertEquals(array($feed, array($item)), $result); + } + + +} diff --git a/tests/unit/fetcher/FetcherTest.php b/tests/unit/fetcher/FetcherTest.php new file mode 100644 index 000000000..41f33129c --- /dev/null +++ b/tests/unit/fetcher/FetcherTest.php @@ -0,0 +1,108 @@ +