diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2013-09-27 20:03:00 +0200 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2013-09-27 20:03:16 +0200 |
commit | d6066c382083dedf426abbfd5f2f1df725c68aaf (patch) | |
tree | 4406388c5a827ebe8ed62234e11e49c35e16f3a5 /fetcher | |
parent | 30080c220a3f5013875d6a52e4dfe634719cbeca (diff) |
use seperate direcotires for article enhancers and fetchers
Diffstat (limited to 'fetcher')
-rw-r--r-- | fetcher/feedfetcher.php | 213 | ||||
-rw-r--r-- | fetcher/fetcher.php | 52 | ||||
-rw-r--r-- | fetcher/fetcherexception.php | 38 | ||||
-rw-r--r-- | fetcher/ifeedfetcher.php | 47 |
4 files changed, 350 insertions, 0 deletions
diff --git a/fetcher/feedfetcher.php b/fetcher/feedfetcher.php new file mode 100644 index 000000000..fdc062d6c --- /dev/null +++ b/fetcher/feedfetcher.php @@ -0,0 +1,213 @@ +<?php + +/** +* ownCloud - News +* +* @author Alessandro Cosentino +* @author Bernhard Posselt +* @copyright 2012 Alessandro Cosentino cosenal@gmail.com +* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE +* License as published by the Free Software Foundation; either +* version 3 of the License, or any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU AFFERO GENERAL PUBLIC LICENSE for more details. +* +* You should have received a copy of the GNU Affero General Public +* License along with this library. If not, see <http://www.gnu.org/licenses/>. +* +*/ + +namespace OCA\News\Fetcher; + +use \OCA\AppFramework\Core\API; +use \OCA\AppFramework\Utility\FaviconFetcher; +use \OCA\AppFramework\Utility\SimplePieAPIFactory; +use \OCA\AppFramework\Utility\TimeFactory; + +use \OCA\News\Db\Item; +use \OCA\News\Db\Feed; + + +class FeedFetcher implements IFeedFetcher { + + private $api; + private $cacheDirectory; + private $cacheDuration; + private $faviconFetcher; + private $simplePieFactory; + private $fetchTimeout; + private $time; + private $purifier; + + public function __construct(API $api, + SimplePieAPIFactory $simplePieFactory, + FaviconFetcher $faviconFetcher, + TimeFactory $time, + $cacheDirectory, + $cacheDuration, + $fetchTimeout, + $purifier){ + $this->api = $api; + $this->cacheDirectory = $cacheDirectory; + $this->cacheDuration = $cacheDuration; + $this->faviconFetcher = $faviconFetcher; + $this->simplePieFactory = $simplePieFactory; + $this->time = $time; + $this->purifier = $purifier; + $this->fetchTimeout = $fetchTimeout; + } + + + /** + * This fetcher handles all the remaining urls therefore always returns true + */ + public function canHandle($url){ + return true; + } + + + /** + * Fetch a feed from remote + * @param string url remote url of the feed + * @throws FetcherException if simple pie fails + * @return array an array containing the new feed and its items + */ + public function fetch($url, $getFavicon=true) { + $simplePie = $this->simplePieFactory->getCore(); + $simplePie->set_feed_url($url); + $simplePie->enable_cache(true); + $simplePie->set_timeout($this->fetchTimeout); + $simplePie->set_cache_location($this->cacheDirectory); + $simplePie->set_cache_duration($this->cacheDuration); + + if (!$simplePie->init()) { + throw new FetcherException('Could not initialize simple pie on feed with url ' . $url); + } + + + try { + // somehow $simplePie turns into a feed after init + $items = array(); + if ($feedItems = $simplePie->get_items()) { + foreach($feedItems as $feedItem) { + array_push($items, $this->buildItem($feedItem)); + } + } + + $feed = $this->buildFeed($simplePie, $url, $getFavicon); + + return array($feed, $items); + + } catch(\Exception $ex){ + throw new FetcherException($ex->getMessage()); + } + + } + + + private function decodeTwice($string) { + // behold! ' is not converted by PHP thats why we need to do it + // manually (TM) + return str_replace(''', '\'', + html_entity_decode( + html_entity_decode( + $string, ENT_QUOTES, 'UTF-8' + ), + ENT_QUOTES, 'UTF-8' + ) + ); + } + + + protected function buildItem($simplePieItem) { + $item = new Item(); + $item->setStatus(0); + $item->setUnread(); + $item->setUrl($this->decodeTwice($simplePieItem->get_permalink())); + + // unescape content because angularjs helps against XSS + $item->setTitle($this->decodeTwice($simplePieItem->get_title())); + $guid = $simplePieItem->get_id(); + $item->setGuid($guid); + + // links should always open in a new window + $item->setBody( + str_replace( + '<a', '<a target="_blank"', $this->purifier->purify( + $simplePieItem->get_content() + ) + ) + ); + + // pubdate is not required. if not given use the current date + $date = $simplePieItem->get_date('U'); + if(!$date) { + $date = $this->time->getTime(); + } + + $item->setPubDate($date); + + $item->setLastModified($this->time->getTime()); + + $author = $simplePieItem->get_author(); + if ($author !== null) { + $name = $this->decodeTwice($author->get_name()); + if ($name) { + $item->setAuthor($name); + } else { + $item->setAuthor($this->decodeTwice($author->get_email())); + } + } + + // TODO: make it work for video files also + $enclosure = $simplePieItem->get_enclosure(); + if($enclosure !== null) { + $enclosureType = $enclosure->get_type(); + if(stripos($enclosureType, "audio/") !== false) { + $item->setEnclosureMime($enclosureType); + $item->setEnclosureLink($enclosure->get_link()); + } + } + + return $item; + } + + + protected function buildFeed($simplePieFeed, $url, $getFavicon) { + $feed = new Feed(); + + // unescape content because angularjs helps against XSS + $title = $this->decodeTwice($simplePieFeed->get_title(), + ENT_COMPAT, 'UTF-8' ); + + // if there is no title use the url + if(!$title) { + $title = $url; + } + + $feed->setTitle($title); + $feed->setUrl($url); + $feed->setLink($simplePieFeed->get_permalink()); + $feed->setAdded($this->time->getTime()); + + if ($getFavicon) { + // use the favicon from the page first since most feeds use a weird image + $favicon = $this->faviconFetcher->fetch($feed->getLink()); + + if (!$favicon) { + $favicon = $simplePieFeed->get_image_url(); + } + + $feed->setFaviconLink($favicon); + } + + return $feed; + } + +} diff --git a/fetcher/fetcher.php b/fetcher/fetcher.php new file mode 100644 index 000000000..c86db6b64 --- /dev/null +++ b/fetcher/fetcher.php @@ -0,0 +1,52 @@ +<?php + +/** +* ownCloud - News +* +* @author Alessandro Cosentino +* @author Bernhard Posselt +* @copyright 2012 Alessandro Cosentino cosenal@gmail.com +* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE +* License as published by the Free Software Foundation; either +* version 3 of the License, or any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU AFFERO GENERAL PUBLIC LICENSE for more details. +* +* You should have received a copy of the GNU Affero General Public +* License along with this library. If not, see <http://www.gnu.org/licenses/>. +* +*/ + +namespace OCA\News\Fetcher; + + +class Fetcher { + + private $fetchers; + + public function __construct(){ + $this->fetchers = array(); + } + + + public function registerFetcher(IFeedFetcher $fetcher){ + array_push($this->fetchers, $fetcher); + } + + + public function fetch($url, $getFavicon=true){ + foreach($this->fetchers as $fetcher){ + if($fetcher->canHandle($url)){ + return $fetcher->fetch($url, $getFavicon); + } + } + } + + +}
\ No newline at end of file diff --git a/fetcher/fetcherexception.php b/fetcher/fetcherexception.php new file mode 100644 index 000000000..a9082dd30 --- /dev/null +++ b/fetcher/fetcherexception.php @@ -0,0 +1,38 @@ +<?php + +/** +* ownCloud - News +* +* @author Alessandro Cosentino +* @author Bernhard Posselt +* @copyright 2012 Alessandro Cosentino cosenal@gmail.com +* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE +* License as published by the Free Software Foundation; either +* version 3 of the License, or any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU AFFERO GENERAL PUBLIC LICENSE for more details. +* +* You should have received a copy of the GNU Affero General Public +* License along with this library. If not, see <http://www.gnu.org/licenses/>. +* +*/ + +namespace OCA\News\Fetcher; + +class FetcherException extends \Exception { + + /** + * Constructor + * @param string $msg the error message + */ + public function __construct($msg){ + parent::__construct($msg); + } + +}
\ No newline at end of file diff --git a/fetcher/ifeedfetcher.php b/fetcher/ifeedfetcher.php new file mode 100644 index 000000000..5fa7fc678 --- /dev/null +++ b/fetcher/ifeedfetcher.php @@ -0,0 +1,47 @@ +<?php + +/** +* ownCloud - News +* +* @author Alessandro Cosentino +* @author Bernhard Posselt +* @copyright 2012 Alessandro Cosentino cosenal@gmail.com +* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE +* License as published by the Free Software Foundation; either +* version 3 of the License, or any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU AFFERO GENERAL PUBLIC LICENSE for more details. +* +* You should have received a copy of the GNU Affero General Public +* License along with this library. If not, see <http://www.gnu.org/licenses/>. +* +*/ + +namespace OCA\News\Fetcher; + +interface IFeedFetcher { + + /** + * @param string url the url that the user entered in the add feed dialog + * box + * @throws FetcherException if the fetcher encounters a problem + * @return array with the first element being the feed and the + * second element being an array of items. Those items will be saved into + * into the database + */ + function fetch($url, $getFavicon=true); + + /** + * @param string $url the url that should be fetched + * @return boolean if the fetcher can handle the url. This fetcher will be + * used exclusively to fetch the feed and the items of the page + */ + function canHandle($url); + +}
\ No newline at end of file |