summaryrefslogtreecommitdiffstats
path: root/fetcher
diff options
context:
space:
mode:
authorBernhard Posselt <dev@bernhard-posselt.com>2016-07-23 21:24:54 +0200
committerBernhard Posselt <dev@bernhard-posselt.com>2016-07-23 21:24:54 +0200
commit004fcbbcc7609ca83807f2e38967ef54f469bf72 (patch)
tree49eb99b4ea92b2045793fc567f719b31ec7f9042 /fetcher
parent60abc0ed4438c9b6fda245b0dc33cb483bc2aeaf (diff)
Move to new directory structure
Diffstat (limited to 'fetcher')
-rw-r--r--fetcher/feedfetcher.php302
-rw-r--r--fetcher/fetcher.php69
-rw-r--r--fetcher/fetcherexception.php26
-rw-r--r--fetcher/ifeedfetcher.php48
-rw-r--r--fetcher/youtubefetcher.php82
5 files changed, 0 insertions, 527 deletions
diff --git a/fetcher/feedfetcher.php b/fetcher/feedfetcher.php
deleted file mode 100644
index beffe9051..000000000
--- a/fetcher/feedfetcher.php
+++ /dev/null
@@ -1,302 +0,0 @@
-<?php
-/**
- * ownCloud - News
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Alessandro Cosentino <cosenal@gmail.com>
- * @author Bernhard Posselt <dev@bernhard-posselt.com>
- * @copyright Alessandro Cosentino 2012
- * @copyright Bernhard Posselt 2012, 2014
- */
-
-namespace OCA\News\Fetcher;
-
-use Exception;
-
-use PicoFeed\Parser\MalFormedXmlException;
-use PicoFeed\Reader\Reader;
-use PicoFeed\Parser\Parser;
-use PicoFeed\Reader\SubscriptionNotFoundException;
-use PicoFeed\Reader\UnsupportedFeedFormatException;
-use PicoFeed\Client\InvalidCertificateException;
-use PicoFeed\Client\InvalidUrlException;
-use PicoFeed\Client\MaxRedirectException;
-use PicoFeed\Client\MaxSizeException;
-use PicoFeed\Client\TimeoutException;
-use PicoFeed\Client\ForbiddenException;
-use PicoFeed\Client\UnauthorizedException;
-
-use OCP\IL10N;
-
-use OCA\News\Db\Item;
-use OCA\News\Db\Feed;
-use OCA\News\Utility\PicoFeedFaviconFactory;
-use OCA\News\Utility\PicoFeedReaderFactory;
-use OCA\News\Utility\Time;
-
-class FeedFetcher implements IFeedFetcher {
-
- private $faviconFactory;
- private $reader;
- private $l10n;
- private $time;
-
- public function __construct(Reader $reader,
- PicoFeedFaviconFactory $faviconFactory,
- IL10N $l10n,
- Time $time) {
- $this->faviconFactory = $faviconFactory;
- $this->reader = $reader;
- $this->time = $time;
- $this->l10n = $l10n;
- }
-
-
- /**
- * This fetcher handles all the remaining urls therefore always returns true
- */
- public function canHandle($url) {
- return true;
- }
-
-
- /**
- * Fetch a feed from remote
- * @param string $url remote url of the feed
- * @param boolean $getFavicon if the favicon should also be fetched,
- * defaults to true
- * @param string $lastModified a last modified value from an http header
- * defaults to false. If lastModified matches the http header from the feed
- * no results are fetched
- * @param string $etag an etag from an http header.
- * If lastModified matches the http header from the feed
- * no results are fetched
- * @param bool fullTextEnabled if true tells the fetcher to enhance the
- * articles by fetching custom enhanced content
- * @param string $basicAuthUser if given, basic auth is set for this feed
- * @param string $basicAuthPassword if given, basic auth is set for this
- * feed. Ignored if user is null or an empty string
- * @throws FetcherException if it fails
- * @return array an array containing the new feed and its items, first
- * element being the Feed and second element being an array of Items
- */
- public function fetch($url, $getFavicon = true, $lastModified = null,
- $etag = null, $fullTextEnabled = false,
- $basicAuthUser = null, $basicAuthPassword = null) {
- try {
- if ($basicAuthUser !== null && trim($basicAuthUser) !== '') {
- $resource = $this->reader->discover($url, $lastModified, $etag,
- $basicAuthUser,
- $basicAuthPassword);
- } else {
- $resource = $this->reader->discover($url, $lastModified, $etag);
- }
-
- if (!$resource->isModified()) {
- return [null, null];
- }
-
- $location = $resource->getUrl();
- $etag = $resource->getEtag();
- $content = $resource->getContent();
- $encoding = $resource->getEncoding();
- $lastModified = $resource->getLastModified();
-
- $parser = $this->reader->getParser($location, $content, $encoding);
-
- if ($fullTextEnabled) {
- $parser->enableContentGrabber();
- }
-
- $parsedFeed = $parser->execute();
-
- $feed = $this->buildFeed(
- $parsedFeed, $url, $getFavicon, $lastModified, $etag, $location
- );
-
- $items = [];
- foreach ($parsedFeed->getItems() as $item) {
- $items[] = $this->buildItem($item, $parsedFeed);
- }
-
- return [$feed, $items];
-
- } catch (Exception $ex) {
- $this->handleError($ex, $url);
- }
-
- }
-
-
- private function handleError(Exception $ex, $url) {
- $msg = $ex->getMessage();
-
- if ($ex instanceof MalFormedXmlException) {
- $msg = $this->l10n->t('Feed contains invalid XML');
- } else if ($ex instanceof SubscriptionNotFoundException) {
- $msg = $this->l10n->t('Feed not found: either the website ' .
- 'does not provide a feed or blocks access. To rule out ' .
- 'blocking, try to download the feed on your server\'s ' .
- 'command line using curl: curl ' . $url);
- } else if ($ex instanceof UnsupportedFeedFormatException) {
- $msg = $this->l10n->t('Detected feed format is not supported');
- } else if ($ex instanceof InvalidCertificateException) {
- $msg = $this->buildCurlSslErrorMessage($ex->getCode());
- } else if ($ex instanceof InvalidUrlException) {
- $msg = $this->l10n->t('Website not found');
- } else if ($ex instanceof MaxRedirectException) {
- $msg = $this->l10n->t('More redirects than allowed, aborting');
- } else if ($ex instanceof MaxSizeException) {
- $msg = $this->l10n->t('Bigger than maximum allowed size');
- } else if ($ex instanceof TimeoutException) {
- $msg = $this->l10n->t('Request timed out');
- } else if ($ex instanceof UnauthorizedException) {
- $msg = $this->l10n->t('Required credentials for feed were ' .
- 'either missing or incorrect');
- } else if ($ex instanceof ForbiddenException) {
- $msg = $this->l10n->t('Forbidden to access feed');
- }
-
- throw new FetcherException($msg);
- }
-
- private function buildCurlSslErrorMessage($errorCode) {
- switch ($errorCode) {
- case 35: // CURLE_SSL_CONNECT_ERROR
- return $this->l10n->t(
- 'Certificate error: A problem occurred ' .
- 'somewhere in the SSL/TLS handshake. Could be ' .
- 'certificates (file formats, paths, permissions), ' .
- 'passwords, and others.'
- );
- case 51: // CURLE_PEER_FAILED_VERIFICATION
- return $this->l10n->t(
- 'Certificate error: The remote server\'s SSL ' .
- 'certificate or SSH md5 fingerprint was deemed not OK.'
- );
- case 58: // CURLE_SSL_CERTPROBLEM
- return $this->l10n->t(
- 'Certificate error: Problem with the local client ' .
- 'certificate.'
- );
- case 59: // CURLE_SSL_CIPHER
- return $this->l10n->t(
- 'Certificate error: Couldn\'t use specified cipher.'
- );
- case 60: // CURLE_SSL_CACERT
- return $this->l10n->t(
- 'Certificate error: Peer certificate cannot be ' .
- 'authenticated with known CA certificates.'
- );
- case 64: // CURLE_USE_SSL_FAILED
- return $this->l10n->t(
- 'Certificate error: Requested FTP SSL level failed.'
- );
- case 66: // CURLE_SSL_ENGINE_INITFAILED
- return $this->l10n->t(
- 'Certificate error: Initiating the SSL Engine failed.'
- );
- case 77: // CURLE_SSL_CACERT_BADFILE
- return $this->l10n->t(
- 'Certificate error: Problem with reading the SSL CA ' .
- 'cert (path? access rights?)'
- );
- case 83: // CURLE_SSL_ISSUER_ERROR
- return $this->l10n->t(
- 'Certificate error: Issuer check failed'
- );
- default:
- return $this->l10n->t('Unknown SSL certificate error!');
- }
- }
-
- private function decodeTwice($string) {
- return html_entity_decode(
- html_entity_decode(
- $string, ENT_QUOTES | ENT_HTML5, 'UTF-8'
- ),
- ENT_QUOTES | ENT_HTML5, 'UTF-8'
- );
- }
-
-
- protected function determineRtl($parsedItem, $parsedFeed) {
- $itemLang = $parsedItem->getLanguage();
- $feedLang = $parsedFeed->getLanguage();
-
- if ($itemLang) {
- return Parser::isLanguageRTL($itemLang);
- } else {
- return Parser::isLanguageRTL($feedLang);
- }
- }
-
-
- protected function buildItem($parsedItem, $parsedFeed) {
- $item = new Item();
- $item->setUnread();
- $item->setUrl($parsedItem->getUrl());
- $item->setGuid($parsedItem->getId());
- $item->setGuidHash($item->getGuid());
- $item->setPubDate($parsedItem->getDate()->getTimestamp());
- $item->setRtl($this->determineRtl($parsedItem, $parsedFeed));
-
- // unescape content because angularjs helps against XSS
- $item->setTitle($this->decodeTwice($parsedItem->getTitle()));
- $item->setAuthor($this->decodeTwice($parsedItem->getAuthor()));
-
- // purification is done in the service layer
- $body = $parsedItem->getContent();
- $body = mb_convert_encoding($body, 'HTML-ENTITIES',
- mb_detect_encoding($body));
- $item->setBody($body);
-
- $enclosureUrl = $parsedItem->getEnclosureUrl();
- if ($enclosureUrl) {
- $enclosureType = $parsedItem->getEnclosureType();
- if (stripos($enclosureType, 'audio/') !== false ||
- stripos($enclosureType, 'video/') !== false
- ) {
- $item->setEnclosureMime($enclosureType);
- $item->setEnclosureLink($enclosureUrl);
- }
- }
-
- $item->generateSearchIndex();
-
- return $item;
- }
-
-
- protected function buildFeed($parsedFeed, $url, $getFavicon, $modified,
- $etag, $location) {
- $feed = new Feed();
-
- $link = $parsedFeed->getSiteUrl();
-
- if (!$link) {
- $link = $location;
- }
-
- // unescape content because angularjs helps against XSS
- $title = strip_tags($this->decodeTwice($parsedFeed->getTitle()));
- $feed->setTitle($title);
- $feed->setUrl($url); // the url used to add the feed
- $feed->setLocation($location); // the url where the feed was found
- $feed->setLink($link); // <link> attribute in the feed
- $feed->setHttpLastModified($modified);
- $feed->setHttpEtag($etag);
- $feed->setAdded($this->time->getTime());
-
- if ($getFavicon) {
- $faviconFetcher = $this->faviconFactory->build();
- $favicon = $faviconFetcher->find($feed->getLink());
- $feed->setFaviconLink($favicon);
- }
-
- return $feed;
- }
-
-}
diff --git a/fetcher/fetcher.php b/fetcher/fetcher.php
deleted file mode 100644
index 43c9e7a3f..000000000
--- a/fetcher/fetcher.php
+++ /dev/null
@@ -1,69 +0,0 @@
-<?php
-/**
- * ownCloud - News
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Alessandro Cosentino <cosenal@gmail.com>
- * @author Bernhard Posselt <dev@bernhard-posselt.com>
- * @copyright Alessandro Cosentino 2012
- * @copyright Bernhard Posselt 2012, 2014
- */
-
-namespace OCA\News\Fetcher;
-
-
-class Fetcher {
-
- private $fetchers;
-
- public function __construct(){
- $this->fetchers = [];
- }
-
-
- /**
- * Add an additional fetcher
- * @param IFeedFetcher $fetcher the fetcher
- */
- public function registerFetcher(IFeedFetcher $fetcher){
- $this->fetchers[] = $fetcher;
- }
-
- /**
- * Fetch a feed from remote
- * @param string $url remote url of the feed
- * @param boolean $getFavicon if the favicon should also be fetched,
- * defaults to true
- * @param string $lastModified a last modified value from an http header
- * defaults to false. If lastModified matches the http header from the feed
- * no results are fetched
- * @param string $etag an etag from an http header.
- * If lastModified matches the http header from the feed
- * no results are fetched
- * @param bool fullTextEnabled if true tells the fetcher to enhance the
- * articles by fetching custom enhanced content
- * @param string $basicAuthUser if given, basic auth is set for this feed
- * @param string $basicAuthPassword if given, basic auth is set for this
- * feed. Ignored if user is null or an empty string
- * @throws FetcherException if simple pie fails
- * @return array an array containing the new feed and its items, first
- * element being the Feed and second element being an array of Items
- */
- public function fetch($url, $getFavicon=true, $lastModified=null,
- $etag=null, $fullTextEnabled=false,
- $basicAuthUser=null, $basicAuthPassword=null) {
- foreach($this->fetchers as $fetcher){
- if($fetcher->canHandle($url)){
- return $fetcher->fetch($url, $getFavicon, $lastModified, $etag,
- $fullTextEnabled, $basicAuthUser,
- $basicAuthPassword);
- }
- }
-
- return [null, []];
- }
-
-
-}
diff --git a/fetcher/fetcherexception.php b/fetcher/fetcherexception.php
deleted file mode 100644
index 27dd42f39..000000000
--- a/fetcher/fetcherexception.php
+++ /dev/null
@@ -1,26 +0,0 @@
-<?php
-/**
- * ownCloud - News
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Alessandro Cosentino <cosenal@gmail.com>
- * @author Bernhard Posselt <dev@bernhard-posselt.com>
- * @copyright Alessandro Cosentino 2012
- * @copyright Bernhard Posselt 2012, 2014
- */
-
-namespace OCA\News\Fetcher;
-
-class FetcherException extends \Exception {
-
- /**
- * Constructor
- * @param string $msg the error message
- */
- public function __construct($msg){
- parent::__construct($msg);
- }
-
-} \ No newline at end of file
diff --git a/fetcher/ifeedfetcher.php b/fetcher/ifeedfetcher.php
deleted file mode 100644
index e854d83fe..000000000
--- a/fetcher/ifeedfetcher.php
+++ /dev/null
@@ -1,48 +0,0 @@
-<?php
-/**
- * ownCloud - News
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Alessandro Cosentino <cosenal@gmail.com>
- * @author Bernhard Posselt <dev@bernhard-posselt.com>
- * @copyright Alessandro Cosentino 2012
- * @copyright Bernhard Posselt 2012, 2014
- */
-
-namespace OCA\News\Fetcher;
-
-interface IFeedFetcher {
-
- /**
- * @param string $url remote url of the feed
- * @param boolean $getFavicon if the favicon should also be fetched,
- * defaults to true
- * @param string $lastModified a last modified value from an http header
- * defaults to false. If lastModified matches the http header from the feed
- * no results are fetched
- * @param string $etag an etag from an http header.
- * If lastModified matches the http header from the feed
- * no results are fetched
- * @param bool fullTextEnabled if true tells the fetcher to enhance the
- * articles by fetching custom enhanced content
- * @param string $basicAuthUser if given, basic auth is set for this feed
- * @param string $basicAuthPassword if given, basic auth is set for this
- * feed. Ignored if user is null or an empty string
- * @throws FetcherException if the fetcher encounters a problem
- * @return array an array containing the new feed and its items, first
- * element being the Feed and second element being an array of Items
- */
- function fetch($url, $getFavicon=true, $lastModified=null, $etag=null,
- $fullTextEnabled=false, $basicAuthUser=null,
- $basicAuthPassword=null);
-
- /**
- * @param string $url the url that should be fetched
- * @return boolean if the fetcher can handle the url. This fetcher will be
- * used exclusively to fetch the feed and the items of the page
- */
- function canHandle($url);
-
-}
diff --git a/fetcher/youtubefetcher.php b/fetcher/youtubefetcher.php
deleted file mode 100644
index 3752ba197..000000000
--- a/fetcher/youtubefetcher.php
+++ /dev/null
@@ -1,82 +0,0 @@
-<?php
-/**
- * ownCloud - News
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Bernhard Posselt <dev@bernhard-posselt.com>
- * @copyright Bernhard Posselt 2012, 2014
- */
-
-namespace OCA\News\Fetcher;
-
-
-class YoutubeFetcher implements IFeedFetcher {
-
- private $feedFetcher;
-
- public function __construct(FeedFetcher $feedFetcher){
- $this->feedFetcher = $feedFetcher;
- }
-
-
- private function buildUrl($url) {
- $baseRegex = '%(?:https?://|//)?(?:www.)?youtube.com';
- $playRegex = $baseRegex . '.*?list=([^&]*)%';
-
- if (preg_match($playRegex, $url, $matches)) {
- $id = $matches[1];
- return 'http://gdata.youtube.com/feeds/api/playlists/' . $id;
- } else {
- return $url;
- }
- }
-
-
- /**
- * This fetcher handles all the remaining urls therefore always returns true
- */
- public function canHandle($url){
- return $this->buildUrl($url) !== $url;
- }
-
-
- /**
- * Fetch a feed from remote
- * @param string $url remote url of the feed
- * @param boolean $getFavicon if the favicon should also be fetched,
- * defaults to true
- * @param string $lastModified a last modified value from an http header
- * defaults to false. If lastModified matches the http header from the feed
- * no results are fetched
- * @param string $etag an etag from an http header.
- * If lastModified matches the http header from the feed
- * no results are fetched
- * @param bool fullTextEnabled if true tells the fetcher to enhance the
- * articles by fetching custom enhanced content
- * @param string $basicAuthUser if given, basic auth is set for this feed
- * @param string $basicAuthPassword if given, basic auth is set for this
- * feed. Ignored if user is null or an empty string
- * @throws FetcherException if it fails
- * @return array an array containing the new feed and its items, first
- * element being the Feed and second element being an array of Items
- */
- public function fetch($url, $getFavicon=true, $lastModified=null,
- $etag=null, $fullTextEnabled=false,
- $basicAuthUser=null, $basicAuthPassword=null) {
- $transformedUrl = $this->buildUrl($url);
-
- $result = $this->feedFetcher->fetch(
- $transformedUrl, $getFavicon, $lastModified, $etag,
- $fullTextEnabled, $basicAuthUser, $basicAuthPassword
- );
-
- // reset feed url so we know the correct added url for the feed
- $result[0]->setUrl($url);
-
- return $result;
- }
-
-
-}