From a3246a927de542e1b3ab403359bfd3c08705b6a7 Mon Sep 17 00:00:00 2001 From: Sean Molenaar Date: Wed, 30 Jan 2019 20:36:40 +0100 Subject: Parser: Switch to feedIO for parsing instead of picoFeed --- lib/AppInfo/Application.php | 113 ++++++---- lib/Config/Config.php | 4 +- lib/Config/FetcherConfig.php | 118 +++++++++++ lib/Config/LegacyGuzzleClient.php | 65 ++++++ lib/Config/LegacyGuzzleResponse.php | 86 ++++++++ lib/Db/Item.php | 14 ++ lib/Fetcher/FeedFetcher.php | 377 ++++++++++++--------------------- lib/Fetcher/Fetcher.php | 45 ++-- lib/Fetcher/IFeedFetcher.php | 17 +- lib/Fetcher/YoutubeFetcher.php | 25 +-- lib/PostProcessor/LWNProcessor.php | 117 ---------- lib/Service/FeedService.php | 47 ++-- lib/Utility/PicoFeedClientFactory.php | 42 ---- lib/Utility/PicoFeedFaviconFactory.php | 40 ---- lib/Utility/PsrLogger.php | 97 +++++++++ 15 files changed, 644 insertions(+), 563 deletions(-) create mode 100644 lib/Config/FetcherConfig.php create mode 100644 lib/Config/LegacyGuzzleClient.php create mode 100644 lib/Config/LegacyGuzzleResponse.php delete mode 100644 lib/PostProcessor/LWNProcessor.php delete mode 100644 lib/Utility/PicoFeedClientFactory.php delete mode 100644 lib/Utility/PicoFeedFaviconFactory.php create mode 100644 lib/Utility/PsrLogger.php (limited to 'lib') diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index d88bbbaec..b2773a224 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -13,27 +13,46 @@ namespace OCA\News\AppInfo; +use Closure; +use FeedIo\FeedIo; use HTMLPurifier; use HTMLPurifier_Config; + +use OCA\News\Config\FetcherConfig; +use OCA\News\Utility\PsrLogger; +use OCP\BackgroundJob\IJobList; + +use OCP\IContainer; +use OCP\INavigationManager; +use OCP\IURLGenerator; +use OCP\IConfig; +use OCP\AppFramework\App; +use OCP\Files\IRootFolder; +use OCP\Files\Node; + + +use OCA\News\Config\AppConfig; use OCA\News\Config\Config; -use OCA\News\Db\ItemMapper; use OCA\News\Db\MapperFactory; +use OCA\News\Db\ItemMapper; use OCA\News\Fetcher\FeedFetcher; use OCA\News\Fetcher\Fetcher; use OCA\News\Fetcher\YoutubeFetcher; use OCA\News\Utility\ProxyConfigParser; -use OCP\AppFramework\App; -use OCP\Files\IRootFolder; -use OCP\Files\Node; -use OCP\IConfig; -use OCP\IContainer; -use OCP\ILogger; -use PicoFeed\Config\Config as PicoFeedConfig; -use PicoFeed\Reader\Reader as PicoFeedReader; +/** + * Class Application + * + * @package OCA\News\AppInfo + */ class Application extends App { + /** + * Application constructor. + * + * @param array $urlParams Parameters + */ public function __construct(array $urlParams = []) { parent::__construct('news', $urlParams); @@ -57,6 +76,21 @@ class Application extends App return $c->query(MapperFactory::class)->build(); }); + + /** + * App config parser. + */ + $container->registerService(AppConfig::class, function (IContainer $c) { + $config = new AppConfig( + $c->query(INavigationManager::class), + $c->query(IURLGenerator::class), + $c->query(IJobList::class) + ); + + $config->loadConfig($c->query('info')); + return $config; + }); + /** * Core */ @@ -79,10 +113,21 @@ class Application extends App } }); + /** + * Logger base + */ + $container->registerService(PsrLogger::class, function (IContainer $c) { + return new PsrLogger( + $c->query('ServerContainer')->getLogger(), + $c->query('AppName') + ); + }); + + $container->registerService(Config::class, function (IContainer $c): Config { $config = new Config( $c->query('ConfigView'), - $c->query(ILogger::class), + $c->query(PsrLogger::class), $c->query('LoggerParameters') ); $config->read($c->query('configFile'), true); @@ -115,55 +160,33 @@ class Application extends App /** * Fetchers */ - $container->registerService(PicoFeedConfig::class, function (IContainer $c): PicoFeedConfig { + $container->registerService(FetcherConfig::class, function (IContainer $c) { // FIXME: move this into a separate class for testing? $config = $c->query(Config::class); - $proxy = $c->query(ProxyConfigParser::class); - - $userAgent = 'NextCloud-News/1.0'; - - $pico = new PicoFeedConfig(); - $pico->setClientUserAgent($userAgent) - ->setClientTimeout($config->getFeedFetcherTimeout()) - ->setMaxRedirections($config->getMaxRedirects()) - ->setMaxBodySize($config->getMaxSize()) - ->setParserHashAlgo('md5'); - - // proxy settings - $proxySettings = $proxy->parse(); - $host = $proxySettings['host']; - $port = $proxySettings['port']; - $user = $proxySettings['user']; - $password = $proxySettings['password']; - - if ($host) { - $pico->setProxyHostname($host); - - if ($port) { - $pico->setProxyPort($port); - } - } + $proxy = $c->query(ProxyConfigParser::class); - if ($user) { - $pico->setProxyUsername($user) - ->setProxyPassword($password); - } + $fConfig = new FetcherConfig(); + $fConfig->setClientTimeout($config->getFeedFetcherTimeout()); + $fConfig->setProxy($proxy); - return $pico; + return $fConfig; }); - $container->registerService(PicoFeedReader::class, function (IContainer $c): PicoFeedReader { - return new PicoFeedReader($c->query(PicoFeedConfig::class)); + $container->registerService(FeedIo::class, function (IContainer $c) { + $config = $c->query(FetcherConfig::class); + return new FeedIo($config->getClient(), $c->query(PsrLogger::class)); }); - $container->registerService(Fetcher::class, function (IContainer $c): Fetcher { + /** + * @noinspection PhpParamsInspection + */ + $container->registerService(Fetcher::class, function (IContainer $c) { $fetcher = new Fetcher(); // register fetchers in order, the most generic fetcher should be // the last one $fetcher->registerFetcher($c->query(YoutubeFetcher::class)); $fetcher->registerFetcher($c->query(FeedFetcher::class)); - return $fetcher; }); } diff --git a/lib/Config/Config.php b/lib/Config/Config.php index 7c5cee74a..dea1f5814 100644 --- a/lib/Config/Config.php +++ b/lib/Config/Config.php @@ -13,7 +13,7 @@ namespace OCA\News\Config; -use OCP\ILogger; +use OCA\News\Utility\PsrLogger; use OCP\Files\Folder; class Config @@ -35,7 +35,7 @@ class Config public function __construct( Folder $fileSystem, - ILogger $logger, + PsrLogger $logger, $LoggerParameters ) { $this->fileSystem = $fileSystem; diff --git a/lib/Config/FetcherConfig.php b/lib/Config/FetcherConfig.php new file mode 100644 index 000000000..55603c47c --- /dev/null +++ b/lib/Config/FetcherConfig.php @@ -0,0 +1,118 @@ + + * @author Bernhard Posselt + * @copyright 2012 Alessandro Cosentino + * @copyright 2012-2014 Bernhard Posselt + */ + +namespace OCA\News\Config; + +use FeedIo\Adapter\ClientInterface; +use \GuzzleHttp\Client; +use \FeedIo\Adapter\Guzzle\Client as FeedIoClient; + +/** + * Class FetcherConfig + * + * @package OCA\News\Config + */ +class FetcherConfig +{ + protected $client_timeout; + protected $proxy; + + /** + * Configure a guzzle client + * + * @return ClientInterface Legacy client to guzzle. + */ + public function getClient() + { + if (!class_exists('GuzzleHttp\Collection')) { + $config = [ + 'timeout' => $this->getClientTimeout(), + ]; + + if (!empty($this->proxy)) { + $config['proxy'] = $this->proxy; + } + + $guzzle = new Client(); + $client = new FeedIoClient($guzzle); + + return $client; + } + + $config = [ + 'request.options' => [ + 'timeout' => $this->getClientTimeout(), + ], + ]; + + if (!empty($this->proxy)) { + $config['request.options']['proxy'] = $this->proxy; + } + + $guzzle = new Client($config); + return new LegacyGuzzleClient($guzzle); + } + + /** + * Set a timeout for the client + * + * @param int $timeout The timeout + * + * @return self + */ + public function setClientTimeout($timeout) + { + $this->client_timeout = $timeout; + + return $this; + } + + /** + * Get the client timeout. + * + * @return mixed + */ + public function getClientTimeout() + { + return $this->client_timeout; + } + + /** + * Set the proxy + * + * @param \OCA\News\Utility\ProxyConfigParser $proxy The proxy to set. + * + * @return self + */ + public function setProxy($proxy) + { + // proxy settings + $proxySettings = $proxy->parse(); + $host = $proxySettings['host']; + $port = $proxySettings['port']; + $user = $proxySettings['user']; + $password = $proxySettings['password']; + + $proxy_string = 'https://'; + if (!empty($user)) { + $proxy_string .= $user . ':' . $password . '@'; + } + $proxy_string .= $host; + if (!empty($port)) { + $proxy_string .= ':' . $port; + } + $this->proxy = $proxy_string; + + return $this; + } +} diff --git a/lib/Config/LegacyGuzzleClient.php b/lib/Config/LegacyGuzzleClient.php new file mode 100644 index 000000000..bc1364c30 --- /dev/null +++ b/lib/Config/LegacyGuzzleClient.php @@ -0,0 +1,65 @@ + + * @copyright 2018 Sean Molenaar + */ + +namespace OCA\News\Config; + +use FeedIo\Adapter\ClientInterface as FeedIoClientInterface; +use FeedIo\Adapter\NotFoundException; +use FeedIo\Adapter\ServerErrorException; +use Guzzle\Service\ClientInterface; +use GuzzleHttp\Exception\BadResponseException; + +/** + * Guzzle dependent HTTP client + */ +class LegacyGuzzleClient implements FeedIoClientInterface +{ + /** + * @var ClientInterface + */ + protected $guzzleClient; + + /** + * @param ClientInterface $guzzleClient + */ + public function __construct(ClientInterface $guzzleClient) + { + $this->guzzleClient = $guzzleClient; + } + + /** + * @param string $url + * @param \DateTime $modifiedSince + * @throws \FeedIo\Adapter\NotFoundException + * @throws \FeedIo\Adapter\ServerErrorException + * @return \FeedIo\Adapter\ResponseInterface + */ + public function getResponse($url, \DateTime $modifiedSince) + { + try { + $options = [ + 'headers' => [ + 'User-Agent' => 'NextCloud-News/1.0', + 'If-Modified-Since' => $modifiedSince->format(\DateTime::RFC2822) + ] + ]; + + return new LegacyGuzzleResponse($this->guzzleClient->get($url, $options)); + } catch (BadResponseException $e) { + switch ((int) $e->getResponse()->getStatusCode()) { + case 404: + throw new NotFoundException($e->getMessage()); + default: + throw new ServerErrorException($e->getMessage()); + } + } + } +} diff --git a/lib/Config/LegacyGuzzleResponse.php b/lib/Config/LegacyGuzzleResponse.php new file mode 100644 index 000000000..d9f6102ee --- /dev/null +++ b/lib/Config/LegacyGuzzleResponse.php @@ -0,0 +1,86 @@ + + * @copyright 2018 Sean Molenaar + */ + +namespace OCA\News\Config; + +use FeedIo\Adapter\ResponseInterface; +use GuzzleHttp\Message\ResponseInterface as GuzzleResponseInterface; + +/** + * Guzzle dependent HTTP Response + */ +class LegacyGuzzleResponse implements ResponseInterface +{ + const HTTP_LAST_MODIFIED = 'Last-Modified'; + + /** + * @var \GuzzleHttp\Message\ResponseInterface + */ + protected $response; + + /** + * @param \GuzzleHttp\Message\ResponseInterface + */ + public function __construct(GuzzleResponseInterface $psrResponse) + { + $this->response = $psrResponse; + } + + /** + * @return boolean + */ + public function isModified() + { + return $this->response->getStatusCode() !== 304 && $this->response->getBody()->getSize() > 0; + } + + /** + * @return \Psr\Http\Message\StreamInterface + */ + public function getBody() + { + return $this->response->getBody(); + } + + /** + * @return \DateTime|null + */ + public function getLastModified() + { + if ($this->response->hasHeader(static::HTTP_LAST_MODIFIED)) { + $lastModified = \DateTime::createFromFormat( + \DateTime::RFC2822, + $this->getHeader(static::HTTP_LAST_MODIFIED) + ); + + return false === $lastModified ? null : $lastModified; + } + + return; + } + + /** + * @return array + */ + public function getHeaders() + { + return $this->response->getHeaders(); + } + + /** + * @param string $name + * @return string[] + */ + public function getHeader($name) + { + return $this->response->getHeader($name); + } +} diff --git a/lib/Db/Item.php b/lib/Db/Item.php index 1a8d284a2..3a17dd2cb 100644 --- a/lib/Db/Item.php +++ b/lib/Db/Item.php @@ -491,4 +491,18 @@ class Item extends Entity implements IAPI, \JsonSerializable $this->getEnclosureLink() ); } + + /** + * Check if a given mimetype is supported + * + * @param string $mime mimetype to check + * + * @return boolean + */ + public function isSupportedMime($mime) + { + return ( + stripos($mime, 'audio/') !== false || + stripos($mime, 'video/') !== false); + } } diff --git a/lib/Fetcher/FeedFetcher.php b/lib/Fetcher/FeedFetcher.php index 65a4b5526..ae338ca09 100644 --- a/lib/Fetcher/FeedFetcher.php +++ b/lib/Fetcher/FeedFetcher.php @@ -13,29 +13,17 @@ namespace OCA\News\Fetcher; -use Exception; - -use OCA\News\PostProcessor\LWNProcessor; +use DateTime; +use Favicon\Favicon; +use FeedIo\Feed\ItemInterface; +use FeedIo\FeedInterface; +use FeedIo\FeedIo; use OCP\Http\Client\IClientService; -use PicoFeed\Parser\MalFormedXmlException; -use PicoFeed\Reader\Reader; -use PicoFeed\Parser\Parser; -use PicoFeed\Reader\SubscriptionNotFoundException; -use PicoFeed\Reader\UnsupportedFeedFormatException; -use PicoFeed\Client\InvalidCertificateException; -use PicoFeed\Client\InvalidUrlException; -use PicoFeed\Client\MaxRedirectException; -use PicoFeed\Client\MaxSizeException; -use PicoFeed\Client\TimeoutException; -use PicoFeed\Client\ForbiddenException; -use PicoFeed\Client\UnauthorizedException; use OCP\IL10N; use OCA\News\Db\Item; use OCA\News\Db\Feed; -use OCA\News\Utility\PicoFeedFaviconFactory; -use OCA\News\Utility\PicoFeedReaderFactory; use OCA\News\Utility\Time; class FeedFetcher implements IFeedFetcher @@ -48,22 +36,26 @@ class FeedFetcher implements IFeedFetcher private $clientService; public function __construct( - Reader $reader, - PicoFeedFaviconFactory $faviconFactory, + FeedIo $fetcher, + Favicon $favicon, IL10N $l10n, Time $time, IClientService $clientService ) { - $this->faviconFactory = $faviconFactory; - $this->reader = $reader; - $this->time = $time; - $this->l10n = $l10n; - $this->clientService = $clientService; + $this->faviconFactory = $favicon; + $this->reader = $fetcher; + $this->time = $time; + $this->l10n = $l10n; + $this->clientService = $clientService; } /** - * This fetcher handles all the remaining urls therefore always returns true + * This fetcher handles all the remaining urls therefore always returns true. + * + * @param string $url The URL to check + * + * @return bool */ public function canHandle($url) { @@ -74,176 +66,55 @@ class FeedFetcher implements IFeedFetcher /** * Fetch a feed from remote * - * @param string $url remote url of the feed - * @param boolean $getFavicon if the favicon should also be fetched, defaults to true - * @param string $lastModified a last modified value from an http header defaults to false. - * If lastModified matches the http header from the feed no results are fetched - * @param string $etag an etag from an http header. - * If lastModified matches the http header from the feed no results are fetched - * @param bool $fullTextEnabled if true tells the fetcher to enhance the articles by fetching more content - * @param string $basicAuthUser if given, basic auth is set for this feed - * @param string $basicAuthPassword if given, basic auth is set for this feed. Ignored if user is empty + * @param string $url Remote url of the feed + * @param boolean $getFavicon If the favicon should also be fetched, + * defaults to true + * @param string $lastModified A last modified value from an http header + * defaults to false. If lastModified matches + * the header from the feed no results are fetched + * @param string $user If given, basic auth is set for this feed + * @param string $password If given, basic auth is set for this feed. + * Ignored if user is null or an empty string. * - * @throws FetcherException if it fails * @return array an array containing the new feed and its items, first * element being the Feed and second element being an array of Items */ - public function fetch( - $url, - $getFavicon = true, - $lastModified = null, - $etag = null, - $fullTextEnabled = false, - $basicAuthUser = null, - $basicAuthPassword = null - ) { - try { - if ($basicAuthUser !== null && trim($basicAuthUser) !== '') { - $resource = $this->reader->discover( - $url, - $lastModified, - $etag, - $basicAuthUser, - $basicAuthPassword - ); - } else { - $resource = $this->reader->discover($url, $lastModified, $etag); - } - - if (!$resource->isModified()) { - return [null, null]; - } - - $location = $resource->getUrl(); - $etag = $resource->getEtag(); - $content = $resource->getContent(); - $encoding = $resource->getEncoding(); - $lastModified = $resource->getLastModified(); - - $parser = $this->reader->getParser($location, $content, $encoding); - - if ($fullTextEnabled) { - $parser->enableContentGrabber(); - $parser->getItemPostProcessor()->register( - new LWNProcessor( - $basicAuthUser, - $basicAuthPassword, - $this->clientService - ) - ); - } - - $parsedFeed = $parser->execute(); - - $feed = $this->buildFeed( - $parsedFeed, - $url, - $getFavicon, - $lastModified, - $etag, - $location - ); - - $items = []; - foreach ($parsedFeed->getItems() as $item) { - $items[] = $this->buildItem($item, $parsedFeed); - } - - return [$feed, $items]; - } catch (Exception $ex) { - $this->handleError($ex, $url); - } - } - - - private function handleError(Exception $ex, $url) + public function fetch($url, $getFavicon = true, $lastModified = null, $user = null, $password = null) { - $msg = $ex->getMessage(); + if ($user !== null && trim($user) !== '') { + $url = explode('://', $url); + $url = $url[0] . '://' . $user . ':' . $password . '@' . $url[1]; + } + $resource = $this->reader->readSince($url, new DateTime($lastModified)); - if ($ex instanceof MalFormedXmlException) { - $msg = $this->l10n->t('Feed contains invalid XML'); - } elseif ($ex instanceof SubscriptionNotFoundException) { - $msg = $this->l10n->t( - 'Feed not found: Either the website ' . - 'does not provide a feed or blocks access. To rule out ' . - 'blocking, try to download the feed on your server\'s ' . - 'command line using curl: curl ' . $url - ); - } elseif ($ex instanceof UnsupportedFeedFormatException) { - $msg = $this->l10n->t('Detected feed format is not supported'); - } elseif ($ex instanceof InvalidCertificateException) { - $msg = $this->buildCurlSslErrorMessage($ex->getCode()); - } elseif ($ex instanceof InvalidUrlException) { - $msg = $this->l10n->t('Website not found'); - } elseif ($ex instanceof MaxRedirectException) { - $msg = $this->l10n->t('More redirects than allowed, aborting'); - } elseif ($ex instanceof MaxSizeException) { - $msg = $this->l10n->t('Bigger than maximum allowed size'); - } elseif ($ex instanceof TimeoutException) { - $msg = $this->l10n->t('Request timed out'); - } elseif ($ex instanceof UnauthorizedException) { - $msg = $this->l10n->t( - 'Required credentials for feed were ' . - 'either missing or incorrect' - ); - } elseif ($ex instanceof ForbiddenException) { - $msg = $this->l10n->t('Forbidden to access feed'); + if (!$resource->getResponse()->isModified()) { + return [null, null]; } - throw new FetcherException($msg); - } + $location = $resource->getUrl(); + $parsedFeed = $resource->getFeed(); + $feed = $this->buildFeed( + $parsedFeed, + $url, + $getFavicon, + $location + ); - private function buildCurlSslErrorMessage($errorCode) - { - switch ($errorCode) { - case 35: // CURLE_SSL_CONNECT_ERROR - return $this->l10n->t( - 'Certificate error: A problem occurred ' . - 'somewhere in the SSL/TLS handshake. Could be ' . - 'certificates (file formats, paths, permissions), ' . - 'passwords, and others.' - ); - case 51: // CURLE_PEER_FAILED_VERIFICATION - return $this->l10n->t( - 'Certificate error: The remote server\'s SSL ' . - 'certificate or SSH md5 fingerprint was deemed not OK.' - ); - case 58: // CURLE_SSL_CERTPROBLEM - return $this->l10n->t( - 'Certificate error: Problem with the local client ' . - 'certificate.' - ); - case 59: // CURLE_SSL_CIPHER - return $this->l10n->t( - 'Certificate error: Couldn\'t use specified cipher.' - ); - case 60: // CURLE_SSL_CACERT - return $this->l10n->t( - 'Certificate error: Peer certificate cannot be ' . - 'authenticated with known CA certificates.' - ); - case 64: // CURLE_USE_SSL_FAILED - return $this->l10n->t( - 'Certificate error: Requested FTP SSL level failed.' - ); - case 66: // CURLE_SSL_ENGINE_INITFAILED - return $this->l10n->t( - 'Certificate error: Initiating the SSL engine failed.' - ); - case 77: // CURLE_SSL_CACERT_BADFILE - return $this->l10n->t( - 'Certificate error: Problem with reading the SSL CA ' . - 'cert (path? access rights?)' - ); - case 83: // CURLE_SSL_ISSUER_ERROR - return $this->l10n->t( - 'Certificate error: Issuer check failed' - ); - default: - return $this->l10n->t('Unknown SSL certificate error!'); + $items = []; + foreach ($parsedFeed as $item) { + $items[] = $this->buildItem($item, $parsedFeed); } + + return [$feed, $items]; } + /** + * Decode the string twice + * + * @param string $string String to decode + * + * @return string + */ private function decodeTwice($string) { return html_entity_decode( @@ -257,37 +128,73 @@ class FeedFetcher implements IFeedFetcher ); } - - protected function determineRtl($parsedItem, $parsedFeed) + /** + * Check if a feed is RTL or not + * + * @param FeedInterface $parsedFeed The feed that was parsed + * + * @return bool + */ + protected function determineRtl($parsedFeed) { - $itemLang = $parsedItem->getLanguage(); - $feedLang = $parsedFeed->getLanguage(); - - if ($itemLang) { - return Parser::isLanguageRTL($itemLang); - } else { - return Parser::isLanguageRTL($feedLang); + $language = $parsedFeed->getLanguage(); + + $language = strtolower($language); + $rtl_languages = array( + 'ar', // Arabic (ar-**) + 'fa', // Farsi (fa-**) + 'ur', // Urdu (ur-**) + 'ps', // Pashtu (ps-**) + 'syr', // Syriac (syr-**) + 'dv', // Divehi (dv-**) + 'he', // Hebrew (he-**) + 'yi', // Yiddish (yi-**) + ); + foreach ($rtl_languages as $prefix) { + if (strpos($language, $prefix) === 0) { + return true; + } } + return false; } - + /** + * Build an item based on a feed. + * + * @param ItemInterface $parsedItem The item to use + * @param FeedInterface $parsedFeed The feed to use + * + * @return Item + */ protected function buildItem($parsedItem, $parsedFeed) { $item = new Item(); $item->setUnread(true); - $item->setUrl($parsedItem->getUrl()); - $item->setGuid($parsedItem->getId()); + $item->setUrl($parsedItem->getLink()); + $item->setGuid($parsedItem->getPublicId()); $item->setGuidHash($item->getGuid()); - $item->setPubDate($parsedItem->getPublishedDate()->getTimestamp()); - $item->setUpdatedDate($parsedItem->getUpdatedDate()->getTimestamp()); - $item->setRtl($this->determineRtl($parsedItem, $parsedFeed)); + + $pubDT = $parsedItem->getLastModified(); + if ($parsedItem->getValue('pubDate') !== null) { + $pubDT = new DateTime($parsedItem->getValue('pubDate')); + } elseif ($parsedItem->getValue('published') !== null) { + $pubDT = new DateTime($parsedItem->getValue('published')); + } + + $item->setPubDate( + $pubDT->getTimestamp() + ); + $item->setLastModified( + $parsedItem->getLastModified()->getTimestamp() + ); + $item->setRtl($this->determineRtl($parsedFeed)); // unescape content because angularjs helps against XSS $item->setTitle($this->decodeTwice($parsedItem->getTitle())); $item->setAuthor($this->decodeTwice($parsedItem->getAuthor())); // purification is done in the service layer - $body = $parsedItem->getContent(); + $body = $parsedItem->getDescription(); $body = mb_convert_encoding( $body, 'HTML-ENTITIES', @@ -295,14 +202,14 @@ class FeedFetcher implements IFeedFetcher ); $item->setBody($body); - $enclosureUrl = $parsedItem->getEnclosureUrl(); - if ($enclosureUrl) { - $enclosureType = $parsedItem->getEnclosureType(); - if (stripos($enclosureType, 'audio/') !== false - || stripos($enclosureType, 'video/') !== false - ) { - $item->setEnclosureMime($enclosureType); - $item->setEnclosureLink($enclosureUrl); + if ($parsedItem->hasMedia()) { + // TODO: Fix multiple media support + foreach ($parsedItem->getMedias() as $media) { + if (!$item->isSupportedMime($media->getType())) { + continue; + } + $item->setEnclosureMime($media->getType()); + $item->setEnclosureLink($media->getUrl()); } } @@ -311,39 +218,35 @@ class FeedFetcher implements IFeedFetcher return $item; } - - protected function buildFeed( - $parsedFeed, - $url, - $getFavicon, - $modified, - $etag, - $location - ) { - $feed = new Feed(); - - $link = $parsedFeed->getSiteUrl(); - - if (!$link) { - $link = $location; - } + /** + * Build a feed based on provided info + * + * @param FeedInterface $feed Feed to build from + * @param string $url URL to use + * @param bool $getFavicon To get the favicon + * @param string $location String base URL + * + * @return Feed + */ + protected function buildFeed($feed, $url, $getFavicon, $location) + { + $newFeed = new Feed(); // unescape content because angularjs helps against XSS - $title = strip_tags($this->decodeTwice($parsedFeed->getTitle())); - $feed->setTitle($title); - $feed->setUrl($url); // the url used to add the feed - $feed->setLocation($location); // the url where the feed was found - $feed->setLink($link); // attribute in the feed - $feed->setHttpLastModified($modified); - $feed->setHttpEtag($etag); - $feed->setAdded($this->time->getTime()); - - if ($getFavicon) { - $faviconFetcher = $this->faviconFactory->build(); - $favicon = $faviconFetcher->find($feed->getLink()); - $feed->setFaviconLink($favicon); + $title = strip_tags($this->decodeTwice($feed->getTitle())); + $newFeed->setTitle($title); + $newFeed->setUrl($url); // the url used to add the feed + $newFeed->setLocation($location); // the url where the feed was found + $newFeed->setLink($feed->getLink()); // attribute in the feed + $newFeed->setLastModified($feed->getLastModified()->getTimestamp()); + $newFeed->setAdded($this->time->getTime()); + + if (!$getFavicon) { + return $newFeed; } + $favicon = $this->faviconFactory->get($url); + $newFeed->setFaviconLink($favicon); - return $feed; + return $newFeed; } } diff --git a/lib/Fetcher/Fetcher.php b/lib/Fetcher/Fetcher.php index e78da0265..23f5b57f7 100644 --- a/lib/Fetcher/Fetcher.php +++ b/lib/Fetcher/Fetcher.php @@ -16,6 +16,10 @@ namespace OCA\News\Fetcher; class Fetcher { + /** + * List of fetchers. + * @var IFeedFetcher[] + */ private $fetchers; public function __construct() @@ -39,39 +43,28 @@ class Fetcher * * @param string $url remote url of the feed * @param boolean $getFavicon if the favicon should also be fetched, defaults to true - * @param string $lastModified a last modified value from an http header defaults to false. + * @param string $lastModified a last modified value from an http header defaults to false. * If lastModified matches the http header from the feed no results are fetched - * @param string $etag an etag from an http header. - * If lastModified matches the http header from the feed no results are fetched - * @param bool $fullTextEnabled if true tells the fetcher to enhance the articles by fetching more content - * @param string $basicAuthUser if given, basic auth is set for this feed - * @param string $basicAuthPassword if given, basic auth is set for this feed. Ignored if user is empty + * @param string $user if given, basic auth is set for this feed + * @param string $password if given, basic auth is set for this feed. Ignored if user is empty * - * @throws FetcherException if simple pie fails + * @throws FetcherException if FeedIO fails * @return array an array containing the new feed and its items, first * element being the Feed and second element being an array of Items */ - public function fetch( - $url, - $getFavicon = true, - $lastModified = null, - $etag = null, - $fullTextEnabled = false, - $basicAuthUser = null, - $basicAuthPassword = null - ) { + public function fetch($url, $getFavicon = true, $lastModified = null, $user = null, $password = null) + { foreach ($this->fetchers as $fetcher) { - if ($fetcher->canHandle($url)) { - return $fetcher->fetch( - $url, - $getFavicon, - $lastModified, - $etag, - $fullTextEnabled, - $basicAuthUser, - $basicAuthPassword - ); + if (!$fetcher->canHandle($url)) { + continue; } + return $fetcher->fetch( + $url, + $getFavicon, + $lastModified, + $user, + $password + ); } return [null, []]; diff --git a/lib/Fetcher/IFeedFetcher.php b/lib/Fetcher/IFeedFetcher.php index c96bd315b..d5994a076 100644 --- a/lib/Fetcher/IFeedFetcher.php +++ b/lib/Fetcher/IFeedFetcher.php @@ -23,25 +23,14 @@ interface IFeedFetcher * @param boolean $getFavicon if the favicon should also be fetched, defaults to true * @param string $lastModified a last modified value from an http header defaults to false. * If lastModified matches the http header from the feed no results are fetched - * @param string $etag an etag from an http header. - * If lastModified matches the http header from the feed no results are fetched - * @param bool $fullTextEnabled if true tells the fetcher to enhance the articles by fetching more content - * @param string $basicAuthUser if given, basic auth is set for this feed - * @param string $basicAuthPassword if given, basic auth is set for this feed. Ignored if user is empty + * @param string $user if given, basic auth is set for this feed + * @param string $password if given, basic auth is set for this feed. Ignored if user is empty * * @throws FetcherException if the fetcher encounters a problem * @return array an array containing the new feed and its items, first * element being the Feed and second element being an array of Items */ - public function fetch( - $url, - $getFavicon = true, - $lastModified = null, - $etag = null, - $fullTextEnabled = false, - $basicAuthUser = null, - $basicAuthPassword = null - ); + public function fetch($url, $getFavicon = true, $lastModified = null, $user = null, $password = null); /** * Can a fetcher handle a feed. diff --git a/lib/Fetcher/YoutubeFetcher.php b/lib/Fetcher/YoutubeFetcher.php index a47b8fdb8..9ccce4463 100644 --- a/lib/Fetcher/YoutubeFetcher.php +++ b/lib/Fetcher/YoutubeFetcher.php @@ -52,35 +52,24 @@ class YoutubeFetcher implements IFeedFetcher * @param boolean $getFavicon if the favicon should also be fetched, defaults to true * @param string $lastModified a last modified value from an http header defaults to false. * If lastModified matches the http header from the feed no results are fetched - * @param string $etag an etag from an http header. - * If lastModified matches the http header from the feed no results are fetched - * @param bool $fullTextEnabled if true tells the fetcher to enhance the articles by fetching more content - * @param string $basicAuthUser if given, basic auth is set for this feed - * @param string $basicAuthPassword if given, basic auth is set for this feed. Ignored if user is empty + * @param string $user if given, basic auth is set for this feed + * @param string $password if given, basic auth is set for this feed. Ignored if user is empty * * @throws FetcherException if it fails * @return array an array containing the new feed and its items, first * element being the Feed and second element being an array of Items */ - public function fetch( - $url, - $getFavicon = true, - $lastModified = null, - $etag = null, - $fullTextEnabled = false, - $basicAuthUser = null, - $basicAuthPassword = null - ) { + public function fetch($url, $getFavicon = true, $lastModified = null, $user = null, $password = null + ) + { $transformedUrl = $this->buildUrl($url); $result = $this->feedFetcher->fetch( $transformedUrl, $getFavicon, $lastModified, - $etag, - $fullTextEnabled, - $basicAuthUser, - $basicAuthPassword + $user, + $password ); // reset feed url so we know the correct added url for the feed diff --git a/lib/PostProcessor/LWNProcessor.php b/lib/PostProcessor/LWNProcessor.php deleted file mode 100644 index 1028df100..000000000 --- a/lib/PostProcessor/LWNProcessor.php +++ /dev/null @@ -1,117 +0,0 @@ - - */ - -namespace OCA\News\PostProcessor; - -use GuzzleHttp\Cookie\CookieJar; -use OCP\Http\Client\IClientService; -use PicoFeed\Parser\Feed; -use PicoFeed\Parser\Item; -use PicoFeed\Processor\ItemProcessorInterface; -use PicoFeed\Scraper\RuleParser; - -class LWNProcessor implements ItemProcessorInterface -{ - private $user; - - private $password; - - private $clientService; - - private $cookieJar; - - /** - * @param $user - * @param $password - */ - public function __construct($user, $password, IClientService $clientService) - { - $this->user = $user; - $this->password = $password; - $this->clientService = $clientService; - $this->cookieJar = new CookieJar(); - } - - private function login() - { - if ($this->cookieJar->count() > 0) { - return true; - } - if (!$this->user || !$this->password) { - return false; - } - - $client = $this->clientService->newClient(); - $response = $client->post( - 'https://lwn.net/login', - [ - 'cookies' => $this->cookieJar, - 'body' => [ - 'Username' => $this->user, - 'Password' => $this->password, - 'target' => '/' - ] - ] - ); - return ($response->getStatusCode() === 200 && $this->cookieJar->count() > 0); - } - - private function getBody($url) - { - $client = $this->clientService->newClient(); - $response = $client->get( - $url, - [ - 'cookies' => $this->cookieJar - ] - ); - $parser = new RuleParser( - $response->getBody(), - [ - 'body' => array( - '//div[@class="ArticleText"]', - ), - 'strip' => array( - '//div[@class="FeatureByline"]' - ) - ] - ); - $articleBody = $parser->execute(); - // make all links absolute - return str_replace('href="/', 'href="https://lwn.net/', $articleBody); - } - - private function canHandle($url) - { - $regex = '%(?:https?://|//)?(?:www.)?lwn.net%'; - - return (bool)preg_match($regex, $url); - } - - /** - * Execute Item Processor - * - * @access public - * @param Feed $feed - * @param Item $item - * @return bool - */ - public function execute(Feed $feed, Item $item) - { - if ($this->canHandle($item->getUrl())) { - $loggedIn = $this->login(); - - $item->setUrl(str_replace('/rss', '', $item->getUrl())); - if ($loggedIn) { - $item->setContent($this->getBody($item->getUrl())); - } - } - } -} diff --git a/lib/Service/FeedService.php b/lib/Service/FeedService.php index 2ccbb014b..fade77df8 100644 --- a/lib/Service/FeedService.php +++ b/lib/Service/FeedService.php @@ -58,8 +58,8 @@ class FeedService extends Service $this->logger = $logger; $this->l10n = $l10n; $this->timeFactory = $timeFactory; - $this->autoPurgeMinimumInterval = - $config->getAutoPurgeMinimumInterval(); + $this->autoPurgeMinimumInterval = $config->getAutoPurgeMinimumInterval( + ); $this->purifier = $purifier; $this->feedMapper = $feedMapper; $this->loggerParams = $LoggerParameters; @@ -69,6 +69,7 @@ class FeedService extends Service * Finds all feeds of a user * * @param string $userId the name of the user + * * @return Feed[] */ public function findAll($userId) @@ -96,9 +97,10 @@ class FeedService extends Service * folder * @param string $userId for which user the feed should be created * @param string $title if given, this is used for the opml feed title - * @param string $basicAuthUser if given, basic auth is set for this feed - * @param string $basicAuthPassword if given, basic auth is set for this + * @param string $user if given, basic auth is set for this feed + * @param string $password if given, basic auth is set for this * feed. Ignored if user is null or an empty string + * * @throws ServiceConflictException if the feed exists already * @throws ServiceNotFoundException if the url points to an invalid feed * @return Feed the newly created feed @@ -108,23 +110,21 @@ class FeedService extends Service $folderId, $userId, $title = null, - $basicAuthUser = null, - $basicAuthPassword = null + $user = null, + $password = null ) { // first try if the feed exists already try { /** - * @var Feed $feed + * @var Feed $feed * @var Item[] $items */ list($feed, $items) = $this->feedFetcher->fetch( $feedUrl, true, null, - null, - false, - $basicAuthUser, - $basicAuthPassword + $user, + $password ); // try again if feed exists depending on the reported link @@ -140,8 +140,8 @@ class FeedService extends Service // insert feed $itemCount = count($items); - $feed->setBasicAuthUser($basicAuthUser); - $feed->setBasicAuthPassword($basicAuthPassword); + $feed->setBasicAuthUser($user); + $feed->setBasicAuthPassword($password); $feed->setFolderId($folderId); $feed->setUserId($userId); $feed->setArticlesPerUpdate($itemCount); @@ -213,6 +213,7 @@ class FeedService extends Service * @param int $feedId the id of the feed that should be updated * @param string $userId the id of the user * @param bool $forceUpdate update even if the article exists already + * * @throws ServiceNotFoundException if the feed does not exist * @return Feed the updated feed entity */ @@ -237,8 +238,6 @@ class FeedService extends Service $location, false, $existingFeed->getHttpLastModified(), - $existingFeed->getHttpEtag(), - $existingFeed->getFullTextEnabled(), $existingFeed->getBasicAuthUser(), $existingFeed->getBasicAuthPassword() ); @@ -332,6 +331,7 @@ class FeedService extends Service * * @param array $json the array with json * @param string $userId the username + * * @return Feed if one had to be created for nonexistent feeds */ public function importArticles($json, $userId) @@ -406,6 +406,7 @@ class FeedService extends Service * * @param int $feedId the id of the feed that should be deleted * @param string $userId the name of the user for security reasons + * * @throws ServiceNotFoundException when feed does not exist */ public function markDeleted($feedId, $userId) @@ -421,6 +422,7 @@ class FeedService extends Service * * @param int $feedId the id of the feed that should be restored * @param string $userId the name of the user for security reasons + * * @throws ServiceNotFoundException when feed does not exist */ public function unmarkDeleted($feedId, $userId) @@ -471,13 +473,14 @@ class FeedService extends Service * @param $feedId * @param $userId * @param $diff an array containing the fields to update, e.g.: - * [ - * 'ordering' => 1, - * 'fullTextEnabled' => true, - * 'pinned' => true, - * 'updateMode' => 0, - * 'title' => 'title' - * ] + * [ + * 'ordering' => 1, + * 'fullTextEnabled' => true, + * 'pinned' => true, + * 'updateMode' => 0, + * 'title' => 'title' + * ] + * * @throws ServiceNotFoundException if feed does not exist */ public function patch($feedId, $userId, $diff = []) diff --git a/lib/Utility/PicoFeedClientFactory.php b/lib/Utility/PicoFeedClientFactory.php deleted file mode 100644 index 046224919..000000000 --- a/lib/Utility/PicoFeedClientFactory.php +++ /dev/null @@ -1,42 +0,0 @@ - - * @author Bernhard Posselt - * @copyright 2012 Alessandro Cosentino - * @copyright 2012-2014 Bernhard Posselt - */ - - -namespace OCA\News\Utility; - -use \PicoFeed\Config\Config; -use \PicoFeed\Client\Client; - -class PicoFeedClientFactory -{ - - private $config; - - public function __construct(Config $config) - { - $this->config = $config; - } - - - /** - * Returns a new instance of an PicoFeed Http client - * - * @return \PicoFeed\Client instance - */ - public function build() - { - $client = Client::getInstance(); - $client->setConfig($this->config); - return $client; - } -} diff --git a/lib/Utility/PicoFeedFaviconFactory.php b/lib/Utility/PicoFeedFaviconFactory.php deleted file mode 100644 index 09a1b76c8..000000000 --- a/lib/Utility/PicoFeedFaviconFactory.php +++ /dev/null @@ -1,40 +0,0 @@ - - * @author Bernhard Posselt - * @copyright 2012 Alessandro Cosentino - * @copyright 2012-2014 Bernhard Posselt - */ - - -namespace OCA\News\Utility; - -use \PicoFeed\Config\Config; -use \PicoFeed\Reader\Favicon; - -class PicoFeedFaviconFactory -{ - - private $config; - - public function __construct(Config $config) - { - $this->config = $config; - } - - - /** - * Returns a new instance of an PicoFeed Http client - * - * @return \PicoFeed\Favicon instance - */ - public function build() - { - return new Favicon($this->config); - } -} diff --git a/lib/Utility/PsrLogger.php b/lib/Utility/PsrLogger.php new file mode 100644 index 000000000..5d9a2529b --- /dev/null +++ b/lib/Utility/PsrLogger.php @@ -0,0 +1,97 @@ + + * @copyright 2018 Sean Molenaar + */ + +namespace OCA\News\Utility; + +use \OCP\ILogger; + +/** + * This is a wrapper to make OC\Log conform to Psr\Log\LoggerInterface + * + * @package OCA\News\Utility + */ +class PsrLogger implements \Psr\Log\LoggerInterface +{ + private $logger; + private $appName; + + /** + * PsrLogger constructor. + * + * @param ILogger $logger The logger + * @param string $appName Name of the app + */ + public function __construct(ILogger $logger, $appName) + { + $this->logger = $logger; + $this->appName = $appName; + } + + public function logException($exception, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->logException($exception, $context); + } + + public function emergency($message, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->emergency($message, $context); + } + + public function alert($message, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->alert($message, $context); + } + + public function critical($message, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->critical($message, $context); + } + + public function error($message, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->error($message, $context); + } + + public function warning($message, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->warning($message, $context); + } + + public function notice($message, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->notice($message, $context); + } + + public function info($message, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->info($message, $context); + } + + public function debug($message, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->debug($message, $context); + } + + public function log($level, $message, array $context = []) + { + $context['app'] = $this->appName; + $this->logger->log($level, $message, $context); + } +} -- cgit v1.2.3