From feb20ad8b27e973c95d9ebf3b41aa3a125318ef5 Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Sat, 4 Oct 2014 11:10:33 +0200 Subject: create files for next gen parser --- articleenhancer/globalarticleenhancer.php | 9 ++++--- articleenhancer/xpatharticleenhancer.php | 8 +++--- feed/client/curl.php | 25 +++++++++++++++++ feed/client/httpclient.php | 45 +++++++++++++++++++++++++++++++ feed/parser/atomparser.php | 28 +++++++++++++++++++ feed/parser/parser.php | 33 +++++++++++++++++++++++ feed/parser/rssparser.php | 28 +++++++++++++++++++ utility/faviconfetcher.php | 6 +++-- 8 files changed, 174 insertions(+), 8 deletions(-) create mode 100644 feed/client/curl.php create mode 100644 feed/client/httpclient.php create mode 100644 feed/parser/atomparser.php create mode 100644 feed/parser/parser.php create mode 100644 feed/parser/rssparser.php diff --git a/articleenhancer/globalarticleenhancer.php b/articleenhancer/globalarticleenhancer.php index f4466f75f..7d8385db2 100644 --- a/articleenhancer/globalarticleenhancer.php +++ b/articleenhancer/globalarticleenhancer.php @@ -13,6 +13,8 @@ namespace OCA\News\ArticleEnhancer; +use \ZendXml\Security; + use \OCA\News\Db\Item; @@ -23,7 +25,7 @@ class GlobalArticleEnhancer implements ArticleEnhancer { * This method is run after all enhancers and for every item */ public function enhance(Item $item) { - + $dom = new \DOMDocument(); // wrap it inside a div if there is none to prevent invalid wrapping @@ -31,9 +33,10 @@ class GlobalArticleEnhancer implements ArticleEnhancer { $body = '
' . $item->getBody() . '
'; $loadEntities = libxml_disable_entity_loader(true); - @$dom->loadHTML($body, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + @$dom->loadHTML($body, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD + | LIBXML_NONET); libxml_disable_entity_loader($loadEntities); - + $xpath = new \DOMXpath($dom); // remove youtube autoplay diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php index 0550ada46..87c80762b 100644 --- a/articleenhancer/xpatharticleenhancer.php +++ b/articleenhancer/xpatharticleenhancer.php @@ -13,6 +13,8 @@ namespace OCA\News\ArticleEnhancer; +use \ZendXml\Security; + use \OCA\News\Db\Item; use \OCA\News\Utility\SimplePieAPIFactory; use \OCA\News\Utility\Config; @@ -67,9 +69,9 @@ class XPathArticleEnhancer implements ArticleEnhancer { } $dom = new \DOMDocument(); - + $loadEntities = libxml_disable_entity_loader(true); - @$dom->loadHTML($body); + @$dom->loadHTML($body, LIBXML_NONET); libxml_disable_entity_loader($loadEntities); $xpath = new \DOMXpath($dom); @@ -135,7 +137,7 @@ class XPathArticleEnhancer implements ArticleEnhancer { // return, if xml is empty or loading the HTML fails $loadEntities = libxml_disable_entity_loader(true); - if( trim($xmlString) == "" || !@$dom->loadHTML($xmlString) ) { + if( trim($xmlString) == "" || !@$dom->loadHTML($xmlString, LIBXML_NONET) ) { libxml_disable_entity_loader($loadEntities); return $xmlString; } diff --git a/feed/client/curl.php b/feed/client/curl.php new file mode 100644 index 000000000..3cacfe2aa --- /dev/null +++ b/feed/client/curl.php @@ -0,0 +1,25 @@ + + * @author Bernhard Posselt + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Client; + +class CURL extends HttpClient { + + public function __construct ($version, array $config=null) { + parent::__construct($version, $config); + } + + +} + + diff --git a/feed/client/httpclient.php b/feed/client/httpclient.php new file mode 100644 index 000000000..2fad3ea68 --- /dev/null +++ b/feed/client/httpclient.php @@ -0,0 +1,45 @@ + + * @author Bernhard Posselt + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Client; + +class HttpClient { + + protected $defaults = [ + 'user_agent' => 'ownCloud News/VERSION ' . + '(+https://owncloud.org/; 1 subscriber; feed-url=URL)', + 'connection_timeout' => 10, // seconds + 'timeout' => 10, // seconds + 'verify_ssl' => true, + 'http_version' => '1.1', + 'proxy_host' => '', + 'proxy_port' => 80, + 'proxy_user' => '', + 'proxy_password' => '' + ]; + + public function __construct ($version, array $config=null) { + foreach ($config as $key => $value) { + $this->defaults[$key] = $value; + } + + $this->defaults['user_agent'] = str_replace('VERSION', $version, + $this->defaults['user_agent']); + } + + + public abstract function get($url); + +} + + diff --git a/feed/parser/atomparser.php b/feed/parser/atomparser.php new file mode 100644 index 000000000..4d9b07f5b --- /dev/null +++ b/feed/parser/atomparser.php @@ -0,0 +1,28 @@ + + * @author Bernhard Posselt + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Parser; + + +class AtomParser extends Parser { + + public function __construct() { + + } + + + public function parse($xml) { + + } + +} diff --git a/feed/parser/parser.php b/feed/parser/parser.php new file mode 100644 index 000000000..3a3fdb86b --- /dev/null +++ b/feed/parser/parser.php @@ -0,0 +1,33 @@ + + * @author Bernhard Posselt + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Parser; + +use \ZendXML\Security; + + +class Parser { + + public function __construct() { + + } + + + protected function parseXML($xml) { + + } + + + public abstract function parse($xml); + +} diff --git a/feed/parser/rssparser.php b/feed/parser/rssparser.php new file mode 100644 index 000000000..4245946ed --- /dev/null +++ b/feed/parser/rssparser.php @@ -0,0 +1,28 @@ + + * @author Bernhard Posselt + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Parser; + + +class RSSParser extends Parser { + + public function __construct() { + + } + + + public function parse($xml) { + + } + +} diff --git a/utility/faviconfetcher.php b/utility/faviconfetcher.php index 16c188313..a4ae54a6e 100644 --- a/utility/faviconfetcher.php +++ b/utility/faviconfetcher.php @@ -13,6 +13,8 @@ namespace OCA\News\Utility; +use \ZendXml\Security; + class FaviconFetcher { @@ -79,7 +81,7 @@ class FaviconFetcher { $document = new \DOMDocument(); /** @noinspection PhpUndefinedFieldInspection */ $loadEntities = libxml_disable_entity_loader(true); - @$document->loadHTML($file->body); + @$document->loadHTML($file->body, LIBXML_NONET); libxml_disable_entity_loader($loadEntities); if($document) { @@ -98,7 +100,7 @@ class FaviconFetcher { return null; } - + private function getFile($url) { if(trim($this->config->getProxyHost()) === '') { return $this->apiFactory->getFile($url, 10, 5, null, null, false, -- cgit v1.2.3