diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2014-10-04 11:10:33 +0200 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2014-10-04 11:15:04 +0200 |
commit | feb20ad8b27e973c95d9ebf3b41aa3a125318ef5 (patch) | |
tree | ae2b1e067018645d9d629bf46a2d5ca8a2395ca9 | |
parent | 2c083d20eb3979b23d6573292f9d11b52fcaa28b (diff) |
create files for next gen parser
-rw-r--r-- | articleenhancer/globalarticleenhancer.php | 9 | ||||
-rw-r--r-- | articleenhancer/xpatharticleenhancer.php | 8 | ||||
-rw-r--r-- | feed/client/curl.php | 25 | ||||
-rw-r--r-- | feed/client/httpclient.php | 45 | ||||
-rw-r--r-- | feed/parser/atomparser.php | 28 | ||||
-rw-r--r-- | feed/parser/parser.php | 33 | ||||
-rw-r--r-- | feed/parser/rssparser.php | 28 | ||||
-rw-r--r-- | utility/faviconfetcher.php | 6 |
8 files changed, 174 insertions, 8 deletions
diff --git a/articleenhancer/globalarticleenhancer.php b/articleenhancer/globalarticleenhancer.php index f4466f75f..7d8385db2 100644 --- a/articleenhancer/globalarticleenhancer.php +++ b/articleenhancer/globalarticleenhancer.php @@ -13,6 +13,8 @@ namespace OCA\News\ArticleEnhancer; +use \ZendXml\Security; + use \OCA\News\Db\Item; @@ -23,7 +25,7 @@ class GlobalArticleEnhancer implements ArticleEnhancer { * This method is run after all enhancers and for every item */ public function enhance(Item $item) { - + $dom = new \DOMDocument(); // wrap it inside a div if there is none to prevent invalid wrapping @@ -31,9 +33,10 @@ class GlobalArticleEnhancer implements ArticleEnhancer { $body = '<div>' . $item->getBody() . '</div>'; $loadEntities = libxml_disable_entity_loader(true); - @$dom->loadHTML($body, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + @$dom->loadHTML($body, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD + | LIBXML_NONET); libxml_disable_entity_loader($loadEntities); - + $xpath = new \DOMXpath($dom); // remove youtube autoplay diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php index 0550ada46..87c80762b 100644 --- a/articleenhancer/xpatharticleenhancer.php +++ b/articleenhancer/xpatharticleenhancer.php @@ -13,6 +13,8 @@ namespace OCA\News\ArticleEnhancer; +use \ZendXml\Security; + use \OCA\News\Db\Item; use \OCA\News\Utility\SimplePieAPIFactory; use \OCA\News\Utility\Config; @@ -67,9 +69,9 @@ class XPathArticleEnhancer implements ArticleEnhancer { } $dom = new \DOMDocument(); - + $loadEntities = libxml_disable_entity_loader(true); - @$dom->loadHTML($body); + @$dom->loadHTML($body, LIBXML_NONET); libxml_disable_entity_loader($loadEntities); $xpath = new \DOMXpath($dom); @@ -135,7 +137,7 @@ class XPathArticleEnhancer implements ArticleEnhancer { // return, if xml is empty or loading the HTML fails $loadEntities = libxml_disable_entity_loader(true); - if( trim($xmlString) == "" || !@$dom->loadHTML($xmlString) ) { + if( trim($xmlString) == "" || !@$dom->loadHTML($xmlString, LIBXML_NONET) ) { libxml_disable_entity_loader($loadEntities); return $xmlString; } diff --git a/feed/client/curl.php b/feed/client/curl.php new file mode 100644 index 000000000..3cacfe2aa --- /dev/null +++ b/feed/client/curl.php @@ -0,0 +1,25 @@ +<?php +/** + * ownCloud - News + * + * This file is licensed under the Affero General Public License version 3 or + * later. See the COPYING file. + * + * @author Alessandro Cosentino <cosenal@gmail.com> + * @author Bernhard Posselt <dev@bernhard-posselt.com> + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Client; + +class CURL extends HttpClient { + + public function __construct ($version, array $config=null) { + parent::__construct($version, $config); + } + + +} + + diff --git a/feed/client/httpclient.php b/feed/client/httpclient.php new file mode 100644 index 000000000..2fad3ea68 --- /dev/null +++ b/feed/client/httpclient.php @@ -0,0 +1,45 @@ +<?php +/** + * ownCloud - News + * + * This file is licensed under the Affero General Public License version 3 or + * later. See the COPYING file. + * + * @author Alessandro Cosentino <cosenal@gmail.com> + * @author Bernhard Posselt <dev@bernhard-posselt.com> + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Client; + +class HttpClient { + + protected $defaults = [ + 'user_agent' => 'ownCloud News/VERSION ' . + '(+https://owncloud.org/; 1 subscriber; feed-url=URL)', + 'connection_timeout' => 10, // seconds + 'timeout' => 10, // seconds + 'verify_ssl' => true, + 'http_version' => '1.1', + 'proxy_host' => '', + 'proxy_port' => 80, + 'proxy_user' => '', + 'proxy_password' => '' + ]; + + public function __construct ($version, array $config=null) { + foreach ($config as $key => $value) { + $this->defaults[$key] = $value; + } + + $this->defaults['user_agent'] = str_replace('VERSION', $version, + $this->defaults['user_agent']); + } + + + public abstract function get($url); + +} + + diff --git a/feed/parser/atomparser.php b/feed/parser/atomparser.php new file mode 100644 index 000000000..4d9b07f5b --- /dev/null +++ b/feed/parser/atomparser.php @@ -0,0 +1,28 @@ +<?php +/** + * ownCloud - News + * + * This file is licensed under the Affero General Public License version 3 or + * later. See the COPYING file. + * + * @author Alessandro Cosentino <cosenal@gmail.com> + * @author Bernhard Posselt <dev@bernhard-posselt.com> + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Parser; + + +class AtomParser extends Parser { + + public function __construct() { + + } + + + public function parse($xml) { + + } + +} diff --git a/feed/parser/parser.php b/feed/parser/parser.php new file mode 100644 index 000000000..3a3fdb86b --- /dev/null +++ b/feed/parser/parser.php @@ -0,0 +1,33 @@ +<?php +/** + * ownCloud - News + * + * This file is licensed under the Affero General Public License version 3 or + * later. See the COPYING file. + * + * @author Alessandro Cosentino <cosenal@gmail.com> + * @author Bernhard Posselt <dev@bernhard-posselt.com> + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Parser; + +use \ZendXML\Security; + + +class Parser { + + public function __construct() { + + } + + + protected function parseXML($xml) { + + } + + + public abstract function parse($xml); + +} diff --git a/feed/parser/rssparser.php b/feed/parser/rssparser.php new file mode 100644 index 000000000..4245946ed --- /dev/null +++ b/feed/parser/rssparser.php @@ -0,0 +1,28 @@ +<?php +/** + * ownCloud - News + * + * This file is licensed under the Affero General Public License version 3 or + * later. See the COPYING file. + * + * @author Alessandro Cosentino <cosenal@gmail.com> + * @author Bernhard Posselt <dev@bernhard-posselt.com> + * @copyright Alessandro Cosentino 2012 + * @copyright Bernhard Posselt 2012, 2014 + */ + +namespace OCA\News\Feed\Parser; + + +class RSSParser extends Parser { + + public function __construct() { + + } + + + public function parse($xml) { + + } + +} diff --git a/utility/faviconfetcher.php b/utility/faviconfetcher.php index 16c188313..a4ae54a6e 100644 --- a/utility/faviconfetcher.php +++ b/utility/faviconfetcher.php @@ -13,6 +13,8 @@ namespace OCA\News\Utility; +use \ZendXml\Security; + class FaviconFetcher { @@ -79,7 +81,7 @@ class FaviconFetcher { $document = new \DOMDocument(); /** @noinspection PhpUndefinedFieldInspection */ $loadEntities = libxml_disable_entity_loader(true); - @$document->loadHTML($file->body); + @$document->loadHTML($file->body, LIBXML_NONET); libxml_disable_entity_loader($loadEntities); if($document) { @@ -98,7 +100,7 @@ class FaviconFetcher { return null; } - + private function getFile($url) { if(trim($this->config->getProxyHost()) === '') { return $this->apiFactory->getFile($url, 10, 5, null, null, false, |