diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-11-12 17:20:17 +0100 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-11-12 17:20:17 +0100 |
commit | ec700890c0484250edb4796b197d490df99daa42 (patch) | |
tree | 55c3fd03d8b60970097c907264e8b51cd28c937e /vendor/fguillot/picofeed/lib | |
parent | b575fec0ecedd20e7178b4e01da48ca2e997243c (diff) |
update picofeed
Diffstat (limited to 'vendor/fguillot/picofeed/lib')
3 files changed, 47 insertions, 83 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php new file mode 100644 index 000000000..f3f914d52 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php @@ -0,0 +1,12 @@ +<?php + +namespace PicoFeed\Parser; + +/** + * XmlEntityException Exception. + * + * @author Bernhard Posselt + */ +class XmlEntityException extends MalformedXmlException +{ +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php index ea04a476f..7c8ebc602 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php @@ -2,9 +2,11 @@ namespace PicoFeed\Parser; -use Closure; use DomDocument; use SimpleXmlElement; +use Exception; + +use ZendXml\Security; /** * XML parser class. @@ -26,64 +28,7 @@ class XmlParser */ public static function getSimpleXml($input) { - $dom = self::getDomDocument($input); - - if ($dom !== false) { - $simplexml = simplexml_import_dom($dom); - - if (!$simplexml instanceof SimpleXmlElement) { - return false; - } - - return $simplexml; - } - - return false; - } - - /** - * Scan the input for XXE attacks. - * - * @param string $input Unsafe input - * @param Closure $callback Callback called to build the dom. - * Must be an instance of DomDocument and receives the input as argument - * - * @return bool|DomDocument False if an XXE attack was discovered, - * otherwise the return of the callback - */ - private static function scanInput($input, Closure $callback) - { - $isRunningFpm = substr(php_sapi_name(), 0, 3) === 'fpm'; - - if ($isRunningFpm) { - - // If running with PHP-FPM and an entity is detected we refuse to parse the feed - // @see https://bugs.php.net/bug.php?id=64938 - if (strpos($input, '<!ENTITY') !== false) { - return false; - } - } else { - $entityLoaderDisabled = libxml_disable_entity_loader(true); - } - - libxml_use_internal_errors(true); - - $dom = $callback($input); - - // Scan for potential XEE attacks using ENTITY - foreach ($dom->childNodes as $child) { - if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { - if ($child->entities->length > 0) { - return false; - } - } - } - - if ($isRunningFpm === false) { - libxml_disable_entity_loader($entityLoaderDisabled); - } - - return $dom; + return self::scan($input); } /** @@ -101,12 +46,7 @@ class XmlParser return false; } - $dom = self::scanInput($input, function ($in) { - $dom = new DomDocument(); - $dom->loadXml($in, LIBXML_NONET); - - return $dom; - }); + $dom = self::scan($input, new DOMDocument()); // The document is empty, there is probably some parsing errors if ($dom && $dom->childNodes->length === 0) { @@ -117,6 +57,22 @@ class XmlParser } /** + * Small wrapper around ZendXml to turn their exceptions into picoFeed + * exceptions + * @param $input the xml to load + * @param $dom pass in a dom document or use null/omit if simpleXml should + * be used + */ + private static function scan($input, $dom=null) + { + try { + return Security::scan($input, $dom); + } catch(\ZendXml\Exception\RuntimeException $e) { + throw new XmlEntityException($e->getMessage()); + } + } + + /** * Load HTML document by using a DomDocument instance or return false on failure. * * @static @@ -127,27 +83,21 @@ class XmlParser */ public static function getHtmlDocument($input) { + $dom = new DomDocument(); + if (empty($input)) { - return new DomDocument(); + return $dom; } - if (version_compare(PHP_VERSION, '5.4.0', '>=')) { - $callback = function ($in) { - $dom = new DomDocument(); - $dom->loadHTML($in, LIBXML_NONET); + libxml_use_internal_errors(true); - return $dom; - }; + if (version_compare(PHP_VERSION, '5.4.0', '>=')) { + $dom->loadHTML($input, LIBXML_NONET); } else { - $callback = function ($in) { - $dom = new DomDocument(); - $dom->loadHTML($in); - - return $dom; - }; + $dom->loadHTML($input); } - return self::scanInput($input, $callback); + return $dom; } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php index d17ed9192..60d9bfa51 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php @@ -3,11 +3,13 @@ return array( 'grabber' => array( '%.*%' => array( - 'test_url' => 'http://www.neustadt-ticker.de/36480/aktuell/nachrichten/buergerbuero-neustadt-ab-heute-wieder-geoeffnet', - 'body' => array('//div[contains(@class,"article")]/div[@class="PostContent" and *[not(contains(@class, "navigation"))]]'), + 'test_url' => 'http://www.neustadt-ticker.de/41302/alltag/kultur/demo-auf-der-boehmischen', + 'body' => array( + '//div[@class="entry-content"]', + ), 'strip' => array( - '//*[@id="wp_rp_first"]', - '//*[@class="yarpp-related"]', + '//*[contains(@class, "sharedaddy")]', + '//*[contains(@class, "yarpp-related")]', ), ), ), |