From cf0f54ef8474ee0ab84e8953459734f5cec601a9 Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Sun, 23 Nov 2014 17:14:29 +0100 Subject: udpate picofeed to fix xxe --- .../picofeed/lib/PicoFeed/Parser/XmlParser.php | 65 +++++++++++++++------- .../lib/PicoFeed/Rules/www.numerama.com.php | 5 +- 2 files changed, 48 insertions(+), 22 deletions(-) (limited to 'vendor/fguillot') diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php index 580b66574..2b007e199 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php @@ -2,6 +2,7 @@ namespace PicoFeed\Parser; +use Closure; use DomDocument; use DOMXPath; use SimpleXmlElement; @@ -43,14 +44,16 @@ class XmlParser } /** - * Get a DomDocument instance or return false + * Scan the input for XXE attacks * - * @static - * @access public - * @param string $input XML content - * @return mixed + * @param string $input Unsafe input + * @param Closure $callback Callback called to build the dom. + * Must be an instance of DomDocument and receives the input as argument + * + * @return bool|DomDocument False if an XXE attack was discovered, + * otherwise the return of the callback */ - public static function getDomDocument($input) + private static function scanInput($input, Closure $callback) { if (substr(php_sapi_name(), 0, 3) === 'fpm') { @@ -67,13 +70,7 @@ class XmlParser libxml_use_internal_errors(true); - $dom = new DomDocument; - $dom->loadXml($input, LIBXML_NONET); - - // The document is empty, there is probably some parsing errors - if ($dom->childNodes->length === 0) { - return false; - } + $dom = $callback($input); // Scan for potential XEE attacks using ENTITY foreach ($dom->childNodes as $child) { @@ -88,27 +85,55 @@ class XmlParser } /** - * Load HTML document by using a DomDocument instance or return false on failure + * Get a DomDocument instance or return false * * @static * @access public * @param string $input XML content * @return mixed */ - public static function getHtmlDocument($input) + public static function getDomDocument($input) { - libxml_use_internal_errors(true); + $dom = self::scanInput($input, function ($in) { + $dom = new DomDocument; + $dom->loadXml($in, LIBXML_NONET); + return $dom; + }); - $dom = new DomDocument; + // The document is empty, there is probably some parsing errors + if ($dom && $dom->childNodes->length === 0) { + return false; + } + + return $dom; + } + /** + * Load HTML document by using a DomDocument instance or return false on failure + * + * @static + * @access public + * @param string $input XML content + * @return mixed + */ + public static function getHtmlDocument($input) + { if (version_compare(PHP_VERSION, '5.4.0', '>=')) { - $dom->loadHTML($input, LIBXML_NONET); + $callback = function ($in) { + $dom = new DomDocument; + $dom->loadHTML($in, LIBXML_NONET); + return $dom; + }; } else { - $dom->loadHTML($input); + $callback = function ($in) { + $dom = new DomDocument; + $dom->loadHTML($in); + return $dom; + }; } - return $dom; + return self::scanInput($input, $callback); } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php index 5149c69ab..b6387da73 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php @@ -2,9 +2,10 @@ return array( 'test_url' => 'http://www.numerama.com/magazine/26857-bientot-des-robots-dans-les-cuisines-de-mcdo.html', 'body' => array( - '//*[@id="general_content"]/table/tbody/tr/td[1]/div/div/div[6]/h2', - '//div[@id="newstext"]', + '//div[@class="col_left"]//div[@class="content"]', ), 'strip' => array( + '//div[@class="news_social"]', + '//div[@id="newssuiv"]', ) ); \ No newline at end of file -- cgit v1.2.3