summaryrefslogtreecommitdiffstats
path: root/vendor/fguillot
diff options
context:
space:
mode:
authorBernhard Posselt <dev@bernhard-posselt.com>2015-11-12 17:20:17 +0100
committerBernhard Posselt <dev@bernhard-posselt.com>2015-11-12 17:20:17 +0100
commitec700890c0484250edb4796b197d490df99daa42 (patch)
tree55c3fd03d8b60970097c907264e8b51cd28c937e /vendor/fguillot
parentb575fec0ecedd20e7178b4e01da48ca2e997243c (diff)
update picofeed
Diffstat (limited to 'vendor/fguillot')
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php12
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php108
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php10
3 files changed, 47 insertions, 83 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php
new file mode 100644
index 000000000..f3f914d52
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php
@@ -0,0 +1,12 @@
+<?php
+
+namespace PicoFeed\Parser;
+
+/**
+ * XmlEntityException Exception.
+ *
+ * @author Bernhard Posselt
+ */
+class XmlEntityException extends MalformedXmlException
+{
+}
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php
index ea04a476f..7c8ebc602 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php
@@ -2,9 +2,11 @@
namespace PicoFeed\Parser;
-use Closure;
use DomDocument;
use SimpleXmlElement;
+use Exception;
+
+use ZendXml\Security;
/**
* XML parser class.
@@ -26,64 +28,7 @@ class XmlParser
*/
public static function getSimpleXml($input)
{
- $dom = self::getDomDocument($input);
-
- if ($dom !== false) {
- $simplexml = simplexml_import_dom($dom);
-
- if (!$simplexml instanceof SimpleXmlElement) {
- return false;
- }
-
- return $simplexml;
- }
-
- return false;
- }
-
- /**
- * Scan the input for XXE attacks.
- *
- * @param string $input Unsafe input
- * @param Closure $callback Callback called to build the dom.
- * Must be an instance of DomDocument and receives the input as argument
- *
- * @return bool|DomDocument False if an XXE attack was discovered,
- * otherwise the return of the callback
- */
- private static function scanInput($input, Closure $callback)
- {
- $isRunningFpm = substr(php_sapi_name(), 0, 3) === 'fpm';
-
- if ($isRunningFpm) {
-
- // If running with PHP-FPM and an entity is detected we refuse to parse the feed
- // @see https://bugs.php.net/bug.php?id=64938
- if (strpos($input, '<!ENTITY') !== false) {
- return false;
- }
- } else {
- $entityLoaderDisabled = libxml_disable_entity_loader(true);
- }
-
- libxml_use_internal_errors(true);
-
- $dom = $callback($input);
-
- // Scan for potential XEE attacks using ENTITY
- foreach ($dom->childNodes as $child) {
- if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
- if ($child->entities->length > 0) {
- return false;
- }
- }
- }
-
- if ($isRunningFpm === false) {
- libxml_disable_entity_loader($entityLoaderDisabled);
- }
-
- return $dom;
+ return self::scan($input);
}
/**
@@ -101,12 +46,7 @@ class XmlParser
return false;
}
- $dom = self::scanInput($input, function ($in) {
- $dom = new DomDocument();
- $dom->loadXml($in, LIBXML_NONET);
-
- return $dom;
- });
+ $dom = self::scan($input, new DOMDocument());
// The document is empty, there is probably some parsing errors
if ($dom && $dom->childNodes->length === 0) {
@@ -117,6 +57,22 @@ class XmlParser
}
/**
+ * Small wrapper around ZendXml to turn their exceptions into picoFeed
+ * exceptions
+ * @param $input the xml to load
+ * @param $dom pass in a dom document or use null/omit if simpleXml should
+ * be used
+ */
+ private static function scan($input, $dom=null)
+ {
+ try {
+ return Security::scan($input, $dom);
+ } catch(\ZendXml\Exception\RuntimeException $e) {
+ throw new XmlEntityException($e->getMessage());
+ }
+ }
+
+ /**
* Load HTML document by using a DomDocument instance or return false on failure.
*
* @static
@@ -127,27 +83,21 @@ class XmlParser
*/
public static function getHtmlDocument($input)
{
+ $dom = new DomDocument();
+
if (empty($input)) {
- return new DomDocument();
+ return $dom;
}
- if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
- $callback = function ($in) {
- $dom = new DomDocument();
- $dom->loadHTML($in, LIBXML_NONET);
+ libxml_use_internal_errors(true);
- return $dom;
- };
+ if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
+ $dom->loadHTML($input, LIBXML_NONET);
} else {
- $callback = function ($in) {
- $dom = new DomDocument();
- $dom->loadHTML($in);
-
- return $dom;
- };
+ $dom->loadHTML($input);
}
- return self::scanInput($input, $callback);
+ return $dom;
}
/**
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php
index d17ed9192..60d9bfa51 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php
@@ -3,11 +3,13 @@
return array(
'grabber' => array(
'%.*%' => array(
- 'test_url' => 'http://www.neustadt-ticker.de/36480/aktuell/nachrichten/buergerbuero-neustadt-ab-heute-wieder-geoeffnet',
- 'body' => array('//div[contains(@class,"article")]/div[@class="PostContent" and *[not(contains(@class, "navigation"))]]'),
+ 'test_url' => 'http://www.neustadt-ticker.de/41302/alltag/kultur/demo-auf-der-boehmischen',
+ 'body' => array(
+ '//div[@class="entry-content"]',
+ ),
'strip' => array(
- '//*[@id="wp_rp_first"]',
- '//*[@class="yarpp-related"]',
+ '//*[contains(@class, "sharedaddy")]',
+ '//*[contains(@class, "yarpp-related")]',
),
),
),