diff options
Diffstat (limited to 'vendor/fguillot/picofeed')
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php | 12 | ||||
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php | 38 | ||||
-rw-r--r-- | vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php | 62 |
3 files changed, 68 insertions, 44 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php index 7d6880c69..4e046603f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php @@ -121,6 +121,8 @@ class Html */ public function execute() { + $this->preFilter(); + $parser = xml_parser_create(); xml_set_object($parser, $this); @@ -136,6 +138,16 @@ class Html } /** + * Called before XML parsing + * + * @access public + */ + public function preFilter() + { + $this->input = $this->tag->removeBlacklistedTags($this->input); + } + + /** * Called after XML parsing * * @access public diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php index 40f7c6c98..647b7352f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php @@ -2,6 +2,9 @@ namespace PicoFeed\Filter; +use DOMXpath; +use PicoFeed\Parser\XmlParser; + /** * Tag Filter class * @@ -11,6 +14,17 @@ namespace PicoFeed\Filter; class Tag { /** + * Tags blacklist (Xpath expressions) + * + * @access private + * @var array + */ + private $tag_blacklist = array( + '//script', + '//style', + ); + + /** * Tags whitelist * * @access private @@ -104,7 +118,7 @@ class Tag */ public function isSelfClosingTag($tag) { - return in_array($tag, array('br', 'img')); + return $tag === 'br' || $tag === 'img'; } /** @@ -135,6 +149,28 @@ class Tag } /** + * Remove script tags + * + * @access public + * @param string $data Input data + * @return string + */ + public function removeBlacklistedTags($data) + { + $dom = XmlParser::getDomDocument($data); + $xpath = new DOMXpath($dom); + + $nodes = $xpath->query(implode(' | ', $this->tag_blacklist)); + + foreach ($nodes as $node) { + $node->parentNode->removeChild($node); + } + + return $dom->saveXML(); + } + + + /** * Remove empty tags * * @access public diff --git a/vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php b/vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php index 5093855d1..8d64cbc1d 100644 --- a/vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php +++ b/vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php @@ -1,9 +1,9 @@ <?php + namespace PicoFeed\Filter; use PHPUnit_Framework_TestCase; - class HtmlFilterTest extends PHPUnit_Framework_TestCase { public function testExecute() @@ -31,6 +31,24 @@ class HtmlFilterTest extends PHPUnit_Framework_TestCase $this->assertEquals($expected, $f->execute()); } + public function testClearScriptAttributes() + { + $data = '<div><script>this is the content</script><script>blubb content</script><p>something</p></div><p>hi</p>'; + + $f = new Html($data, 'http://blabla'); + $expected = '<p>something</p><p>hi</p>'; + $this->assertEquals($expected, $f->execute()); + } + + public function testClearStyleAttributes() + { + $data = '<div><style>this is the content</style><style>blubb content</style><p>something</p></div><p>hi</p>'; + + $f = new Html($data, 'http://blabla'); + $expected = '<p>something</p><p>hi</p>'; + $this->assertEquals($expected, $f->execute()); + } + public function testEmptyTags() { $data = <<<EOD @@ -124,46 +142,4 @@ x-amz-id-2: DDjqfqz2ZJufzqRAcj1mh+9XvSogrPohKHwXlo8IlkzH67G6w4wnjn9HYgbs4uI0 $f = new Html('<table><tr></tr></table>', 'http://blabla'); $this->assertEquals('', $f->execute()); } -/* - public function testFilter() - { - $input = <<<EOD -<div xmlns="http://www.w3.org/1999/xhtml"><article> -<figure> - <img src="/2014/08/06/4694-pluie" alt="Flaque de pluie"/> - <figcaption>La Saussaye, France, 6 août 2014</figcaption> -</figure> - -<div lang="en" class="extrait"> - <blockquote cite="urn:isbn:978-0-8248-3742-6"> - <p>Spring had truly arrived. Countless streams suddenly materialized all over the roads, fields, grasslands, and thickets; flowing as if the melting snow's waters were spilling over. </p> - </blockquote> - <p class="source"><span class="auteur">Takiji Kobayashi</span>, <cite class="titre">Yasuko</cite>.</p> -</div> - -<p>La pluie abonde. La forêt humide resplendit. L'eau monte, l'eau déborde. Il reste pourtant notre humanité. Toute entière, resplendissante.</p> - -</article> -</div> -EOD; - - $expected = <<<EOD -<figure> - <img src="http://www.la-grange.net/2014/08/06/4694-pluie" alt="Flaque de pluie"/> - <figcaption>La Saussaye, France, 6 août 2014</figcaption> -</figure> - - - <blockquote> - <p>Spring had truly arrived. Countless streams suddenly materialized all over the roads, fields, grasslands, and thickets; flowing as if the melting snow's waters were spilling over. </p> - </blockquote> - <p>Takiji Kobayashi, <cite>Yasuko</cite>.</p> - - -<p>La pluie abonde. La forêt humide resplendit. L'eau monte, l'eau déborde. Il reste pourtant notre humanité. Toute entière, resplendissante.</p> -EOD; - - $f = new Html($input, 'http://www.la-grange.net/'); - $this->assertEquals($expected, $f->execute()); - }*/ } |