summaryrefslogtreecommitdiffstats
path: root/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/fguillot/picofeed/lib/PicoFeed/Filter')
-rw-r--r--3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php27
-rw-r--r--3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php170
-rw-r--r--3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Html.php15
-rw-r--r--3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php2
4 files changed, 191 insertions, 23 deletions
diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php
index 8fe4b7199..23b1103ad 100644
--- a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php
+++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php
@@ -2,14 +2,13 @@
namespace PicoFeed\Filter;
-use \PicoFeed\Url;
-use \PicoFeed\Filter;
+use \PicoFeed\Client\Url;
/**
* Attribute Filter class
*
* @author Frederic Guillot
- * @package filter
+ * @package Filter
*/
class Attribute
{
@@ -215,15 +214,15 @@ class Attribute
* Add attributes to specified tags
*
* @access private
- * @var \PicoFeed\Url
+ * @var \PicoFeed\Client\Url
*/
- private $website = null;
+ private $website;
/**
* Constructor
*
* @access public
- * @param \PicoFeed\Url $website Website url instance
+ * @param \PicoFeed\Client\Url $website Website url instance
*/
public function __construct(Url $website)
{
@@ -489,7 +488,7 @@ class Attribute
*
* @access public
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
- * @return \PicoFeed\Filter
+ * @return \PicoFeed\Filter\Filter
*/
public function setWhitelistedAttributes(array $values)
{
@@ -502,7 +501,7 @@ class Attribute
*
* @access public
* @param array $values List of scheme: ['http://', 'ftp://']
- * @return \PicoFeed\Filter
+ * @return \PicoFeed\Filter\Filter
*/
public function setSchemeWhitelist(array $values)
{
@@ -515,7 +514,7 @@ class Attribute
*
* @access public
* @param array $values List of values: ['src', 'href']
- * @return \PicoFeed\Filter
+ * @return \PicoFeed\Filter\Filter
*/
public function setMediaAttributes(array $values)
{
@@ -528,7 +527,7 @@ class Attribute
*
* @access public
* @param array $values List of tags: ['http://google.com/', '...']
- * @return \PicoFeed\Filter
+ * @return \PicoFeed\Filter\Filter
*/
public function setMediaBlacklist(array $values)
{
@@ -541,7 +540,7 @@ class Attribute
*
* @access public
* @param array $values List of tags: ['img' => 'src']
- * @return \PicoFeed\Filter
+ * @return \PicoFeed\Filter\Filter
*/
public function setRequiredAttributes(array $values)
{
@@ -554,7 +553,7 @@ class Attribute
*
* @access public
* @param array $values List of tags: ['a' => 'target="_blank"']
- * @return \PicoFeed\Filter
+ * @return \PicoFeed\Filter\Filter
*/
public function setAttributeOverrides(array $values)
{
@@ -567,7 +566,7 @@ class Attribute
*
* @access public
* @param array $values List of tags: ['width', 'height']
- * @return \PicoFeed\Filter
+ * @return \PicoFeed\Filter\Filter
*/
public function setIntegerAttributes(array $values)
{
@@ -580,7 +579,7 @@ class Attribute
*
* @access public
* @param array $values List of tags: ['http://www.youtube.com']
- * @return \PicoFeed\Filter
+ * @return \PicoFeed\Filter\Filter
*/
public function setIframeWhitelist(array $values)
{
diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
new file mode 100644
index 000000000..0490e2f49
--- /dev/null
+++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
@@ -0,0 +1,170 @@
+<?php
+
+namespace PicoFeed\Filter;
+
+/**
+ * Filter class
+ *
+ * @author Frederic Guillot
+ * @package Filter
+ */
+class Filter
+{
+ /**
+ * Get the Html filter instance
+ *
+ * @static
+ * @access public
+ * @param string $html HTML content
+ * @param string $website Site URL (used to build absolute URL)
+ * @return PicoFeed\Filter\Html
+ */
+ public static function html($html, $website)
+ {
+ $filter = new Html($html, $website);
+ return $filter;
+ }
+
+ /**
+ * Escape HTML content
+ *
+ * @static
+ * @access public
+ * @return string
+ */
+ public static function escape($content)
+ {
+ return @htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
+ }
+
+ /**
+ * Remove HTML tags
+ *
+ * @access public
+ * @param string $data Input data
+ * @return string
+ */
+ public function removeHTMLTags($data)
+ {
+ return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $data);
+ }
+
+ /**
+ * Remove the XML tag from a document
+ *
+ * @static
+ * @access public
+ * @param string $data Input data
+ * @return string
+ */
+ public static function stripXmlTag($data)
+ {
+ if (strpos($data, '<?xml') !== false) {
+ $data = ltrim(substr($data, strpos($data, '?>') + 2));
+ }
+
+ do {
+
+ $pos = strpos($data, '<?xml-stylesheet ');
+
+ if ($pos !== false) {
+ $data = ltrim(substr($data, strpos($data, '?>') + 2));
+ }
+
+ } while ($pos !== false && $pos < 200);
+
+ return $data;
+ }
+
+ /**
+ * Strip head tag from the HTML content
+ *
+ * @static
+ * @access public
+ * @param string $data Input data
+ * @return string
+ */
+ public static function stripHeadTags($data)
+ {
+ $start = strpos($data, '<head>');
+ $end = strpos($data, '</head>');
+
+ if ($start !== false && $end !== false) {
+ $before = substr($data, 0, $start);
+ $after = substr($data, $end + 7);
+ $data = $before.$after;
+ }
+
+ return $data;
+ }
+
+ /**
+ * Trim whitespace from the begining, the end and inside a string and don't break utf-8 string
+ *
+ * @static
+ * @access public
+ * @param string $value Raw data
+ * @return string Normalized data
+ */
+ public static function stripWhiteSpace($value)
+ {
+ $value = str_replace("\r", ' ', $value);
+ $value = str_replace("\t", ' ', $value);
+ $value = str_replace("\n", ' ', $value);
+ // $value = preg_replace('/\s+/', ' ', $value); <= break utf-8
+ return trim($value);
+ }
+
+ /**
+ * Dirty quickfixes before XML parsing
+ *
+ * @static
+ * @access public
+ * @param string $data Raw data
+ * @return string Normalized data
+ */
+ public static function normalizeData($data)
+ {
+ $invalid_chars = array(
+ "\x10",
+ "\xc3\x20",
+ "&#x1F;",
+ );
+
+ foreach ($invalid_chars as $needle) {
+ $data = str_replace($needle, '', $data);
+ }
+
+ return $data;
+ }
+
+ /**
+ * Get the first XML tag
+ *
+ * @static
+ * @access public
+ * @param string $data Feed content
+ * @return string
+ */
+ public static function getFirstTag($data)
+ {
+ // Strip HTML comments (max of 5,000 characters long to prevent crashing)
+ $data = preg_replace('/<!--(.{0,5000}?)-->/Uis', '', $data);
+
+ /* Strip Doctype:
+ * Doctype needs to be within the first 100 characters. (Ideally the first!)
+ * If it's not found by then, we need to stop looking to prevent PREG
+ * from reaching max backtrack depth and crashing.
+ */
+ $data = preg_replace('/^.{0,100}<!DOCTYPE([^>]*)>/Uis', '', $data);
+
+ // Strip <?xml version....
+ $data = self::stripXmlTag($data);
+
+ // Find the first tag
+ $open_tag = strpos($data, '<');
+ $close_tag = strpos($data, '>');
+
+ return substr($data, $open_tag, $close_tag);
+ }
+}
diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
index 4a76ca45f..f09a10e3a 100644
--- a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
+++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
@@ -2,15 +2,14 @@
namespace PicoFeed\Filter;
-use \PicoFeed\Url;
-use \PicoFeed\Filter;
-use \PicoFeed\XmlParser;
+use \PicoFeed\Client\Url;
+use \PicoFeed\Parser\XmlParser;
/**
* HTML Filter class
*
* @author Frederic Guillot
- * @package filter
+ * @package Filter
*/
class Html
{
@@ -18,9 +17,9 @@ class Html
* Config object
*
* @access private
- * @var \PicoFeed\Config
+ * @var \PicoFeed\Config\Config
*/
- private $config = null;
+ private $config;
/**
* Unfiltered XML data
@@ -89,8 +88,8 @@ class Html
* Set config object
*
* @access public
- * @param \PicoFeed\Config $config Config instance
- * @return \PicoFeed\Html
+ * @param \PicoFeed\Config\Config $config Config instance
+ * @return \PicoFeed\Filter\Html
*/
public function setConfig($config)
{
diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php
index 83bd1b9e4..dbeffe7a4 100644
--- a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php
+++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php
@@ -6,7 +6,7 @@ namespace PicoFeed\Filter;
* Tag Filter class
*
* @author Frederic Guillot
- * @package filter
+ * @package Filter
*/
class Tag
{