diff options
Diffstat (limited to '3rdparty/fguillot/picofeed/lib/PicoFeed/Filter')
4 files changed, 191 insertions, 23 deletions
diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php index 8fe4b7199..23b1103ad 100644 --- a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php +++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php @@ -2,14 +2,13 @@ namespace PicoFeed\Filter; -use \PicoFeed\Url; -use \PicoFeed\Filter; +use \PicoFeed\Client\Url; /** * Attribute Filter class * * @author Frederic Guillot - * @package filter + * @package Filter */ class Attribute { @@ -215,15 +214,15 @@ class Attribute * Add attributes to specified tags * * @access private - * @var \PicoFeed\Url + * @var \PicoFeed\Client\Url */ - private $website = null; + private $website; /** * Constructor * * @access public - * @param \PicoFeed\Url $website Website url instance + * @param \PicoFeed\Client\Url $website Website url instance */ public function __construct(Url $website) { @@ -489,7 +488,7 @@ class Attribute * * @access public * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] - * @return \PicoFeed\Filter + * @return \PicoFeed\Filter\Filter */ public function setWhitelistedAttributes(array $values) { @@ -502,7 +501,7 @@ class Attribute * * @access public * @param array $values List of scheme: ['http://', 'ftp://'] - * @return \PicoFeed\Filter + * @return \PicoFeed\Filter\Filter */ public function setSchemeWhitelist(array $values) { @@ -515,7 +514,7 @@ class Attribute * * @access public * @param array $values List of values: ['src', 'href'] - * @return \PicoFeed\Filter + * @return \PicoFeed\Filter\Filter */ public function setMediaAttributes(array $values) { @@ -528,7 +527,7 @@ class Attribute * * @access public * @param array $values List of tags: ['http://google.com/', '...'] - * @return \PicoFeed\Filter + * @return \PicoFeed\Filter\Filter */ public function setMediaBlacklist(array $values) { @@ -541,7 +540,7 @@ class Attribute * * @access public * @param array $values List of tags: ['img' => 'src'] - * @return \PicoFeed\Filter + * @return \PicoFeed\Filter\Filter */ public function setRequiredAttributes(array $values) { @@ -554,7 +553,7 @@ class Attribute * * @access public * @param array $values List of tags: ['a' => 'target="_blank"'] - * @return \PicoFeed\Filter + * @return \PicoFeed\Filter\Filter */ public function setAttributeOverrides(array $values) { @@ -567,7 +566,7 @@ class Attribute * * @access public * @param array $values List of tags: ['width', 'height'] - * @return \PicoFeed\Filter + * @return \PicoFeed\Filter\Filter */ public function setIntegerAttributes(array $values) { @@ -580,7 +579,7 @@ class Attribute * * @access public * @param array $values List of tags: ['http://www.youtube.com'] - * @return \PicoFeed\Filter + * @return \PicoFeed\Filter\Filter */ public function setIframeWhitelist(array $values) { diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php new file mode 100644 index 000000000..0490e2f49 --- /dev/null +++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php @@ -0,0 +1,170 @@ +<?php + +namespace PicoFeed\Filter; + +/** + * Filter class + * + * @author Frederic Guillot + * @package Filter + */ +class Filter +{ + /** + * Get the Html filter instance + * + * @static + * @access public + * @param string $html HTML content + * @param string $website Site URL (used to build absolute URL) + * @return PicoFeed\Filter\Html + */ + public static function html($html, $website) + { + $filter = new Html($html, $website); + return $filter; + } + + /** + * Escape HTML content + * + * @static + * @access public + * @return string + */ + public static function escape($content) + { + return @htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false); + } + + /** + * Remove HTML tags + * + * @access public + * @param string $data Input data + * @return string + */ + public function removeHTMLTags($data) + { + return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $data); + } + + /** + * Remove the XML tag from a document + * + * @static + * @access public + * @param string $data Input data + * @return string + */ + public static function stripXmlTag($data) + { + if (strpos($data, '<?xml') !== false) { + $data = ltrim(substr($data, strpos($data, '?>') + 2)); + } + + do { + + $pos = strpos($data, '<?xml-stylesheet '); + + if ($pos !== false) { + $data = ltrim(substr($data, strpos($data, '?>') + 2)); + } + + } while ($pos !== false && $pos < 200); + + return $data; + } + + /** + * Strip head tag from the HTML content + * + * @static + * @access public + * @param string $data Input data + * @return string + */ + public static function stripHeadTags($data) + { + $start = strpos($data, '<head>'); + $end = strpos($data, '</head>'); + + if ($start !== false && $end !== false) { + $before = substr($data, 0, $start); + $after = substr($data, $end + 7); + $data = $before.$after; + } + + return $data; + } + + /** + * Trim whitespace from the begining, the end and inside a string and don't break utf-8 string + * + * @static + * @access public + * @param string $value Raw data + * @return string Normalized data + */ + public static function stripWhiteSpace($value) + { + $value = str_replace("\r", ' ', $value); + $value = str_replace("\t", ' ', $value); + $value = str_replace("\n", ' ', $value); + // $value = preg_replace('/\s+/', ' ', $value); <= break utf-8 + return trim($value); + } + + /** + * Dirty quickfixes before XML parsing + * + * @static + * @access public + * @param string $data Raw data + * @return string Normalized data + */ + public static function normalizeData($data) + { + $invalid_chars = array( + "\x10", + "\xc3\x20", + "", + ); + + foreach ($invalid_chars as $needle) { + $data = str_replace($needle, '', $data); + } + + return $data; + } + + /** + * Get the first XML tag + * + * @static + * @access public + * @param string $data Feed content + * @return string + */ + public static function getFirstTag($data) + { + // Strip HTML comments (max of 5,000 characters long to prevent crashing) + $data = preg_replace('/<!--(.{0,5000}?)-->/Uis', '', $data); + + /* Strip Doctype: + * Doctype needs to be within the first 100 characters. (Ideally the first!) + * If it's not found by then, we need to stop looking to prevent PREG + * from reaching max backtrack depth and crashing. + */ + $data = preg_replace('/^.{0,100}<!DOCTYPE([^>]*)>/Uis', '', $data); + + // Strip <?xml version.... + $data = self::stripXmlTag($data); + + // Find the first tag + $open_tag = strpos($data, '<'); + $close_tag = strpos($data, '>'); + + return substr($data, $open_tag, $close_tag); + } +} diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Html.php index 4a76ca45f..f09a10e3a 100644 --- a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Html.php +++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Html.php @@ -2,15 +2,14 @@ namespace PicoFeed\Filter; -use \PicoFeed\Url; -use \PicoFeed\Filter; -use \PicoFeed\XmlParser; +use \PicoFeed\Client\Url; +use \PicoFeed\Parser\XmlParser; /** * HTML Filter class * * @author Frederic Guillot - * @package filter + * @package Filter */ class Html { @@ -18,9 +17,9 @@ class Html * Config object * * @access private - * @var \PicoFeed\Config + * @var \PicoFeed\Config\Config */ - private $config = null; + private $config; /** * Unfiltered XML data @@ -89,8 +88,8 @@ class Html * Set config object * * @access public - * @param \PicoFeed\Config $config Config instance - * @return \PicoFeed\Html + * @param \PicoFeed\Config\Config $config Config instance + * @return \PicoFeed\Filter\Html */ public function setConfig($config) { diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php index 83bd1b9e4..dbeffe7a4 100644 --- a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php +++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php @@ -6,7 +6,7 @@ namespace PicoFeed\Filter; * Tag Filter class * * @author Frederic Guillot - * @package filter + * @package Filter */ class Tag { |