diff options
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Filter')
4 files changed, 263 insertions, 296 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php index 684dbf7ad..51f7e9e0b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php @@ -5,41 +5,36 @@ namespace PicoFeed\Filter; use PicoFeed\Client\Url; /** - * Attribute Filter class + * Attribute Filter class. * * @author Frederic Guillot - * @package Filter */ class Attribute { /** - * Image proxy url + * Image proxy url. * - * @access private * @var string */ private $image_proxy_url = ''; /** - * Image proxy callback + * Image proxy callback. * - * @access private * @var \Closure|null */ private $image_proxy_callback = null; /** - * limits the image proxy usage to this protocol + * limits the image proxy usage to this protocol. * - * @access private * @var string */ private $image_proxy_limit_protocol = ''; /** - * Tags and attribute whitelist + * Tags and attribute whitelist. * - * @access private * @var array */ private $attribute_whitelist = array( @@ -80,15 +75,14 @@ class Attribute 'time' => array('datetime'), 'abbr' => array('title'), 'iframe' => array('width', 'height', 'frameborder', 'src', 'allowfullscreen'), - 'q' => array('cite') + 'q' => array('cite'), ); /** - * Scheme whitelist + * Scheme whitelist. * * For a complete list go to http://en.wikipedia.org/wiki/URI_scheme * - * @access private * @var array */ private $scheme_whitelist = array( @@ -124,9 +118,8 @@ class Attribute ); /** - * Iframe source whitelist, everything else is ignored + * Iframe source whitelist, everything else is ignored. * - * @access private * @var array */ private $iframe_whitelist = array( @@ -139,9 +132,8 @@ class Attribute ); /** - * Blacklisted resources + * Blacklisted resources. * - * @access private * @var array */ private $media_blacklist = array( @@ -172,9 +164,8 @@ class Attribute ); /** - * Attributes used for external resources + * Attributes used for external resources. * - * @access private * @var array */ private $media_attributes = array( @@ -184,9 +175,8 @@ class Attribute ); /** - * Attributes that must be integer + * Attributes that must be integer. * - * @access private * @var array */ private $integer_attributes = array( @@ -196,9 +186,8 @@ class Attribute ); /** - * Mandatory attributes for specified tags + * Mandatory attributes for specified tags. * - * @access private * @var array */ private $required_attributes = array( @@ -210,9 +199,8 @@ class Attribute ); /** - * Add attributes to specified tags + * Add attributes to specified tags. * - * @access private * @var array */ private $add_attributes = array( @@ -221,9 +209,8 @@ class Attribute ); /** - * List of filters to apply + * List of filters to apply. * - * @access private * @var array */ private $filters = array( @@ -235,22 +222,20 @@ class Attribute 'filterProtocolUrlAttribute', 'rewriteImageProxyUrl', 'secureIframeSrc', - 'removeYouTubeAutoplay' + 'removeYouTubeAutoplay', ); /** - * Add attributes to specified tags + * Add attributes to specified tags. * - * @access private * @var \PicoFeed\Client\Url */ private $website; /** - * Constructor + * Constructor. * - * @access public - * @param \PicoFeed\Client\Url $website Website url instance + * @param \PicoFeed\Client\Url $website Website url instance */ public function __construct(Url $website) { @@ -258,18 +243,18 @@ class Attribute } /** - * Apply filters to the attributes list + * Apply filters to the attributes list. * - * @access public - * @param string $tag Tag name - * @param array $attributes Attributes dictionary - * @return array Filtered attributes + * @param string $tag Tag name + * @param array $attributes Attributes dictionary + * + * @return array Filtered attributes */ public function filter($tag, array $attributes) { foreach ($attributes as $attribute => &$value) { foreach ($this->filters as $filter) { - if (! $this->$filter($tag, $attribute, $value)) { + if (!$this->$filter($tag, $attribute, $value)) { unset($attributes[$attribute]); break; } @@ -280,13 +265,13 @@ class Attribute } /** - * Return true if the value is allowed (remove not allowed attributes) + * Return true if the value is allowed (remove not allowed attributes). + * + * @param string $tag Tag name + * @param string $attribute Attribute name + * @param string $value Attribute value * - * @access public - * @param string $tag Tag name - * @param string $attribute Attribute name - * @param string $value Attribute value - * @return boolean + * @return bool */ public function filterAllowedAttribute($tag, $attribute, $value) { @@ -294,13 +279,13 @@ class Attribute } /** - * Return true if the value is not integer (remove attributes that should have an integer value) + * Return true if the value is not integer (remove attributes that should have an integer value). + * + * @param string $tag Tag name + * @param string $attribute Attribute name + * @param string $value Attribute value * - * @access public - * @param string $tag Tag name - * @param string $attribute Attribute name - * @param string $value Attribute value - * @return boolean + * @return bool */ public function filterIntegerAttribute($tag, $attribute, $value) { @@ -312,18 +297,17 @@ class Attribute } /** - * Return true if the iframe source is allowed (remove not allowed iframe) + * Return true if the iframe source is allowed (remove not allowed iframe). * - * @access public - * @param string $tag Tag name - * @param string $attribute Attribute name - * @param string $value Attribute value - * @return boolean + * @param string $tag Tag name + * @param string $attribute Attribute name + * @param string $value Attribute value + * + * @return bool */ public function filterIframeAttribute($tag, $attribute, $value) { if ($tag === 'iframe' && $attribute === 'src') { - foreach ($this->iframe_whitelist as $url) { if (strpos($value, $url) === 0) { return true; @@ -337,13 +321,13 @@ class Attribute } /** - * Return true if the resource is not blacklisted (remove blacklisted resource attributes) + * Return true if the resource is not blacklisted (remove blacklisted resource attributes). + * + * @param string $tag Tag name + * @param string $attribute Attribute name + * @param string $value Attribute value * - * @access public - * @param string $tag Tag name - * @param string $attribute Attribute name - * @param string $value Attribute value - * @return boolean + * @return bool */ public function filterBlacklistResourceAttribute($tag, $attribute, $value) { @@ -355,13 +339,13 @@ class Attribute } /** - * Convert all relative links to absolute url + * Convert all relative links to absolute url. * - * @access public - * @param string $tag Tag name - * @param string $attribute Attribute name - * @param string $value Attribute value - * @return boolean + * @param string $tag Tag name + * @param string $attribute Attribute name + * @param string $value Attribute value + * + * @return bool */ public function rewriteAbsoluteUrl($tag, $attribute, &$value) { @@ -374,13 +358,13 @@ class Attribute /** * Turns iframes' src attribute from http to https to prevent - * mixed active content + * mixed active content. + * + * @param string $tag Tag name + * @param array $attribute Atttributes name + * @param string $value Attribute value * - * @access public - * @param string $tag Tag name - * @param array $attribute Atttributes name - * @param string $value Attribute value - * @return boolean + * @return bool */ public function secureIframeSrc($tag, $attribute, &$value) { @@ -392,13 +376,13 @@ class Attribute } /** - * Removes YouTube autoplay from iframes + * Removes YouTube autoplay from iframes. + * + * @param string $tag Tag name + * @param array $attribute Atttributes name + * @param string $value Attribute value * - * @access public - * @param string $tag Tag name - * @param array $attribute Atttributes name - * @param string $value Attribute value - * @return boolean + * @return bool */ public function removeYouTubeAutoplay($tag, $attribute, &$value) { @@ -411,23 +395,21 @@ class Attribute } /** - * Rewrite image url to use with a proxy + * Rewrite image url to use with a proxy. * - * @access public - * @param string $tag Tag name - * @param string $attribute Attribute name - * @param string $value Attribute value - * @return boolean + * @param string $tag Tag name + * @param string $attribute Attribute name + * @param string $value Attribute value + * + * @return bool */ public function rewriteImageProxyUrl($tag, $attribute, &$value) { if ($tag === 'img' && $attribute === 'src' - && ! ($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) { - + && !($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) { if ($this->image_proxy_url) { $value = sprintf($this->image_proxy_url, rawurlencode($value)); - } - else if (is_callable($this->image_proxy_callback)) { + } elseif (is_callable($this->image_proxy_callback)) { $value = call_user_func($this->image_proxy_callback, $value); } } @@ -436,17 +418,17 @@ class Attribute } /** - * Return true if the scheme is authorized + * Return true if the scheme is authorized. + * + * @param string $tag Tag name + * @param string $attribute Attribute name + * @param string $value Attribute value * - * @access public - * @param string $tag Tag name - * @param string $attribute Attribute name - * @param string $value Attribute value - * @return boolean + * @return bool */ public function filterProtocolUrlAttribute($tag, $attribute, $value) { - if ($this->isResource($attribute) && ! $this->isAllowedProtocol($value)) { + if ($this->isResource($attribute) && !$this->isAllowedProtocol($value)) { return false; } @@ -454,11 +436,11 @@ class Attribute } /** - * Automatically add/override some attributes for specific tags + * Automatically add/override some attributes for specific tags. + * + * @param string $tag Tag name + * @param array $attributes Attributes list * - * @access public - * @param string $tag Tag name - * @param array $attributes Attributes list * @return array */ public function addAttributes($tag, array $attributes) @@ -471,19 +453,18 @@ class Attribute } /** - * Return true if all required attributes are present + * Return true if all required attributes are present. * - * @access public - * @param string $tag Tag name - * @param array $attributes Attributes list - * @return boolean + * @param string $tag Tag name + * @param array $attributes Attributes list + * + * @return bool */ public function hasRequiredAttributes($tag, array $attributes) { if (isset($this->required_attributes[$tag])) { - foreach ($this->required_attributes[$tag] as $attribute) { - if (! isset($attributes[$attribute])) { + if (!isset($attributes[$attribute])) { return false; } } @@ -493,11 +474,11 @@ class Attribute } /** - * Check if an attribute name is an external resource + * Check if an attribute name is an external resource. + * + * @param string $attribute Attribute name * - * @access public - * @param string $attribute Attribute name - * @return boolean + * @return bool */ public function isResource($attribute) { @@ -505,16 +486,15 @@ class Attribute } /** - * Detect if the protocol is allowed or not + * Detect if the protocol is allowed or not. * - * @access public - * @param string $value Attribute value - * @return boolean + * @param string $value Attribute value + * + * @return bool */ public function isAllowedProtocol($value) { foreach ($this->scheme_whitelist as $protocol) { - if (strpos($value, $protocol) === 0) { return true; } @@ -524,16 +504,15 @@ class Attribute } /** - * Detect if an url is blacklisted + * Detect if an url is blacklisted. + * + * @param string $resource Attribute value (URL) * - * @access public - * @param string $resource Attribute value (URL) - * @return boolean + * @return bool */ public function isBlacklistedMedia($resource) { foreach ($this->media_blacklist as $name) { - if (strpos($resource, $name) !== false) { return true; } @@ -543,10 +522,10 @@ class Attribute } /** - * Convert the attribute list to html + * Convert the attribute list to html. + * + * @param array $attributes Attributes * - * @access public - * @param array $attributes Attributes * @return string */ public function toHtml(array $attributes) @@ -561,147 +540,158 @@ class Attribute } /** - * Set whitelisted tags and attributes for each tag + * Set whitelisted tags and attributes for each tag. + * + * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] * - * @access public - * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] * @return Attribute */ public function setWhitelistedAttributes(array $values) { $this->attribute_whitelist = $values ?: $this->attribute_whitelist; + return $this; } /** - * Set scheme whitelist + * Set scheme whitelist. + * + * @param array $values List of scheme: ['http://', 'ftp://'] * - * @access public - * @param array $values List of scheme: ['http://', 'ftp://'] * @return Attribute */ public function setSchemeWhitelist(array $values) { $this->scheme_whitelist = $values ?: $this->scheme_whitelist; + return $this; } /** - * Set media attributes (used to load external resources) + * Set media attributes (used to load external resources). + * + * @param array $values List of values: ['src', 'href'] * - * @access public - * @param array $values List of values: ['src', 'href'] * @return Attribute */ public function setMediaAttributes(array $values) { $this->media_attributes = $values ?: $this->media_attributes; + return $this; } /** - * Set blacklisted external resources + * Set blacklisted external resources. + * + * @param array $values List of tags: ['http://google.com/', '...'] * - * @access public - * @param array $values List of tags: ['http://google.com/', '...'] * @return Attribute */ public function setMediaBlacklist(array $values) { $this->media_blacklist = $values ?: $this->media_blacklist; + return $this; } /** - * Set mandatory attributes for whitelisted tags + * Set mandatory attributes for whitelisted tags. + * + * @param array $values List of tags: ['img' => 'src'] * - * @access public - * @param array $values List of tags: ['img' => 'src'] * @return Attribute */ public function setRequiredAttributes(array $values) { $this->required_attributes = $values ?: $this->required_attributes; + return $this; } /** - * Set attributes to automatically to specific tags + * Set attributes to automatically to specific tags. + * + * @param array $values List of tags: ['a' => 'target="_blank"'] * - * @access public - * @param array $values List of tags: ['a' => 'target="_blank"'] * @return Attribute */ public function setAttributeOverrides(array $values) { $this->add_attributes = $values ?: $this->add_attributes; + return $this; } /** - * Set attributes that must be an integer + * Set attributes that must be an integer. + * + * @param array $values List of tags: ['width', 'height'] * - * @access public - * @param array $values List of tags: ['width', 'height'] * @return Attribute */ public function setIntegerAttributes(array $values) { $this->integer_attributes = $values ?: $this->integer_attributes; + return $this; } /** - * Set allowed iframe resources + * Set allowed iframe resources. + * + * @param array $values List of tags: ['http://www.youtube.com'] * - * @access public - * @param array $values List of tags: ['http://www.youtube.com'] * @return Attribute */ public function setIframeWhitelist(array $values) { $this->iframe_whitelist = $values ?: $this->iframe_whitelist; + return $this; } /** - * Set image proxy URL + * Set image proxy URL. * * The original image url will be urlencoded * - * @access public - * @param string $url Proxy URL + * @param string $url Proxy URL + * * @return Attribute */ public function setImageProxyUrl($url) { $this->image_proxy_url = $url ?: $this->image_proxy_url; + return $this; } /** - * Set image proxy callback + * Set image proxy callback. + * + * @param \Closure $callback * - * @access public - * @param \Closure $callback * @return Attribute */ public function setImageProxyCallback($callback) { $this->image_proxy_callback = $callback ?: $this->image_proxy_callback; + return $this; } /** - * Set image proxy protocol restriction + * Set image proxy protocol restriction. + * + * @param string $value * - * @access public - * @param string $value * @return Attribute */ public function setImageProxyProtocol($value) { $this->image_proxy_limit_protocol = $value ?: $this->image_proxy_limit_protocol; + return $this; } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php index e3e4ad36b..bae2aff0b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php @@ -3,45 +3,46 @@ namespace PicoFeed\Filter; /** - * Filter class + * Filter class. * * @author Frederic Guillot - * @package Filter */ class Filter { /** - * Get the Html filter instance + * Get the Html filter instance. * * @static - * @access public - * @param string $html HTML content - * @param string $website Site URL (used to build absolute URL) + * + * @param string $html HTML content + * @param string $website Site URL (used to build absolute URL) + * * @return Html */ public static function html($html, $website) { $filter = new Html($html, $website); + return $filter; } /** - * Escape HTML content + * Escape HTML content. * * @static - * @access public + * * @return string */ public static function escape($content) { - return @htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false); + return htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false); } /** - * Remove HTML tags + * Remove HTML tags. + * + * @param string $data Input data * - * @access public - * @param string $data Input data * @return string */ public function removeHTMLTags($data) @@ -50,11 +51,12 @@ class Filter } /** - * Remove the XML tag from a document + * Remove the XML tag from a document. * * @static - * @access public - * @param string $data Input data + * + * @param string $data Input data + * * @return string */ public static function stripXmlTag($data) @@ -64,38 +66,38 @@ class Filter } do { - $pos = strpos($data, '<?xml-stylesheet '); if ($pos !== false) { $data = ltrim(substr($data, strpos($data, '?>') + 2)); } - } while ($pos !== false && $pos < 200); return $data; } /** - * Strip head tag from the HTML content + * Strip head tag from the HTML content. * * @static - * @access public - * @param string $data Input data + * + * @param string $data Input data + * * @return string */ public static function stripHeadTags($data) { - return preg_replace('@<head[^>]*?>.*?</head>@siu','', $data ); + return preg_replace('@<head[^>]*?>.*?</head>@siu', '', $data); } /** - * Trim whitespace from the begining, the end and inside a string and don't break utf-8 string + * Trim whitespace from the begining, the end and inside a string and don't break utf-8 string. * * @static - * @access public - * @param string $value Raw data - * @return string Normalized data + * + * @param string $value Raw data + * + * @return string Normalized data */ public static function stripWhiteSpace($value) { @@ -107,12 +109,13 @@ class Filter } /** - * Fixes before XML parsing + * Fixes before XML parsing. * * @static - * @access public - * @param string $data Raw data - * @return string Normalized data + * + * @param string $data Raw data + * + * @return string Normalized data */ public static function normalizeData($data) { @@ -122,7 +125,7 @@ class Filter ); // strip invalid XML 1.0 characters which are encoded as entities - $data = preg_replace_callback($entities, function($matches) { + $data = preg_replace_callback($entities, function ($matches) { $code_point = $matches[2]; // convert hex entity to decimal diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php index c380a466f..0ccc192fc 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php @@ -8,88 +8,78 @@ use PicoFeed\Scraper\RuleLoader; use PicoFeed\Parser\XmlParser; /** - * HTML Filter class + * HTML Filter class. * * @author Frederic Guillot - * @package Filter */ class Html { /** - * Config object + * Config object. * - * @access private * @var \PicoFeed\Config\Config */ private $config; /** - * Unfiltered XML data + * Unfiltered XML data. * - * @access private * @var string */ private $input = ''; /** - * Filtered XML data + * Filtered XML data. * - * @access private * @var string */ private $output = ''; /** - * List of empty tags + * List of empty tags. * - * @access private * @var array */ private $empty_tags = array(); /** - * Empty flag + * Empty flag. * - * @access private - * @var boolean + * @var bool */ private $empty = true; /** - * Tag instance + * Tag instance. * - * @access public * @var \PicoFeed\Filter\Tag */ public $tag = ''; /** - * Attribute instance + * Attribute instance. * - * @access public * @var \PicoFeed\Filter\Attribute */ public $attribute = ''; /** - * The website to filter + * The website to filter. * - * @access private * @var string */ private $website; /** - * Initialize the filter, all inputs data must be encoded in UTF-8 before + * Initialize the filter, all inputs data must be encoded in UTF-8 before. * - * @access public - * @param string $html HTML content - * @param string $website Site URL (used to build absolute URL) + * @param string $html HTML content + * @param string $website Site URL (used to build absolute URL) */ public function __construct($html, $website) { - $this->config = new Config; - $this->input = XmlParser::HtmlToXml($html); + $this->config = new Config(); + $this->input = XmlParser::htmlToXml($html); $this->output = ''; $this->tag = new Tag($this->config); $this->website = $website; @@ -97,10 +87,10 @@ class Html } /** - * Set config object + * Set config object. + * + * @param \PicoFeed\Config\Config $config Config instance * - * @access public - * @param \PicoFeed\Config\Config $config Config instance * @return \PicoFeed\Filter\Html */ public function setConfig($config) @@ -126,9 +116,8 @@ class Html } /** - * Run tags/attributes filtering + * Run tags/attributes filtering. * - * @access public * @return string */ public function execute() @@ -150,9 +139,7 @@ class Html } /** - * Called before XML parsing - * - * @access public + * Called before XML parsing. */ public function preFilter() { @@ -160,9 +147,7 @@ class Html } /** - * Called after XML parsing - * - * @access public + * Called after XML parsing. */ public function postFilter() { @@ -173,16 +158,15 @@ class Html } /** - * Called after XML parsing - * @param string $content the content that should be filtered + * Called after XML parsing. * - * @access public + * @param string $content the content that should be filtered */ public function filterRules($content) { // the constructor should require a config, then this if can be removed if ($this->config === null) { - $config = new Config; + $config = new Config(); } else { $config = $this->config; } @@ -196,7 +180,7 @@ class Html if (isset($rules['filter'])) { foreach ($rules['filter'] as $pattern => $rule) { if (preg_match($pattern, $sub_url)) { - foreach($rule as $search => $replace) { + foreach ($rule as $search => $replace) { $content = preg_replace($search, $replace, $content); } } @@ -207,23 +191,20 @@ class Html } /** - * Parse opening tag + * Parse opening tag. * - * @access public - * @param resource $parser XML parser - * @param string $tag Tag name - * @param array $attributes Tag attributes + * @param resource $parser XML parser + * @param string $tag Tag name + * @param array $attributes Tag attributes */ public function startTag($parser, $tag, array $attributes) { $this->empty = true; |