diff options
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Filter')
m--------- | vendor/fguillot/picofeed | 0 | ||||
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php | 658 | ||||
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php | 131 | ||||
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php | 203 | ||||
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php | 173 |
5 files changed, 1165 insertions, 0 deletions
diff --git a/vendor/fguillot/picofeed b/vendor/fguillot/picofeed deleted file mode 160000 -Subproject 0a1d0d3950f7f047dc8fb1d80aa6296e15f306d diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php new file mode 100644 index 000000000..02126d208 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php @@ -0,0 +1,658 @@ +<?php + +namespace PicoFeed\Filter; + +use \PicoFeed\Client\Url; + +/** + * Attribute Filter class + * + * @author Frederic Guillot + * @package Filter + */ +class Attribute +{ + /** + * Image proxy url + * + * @access private + * @var string + */ + private $image_proxy_url = ''; + + /** + * Image proxy callback + * + * @access private + * @var \Closure|null + */ + private $image_proxy_callback = null; + + /** + * Tags and attribute whitelist + * + * @access private + * @var array + */ + private $attribute_whitelist = array( + 'audio' => array('controls', 'src'), + 'video' => array('poster', 'controls', 'height', 'width', 'src'), + 'source' => array('src', 'type'), + 'dt' => array(), + 'dd' => array(), + 'dl' => array(), + 'table' => array(), + 'caption' => array(), + 'tr' => array(), + 'th' => array(), + 'td' => array(), + 'tbody' => array(), + 'thead' => array(), + 'h2' => array(), + 'h3' => array(), + 'h4' => array(), + 'h5' => array(), + 'h6' => array(), + 'strong' => array(), + 'em' => array(), + 'code' => array(), + 'pre' => array(), + 'blockquote' => array(), + 'p' => array(), + 'ul' => array(), + 'li' => array(), + 'ol' => array(), + 'br' => array(), + 'del' => array(), + 'a' => array('href'), + 'img' => array('src', 'title', 'alt'), + 'figure' => array(), + 'figcaption' => array(), + 'cite' => array(), + 'time' => array('datetime'), + 'abbr' => array('title'), + 'iframe' => array('width', 'height', 'frameborder', 'src'), + 'q' => array('cite') + ); + + /** + * Scheme whitelist + * + * For a complete list go to http://en.wikipedia.org/wiki/URI_scheme + * + * @access private + * @var array + */ + private $scheme_whitelist = array( + 'bitcoin:', + 'callto:', + 'ed2k://', + 'facetime://', + 'feed:', + 'ftp://', + 'geo:', + 'git://', + 'http://', + 'https://', + 'irc://', + 'irc6://', + 'ircs://', + 'jabber:', + 'magnet:', + 'mailto:', + 'nntp://', + 'rtmp://', + 'sftp://', + 'sip:', + 'sips:', + 'skype:', + 'smb://', + 'sms:', + 'spotify:', + 'ssh:', + 'steam:', + 'svn://', + 'tel:', + ); + + /** + * Iframe source whitelist, everything else is ignored + * + * @access private + * @var array + */ + private $iframe_whitelist = array( + 'http://www.youtube.com', + 'https://www.youtube.com', + 'http://player.vimeo.com', + 'https://player.vimeo.com', + 'http://www.dailymotion.com', + 'https://www.dailymotion.com', + ); + + /** + * Blacklisted resources + * + * @access private + * @var array + */ + private $media_blacklist = array( + 'api.flattr.com', + 'feeds.feedburner.com', + 'share.feedsportal.com', + 'da.feedsportal.com', + 'rss.feedsportal.com', + 'res.feedsportal.com', + 'res1.feedsportal.com', + 'res2.feedsportal.com', + 'res3.feedsportal.com', + 'pi.feedsportal.com', + 'rss.nytimes.com', + 'feeds.wordpress.com', + 'stats.wordpress.com', + 'rss.cnn.com', + 'twitter.com/home?status=', + 'twitter.com/share', + 'twitter_icon_large.png', + 'www.facebook.com/sharer.php', + 'facebook_icon_large.png', + 'plus.google.com/share', + 'www.gstatic.com/images/icons/gplus-16.png', + 'www.gstatic.com/images/icons/gplus-32.png', + 'www.gstatic.com/images/icons/gplus-64.png', + ); + + /** + * Attributes used for external resources + * + * @access private + * @var array + */ + private $media_attributes = array( + 'src', + 'href', + 'poster', + ); + + /** + * Attributes that must be integer + * + * @access private + * @var array + */ + private $integer_attributes = array( + 'width', + 'height', + 'frameborder', + ); + + /** + * Mandatory attributes for specified tags + * + * @access private + * @var array + */ + private $required_attributes = array( + 'a' => array('href'), + 'img' => array('src'), + 'iframe' => array('src'), + 'audio' => array('src'), + 'source' => array('src'), + ); + + /** + * Add attributes to specified tags + * + * @access private + * @var array + */ + private $add_attributes = array( + 'a' => array('rel' => 'noreferrer', 'target' => '_blank') + ); + + /** + * List of filters to apply + * + * @access private + * @var array + */ + private $filters = array( + 'filterEmptyAttribute', + 'filterAllowedAttribute', + 'filterIntegerAttribute', + 'rewriteAbsoluteUrl', + 'filterIframeAttribute', + 'filterBlacklistResourceAttribute', + 'filterProtocolUrlAttribute', + 'rewriteImageProxyUrl', + ); + + /** + * Add attributes to specified tags + * + * @access private + * @var \PicoFeed\Client\Url + */ + private $website; + + /** + * Constructor + * + * @access public + * @param \PicoFeed\Client\Url $website Website url instance + */ + public function __construct(Url $website) + { + $this->website = $website; + } + + /** + * Apply filters to the attributes list + * + * @access public + * @param string $tag Tag name + * @param array $attributes Attributes dictionary + * @return array Filtered attributes + */ + public function filter($tag, array $attributes) + { + foreach ($attributes as $attribute => &$value) { + foreach ($this->filters as $filter) { + if (! $this->$filter($tag, $attribute, $value)) { + unset($attributes[$attribute]); + break; + } + } + } + + return $attributes; + } + + /** + * Return true if the value is not empty (remove empty attributes) + * + * @access public + * @param string $tag Tag name + * @param string $attribute Atttribute name + * @param string $value Atttribute value + * @return boolean + */ + public function filterEmptyAttribute($tag, $attribute, $value) + { + return $value !== ''; + } + + /** + * Return true if the value is allowed (remove not allowed attributes) + * + * @access public + * @param string $tag Tag name + * @param string $attribute Atttribute name + * @param string $value Atttribute value + * @return boolean + */ + public function filterAllowedAttribute($tag, $attribute, $value) + { + return isset($this->attribute_whitelist[$tag]) && in_array($attribute, $this->attribute_whitelist[$tag]); + } + + /** + * Return true if the value is not integer (remove attributes that should have an integer value) + * + * @access public + * @param string $tag Tag name + * @param string $attribute Atttribute name + * @param string $value Atttribute value + * @return boolean + */ + public function filterIntegerAttribute($tag, $attribute, $value) + { + if (in_array($attribute, $this->integer_attributes)) { + return ctype_digit($value); + } + + return true; + } + + /** + * Return true if the iframe source is allowed (remove not allowed iframe) + * + * @access public + * @param string $tag Tag name + * @param string $attribute Atttribute name + * @param string $value Atttribute value + * @return boolean + */ + public function filterIframeAttribute($tag, $attribute, $value) + { + if ($tag === 'iframe' && $attribute === 'src') { + + foreach ($this->iframe_whitelist as $url) { + if (strpos($value, $url) === 0) { + return true; + } + } + + return false; + } + + return true; + } + + /** + * Return true if the resource is not blacklisted (remove blacklisted resource attributes) + * + * @access public + * @param string $tag Tag name + * @param string $attribute Atttribute name + * @param string $value Atttribute value + * @return boolean + */ + public function filterBlacklistResourceAttribute($tag, $attribute, $value) + { + if ($this->isResource($attribute) && $this->isBlacklistedMedia($value)) { + return false; + } + + return true; + } + + /** + * Convert all relative links to absolute url + * + * @access public + * @param string $tag Tag name + * @param string $attribute Atttribute name + * @param string $value Atttribute value + * @return boolean + */ + public function rewriteAbsoluteUrl($tag, $attribute, &$value) + { + if ($this->isResource($attribute)) { + $value = Url::resolve($value, $this->website); + } + + return true; + } + + /** + * Rewrite image url to use with a proxy (HTTPS resource are ignored) + * + * @access public + * @param string $tag Tag name + * @param string $attribute Atttribute name + * @param string $value Atttribute value + * @return boolean + */ + public function rewriteImageProxyUrl($tag, $attribute, &$value) + { + if ($tag === 'img' && $attribute === 'src' && strpos($value, 'http:') === 0) { + + if ($this->image_proxy_url) { + $value = sprintf($this->image_proxy_url, rawurlencode($value)); + } + else if (is_callable($this->image_proxy_callback)) { + $value = call_user_func($this->image_proxy_callback, $value); + } + } + + return true; + } + + /** + * Return true if the scheme is authorized + * + * @access public + * @param string $tag Tag name + * @param string $attribute Atttribute name + * @param string $value Atttribute value + * @return boolean + */ + public function filterProtocolUrlAttribute($tag, $attribute, $value) + { + if ($this->isResource($attribute) && ! $this->isAllowedProtocol($value)) { + return false; + } + + return true; + } + + /** + * Automatically add/override some attributes for specific tags + * + * @access public + * @param string $tag Tag name + * @param array $attributes Atttributes list + * @return array + */ + public function addAttributes($tag, array $attributes) + { + if (isset($this->add_attributes[$tag])) { + $attributes += $this->add_attributes[$tag]; + } + + return $attributes; + } + + /** + * Return true if all required attributes are present + * + * @access public + * @param string $tag Tag name + * @param array $attributes Atttributes list + * @return boolean + */ + public function hasRequiredAttributes($tag, array $attributes) + { + if (isset($this->required_attributes[$tag])) { + + foreach ($this->required_attributes[$tag] as $attribute) { + if (! isset($attributes[$attribute])) { + return false; + } + } + } + + return true; + } + + /** + * Check if an attribute name is an external resource + * + * @access public + * @param string $attribute Attribute name + * @return boolean + */ + public function isResource($attribute) + { + return in_array($attribute, $this->media_attributes); + } + + /** + * Detect if the protocol is allowed or not + * + * @access public + * @param string $value Attribute value + * @return boolean + */ + public function isAllowedProtocol($value) + { + foreach ($this->scheme_whitelist as $protocol) { + + if (strpos($value, $protocol) === 0) { + return true; + } + } + + return false; + } + + /** + * Detect if an url is blacklisted + * + * @access public + * @param string $resource Attribute value (URL) + * @return boolean + */ + public function isBlacklistedMedia($resource) + { + foreach ($this->media_blacklist as $name) { + + if (strpos($resource, $name) !== false) { + return true; + } + } + + return false; + } + + /** + * Convert the attribute list to html + * + * @access public + * @param array $attributes Attributes + * @return string + */ + public function toHtml(array $attributes) + { + $html = array(); + + foreach ($attributes as $attribute => $value) { + $html[] = sprintf('%s="%s"', $attribute, Filter::escape($value)); + } + + return implode(' ', $html); + } + + /** + * Set whitelisted tags and attributes for each tag + * + * @access public + * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] + * @return Attribute + */ + public function setWhitelistedAttributes(array $values) + { + $this->attribute_whitelist = $values ?: $this->attribute_whitelist; + return $this; + } + + /** + * Set scheme whitelist + * + * @access public + * @param array $values List of scheme: ['http://', 'ftp://'] + * @return Attribute + */ + public function setSchemeWhitelist(array $values) + { + $this->scheme_whitelist = $values ?: $this->scheme_whitelist; + return $this; + } + + /** + * Set media attributes (used to load external resources) + * + * @access public + * @param array $values List of values: ['src', 'href'] + * @return Attribute + */ + public function setMediaAttributes(array $values) + { + $this->media_attributes = $values ?: $this->media_attributes; + return $this; + } + + /** + * Set blacklisted external resources + * + * @access public + * @param array $values List of tags: ['http://google.com/', '...'] + * @return Attribute + */ + public function setMediaBlacklist(array $values) + { + $this->media_blacklist = $values ?: $this->media_blacklist; + return $this; + } + + /** + * Set mandatory attributes for whitelisted tags + * + * @access public + * @param array $values List of tags: ['img' => 'src'] + * @return Attribute + */ + public function setRequiredAttributes(array $values) + { + $this->required_attributes = $values ?: $this->required_attributes; + return $this; + } + + /** + * Set attributes to automatically to specific tags + * + * @access public + * @param array $values List of tags: ['a' => 'target="_blank"'] + * @return Attribute + */ + public function setAttributeOverrides(array $values) + { + $this->add_attributes = $values ?: $this->add_attributes; + return $this; + } + + /** + * Set attributes that must be an integer + * + * @access public + * @param array $values List of tags: ['width', 'height'] + * @return Attribute + */ + public function setIntegerAttributes(array $values) + { + $this->integer_attributes = $values ?: $this->integer_attributes; + return $this; + } + + /** + * Set allowed iframe resources + * + * @access public + * @param array $values List of tags: ['http://www.youtube.com'] + * @return Attribute + */ + public function setIframeWhitelist(array $values) + { + $this->iframe_whitelist = $values ?: $this->iframe_whitelist; + return $this; + } + + /** + * Set image proxy URL + * + * The original image url will be urlencoded + * + * @access public + * @param string $url Proxy URL + * @return Attribute + */ + public function setImageProxyUrl($url) + { + $this->image_proxy_url = $url ?: $this->image_proxy_url; + return $this; + } + + /** + * Set image proxy callback + * + * @access public + * @param \Closure $callback + * @return Attribute + */ + public function setImageProxyCallback($callback) + { + $this->image_proxy_callback = $callback ?: $this->image_proxy_callback; + return $this; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php new file mode 100644 index 000000000..0eb3f88ea --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php @@ -0,0 +1,131 @@ +<?php + +namespace PicoFeed\Filter; + +/** + * Filter class + * + * @author Frederic Guillot + * @package Filter + */ +class Filter +{ + /** + * Get the Html filter instance + * + * @static + * @access public + * @param string $html HTML content + * @param string $website Site URL (used to build absolute URL) + * @return Html + */ + public static function html($html, $website) + { + $filter = new Html($html, $website); + return $filter; + } + + /** + * Escape HTML content + * + * @static + * @access public + * @return string + */ + public static function escape($content) + { + return @htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false); + } + + /** + * Remove HTML tags + * + * @access public + * @param string $data Input data + * @return string + */ + public function removeHTMLTags($data) + { + return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $data); + } + + /** + * Remove the XML tag from a document + * + * @static + * @access public + * @param string $data Input data + * @return string + */ + public static function stripXmlTag($data) + { + if (strpos($data, '<?xml') !== false) { + $data = ltrim(substr($data, strpos($data, '?>') + 2)); + } + + do { + + $pos = strpos($data, '<?xml-stylesheet '); + + if ($pos !== false) { + $data = ltrim(substr($data, strpos($data, '?>') + 2)); + } + + } while ($pos !== false && $pos < 200); + + return $data; + } + + /** + * Strip head tag from the HTML content + * + * @static + * @access public + * @param string $data Input data + * @return string + */ + public static function stripHeadTags($data) + { + return preg_replace('@<head[^>]*?>.*?</head>@siu','', $data ); + } + + /** + * Trim whitespace from the begining, the end and inside a string and don't break utf-8 string + * + * @static + * @access public + * @param string $value Raw data + * @return string Normalized data + */ + public static function stripWhiteSpace($value) + { + $value = str_replace("\r", ' ', $value); + $value = str_replace("\t", ' ', $value); + $value = str_replace("\n", ' ', $value); + // $value = preg_replace('/\s+/', ' ', $value); <= break utf-8 + return trim($value); + } + + /** + * Dirty quickfixes before XML parsing + * + * @static + * @access public + * @param string $data Raw data + * @return string Normalized data + */ + public static function normalizeData($data) + { + $invalid_chars = array( + "\x10", + "\xc3\x20", + "", + ); + + foreach ($invalid_chars as $needle) { + $data = str_replace($needle, '', $data); + } + + return $data; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php new file mode 100644 index 000000000..7abd740b1 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php @@ -0,0 +1,203 @@ +<?php + +namespace PicoFeed\Filter; + +use \PicoFeed\Client\Url; +use \PicoFeed\Parser\XmlParser; + +/** + * HTML Filter class + * + * @author Frederic Guillot + * @package Filter + */ +class Html +{ + /** + * Config object + * + * @access private + * @var \PicoFeed\Config\Config + */ + private $config; + + /** + * Unfiltered XML data + * + * @access private + * @var string + */ + private $input = ''; + + /** + * Filtered XML data + * + * @access private + * @var string + */ + private $output = ''; + + /** + * List of empty tags + * + * @access private + * @var array + */ + private $empty_tags = array(); + + /** + * Empty flag + * + * @access private + * @var boolean + */ + private $empty = true; + + /** + * Tag instance + * + * @access public + * @var \PicoFeed\Filter\Tag + */ + public $tag = ''; + + /** + * Attribute instance + * + * @access public + * @var \PicoFeed\Filter\Attribute + */ + public $attribute = ''; + + /** + * Initialize the filter, all inputs data must be encoded in UTF-8 before + * + * @access public + * @param string $html HTML content + * @param string $website Site URL (used to build absolute URL) + */ + public function __construct($html, $website) + { + $this->input = XmlParser::HtmlToXml($html); + $this->output = ''; + $this->tag = new Tag; + $this->attribute = new Attribute(new Url($website)); + } + + /** + * Set config object + * + * @access public + * @param \PicoFeed\Config\Config $config Config instance + * @return \PicoFeed\Filter\Html + */ + public function setConfig($config) + { + $this->config = $config; + + if ($this->config !== null) { + $this->attribute->setImageProxyCallback($this->config->getFilterImageProxyCallback()); + $this->attribute->setImageProxyUrl($this->config->getFilterImageProxyUrl()); + $this->attribute->setIframeWhitelist($this->config->getFilterIframeWhitelist(array())); + $this->attribute->setIntegerAttributes($this->config->getFilterIntegerAttributes(array())); + $this->attribute->setAttributeOverrides($this->config->getFilterAttributeOverrides(array())); + $this->attribute->setRequiredAttributes($this->config->getFilterRequiredAttributes(array())); + $this->attribute->setMediaBlacklist($this->config->getFilterMediaBlacklist(array())); + $this->attribute->setMediaAttributes($this->config->getFilterMediaAttributes(array())); + $this->attribute->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array())); + $this->attribute->setWhitelistedAttributes($this->config->getFilterWhitelistedTags(array())); + $this->tag->setWhitelistedTags(array_keys($this->config->getFilterWhitelistedTags(array()))); + } + + return $this; + } + + /** + * Run tags/attributes filtering + * + * @access public + * @return string + */ + public function execute() + { + $parser = xml_parser_create(); + + xml_set_object($parser, $this); + xml_set_element_handler($parser, 'startTag', 'endTag'); + xml_set_character_data_handler($parser, 'dataTag'); + xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false); + xml_parse($parser, $this->input, true); + xml_parser_free($parser); + + $this->postFilter(); + + return $this->output; + } + + /** + * Called after XML parsing + * + * @access public + */ + public function postFilter() + { + $this->output = $this->tag->removeEmptyTags($this->output); + $this->output = trim($this->output); + } + + /** + * Parse opening tag + * + * @access public + * @param resource $parser XML parser + * @param string $tag Tag name + * @param array $attributes Tag attributes + */ + public function startTag($parser, $tag, array $attributes) + { + $this->empty = true; + + if ($this->tag->isAllowed($tag, $attributes)) { + + $attributes = $this->attribute->filter($tag, $attributes); + + if ($this->attribute->hasRequiredAttributes($tag, $attributes)) { + + $attributes = $this->attribute->addAttributes($tag, $attributes); + + $this->output .= $this->tag->openHtmlTag($tag, $this->attribute->toHtml($attributes)); + $this->empty = false; + } + } + + $this->empty_tags[] = $this->empty; + } + + /** + * Parse closing tag + * + * @access public + * @param resource $parser XML parser + * @param string $tag Tag name + */ + public function endTag($parser, $tag) + { + if (! array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) { + $this->output .= $this->tag->closeHtmlTag($tag); + } + } + + /** + * Parse tag content + * + * @access public + * @param resource $parser XML parser + * @param string $content Tag content + */ + public function dataTag($parser, $content) + { + // Replace with normal space + $content = str_replace("\xc2\xa0", ' ', $content); + $this->output .= Filter::escape($content); + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php new file mode 100644 index 000000000..40f7c6c98 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php @@ -0,0 +1,173 @@ +<?php + +namespace PicoFeed\Filter; + +/** + * Tag Filter class + * + * @author Frederic Guillot + * @package Filter + */ +class Tag +{ + /** + * Tags whitelist + * + * @access private + * @var array + */ + private $tag_whitelist = array( + 'audio', + 'video', + 'source', + 'dt', + 'dd', + 'dl', + 'table', + 'caption', + 'tr', + 'th', + 'td', + 'tbody', + 'thead', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'strong', + 'em', + 'code', + 'pre', + 'blockquote', + 'p', + 'ul', + 'li', + 'ol', + 'br', + 'del', + 'a', + 'img', + 'figure', + 'figcaption', + 'cite', + 'time', + 'abbr', + 'iframe', + 'q', + ); + + /** + * Check if the tag is allowed and is not a pixel tracker + * + * @access public + * @param string $tag Tag name + * @param array $attributes Attributes dictionary + * @return boolean + */ + public function isAllowed($tag, array $attributes) + { + return $this->isAllowedTag($tag) && ! $this->isPixelTracker($tag, $attributes); + } + + /** + * Return the HTML opening tag + * + * @access public + * @param string $tag Tag name + * @param string $attributes Attributes converted in html + * @return string + */ + public function openHtmlTag($tag, $attributes = '') + { + return '<'.$tag.(empty($attributes) ? '' : ' '.$attributes).($this->isSelfClosingTag($tag) ? '/>' : '>'); + } + + /** + * Return the HTML closing tag + * + * @access public + * @param string $tag Tag name + * @return string + */ + public function closeHtmlTag($tag) + { + return $this->isSelfC |