diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2016-03-25 20:19:57 +0100 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2016-03-25 20:19:57 +0100 |
commit | f2a65b0a4aa1cd6448ec5236190dcdf1f0eb12c4 (patch) | |
tree | 5be26b5c74561b59d5662046fe8ee82c08c120cd /vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php | |
parent | 8a58b145cf7d05d6c73043700b27813125d9a88f (diff) |
remove compiled js and 3rdparty deps from the repo
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php')
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php | 243 |
1 files changed, 0 insertions, 243 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php deleted file mode 100644 index 0ccc192fc..000000000 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php +++ /dev/null @@ -1,243 +0,0 @@ -<?php - -namespace PicoFeed\Filter; - -use PicoFeed\Config\Config; -use PicoFeed\Client\Url; -use PicoFeed\Scraper\RuleLoader; -use PicoFeed\Parser\XmlParser; - -/** - * HTML Filter class. - * - * @author Frederic Guillot - */ -class Html -{ - /** - * Config object. - * - * @var \PicoFeed\Config\Config - */ - private $config; - - /** - * Unfiltered XML data. - * - * @var string - */ - private $input = ''; - - /** - * Filtered XML data. - * - * @var string - */ - private $output = ''; - - /** - * List of empty tags. - * - * @var array - */ - private $empty_tags = array(); - - /** - * Empty flag. - * - * @var bool - */ - private $empty = true; - - /** - * Tag instance. - * - * @var \PicoFeed\Filter\Tag - */ - public $tag = ''; - - /** - * Attribute instance. - * - * @var \PicoFeed\Filter\Attribute - */ - public $attribute = ''; - - /** - * The website to filter. - * - * @var string - */ - private $website; - - /** - * Initialize the filter, all inputs data must be encoded in UTF-8 before. - * - * @param string $html HTML content - * @param string $website Site URL (used to build absolute URL) - */ - public function __construct($html, $website) - { - $this->config = new Config(); - $this->input = XmlParser::htmlToXml($html); - $this->output = ''; - $this->tag = new Tag($this->config); - $this->website = $website; - $this->attribute = new Attribute(new Url($website)); - } - - /** - * Set config object. - * - * @param \PicoFeed\Config\Config $config Config instance - * - * @return \PicoFeed\Filter\Html - */ - public function setConfig($config) - { - $this->config = $config; - - if ($this->config !== null) { - $this->attribute->setImageProxyCallback($this->config->getFilterImageProxyCallback()); - $this->attribute->setImageProxyUrl($this->config->getFilterImageProxyUrl()); - $this->attribute->setImageProxyProtocol($this->config->getFilterImageProxyProtocol()); - $this->attribute->setIframeWhitelist($this->config->getFilterIframeWhitelist(array())); - $this->attribute->setIntegerAttributes($this->config->getFilterIntegerAttributes(array())); - $this->attribute->setAttributeOverrides($this->config->getFilterAttributeOverrides(array())); - $this->attribute->setRequiredAttributes($this->config->getFilterRequiredAttributes(array())); - $this->attribute->setMediaBlacklist($this->config->getFilterMediaBlacklist(array())); - $this->attribute->setMediaAttributes($this->config->getFilterMediaAttributes(array())); - $this->attribute->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array())); - $this->attribute->setWhitelistedAttributes($this->config->getFilterWhitelistedTags(array())); - $this->tag->setWhitelistedTags(array_keys($this->config->getFilterWhitelistedTags(array()))); - } - - return $this; - } - - /** - * Run tags/attributes filtering. - * - * @return string - */ - public function execute() - { - $this->preFilter(); - - $parser = xml_parser_create(); - - xml_set_object($parser, $this); - xml_set_element_handler($parser, 'startTag', 'endTag'); - xml_set_character_data_handler($parser, 'dataTag'); - xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false); - xml_parse($parser, $this->input, true); - xml_parser_free($parser); - - $this->postFilter(); - - return $this->output; - } - - /** - * Called before XML parsing. - */ - public function preFilter() - { - $this->input = $this->tag->removeBlacklistedTags($this->input); - } - - /** - * Called after XML parsing. - */ - public function postFilter() - { - $this->output = $this->tag->removeEmptyTags($this->output); - $this->output = $this->filterRules($this->output); - $this->output = $this->tag->removeMultipleBreakTags($this->output); - $this->output = trim($this->output); - } - - /** - * Called after XML parsing. - * - * @param string $content the content that should be filtered - */ - public function filterRules($content) - { - // the constructor should require a config, then this if can be removed - if ($this->config === null) { - $config = new Config(); - } else { - $config = $this->config; - } - - $loader = new RuleLoader($config); - $rules = $loader->getRules($this->website); - - $url = new Url($this->website); - $sub_url = $url->getFullPath(); - - if (isset($rules['filter'])) { - foreach ($rules['filter'] as $pattern => $rule) { - if (preg_match($pattern, $sub_url)) { - foreach ($rule as $search => $replace) { - $content = preg_replace($search, $replace, $content); - } - } - } - } - - return $content; - } - - /** - * Parse opening tag. - * - * @param resource $parser XML parser - * @param string $tag Tag name - * @param array $attributes Tag attributes - */ - public function startTag($parser, $tag, array $attributes) - { - $this->empty = true; - - if ($this->tag->isAllowed($tag, $attributes)) { - $attributes = $this->attribute->filter($tag, $attributes); - - if ($this->attribute->hasRequiredAttributes($tag, $attributes)) { - $attributes = $this->attribute->addAttributes($tag, $attributes); - - $this->output .= $this->tag->openHtmlTag($tag, $this->attribute->toHtml($attributes)); - $this->empty = false; - } - } - - $this->empty_tags[] = $this->empty; - } - - /** - * Parse closing tag. - * - * @param resource $parser XML parser - * @param string $tag Tag name - */ - public function endTag($parser, $tag) - { - if (!array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) { - $this->output .= $this->tag->closeHtmlTag($tag); - } - } - - /** - * Parse tag content. - * - * @param resource $parser XML parser - * @param string $content Tag content - */ - public function dataTag($parser, $content) - { - // Replace with normal space - $content = str_replace("\xc2\xa0", ' ', $content); - $this->output .= Filter::escape($content); - } -} |