diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-04-30 18:30:11 +0200 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-04-30 18:30:11 +0200 |
commit | eb28c3b137c8a0d61377087c9a04b820151b0b7c (patch) | |
tree | c1ebf149f43fa653a4ef1c3f33df04557094e834 /vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php | |
parent | 2e54780c1496bfa39cd035b9ac40ed851d2198f1 (diff) |
update deps
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php')
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php index 4e046603f..36ab3f18c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php @@ -2,7 +2,9 @@ namespace PicoFeed\Filter; +use PicoFeed\Config\Config; use PicoFeed\Client\Url; +use PicoFeed\Scraper\RuleLoader; use PicoFeed\Parser\XmlParser; /** @@ -70,6 +72,14 @@ class Html public $attribute = ''; /** + * The website to filter + * + * @access private + * @var string + */ + private $website; + + /** * Initialize the filter, all inputs data must be encoded in UTF-8 before * * @access public @@ -81,6 +91,7 @@ class Html $this->input = XmlParser::HtmlToXml($html); $this->output = ''; $this->tag = new Tag; + $this->website = $website; $this->attribute = new Attribute(new Url($website)); } @@ -155,10 +166,46 @@ class Html public function postFilter() { $this->output = $this->tag->removeEmptyTags($this->output); + $this->output = $this->filterRules($this->output); + $this->output = $this->tag->removeMultipleBreakTags($this->output); $this->output = trim($this->output); } /** + * Called after XML parsing + * @param string $content the content that should be filtered + * + * @access public + */ + public function filterRules($content) + { + // the constructor should require a config, then this if can be removed + if ($this->config === null) { + $config = new Config; + } else { + $config = $this->config; + } + + $loader = new RuleLoader($config); + $rules = $loader->getRules($this->website); + + $url = new Url($this->website); + $sub_url = $url->getFullPath(); + + if (isset($rules['filter'])) { + foreach ($rules['filter'] as $pattern => $rule) { + if (preg_match($pattern, $sub_url)) { + foreach($rule as $search => $replace) { + $content = preg_replace($search, $replace, $content); + } + } + } + } + + return $content; + } + + /** * Parse opening tag * * @access public |