summaryrefslogtreecommitdiffstats
path: root/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php')
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php47
1 files changed, 47 insertions, 0 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
index 4e046603f..36ab3f18c 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
@@ -2,7 +2,9 @@
namespace PicoFeed\Filter;
+use PicoFeed\Config\Config;
use PicoFeed\Client\Url;
+use PicoFeed\Scraper\RuleLoader;
use PicoFeed\Parser\XmlParser;
/**
@@ -70,6 +72,14 @@ class Html
public $attribute = '';
/**
+ * The website to filter
+ *
+ * @access private
+ * @var string
+ */
+ private $website;
+
+ /**
* Initialize the filter, all inputs data must be encoded in UTF-8 before
*
* @access public
@@ -81,6 +91,7 @@ class Html
$this->input = XmlParser::HtmlToXml($html);
$this->output = '';
$this->tag = new Tag;
+ $this->website = $website;
$this->attribute = new Attribute(new Url($website));
}
@@ -155,10 +166,46 @@ class Html
public function postFilter()
{
$this->output = $this->tag->removeEmptyTags($this->output);
+ $this->output = $this->filterRules($this->output);
+ $this->output = $this->tag->removeMultipleBreakTags($this->output);
$this->output = trim($this->output);
}
/**
+ * Called after XML parsing
+ * @param string $content the content that should be filtered
+ *
+ * @access public
+ */
+ public function filterRules($content)
+ {
+ // the constructor should require a config, then this if can be removed
+ if ($this->config === null) {
+ $config = new Config;
+ } else {
+ $config = $this->config;
+ }
+
+ $loader = new RuleLoader($config);
+ $rules = $loader->getRules($this->website);
+
+ $url = new Url($this->website);
+ $sub_url = $url->getFullPath();
+
+ if (isset($rules['filter'])) {
+ foreach ($rules['filter'] as $pattern => $rule) {
+ if (preg_match($pattern, $sub_url)) {
+ foreach($rule as $search => $replace) {
+ $content = preg_replace($search, $replace, $content);
+ }
+ }
+ }
+ }
+
+ return $content;
+ }
+
+ /**
* Parse opening tag
*
* @access public