diff options
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php')
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php index 52f2f0bf1..fe4890400 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php @@ -123,8 +123,6 @@ class Grabber * @var array */ private $stripTags = array( - 'script', - 'style', 'nav', 'header', 'footer', @@ -276,11 +274,11 @@ class Grabber } if ($this->html) { + $html_encoding = XmlParser::getEncodingFromMetaTag($this->html); - Logger::setMessage(get_called_class().': Fix encoding'); - Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"'); - - $this->html = Encoding::convert($this->html, $this->encoding); + // Encode everything in UTF-8 + Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'" ; HTML Encoding "'.$html_encoding.'"'); + $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding); $this->html = Filter::stripHeadTags($this->html); Logger::setMessage(get_called_class().': Content length: '.strlen($this->html).' bytes'); |