diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-02-27 09:19:51 +0100 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-02-27 09:19:59 +0100 |
commit | ea4ecf501fc83134db2201477b4cdb1806d5312c (patch) | |
tree | 3f037329e0b7dead6c97fd58182013bc3d097667 /vendor/fguillot/picofeed/lib/PicoFeed/Client | |
parent | 790f0a0a709510092546c5ca4347bbec5f0a19c2 (diff) |
update picofeed
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Client')
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php index 52f2f0bf1..fe4890400 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php @@ -123,8 +123,6 @@ class Grabber * @var array */ private $stripTags = array( - 'script', - 'style', 'nav', 'header', 'footer', @@ -276,11 +274,11 @@ class Grabber } if ($this->html) { + $html_encoding = XmlParser::getEncodingFromMetaTag($this->html); - Logger::setMessage(get_called_class().': Fix encoding'); - Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"'); - - $this->html = Encoding::convert($this->html, $this->encoding); + // Encode everything in UTF-8 + Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'" ; HTML Encoding "'.$html_encoding.'"'); + $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding); $this->html = Filter::stripHeadTags($this->html); Logger::setMessage(get_called_class().': Content length: '.strlen($this->html).' bytes'); |