diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2014-11-07 15:00:45 +0100 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2014-11-07 15:00:45 +0100 |
commit | e92d57740718b142b5157e371ce5ddde7f6a9795 (patch) | |
tree | 0a997191604dbb1a0699180c87ea2a0f6e04ba06 /articleenhancer | |
parent | 7524609f3fa487941e829e2f10117d2ea08f3200 (diff) |
another round of trying to fix the heise.de encoding issues
Diffstat (limited to 'articleenhancer')
-rw-r--r-- | articleenhancer/xpatharticleenhancer.php | 15 |
1 files changed, 4 insertions, 11 deletions
diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php index c48237367..b283786b8 100644 --- a/articleenhancer/xpatharticleenhancer.php +++ b/articleenhancer/xpatharticleenhancer.php @@ -51,13 +51,9 @@ class XPathArticleEnhancer implements ArticleEnhancer { foreach($this->regexXPathPair as $regex => $search) { if(preg_match($regex, $item->getUrl())) { - list($body, $httpEncoding) = $this->getFile($item->getUrl()); - if(preg_match('/(?<=charset=)[^;]*/', $body, $matches)) { - $encoding = $matches[0]; - $body = Encoding::convert($body, $encoding); - $body = mb_convert_encoding($body, 'HTML-ENTITIES', - strtoupper($encoding)); - } + $body = $this->getFile($item->getUrl()); + $body = mb_convert_encoding($body, 'HTML-ENTITIES', + mb_detect_encoding($body)); $dom = new DOMDocument(); @@ -95,10 +91,7 @@ class XPathArticleEnhancer implements ArticleEnhancer { $client = $this->clientFactory->build(); $client->execute($url); $client->setUserAgent('Mozilla/5.0 AppleWebKit'); - return [ - $client->getContent(), - $client->getEncoding() - ]; + return $client->getContent(); } |