From e92d57740718b142b5157e371ce5ddde7f6a9795 Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Fri, 7 Nov 2014 15:00:45 +0100 Subject: another round of trying to fix the heise.de encoding issues --- articleenhancer/xpatharticleenhancer.php | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'articleenhancer') diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php index c48237367..b283786b8 100644 --- a/articleenhancer/xpatharticleenhancer.php +++ b/articleenhancer/xpatharticleenhancer.php @@ -51,13 +51,9 @@ class XPathArticleEnhancer implements ArticleEnhancer { foreach($this->regexXPathPair as $regex => $search) { if(preg_match($regex, $item->getUrl())) { - list($body, $httpEncoding) = $this->getFile($item->getUrl()); - if(preg_match('/(?<=charset=)[^;]*/', $body, $matches)) { - $encoding = $matches[0]; - $body = Encoding::convert($body, $encoding); - $body = mb_convert_encoding($body, 'HTML-ENTITIES', - strtoupper($encoding)); - } + $body = $this->getFile($item->getUrl()); + $body = mb_convert_encoding($body, 'HTML-ENTITIES', + mb_detect_encoding($body)); $dom = new DOMDocument(); @@ -95,10 +91,7 @@ class XPathArticleEnhancer implements ArticleEnhancer { $client = $this->clientFactory->build(); $client->execute($url); $client->setUserAgent('Mozilla/5.0 AppleWebKit'); - return [ - $client->getContent(), - $client->getEncoding() - ]; + return $client->getContent(); } -- cgit v1.2.3