summaryrefslogtreecommitdiffstats
path: root/articleenhancer
diff options
context:
space:
mode:
authorBernhard Posselt <dev@bernhard-posselt.com>2014-11-07 15:00:45 +0100
committerBernhard Posselt <dev@bernhard-posselt.com>2014-11-07 15:00:45 +0100
commite92d57740718b142b5157e371ce5ddde7f6a9795 (patch)
tree0a997191604dbb1a0699180c87ea2a0f6e04ba06 /articleenhancer
parent7524609f3fa487941e829e2f10117d2ea08f3200 (diff)
another round of trying to fix the heise.de encoding issues
Diffstat (limited to 'articleenhancer')
-rw-r--r--articleenhancer/xpatharticleenhancer.php15
1 files changed, 4 insertions, 11 deletions
diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php
index c48237367..b283786b8 100644
--- a/articleenhancer/xpatharticleenhancer.php
+++ b/articleenhancer/xpatharticleenhancer.php
@@ -51,13 +51,9 @@ class XPathArticleEnhancer implements ArticleEnhancer {
foreach($this->regexXPathPair as $regex => $search) {
if(preg_match($regex, $item->getUrl())) {
- list($body, $httpEncoding) = $this->getFile($item->getUrl());
- if(preg_match('/(?<=charset=)[^;]*/', $body, $matches)) {
- $encoding = $matches[0];
- $body = Encoding::convert($body, $encoding);
- $body = mb_convert_encoding($body, 'HTML-ENTITIES',
- strtoupper($encoding));
- }
+ $body = $this->getFile($item->getUrl());
+ $body = mb_convert_encoding($body, 'HTML-ENTITIES',
+ mb_detect_encoding($body));
$dom = new DOMDocument();
@@ -95,10 +91,7 @@ class XPathArticleEnhancer implements ArticleEnhancer {
$client = $this->clientFactory->build();
$client->execute($url);
$client->setUserAgent('Mozilla/5.0 AppleWebKit');
- return [
- $client->getContent(),
- $client->getEncoding()
- ];
+ return $client->getContent();
}