diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2014-11-23 17:08:19 +0100 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2014-11-23 17:08:19 +0100 |
commit | 692ecc06f5f69fd0f0eaefde8ef1ab136cfcc70f (patch) | |
tree | 83ef083fb588f23912eaa4fe13e395042ad6bbc3 /articleenhancer | |
parent | bbd765b76ac337fa7cf895b7ac083de926bb7731 (diff) |
use single quotes and clean up comments
Diffstat (limited to 'articleenhancer')
-rw-r--r-- | articleenhancer/xpatharticleenhancer.php | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php index dec0fe760..871752856 100644 --- a/articleenhancer/xpatharticleenhancer.php +++ b/articleenhancer/xpatharticleenhancer.php @@ -53,19 +53,19 @@ class XPathArticleEnhancer implements ArticleEnhancer { if(preg_match($regex, $item->getUrl())) { $body = $this->getFile($item->getUrl()); - // Determine document encoding. // First check if either <meta charset="..."> or - // <meta http-equiv="Content-Type" ...> is specified and use that + // <meta http-equiv="Content-Type" ...> is specified and use it // If this fails use mb_detect_encoding() - // Use UTF-8 if mb_detect_encoding does not return anything (or the HTML page is messed up) - $encregex = "/<meta\s+[^>]*(?:charset\s*=\s*['\"]([^>'\"]*)['\"]" . - "|http-equiv\s*=\s*['\"]content-type['\"]\s+[^>]*content\s*=\s*['\"][^>]*charset=([^>]*)['\"])[^>]*>/i"; - if(preg_match($encregex, $body, $matches)) { + $regex = '/<meta\s+[^>]*(?:charset\s*=\s*[\'"]([^>\'"]*)[\'"]' . + '|http-equiv\s*=\s*[\'"]content-type[\'"]\s+[^>]*' . + 'content\s*=\s*[\'"][^>]*charset=([^>]*)[\'"])[^>]*>' . + '/i'; + if(preg_match($regex, $body, $matches)) { $enc = strtoupper($matches[sizeof($matches) - 1]); } else { $enc = mb_detect_encoding($body); } - $enc = $enc ? $enc : "UTF-8"; + $enc = $enc ? $enc : 'UTF-8'; $body = mb_convert_encoding($body, 'HTML-ENTITIES', $enc); $dom = new DOMDocument(); |