diff options
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php')
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php | 49 |
1 files changed, 48 insertions, 1 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php index 73a4a43a6..2c68c50a5 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php @@ -90,10 +90,14 @@ class XmlParser * @static * @access public * @param string $input XML content - * @return \DOMNode + * @return \DOMNDocument */ public static function getDomDocument($input) { + if (empty($input)) { + return false; + } + $dom = self::scanInput($input, function ($in) { $dom = new DomDocument; $dom->loadXml($in, LIBXML_NONET); @@ -208,6 +212,49 @@ class XmlParser } /** + * Extract charset from meta tag + * + * @static + * @access public + * @param string $data meta tag content + * @return string + */ + public static function findCharset($data) + { + $result = explode('charset=', $data); + return isset($result[1]) ? $result[1] : $data; + } + + /** + * Get the encoding from a xml tag + * + * @static + * @access public + * @param string $data Input data + * @return string + */ + public static function getEncodingFromMetaTag($data) + { + $encoding = ''; + + $dom = static::getHtmlDocument($data); + $xpath = new DOMXPath($dom); + + $tags = array( + '/html/head/meta[translate(@http-equiv, "CENOPTY", "cenopty")="content-type"]/@content', //HTML4, convert upper to lower-case + '/html/head/meta/@charset', //HTML5 + ); + + $nodes = $xpath->query(implode(' | ', $tags)); + + foreach ($nodes as $node) { + $encoding = static::findCharset($node->nodeValue); + } + + return $encoding; + } + + /** * Get xml:lang value * * @static |