diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-07-13 17:44:44 +0200 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-07-13 17:44:44 +0200 |
commit | 17f2cf72af4173409ee294a204de4221b0275428 (patch) | |
tree | 366d2621adfe0c4b862d0afba607076c5d3e30e0 /vendor/fguillot/picofeed/lib | |
parent | 5ee4dcd035cbf8c00ff95f02fce6ff6b3dc61aaf (diff) |
update picofeed
Diffstat (limited to 'vendor/fguillot/picofeed/lib')
53 files changed, 939 insertions, 224 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php index 72afe92c0..36c5ca6f3 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php @@ -62,7 +62,7 @@ class Stream extends Client { foreach($headers as $header) { if (stripos($header, 'Location') === 0) { - list($name, $value) = explode(': ', $header); + list(, $value) = explode(': ', $header); $this->url = Url::resolve($value, $this->url); } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php b/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php index 0590c47b1..5821dd40c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php @@ -15,7 +15,21 @@ class Encoding return $input; } - // convert input to utf-8; ignore malformed characters - return iconv($encoding, 'UTF-8//IGNORE', $input); + // suppress all notices since it isn't possible to silence only the + // notice "Wrong charset, conversion from $in_encoding to $out_encoding is not allowed" + set_error_handler(function() {}, E_NOTICE); + + // convert input to utf-8 and strip invalid characters + $value = iconv($encoding, 'UTF-8//IGNORE', $input); + + // stop silencing of notices + restore_error_handler(); + + // return input if something went wrong, maybe it's usable anyway + if ($value === false) { + return $input; + } + + return $value; } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php index 123f9896e..e3e4ad36b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php @@ -107,7 +107,7 @@ class Filter } /** - * Dirty quickfixes before XML parsing + * Fixes before XML parsing * * @static * @access public @@ -116,17 +116,37 @@ class Filter */ public static function normalizeData($data) { - $invalid_chars = array( - "\x10", - "\xc3\x20", - "", - "\xe2\x80\x9c\x08", + $entities = array( + '/(&#)(\d+);/m', // decimal encoded + '/(&#x)([a-f0-9]+);/mi', // hex encoded ); - foreach ($invalid_chars as $needle) { - $data = str_replace($needle, '', $data); - } + // strip invalid XML 1.0 characters which are encoded as entities + $data = preg_replace_callback($entities, function($matches) { + $code_point = $matches[2]; - return $data; + // convert hex entity to decimal + if (strtolower($matches[1]) === '&#x') { + $code_point = hexdec($code_point); + } + + $code_point = (int) $code_point; + + // replace invalid characters + if ($code_point < 9 + || ($code_point > 10 && $code_point < 13) + || ($code_point > 13 && $code_point < 32) + || ($code_point > 55295 && $code_point < 57344) + || ($code_point > 65533 && $code_point < 65536) + || $code_point > 1114111 + ) { + return ''; + }; + + return $matches[0]; + }, $data); + + // strip every utf-8 character than isn't in the range of valid XML 1.0 characters + return (string) preg_replace('/[^\x{0009}\x{000A}\x{000D}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/u', '', $data); } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php index 0e53d2778..6950d9afd 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php @@ -15,6 +15,13 @@ use PicoFeed\Client\Url; class Atom extends Parser { /** + * Supported namespaces + */ + protected $namespaces = array( + 'atom' => 'http://www.w3.org/2005/Atom', + ); + + /** * Get the path to the items XML tree * * @access public @@ -23,7 +30,8 @@ class Atom extends Parser */ public function getItemsTree(SimpleXMLElement $xml) { - return $xml->entry; + return XmlParser::getXPathResult($xml, 'atom:entry', $this->namespaces) + ?: XmlParser::getXPathResult($xml, 'entry'); } /** @@ -59,7 +67,10 @@ class Atom extends Parser */ public function findFeedDescription(SimpleXMLElement $xml, Feed $feed) { - $feed->description = (string) $xml->subtitle; + $description = XmlParser::getXPathResult($xml, 'atom:subtitle', $this->namespaces) + ?: XmlParser::getXPathResult($xml, 'subtitle'); + + $feed->description = (string) current($description); } /** @@ -71,7 +82,10 @@ class Atom extends Parser */ public function findFeedLogo(SimpleXMLElement $xml, Feed $feed) { - $feed->logo = (string) $xml->logo; + $logo = XmlParser::getXPathResult($xml, 'atom:logo', $this->namespaces) + ?: XmlParser::getXPathResult($xml, 'logo'); + + $feed->logo = (string) current($logo); } /** @@ -83,7 +97,10 @@ class Atom extends Parser */ public function findFeedIcon(SimpleXMLElement $xml, Feed $feed) { - $feed->icon = (string) $xml->icon; + $icon = XmlParser::getXPathResult($xml, 'atom:icon', $this->namespaces) + ?: XmlParser::getXPathResult($xml, 'icon'); + + $feed->icon = (string) current($icon); } /** @@ -95,7 +112,10 @@ class Atom extends Parser */ public function findFeedTitle(SimpleXMLElement $xml, Feed $feed) { - $feed->title = Filter::stripWhiteSpace((string) $xml->title) ?: $feed->getSiteUrl(); + $title = XmlParser::getXPathResult($xml, 'atom:title', $this->namespaces) + ?: XmlParser::getXPathResult($xml, 'title'); + + $feed->title = Filter::stripWhiteSpace((string) current($title)) ?: $feed->getSiteUrl(); } /** @@ -107,7 +127,10 @@ class Atom extends Parser */ public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed) { - $feed->language = XmlParser::getXmlLang($this->content); + $language = XmlParser::getXPathResult($xml, '*[not(self::atom:entry)]/@xml:lang', $this->namespaces) + ?: XmlParser::getXPathResult($xml, '@xml:lang'); + + $feed->language = (string) current($language); } /** @@ -119,7 +142,10 @@ class Atom extends Parser */ public function findFeedId(SimpleXMLElement $xml, Feed $feed) { - $feed->id = (string) $xml->id; + $id = XmlParser::getXPathResult($xml, 'atom:id', $this->namespaces) + ?: XmlParser::getXPathResult($xml, 'id'); + + $feed->id = (string) current($id); } /** @@ -131,7 +157,10 @@ class Atom extends Parser */ public function findFeedDate(SimpleXMLElement $xml, Feed $feed) { - $feed->date = $this->date->getDateTime((string) $xml->updated); + $updated = XmlParser::getXPathResult($xml, 'atom:updated', $this->namespaces) + ?: XmlParser::getXPathResult($xml, 'updated'); + + $feed->date = $this->date->getDateTime((string) current($updated)); } /** @@ -144,11 +173,17 @@ class Atom extends Parser */ public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed) { - $published = isset($entry->published) ? $this->date->getDateTime((string) $entry->published) : null; - $updated = isset($entry->updated) ? $this->date->getDateTime((string) $entry->updated) : null; + $published = XmlParser::getXPathResult($entry, 'atom:published', $this->namespaces) + ?: XmlParser::getXPathResult($entry, 'published'); + + $updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces) + ?: XmlParser::getXPathResult($entry, 'updated'); + + $published = ! empty($published) ? $this->date->getDateTime((string) current($published)) : null; + $updated = ! empty($updated) ? $this->date->getDateTime((string) current($updated)) : null; if ($published === null && $updated === null) { - $item->date = $feed->getDate(); // We use the feed date if there is no date for the item + $item->date = $feed->getDate(); // We use the feed date if there is no date for the item } else if ($published !== null && $updated !== null) { $item->date = max($published, $updated); // We use the most recent date between published and updated @@ -167,11 +202,10 @@ class Atom extends Parser */ public function findItemTitle(SimpleXMLElement $entry, Item $item) { - $item->title = Filter::stripWhiteSpace((string) $entry->title); + $title = XmlParser::getXPathResult($entry, 'atom:title', $this->namespaces) + ?: XmlParser::getXPathResult($entry, 'title'); - if (empty($item->title)) { - $item->title = $item->url; - } + $item->title = Filter::stripWhiteSpace((string) current($title)) ?: $item->url; } /** @@ -184,12 +218,12 @@ class Atom extends Parser */ public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item) { - if (isset($entry->author->name)) { - $item->author = (string) $entry->author->name; - } - else { - $item->author = (string) $xml->author->name; - } + $author = XmlParser::getXPathResult($entry, 'atom:author/atom:name', $this->namespaces) + ?: XmlParser::getXPathResult($entry, 'author/name') + ?: XmlParser::getXPathResult($xml, 'atom:author/atom:name', $this->namespaces) + ?: XmlParser::getXPathResult($xml, 'author/name'); + + $item->author = (string) current($author); } /** @@ -226,10 +260,11 @@ class Atom extends Parser */ public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed) { - $id = (string) $entry->id; + $id = XmlParser::getXPathResult($entry, 'atom:id', $this->namespaces) + ?: XmlParser::getXPathResult($entry, 'id'); - if ($id) { - $item->id = $this->generateId($id); + if (! empty($id)) { + $item->id = $this->generateId((string) current($id)); } else { $item->id = $this->generateId( @@ -266,13 +301,9 @@ class Atom extends Parser */ public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed) { - $language = (string) $entry->attributes('xml', true)->{'lang'}; - - if ($language === '') { - $language = $feed->language; - } + $language = XmlParser::getXPathResult($entry, './/@xml:lang'); - $item->language = $language; + $item->language = (string) current($language) ?: $feed->language; } /** @@ -309,7 +340,10 @@ class Atom extends Parser */ private function findLink(SimpleXMLElement $xml, $rel) { - foreach ($xml->link as $link) { + $links = XmlParser::getXPathResult($xml, 'atom:link', $this->namespaces) + ?: XmlParser::getXPathResult($xml, 'link'); + + foreach ($links as $link) { if ($rel === (string) $link['rel']) { return $link; } @@ -327,19 +361,27 @@ class Atom extends Parser */ private function getContent(SimpleXMLElement $entry) { - if (isset($entry->content) && ! empty($entry->content)) { + $content = current( + XmlParser::getXPathResult($entry, 'atom:content', $this->namespaces) + ?: XmlParser::getXPathResult($entry, 'content') + ); - if (count($entry->content->children())) { - return (string) $entry->content->asXML(); - } - else { - return (string) $entry->content; + if (! empty($content) && count($content->children())) { + $xml_string = ''; + + foreach($content->children() as $child) { + $xml_string .= $child->asXML(); } + + return $xml_string; } - else if (isset($entry->summary) && ! empty($entry->summary)) { - return (string) $entry->summary; + else if (trim((string) $content) !== '') { + return (string) $content; } - return ''; + $summary = XmlParser::getXPathResult($entry, 'atom:summary', $this->namespaces) + ?: XmlParser::getXPathResult($entry, 'summary'); + + return (string) current($summary); } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php index 92d0012ac..4612a8613 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php @@ -88,7 +88,7 @@ class DateParser * @access public * @param string $format Date format * @param string $value Original date value - * @return DateTime + * @return DateTime|boolean */ public function getValidDate($format, $value) { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php index d891ef41c..c9dff5a6f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php @@ -121,23 +121,24 @@ class Item * @access public * @param string $tag Tag name (examples: guid, media:content) * @param string $attribute Tag attribute - * @return string + * @return array|false Tag values or error */ public function getTag($tag, $attribute = '') { - // Get namespaced value - if (strpos($tag, ':') !== false) { - list(,$tag) = explode(':', $tag); - return XmlParser::getNamespaceValue($this->xml, $this->namespaces, $tag, $attribute); + // convert to xPath attribute query + if ($attribute !== '') { + $attribute = '/@'.$attribute; } - // Return attribute value - if (! empty($attribute)) { - return (string) $this->xml->{$tag}[$attribute]; + // construct query + $query = './/'.$tag.$attribute; + $elements = XmlParser::getXPathResult($this->xml, $query, $this->namespaces); + + if ($elements === false) { // xPath error + return false; } - // Return tag content - return (string) $this->xml->$tag; + return array_map(function ($element) { return (string) $element;}, $elements); } /** @@ -198,7 +199,7 @@ class Item * Get date * * @access public - * $return integer + * $return \DateTime */ public function getDate() { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php index 810494b70..918cdef33 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php @@ -58,7 +58,7 @@ abstract class Parser protected $fallback_url = ''; /** - * XML namespaces + * XML namespaces supported by parser * * @access protected * @var array @@ -66,6 +66,14 @@ abstract class Parser protected $namespaces = array(); /** + * XML namespaces used in document + * + * @access protected + * @var array + */ + protected $used_namespaces = array(); + + /** * Enable the content filtering * * @access private @@ -117,9 +125,6 @@ abstract class Parser // Encode everything in UTF-8 Logger::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; |