summaryrefslogtreecommitdiffstats
path: root/vendor/fguillot/picofeed/lib
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/fguillot/picofeed/lib')
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php2
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php18
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php40
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php122
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php2
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php23
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php48
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php229
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php126
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php62
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/abstrusegoose.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/amazingsuperpowers.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/anythingcomic.com.php13
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/buttersafe.com.php13
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/channelate.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/cowbirdsinlove.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/drawingboardcomic.com.php15
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/endlessorigami.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/escapistmagazine.com.php7
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/exocomics.com.php15
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/explosm.net.php1
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/extrafabulouscomics.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/giantitp.com.php12
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/gocomics.com.php12
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/happletea.com.php18
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/imogenquest.net.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/invisiblebread.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/ir.amd.com.php10
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/lastplacecomics.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php10
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/lukesurl.com.php15
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/marriedtothesea.com.php12
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/marycagle.com.php13
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/mrlovenstein.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/nedroid.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/oglaf.com.php19
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/optipess.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/peebleslab.com.php9
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/pixelbeat.org.php12
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/popstrip.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/satwcomic.com.php3
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/sentfromthemoon.com.php18
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/smbc-comics.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/stupidfox.net.php13
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/subtraction.com.php15
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/theawkwardyeti.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/thedoghousediaries.com.php18
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/treelobsters.com.php8
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/voz.vn.php10
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php17
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.nextinpact.com.php13
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.zeit.de.php41
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Rules/xkcd.com.php9
53 files changed, 939 insertions, 224 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php
index 72afe92c0..36c5ca6f3 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php
@@ -62,7 +62,7 @@ class Stream extends Client
{
foreach($headers as $header) {
if (stripos($header, 'Location') === 0) {
- list($name, $value) = explode(': ', $header);
+ list(, $value) = explode(': ', $header);
$this->url = Url::resolve($value, $this->url);
}
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php b/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php
index 0590c47b1..5821dd40c 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php
@@ -15,7 +15,21 @@ class Encoding
return $input;
}
- // convert input to utf-8; ignore malformed characters
- return iconv($encoding, 'UTF-8//IGNORE', $input);
+ // suppress all notices since it isn't possible to silence only the
+ // notice "Wrong charset, conversion from $in_encoding to $out_encoding is not allowed"
+ set_error_handler(function() {}, E_NOTICE);
+
+ // convert input to utf-8 and strip invalid characters
+ $value = iconv($encoding, 'UTF-8//IGNORE', $input);
+
+ // stop silencing of notices
+ restore_error_handler();
+
+ // return input if something went wrong, maybe it's usable anyway
+ if ($value === false) {
+ return $input;
+ }
+
+ return $value;
}
}
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
index 123f9896e..e3e4ad36b 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
@@ -107,7 +107,7 @@ class Filter
}
/**
- * Dirty quickfixes before XML parsing
+ * Fixes before XML parsing
*
* @static
* @access public
@@ -116,17 +116,37 @@ class Filter
*/
public static function normalizeData($data)
{
- $invalid_chars = array(
- "\x10",
- "\xc3\x20",
- "",
- "\xe2\x80\x9c\x08",
+ $entities = array(
+ '/(&#)(\d+);/m', // decimal encoded
+ '/(&#x)([a-f0-9]+);/mi', // hex encoded
);
- foreach ($invalid_chars as $needle) {
- $data = str_replace($needle, '', $data);
- }
+ // strip invalid XML 1.0 characters which are encoded as entities
+ $data = preg_replace_callback($entities, function($matches) {
+ $code_point = $matches[2];
- return $data;
+ // convert hex entity to decimal
+ if (strtolower($matches[1]) === '&#x') {
+ $code_point = hexdec($code_point);
+ }
+
+ $code_point = (int) $code_point;
+
+ // replace invalid characters
+ if ($code_point < 9
+ || ($code_point > 10 && $code_point < 13)
+ || ($code_point > 13 && $code_point < 32)
+ || ($code_point > 55295 && $code_point < 57344)
+ || ($code_point > 65533 && $code_point < 65536)
+ || $code_point > 1114111
+ ) {
+ return '';
+ };
+
+ return $matches[0];
+ }, $data);
+
+ // strip every utf-8 character than isn't in the range of valid XML 1.0 characters
+ return (string) preg_replace('/[^\x{0009}\x{000A}\x{000D}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/u', '', $data);
}
}
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php
index 0e53d2778..6950d9afd 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php
@@ -15,6 +15,13 @@ use PicoFeed\Client\Url;
class Atom extends Parser
{
/**
+ * Supported namespaces
+ */
+ protected $namespaces = array(
+ 'atom' => 'http://www.w3.org/2005/Atom',
+ );
+
+ /**
* Get the path to the items XML tree
*
* @access public
@@ -23,7 +30,8 @@ class Atom extends Parser
*/
public function getItemsTree(SimpleXMLElement $xml)
{
- return $xml->entry;
+ return XmlParser::getXPathResult($xml, 'atom:entry', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, 'entry');
}
/**
@@ -59,7 +67,10 @@ class Atom extends Parser
*/
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
{
- $feed->description = (string) $xml->subtitle;
+ $description = XmlParser::getXPathResult($xml, 'atom:subtitle', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, 'subtitle');
+
+ $feed->description = (string) current($description);
}
/**
@@ -71,7 +82,10 @@ class Atom extends Parser
*/
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
{
- $feed->logo = (string) $xml->logo;
+ $logo = XmlParser::getXPathResult($xml, 'atom:logo', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, 'logo');
+
+ $feed->logo = (string) current($logo);
}
/**
@@ -83,7 +97,10 @@ class Atom extends Parser
*/
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
{
- $feed->icon = (string) $xml->icon;
+ $icon = XmlParser::getXPathResult($xml, 'atom:icon', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, 'icon');
+
+ $feed->icon = (string) current($icon);
}
/**
@@ -95,7 +112,10 @@ class Atom extends Parser
*/
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
{
- $feed->title = Filter::stripWhiteSpace((string) $xml->title) ?: $feed->getSiteUrl();
+ $title = XmlParser::getXPathResult($xml, 'atom:title', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, 'title');
+
+ $feed->title = Filter::stripWhiteSpace((string) current($title)) ?: $feed->getSiteUrl();
}
/**
@@ -107,7 +127,10 @@ class Atom extends Parser
*/
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
{
- $feed->language = XmlParser::getXmlLang($this->content);
+ $language = XmlParser::getXPathResult($xml, '*[not(self::atom:entry)]/@xml:lang', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, '@xml:lang');
+
+ $feed->language = (string) current($language);
}
/**
@@ -119,7 +142,10 @@ class Atom extends Parser
*/
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
{
- $feed->id = (string) $xml->id;
+ $id = XmlParser::getXPathResult($xml, 'atom:id', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, 'id');
+
+ $feed->id = (string) current($id);
}
/**
@@ -131,7 +157,10 @@ class Atom extends Parser
*/
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
{
- $feed->date = $this->date->getDateTime((string) $xml->updated);
+ $updated = XmlParser::getXPathResult($xml, 'atom:updated', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, 'updated');
+
+ $feed->date = $this->date->getDateTime((string) current($updated));
}
/**
@@ -144,11 +173,17 @@ class Atom extends Parser
*/
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
{
- $published = isset($entry->published) ? $this->date->getDateTime((string) $entry->published) : null;
- $updated = isset($entry->updated) ? $this->date->getDateTime((string) $entry->updated) : null;
+ $published = XmlParser::getXPathResult($entry, 'atom:published', $this->namespaces)
+ ?: XmlParser::getXPathResult($entry, 'published');
+
+ $updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces)
+ ?: XmlParser::getXPathResult($entry, 'updated');
+
+ $published = ! empty($published) ? $this->date->getDateTime((string) current($published)) : null;
+ $updated = ! empty($updated) ? $this->date->getDateTime((string) current($updated)) : null;
if ($published === null && $updated === null) {
- $item->date = $feed->getDate(); // We use the feed date if there is no date for the item
+ $item->date = $feed->getDate(); // We use the feed date if there is no date for the item
}
else if ($published !== null && $updated !== null) {
$item->date = max($published, $updated); // We use the most recent date between published and updated
@@ -167,11 +202,10 @@ class Atom extends Parser
*/
public function findItemTitle(SimpleXMLElement $entry, Item $item)
{
- $item->title = Filter::stripWhiteSpace((string) $entry->title);
+ $title = XmlParser::getXPathResult($entry, 'atom:title', $this->namespaces)
+ ?: XmlParser::getXPathResult($entry, 'title');
- if (empty($item->title)) {
- $item->title = $item->url;
- }
+ $item->title = Filter::stripWhiteSpace((string) current($title)) ?: $item->url;
}
/**
@@ -184,12 +218,12 @@ class Atom extends Parser
*/
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
{
- if (isset($entry->author->name)) {
- $item->author = (string) $entry->author->name;
- }
- else {
- $item->author = (string) $xml->author->name;
- }
+ $author = XmlParser::getXPathResult($entry, 'atom:author/atom:name', $this->namespaces)
+ ?: XmlParser::getXPathResult($entry, 'author/name')
+ ?: XmlParser::getXPathResult($xml, 'atom:author/atom:name', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, 'author/name');
+
+ $item->author = (string) current($author);
}
/**
@@ -226,10 +260,11 @@ class Atom extends Parser
*/
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
{
- $id = (string) $entry->id;
+ $id = XmlParser::getXPathResult($entry, 'atom:id', $this->namespaces)
+ ?: XmlParser::getXPathResult($entry, 'id');
- if ($id) {
- $item->id = $this->generateId($id);
+ if (! empty($id)) {
+ $item->id = $this->generateId((string) current($id));
}
else {
$item->id = $this->generateId(
@@ -266,13 +301,9 @@ class Atom extends Parser
*/
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
{
- $language = (string) $entry->attributes('xml', true)->{'lang'};
-
- if ($language === '') {
- $language = $feed->language;
- }
+ $language = XmlParser::getXPathResult($entry, './/@xml:lang');
- $item->language = $language;
+ $item->language = (string) current($language) ?: $feed->language;
}
/**
@@ -309,7 +340,10 @@ class Atom extends Parser
*/
private function findLink(SimpleXMLElement $xml, $rel)
{
- foreach ($xml->link as $link) {
+ $links = XmlParser::getXPathResult($xml, 'atom:link', $this->namespaces)
+ ?: XmlParser::getXPathResult($xml, 'link');
+
+ foreach ($links as $link) {
if ($rel === (string) $link['rel']) {
return $link;
}
@@ -327,19 +361,27 @@ class Atom extends Parser
*/
private function getContent(SimpleXMLElement $entry)
{
- if (isset($entry->content) && ! empty($entry->content)) {
+ $content = current(
+ XmlParser::getXPathResult($entry, 'atom:content', $this->namespaces)
+ ?: XmlParser::getXPathResult($entry, 'content')
+ );
- if (count($entry->content->children())) {
- return (string) $entry->content->asXML();
- }
- else {
- return (string) $entry->content;
+ if (! empty($content) && count($content->children())) {
+ $xml_string = '';
+
+ foreach($content->children() as $child) {
+ $xml_string .= $child->asXML();
}
+
+ return $xml_string;
}
- else if (isset($entry->summary) && ! empty($entry->summary)) {
- return (string) $entry->summary;
+ else if (trim((string) $content) !== '') {
+ return (string) $content;
}
- return '';
+ $summary = XmlParser::getXPathResult($entry, 'atom:summary', $this->namespaces)
+ ?: XmlParser::getXPathResult($entry, 'summary');
+
+ return (string) current($summary);
}
}
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php
index 92d0012ac..4612a8613 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php
@@ -88,7 +88,7 @@ class DateParser
* @access public
* @param string $format Date format
* @param string $value Original date value
- * @return DateTime
+ * @return DateTime|boolean
*/
public function getValidDate($format, $value)
{
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php
index d891ef41c..c9dff5a6f 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php
@@ -121,23 +121,24 @@ class Item
* @access public
* @param string $tag Tag name (examples: guid, media:content)
* @param string $attribute Tag attribute
- * @return string
+ * @return array|false Tag values or error
*/
public function getTag($tag, $attribute = '')
{
- // Get namespaced value
- if (strpos($tag, ':') !== false) {
- list(,$tag) = explode(':', $tag);
- return XmlParser::getNamespaceValue($this->xml, $this->namespaces, $tag, $attribute);
+ // convert to xPath attribute query
+ if ($attribute !== '') {
+ $attribute = '/@'.$attribute;
}
- // Return attribute value
- if (! empty($attribute)) {
- return (string) $this->xml->{$tag}[$attribute];
+ // construct query
+ $query = './/'.$tag.$attribute;
+ $elements = XmlParser::getXPathResult($this->xml, $query, $this->namespaces);
+
+ if ($elements === false) { // xPath error
+ return false;
}
- // Return tag content
- return (string) $this->xml->$tag;
+ return array_map(function ($element) { return (string) $element;}, $elements);
}
/**
@@ -198,7 +199,7 @@ class Item
* Get date
*
* @access public
- * $return integer
+ * $return \DateTime
*/
public function getDate()
{
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
index 810494b70..918cdef33 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
@@ -58,7 +58,7 @@ abstract class Parser
protected $fallback_url = '';
/**
- * XML namespaces
+ * XML namespaces supported by parser
*
* @access protected
* @var array
@@ -66,6 +66,14 @@ abstract class Parser
protected $namespaces = array();
/**
+ * XML namespaces used in document
+ *
+ * @access protected
+ * @var array
+ */
+ protected $used_namespaces = array();
+
+ /**
* Enable the content filtering
*
* @access private
@@ -117,9 +125,6 @@ abstract class Parser
// Encode everything in UTF-8
Logger::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
$this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
-
- // Workarounds
- $this->content = Filter::normalizeData($this->content);
}
/**
@@ -135,12 +140,19 @@ abstract class Parser
$xml = XmlParser::getSimpleXml($this->content);
if ($xml === false) {
- Logger::setMessage(get_called_class().': XML parsing error');
- Logger::setMessage(XmlParser::getErrors());
- throw new MalformedXmlException('XML parsing error');
+ Logger::setMessage(get_called_class().': Applying XML workarounds');
+ $this->content = Filter::normalizeData($this->content);
+ $xml = XmlParser::getSimpleXml($this->content);
+
+ if ($xml === false) {
+ Logger::setMessage(get_called_class().': XML parsing error');
+ Logger::setMessage(XmlParser::getErrors());
+ throw new MalformedXmlException('XML parsing error');
+ }
}
- $this->namespaces = $xml->getNamespaces(true);