diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2014-12-22 09:16:08 +0100 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2014-12-22 09:16:08 +0100 |
commit | 5697f7c92cbc7b2c23d2a8c6ba3d904734dd0739 (patch) | |
tree | d9c7d8cf5b7a42d505ceb06d63a9e00fa212daf8 /vendor/fguillot/picofeed/lib | |
parent | d2d16c4c26f8f9a7ee97350e5cba5c3554c51013 (diff) |
udpate picofeed
Diffstat (limited to 'vendor/fguillot/picofeed/lib')
21 files changed, 207 insertions, 95 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php index c8c812c1a..602416e42 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php @@ -199,16 +199,9 @@ abstract class Client $this->is_modified = false; } else if ($response['status'] == 200) { - - $etag = $this->getHeader($response, 'ETag'); - $last_modified = $this->getHeader($response, 'Last-Modified'); - - if ($this->isPropertyEquals('etag', $etag) || $this->isPropertyEquals('last_modified', $last_modified)) { - $this->is_modified = false; - } - - $this->etag = $etag; - $this->last_modified = $last_modified; + $this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified); + $this->etag = $this->getHeader($response, 'ETag'); + $this->last_modified = $this->getHeader($response, 'Last-Modified'); } if ($this->is_modified === false) { @@ -245,16 +238,39 @@ abstract class Client } /** - * Check if a class property equals to a value + * Check if a request has been modified according to the parameters * * @access public - * @param string $property Class property - * @param string $value Value + * @param array $response + * @param string $etag + * @param string $lastModified * @return boolean */ - private function isPropertyEquals($property, $value) + private function hasBeenModified($response, $etag, $lastModified) { - return $this->$property && $this->$property === $value; + $headers = array( + 'Etag' => $etag, + 'Last-Modified' => $lastModified + ); + + // Compare the values for each header that is present + $presentCacheHeaderCount = 0; + foreach ($headers as $key => $value) { + if (isset($response['headers'][$key])) { + if ($response['headers'][$key] !== $value) { + return true; + } + $presentCacheHeaderCount++; + } + } + + // If at least one header is present and the values match, the response + // was not modified + if ($presentCacheHeaderCount > 0) { + return false; + } + + return true; } /** @@ -324,7 +340,7 @@ abstract class Client Logger::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value); } - return array($status, $headers); + return array($status, new HttpHeaders($headers)); } /** @@ -552,7 +568,7 @@ abstract class Client * * @access public * @param \PicoFeed\Config\Config $config Config instance - * @return \PicoFeed\Config\Config + * @return \PicoFeed\Client\Client */ public function setConfig($config) { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php index 2b0d7e1c0..54b3c6ef9 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php @@ -99,7 +99,7 @@ class Curl extends Client * Prepare HTTP headers * * @access private - * @return array + * @return string[] */ private function prepareHeaders() { @@ -123,7 +123,7 @@ class Curl extends Client * Prepare curl proxy context * * @access private - * @return resource + * @return resource $ch */ private function prepareProxyContext($ch) { @@ -199,6 +199,9 @@ class Curl extends Client $this->handleError($curl_errno); } + // Update the url if there where redirects + $this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + curl_close($ch); } @@ -215,7 +218,7 @@ class Curl extends Client list($status, $headers) = $this->parseHeaders(explode("\r\n", $this->headers[$this->headers_counter - 1])); - // When resticted with open_basedir + // When restricted with open_basedir if ($this->needToHandleRedirection($follow_location, $status)) { return $this->handleRedirection($headers['Location']); } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php index 57661cb7b..1bca05664 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php @@ -3,7 +3,6 @@ namespace PicoFeed\Client; use DOMXPath; - use PicoFeed\Encoding\Encoding; use PicoFeed\Logging\Logger; use PicoFeed\Filter\Filter; @@ -148,7 +147,7 @@ class Grabber * * @access public * @param \PicoFeed\Config\Config $config Config instance - * @return \PicoFeed\Grabber + * @return Grabber */ public function setConfig($config) { @@ -179,6 +178,19 @@ class Grabber } /** + * Get filtered relevant content + * + * @access public + * @return string + */ + public function getFilteredContent() + { + $filter = Filter::html($this->content, $this->url); + $filter->setConfig($this->config); + return $filter->execute(); + } + + /** * Parse the HTML content * * @access public @@ -191,8 +203,8 @@ class Grabber Logger::setMessage(get_called_class().' Fix encoding'); Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"'); - $this->html = Filter::stripHeadTags($this->html); $this->html = Encoding::convert($this->html, $this->encoding); + $this->html = Filter::stripHeadTags($this->html); Logger::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes'); $rules = $this->getRules(); @@ -228,6 +240,7 @@ class Grabber $client->setConfig($this->config); $client->execute($this->url); + $this->url = $client->getUrl(); $this->html = $client->getContent(); $this->encoding = $client->getEncoding(); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php new file mode 100644 index 000000000..4453a7871 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php @@ -0,0 +1,43 @@ +<?php + +namespace PicoFeed\Client; + +use ArrayAccess; + +/** + * Class to handle http headers case insensitivity + * + * @author Bernhard Posselt + * @package Client + */ +class HttpHeaders implements ArrayAccess +{ + private $headers = array(); + + public function __construct(array $headers) + { + foreach ($headers as $key => $value) { + $this->headers[strtolower($key)] = $value; + } + } + + public function offsetGet($offset) + { + return $this->headers[strtolower($offset)]; + } + + public function offsetSet($offset, $value) + { + $this->headers[strtolower($offset)] = $value; + } + + public function offsetExists($offset) + { + return isset($this->headers[strtolower($offset)]); + } + + public function offsetUnset($offset) + { + unset($this->headers[strtolower($offset)]); + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php index a0058f9b0..32d045cb1 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php @@ -16,7 +16,7 @@ class Stream extends Client * Prepare HTTP headers * * @access private - * @return array + * @return string[] */ private function prepareHeaders() { @@ -128,11 +128,11 @@ class Stream extends Client * Decode body response according to the HTTP headers * * @access public - * @param string $body Raw body - * @param array $headers HTTP headers + * @param string $body Raw body + * @param HttpHeaders $headers HTTP headers * @return string */ - public function decodeBody($body, array $headers) + public function decodeBody($body, HttpHeaders $headers) { if (isset($headers['Transfer-Encoding']) && $headers['Transfer-Encoding'] === 'chunked') { $body = $this->decodeChunked($body); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php b/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php index 9a5381f0f..2ee3718eb 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php @@ -55,8 +55,8 @@ namespace PicoFeed\Config; * @method array getFilterSchemeWhitelist(array $default_value) * @method array getFilterWhitelistedTags(array $default_value) * @method array getFilterBlacklistedTags(array $default_value) - * @method string getFilterImageProxyUrl($default_value) - * @method string getFilterImageProxyCallback($default_value) + * @method string getFilterImageProxyUrl() + * @method \Closure getFilterImageProxyCallback() */ class Config { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php b/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php index d6296c0b6..7739def5f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php @@ -152,20 +152,16 @@ class Encoding return $cc1.$cc2; } - public static function convert_CP_1251($input) - { - return iconv('CP1251', 'UTF-8//TRANSLIT', $input); - } - public static function convert($input, $encoding) { - if ($encoding === 'windows-1251') { - return self::convert_CP_1251($input); + switch ($encoding) { + case 'utf-8': + return $input; + case 'windows-1251': + case 'windows-1255': + return iconv($encoding, 'UTF-8//TRANSLIT', $input); + default: + return self::toUTF8($input); } - else if ($encoding === '' || $encoding !== 'utf-8') { - return self::toUTF8($input); - } - - return $input; } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php index 66b3470f8..5948dec35 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php @@ -460,7 +460,7 @@ class Attribute * Check if an attribute name is an external resource * * @access public - * @param string $data Attribute name + * @param string $attribute Attribute name * @return boolean */ public function isResource($attribute) @@ -491,7 +491,7 @@ class Attribute * Detect if an url is blacklisted * * @access public - * @param string $resouce Attribute value (URL) + * @param string $resource Attribute value (URL) * @return boolean */ public function isBlacklistedMedia($resource) @@ -529,7 +529,7 @@ class Attribute * * @access public * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setWhitelistedAttributes(array $values) { @@ -542,7 +542,7 @@ class Attribute * * @access public * @param array $values List of scheme: ['http://', 'ftp://'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setSchemeWhitelist(array $values) { @@ -555,7 +555,7 @@ class Attribute * * @access public * @param array $values List of values: ['src', 'href'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setMediaAttributes(array $values) { @@ -568,7 +568,7 @@ class Attribute * * @access public * @param array $values List of tags: ['http://google.com/', '...'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setMediaBlacklist(array $values) { @@ -581,7 +581,7 @@ class Attribute * * @access public * @param array $values List of tags: ['img' => 'src'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setRequiredAttributes(array $values) { @@ -594,7 +594,7 @@ class Attribute * * @access public * @param array $values List of tags: ['a' => 'target="_blank"'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setAttributeOverrides(array $values) { @@ -607,7 +607,7 @@ class Attribute * * @access public * @param array $values List of tags: ['width', 'height'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setIntegerAttributes(array $values) { @@ -620,7 +620,7 @@ class Attribute * * @access public * @param array $values List of tags: ['http://www.youtube.com'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setIframeWhitelist(array $values) { @@ -635,7 +635,7 @@ class Attribute * * @access public * @param string $url Proxy URL - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setImageProxyUrl($url) { @@ -648,7 +648,7 @@ class Attribute * * @access public * @param \Closure $callback - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setImageProxyCallback($callback) { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php index 82289444b..0eb3f88ea 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php @@ -2,8 +2,6 @@ namespace PicoFeed\Filter; -use PicoFeed\Parser\XmlParser; - /** * Filter class * @@ -19,7 +17,7 @@ class Filter * @access public * @param string $html HTML content * @param string $website Site URL (used to build absolute URL) - * @return PicoFeed\Filter\Html + * @return Html */ public static function html($html, $website) { @@ -88,16 +86,7 @@ class Filter */ public static function stripHeadTags($data) { - $start = strpos($data, '<head>'); - $end = strpos($data, '</head>'); - - if ($start !== false && $end !== false) { - $before = substr($data, 0, $start); - $after = substr($data, $end + 7); - $data = $before.$after; - } - - return $data; + return preg_replace('@<head[^>]*?>.*?</head>@siu','', $data ); } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php index f7816f1d2..7abd740b1 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php @@ -150,7 +150,7 @@ class Html * * @access public * @param resource $parser XML parser - * @param string $name Tag name + * @param string $tag Tag name * @param array $attributes Tag attributes */ public function startTag($parser, $tag, array $attributes) @@ -178,7 +178,7 @@ class Html * * @access public * @param resource $parser XML parser - * @param string $name Tag name + * @param string $tag Tag name */ public function endTag($parser, $tag) { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php index dbeffe7a4..40f7c6c98 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php @@ -163,7 +163,7 @@ class Tag * * @access public * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] - * @return \PicoFeed\Filter + * @return Tag */ public function setWhitelistedTags(array $values) { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php index 5bb930b22..154ed3cfb 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php @@ -192,7 +192,7 @@ class Atom extends Parser */ public function findItemUrl(SimpleXMLElement $entry, Item $item) { - $item->url = $this->getUrl($entry, 'alternate'); + $item->url = $this->getUrl($entry, 'alternate', true); } /** @@ -245,7 +245,13 @@ class Atom extends Parser */ public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed) { - $item->language = $feed->language; + $language = (string) $entry->attributes('xml', true)->{'lang'}; + + if ($language === '') { + $language = $feed->language; + } + + $item->language = $language; } /** @@ -283,7 +289,7 @@ class Atom extends Parser private function findLink(SimpleXMLElement $xml, $rel) { foreach ($xml->link as $link) { - if (empty($rel) || $rel === (string) $link['rel']) { + if ($rel === (string) $link['rel']) { return $link; } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php index b8edbd6f8..99fc27e8e 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php @@ -96,6 +96,7 @@ class Feed $output .= 'Feed::'.$property.' = '.$this->$property.PHP_EOL; } + $output .= 'Feed::isRTL() = '.($this->isRTL() ? 'true' : 'false').PHP_EOL; $output .= 'Feed::items = '.count($this->items).' items'.PHP_EOL; foreach ($this->items as $item) { @@ -204,4 +205,15 @@ class Feed { return $this->items; } + + /** + * Return true if the feed is "Right to Left" + * + * @access public + * @return bool + */ + public function isRTL() + { + return Parser::isLanguageRTL($this->language); + } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php index 6b2864ba7..3642cccea 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php @@ -226,14 +226,6 @@ class Item */ public function isRTL() { - $language = strtolower($this->language); - - foreach ($this->rtl as $prefix) { - if (strpos($language, $prefix) === 0) { - return true; - } - } - - return false; + return Parser::isLanguageRTL($this->language); } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php index de73504e4..44f0c8e38 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php @@ -5,7 +5,6 @@ namespace PicoFeed\Parser; use SimpleXMLElement; use DateTime; use DateTimeZone; - use PicoFeed\Encoding\Encoding; use PicoFeed\Filter\Filter; use PicoFeed\Logging\Logger; @@ -96,9 +95,9 @@ abstract class Parser * Constructor * * @access public - * @param string $content Feed content - * @param string $http_encoding HTTP encoding (headers) - * @param string $base_url Fallback url when the feed provide relative or broken url + * @param string $content Feed content + * @param string $http_encoding HTTP encoding (headers) + * @param string $fallback_url Fallback url when the feed provide relative or broken url */ public function __construct($content, $http_encoding = '', $fallback_url = '') { @@ -268,7 +267,7 @@ abstract class Parser * * @access public * @param string $args Pieces of data to hash - * @return string Id + * @return string */ public function generateId() { @@ -357,6 +356,38 @@ abstract class Parser } /** + * Return true if the given language is "Right to Left" + * + * @static + * @access public + * @param string $language Language: fr-FR, en-US + * @return bool + */ + public static function isLanguageRTL($language) + { + $language = strtolower($language); + + $rtl_languages = array( + 'ar', // Arabic (ar-**) + 'fa', // Farsi (fa-**) + 'ur', // Urdu (ur-**) + 'ps', // Pashtu (ps-**) + 'syr', // Syriac (syr-**) + 'dv', // Divehi (dv-**) + 'he', // Hebrew (he-**) + 'yi', // Yiddish (yi-**) + ); + + foreach ($rtl_languages as $prefix) { + if (strpos($language, $prefix) === 0) { + return true; + } + } + + return false; + } + + /** * Set Hash algorithm used for id generation * * @access public diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php index 2b007e199..0afc89d5d 100644 --- a/vendor/fgu |