diff options
36 files changed, 902 insertions, 114 deletions
diff --git a/composer.lock b/composer.lock index a73fdcce7..8771a76cf 100644 --- a/composer.lock +++ b/composer.lock @@ -57,12 +57,12 @@ "source": { "type": "git", "url": "https://github.com/fguillot/picoFeed.git", - "reference": "6485f32d62698be73c3f0456bb87d960fcae1586" + "reference": "11589851f91cc3f04c84ba873484486d1457e638" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/6485f32d62698be73c3f0456bb87d960fcae1586", - "reference": "6485f32d62698be73c3f0456bb87d960fcae1586", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/11589851f91cc3f04c84ba873484486d1457e638", + "reference": "11589851f91cc3f04c84ba873484486d1457e638", "shasum": "" }, "require": { @@ -86,7 +86,7 @@ ], "description": "Modern library to write or read feeds (RSS/Atom)", "homepage": "http://fguillot.github.io/picoFeed", - "time": "2014-12-16 23:53:59" + "time": "2014-12-22 03:23:04" }, { "name": "pear/net_url2", @@ -160,6 +160,7 @@ "fguillot/picofeed": 20 }, "prefer-stable": false, + "prefer-lowest": false, "platform": [], "platform-dev": [] } diff --git a/vendor/autoload.php b/vendor/autoload.php index f8495def9..3525cd9c6 100644 --- a/vendor/autoload.php +++ b/vendor/autoload.php @@ -4,4 +4,4 @@ require_once __DIR__ . '/composer' . '/autoload_real.php'; -return ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef::getLoader(); +return ComposerAutoloaderInit473bffa75e8c08e86770574b2fe57877::getLoader(); diff --git a/vendor/composer/autoload_real.php b/vendor/composer/autoload_real.php index 113cd5ee5..b53f76186 100644 --- a/vendor/composer/autoload_real.php +++ b/vendor/composer/autoload_real.php @@ -2,7 +2,7 @@ // autoload_real.php @generated by Composer -class ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef +class ComposerAutoloaderInit473bffa75e8c08e86770574b2fe57877 { private static $loader; @@ -19,9 +19,9 @@ class ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef return self::$loader; } - spl_autoload_register(array('ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef', 'loadClassLoader'), true, true); + spl_autoload_register(array('ComposerAutoloaderInit473bffa75e8c08e86770574b2fe57877', 'loadClassLoader'), true, true); self::$loader = $loader = new \Composer\Autoload\ClassLoader(); - spl_autoload_unregister(array('ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef', 'loadClassLoader')); + spl_autoload_unregister(array('ComposerAutoloaderInit473bffa75e8c08e86770574b2fe57877', 'loadClassLoader')); $includePaths = require __DIR__ . '/include_paths.php'; array_push($includePaths, get_include_path()); @@ -46,14 +46,14 @@ class ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef $includeFiles = require __DIR__ . '/autoload_files.php'; foreach ($includeFiles as $file) { - composerRequireb70f37963a41b6db289ef240676024ef($file); + composerRequire473bffa75e8c08e86770574b2fe57877($file); } return $loader; } } -function composerRequireb70f37963a41b6db289ef240676024ef($file) +function composerRequire473bffa75e8c08e86770574b2fe57877($file) { require $file; } diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index 2a5124d70..46ce73f9a 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -119,18 +119,18 @@ "source": { "type": "git", "url": "https://github.com/fguillot/picoFeed.git", - "reference": "6485f32d62698be73c3f0456bb87d960fcae1586" + "reference": "11589851f91cc3f04c84ba873484486d1457e638" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/6485f32d62698be73c3f0456bb87d960fcae1586", - "reference": "6485f32d62698be73c3f0456bb87d960fcae1586", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/11589851f91cc3f04c84ba873484486d1457e638", + "reference": "11589851f91cc3f04c84ba873484486d1457e638", "shasum": "" }, "require": { "php": ">=5.3.0" }, - "time": "2014-12-16 23:53:59", + "time": "2014-12-22 03:23:04", "type": "library", "installation-source": "dist", "autoload": { diff --git a/vendor/fguillot/picofeed/README.markdown b/vendor/fguillot/picofeed/README.markdown index a1e1e39c5..ea18adbb6 100644 --- a/vendor/fguillot/picofeed/README.markdown +++ b/vendor/fguillot/picofeed/README.markdown @@ -41,6 +41,7 @@ Authors - Major Contributors: - [Bernhard Posselt](https://github.com/Raydiation) - [David Pennington](https://github.com/Xeoncross) + - [Mathias Kresin](https://github.com/mkresin) Real world usage ---------------- diff --git a/vendor/fguillot/picofeed/docs/feed-parsing.markdown b/vendor/fguillot/picofeed/docs/feed-parsing.markdown index 82d3703e9..d00e08364 100644 --- a/vendor/fguillot/picofeed/docs/feed-parsing.markdown +++ b/vendor/fguillot/picofeed/docs/feed-parsing.markdown @@ -50,6 +50,7 @@ Feed::language = en-US Feed::description = Feed::logo = Feed::items = 15 items +Feed::isRTL() = false ---- Item::id = 38d8f48284fb03940cbb3aff9101089b81e44efb1281641bdd7c3e7e4bf3b0cd Item::title = openSUSE 13.2 : nouvelle version du caméléon disponible ! diff --git a/vendor/fguillot/picofeed/docs/grabber.markdown b/vendor/fguillot/picofeed/docs/grabber.markdown index 2098b25d0..b99b756ed 100644 --- a/vendor/fguillot/picofeed/docs/grabber.markdown +++ b/vendor/fguillot/picofeed/docs/grabber.markdown @@ -12,12 +12,36 @@ How the content grabber works? **The best results are obtained with XPath rules file.** -How to use the content scraper? -------------------------------- +Standalone usage +---------------- + +```php +<?php + +use PicoFeed\Client\Grabber; + +$grabber = new Grabber($item_url); +$grabber->download(); +$grabber->parse(); + +// Get raw HTML content +echo $grabber->getRawContent(); + +// Get relevant content +echo $grabber->getContent(); + +// Get filtered relevant content +echo $grabber->getFilteredContent(); +``` + +Fetch full item contents during feed parsing +-------------------------------------------- Before parsing all items, just call the method `$parser->enableContentGrabber()`: ```php +<?php + use PicoFeed\Reader\Reader; use PicoFeed\PicoFeedException; diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php index c8c812c1a..602416e42 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php @@ -199,16 +199,9 @@ abstract class Client $this->is_modified = false; } else if ($response['status'] == 200) { - - $etag = $this->getHeader($response, 'ETag'); - $last_modified = $this->getHeader($response, 'Last-Modified'); - - if ($this->isPropertyEquals('etag', $etag) || $this->isPropertyEquals('last_modified', $last_modified)) { - $this->is_modified = false; - } - - $this->etag = $etag; - $this->last_modified = $last_modified; + $this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified); + $this->etag = $this->getHeader($response, 'ETag'); + $this->last_modified = $this->getHeader($response, 'Last-Modified'); } if ($this->is_modified === false) { @@ -245,16 +238,39 @@ abstract class Client } /** - * Check if a class property equals to a value + * Check if a request has been modified according to the parameters * * @access public - * @param string $property Class property - * @param string $value Value + * @param array $response + * @param string $etag + * @param string $lastModified * @return boolean */ - private function isPropertyEquals($property, $value) + private function hasBeenModified($response, $etag, $lastModified) { - return $this->$property && $this->$property === $value; + $headers = array( + 'Etag' => $etag, + 'Last-Modified' => $lastModified + ); + + // Compare the values for each header that is present + $presentCacheHeaderCount = 0; + foreach ($headers as $key => $value) { + if (isset($response['headers'][$key])) { + if ($response['headers'][$key] !== $value) { + return true; + } + $presentCacheHeaderCount++; + } + } + + // If at least one header is present and the values match, the response + // was not modified + if ($presentCacheHeaderCount > 0) { + return false; + } + + return true; } /** @@ -324,7 +340,7 @@ abstract class Client Logger::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value); } - return array($status, $headers); + return array($status, new HttpHeaders($headers)); } /** @@ -552,7 +568,7 @@ abstract class Client * * @access public * @param \PicoFeed\Config\Config $config Config instance - * @return \PicoFeed\Config\Config + * @return \PicoFeed\Client\Client */ public function setConfig($config) { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php index 2b0d7e1c0..54b3c6ef9 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php @@ -99,7 +99,7 @@ class Curl extends Client * Prepare HTTP headers * * @access private - * @return array + * @return string[] */ private function prepareHeaders() { @@ -123,7 +123,7 @@ class Curl extends Client * Prepare curl proxy context * * @access private - * @return resource + * @return resource $ch */ private function prepareProxyContext($ch) { @@ -199,6 +199,9 @@ class Curl extends Client $this->handleError($curl_errno); } + // Update the url if there where redirects + $this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + curl_close($ch); } @@ -215,7 +218,7 @@ class Curl extends Client list($status, $headers) = $this->parseHeaders(explode("\r\n", $this->headers[$this->headers_counter - 1])); - // When resticted with open_basedir + // When restricted with open_basedir if ($this->needToHandleRedirection($follow_location, $status)) { return $this->handleRedirection($headers['Location']); } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php index 57661cb7b..1bca05664 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php @@ -3,7 +3,6 @@ namespace PicoFeed\Client; use DOMXPath; - use PicoFeed\Encoding\Encoding; use PicoFeed\Logging\Logger; use PicoFeed\Filter\Filter; @@ -148,7 +147,7 @@ class Grabber * * @access public * @param \PicoFeed\Config\Config $config Config instance - * @return \PicoFeed\Grabber + * @return Grabber */ public function setConfig($config) { @@ -179,6 +178,19 @@ class Grabber } /** + * Get filtered relevant content + * + * @access public + * @return string + */ + public function getFilteredContent() + { + $filter = Filter::html($this->content, $this->url); + $filter->setConfig($this->config); + return $filter->execute(); + } + + /** * Parse the HTML content * * @access public @@ -191,8 +203,8 @@ class Grabber Logger::setMessage(get_called_class().' Fix encoding'); Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"'); - $this->html = Filter::stripHeadTags($this->html); $this->html = Encoding::convert($this->html, $this->encoding); + $this->html = Filter::stripHeadTags($this->html); Logger::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes'); $rules = $this->getRules(); @@ -228,6 +240,7 @@ class Grabber $client->setConfig($this->config); $client->execute($this->url); + $this->url = $client->getUrl(); $this->html = $client->getContent(); $this->encoding = $client->getEncoding(); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php new file mode 100644 index 000000000..4453a7871 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php @@ -0,0 +1,43 @@ +<?php + +namespace PicoFeed\Client; + +use ArrayAccess; + +/** + * Class to handle http headers case insensitivity + * + * @author Bernhard Posselt + * @package Client + */ +class HttpHeaders implements ArrayAccess +{ + private $headers = array(); + + public function __construct(array $headers) + { + foreach ($headers as $key => $value) { + $this->headers[strtolower($key)] = $value; + } + } + + public function offsetGet($offset) + { + return $this->headers[strtolower($offset)]; + } + + public function offsetSet($offset, $value) + { + $this->headers[strtolower($offset)] = $value; + } + + public function offsetExists($offset) + { + return isset($this->headers[strtolower($offset)]); + } + + public function offsetUnset($offset) + { + unset($this->headers[strtolower($offset)]); + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php index a0058f9b0..32d045cb1 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php @@ -16,7 +16,7 @@ class Stream extends Client * Prepare HTTP headers * * @access private - * @return array + * @return string[] */ private function prepareHeaders() { @@ -128,11 +128,11 @@ class Stream extends Client * Decode body response according to the HTTP headers * * @access public - * @param string $body Raw body - * @param array $headers HTTP headers + * @param string $body Raw body + * @param HttpHeaders $headers HTTP headers * @return string */ - public function decodeBody($body, array $headers) + public function decodeBody($body, HttpHeaders $headers) { if (isset($headers['Transfer-Encoding']) && $headers['Transfer-Encoding'] === 'chunked') { $body = $this->decodeChunked($body); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php b/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php index 9a5381f0f..2ee3718eb 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php @@ -55,8 +55,8 @@ namespace PicoFeed\Config; * @method array getFilterSchemeWhitelist(array $default_value) * @method array getFilterWhitelistedTags(array $default_value) * @method array getFilterBlacklistedTags(array $default_value) - * @method string getFilterImageProxyUrl($default_value) - * @method string getFilterImageProxyCallback($default_value) + * @method string getFilterImageProxyUrl() + * @method \Closure getFilterImageProxyCallback() */ class Config { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php b/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php index d6296c0b6..7739def5f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php @@ -152,20 +152,16 @@ class Encoding return $cc1.$cc2; } - public static function convert_CP_1251($input) - { - return iconv('CP1251', 'UTF-8//TRANSLIT', $input); - } - public static function convert($input, $encoding) { - if ($encoding === 'windows-1251') { - return self::convert_CP_1251($input); + switch ($encoding) { + case 'utf-8': + return $input; + case 'windows-1251': + case 'windows-1255': + return iconv($encoding, 'UTF-8//TRANSLIT', $input); + default: + return self::toUTF8($input); } - else if ($encoding === '' || $encoding !== 'utf-8') { - return self::toUTF8($input); - } - - return $input; } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php index 66b3470f8..5948dec35 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php @@ -460,7 +460,7 @@ class Attribute * Check if an attribute name is an external resource * * @access public - * @param string $data Attribute name + * @param string $attribute Attribute name * @return boolean */ public function isResource($attribute) @@ -491,7 +491,7 @@ class Attribute * Detect if an url is blacklisted * * @access public - * @param string $resouce Attribute value (URL) + * @param string $resource Attribute value (URL) * @return boolean */ public function isBlacklistedMedia($resource) @@ -529,7 +529,7 @@ class Attribute * * @access public * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setWhitelistedAttributes(array $values) { @@ -542,7 +542,7 @@ class Attribute * * @access public * @param array $values List of scheme: ['http://', 'ftp://'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setSchemeWhitelist(array $values) { @@ -555,7 +555,7 @@ class Attribute * * @access public * @param array $values List of values: ['src', 'href'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setMediaAttributes(array $values) { @@ -568,7 +568,7 @@ class Attribute * * @access public * @param array $values List of tags: ['http://google.com/', '...'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setMediaBlacklist(array $values) { @@ -581,7 +581,7 @@ class Attribute * * @access public * @param array $values List of tags: ['img' => 'src'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setRequiredAttributes(array $values) { @@ -594,7 +594,7 @@ class Attribute * * @access public * @param array $values List of tags: ['a' => 'target="_blank"'] - * @return \PicoFeed\Filter\Filter + * @return Attribute */ public function setAttributeOverrides(array $values) |