From 867cb7dbf960dc0cfbf7e59d656d1337bb59c526 Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Wed, 17 Dec 2014 09:01:54 +0100 Subject: update picofeed, fix #695 --- composer.lock | 8 +- fetcher/feedfetcher.php | 2 +- tests/unit/fetcher/FeedFetcherTest.php | 4 +- utility/picofeedfaviconfactory.php | 2 +- vendor/autoload.php | 2 +- vendor/composer/ClassLoader.php | 6 +- vendor/composer/autoload_real.php | 10 +- vendor/composer/installed.json | 10 +- vendor/fguillot/picofeed/README.markdown | 1 + vendor/fguillot/picofeed/docs/config.markdown | 23 + vendor/fguillot/picofeed/docs/favicon.markdown | 37 +- .../fguillot/picofeed/docs/feed-parsing.markdown | 13 +- vendor/fguillot/picofeed/docs/image-proxy.markdown | 66 + vendor/fguillot/picofeed/example.php | 45 - .../picofeed/lib/PicoFeed/Client/Client.php | 40 +- .../picofeed/lib/PicoFeed/Client/Favicon.php | 170 --- .../fguillot/picofeed/lib/PicoFeed/Client/Url.php | 14 + .../picofeed/lib/PicoFeed/Config/Config.php | 4 + .../picofeed/lib/PicoFeed/Filter/Attribute.php | 75 +- .../picofeed/lib/PicoFeed/Filter/Filter.php | 32 +- .../fguillot/picofeed/lib/PicoFeed/Filter/Html.php | 2 + .../fguillot/picofeed/lib/PicoFeed/Parser/Atom.php | 89 +- .../fguillot/picofeed/lib/PicoFeed/Parser/Feed.php | 29 +- .../fguillot/picofeed/lib/PicoFeed/Parser/Item.php | 37 + .../picofeed/lib/PicoFeed/Parser/Parser.php | 108 +- .../picofeed/lib/PicoFeed/Parser/Rss20.php | 63 +- .../picofeed/lib/PicoFeed/Reader/Favicon.php | 209 +++ .../picofeed/lib/PicoFeed/Reader/Reader.php | 43 +- .../fguillot/picofeed/tests/Client/ClientTest.php | 20 +- .../fguillot/picofeed/tests/Client/FaviconTest.php | 143 -- .../picofeed/tests/Filter/AttributeFilterTest.php | 49 +- .../fguillot/picofeed/tests/Filter/FilterTest.php | 35 + .../picofeed/tests/Parser/AtomParserTest.php | 29 +- vendor/fguillot/picofeed/tests/Parser/ItemTest.php | 24 + .../fguillot/picofeed/tests/Parser/ParserTest.php | 11 +- .../picofeed/tests/Parser/Rss10ParserTest.php | 11 +- .../picofeed/tests/Parser/Rss20ParserTest.php | 21 +- .../picofeed/tests/Parser/Rss91ParserTest.php | 5 +- .../picofeed/tests/Parser/Rss92ParserTest.php | 5 +- .../fguillot/picofeed/tests/Reader/FaviconTest.php | 158 ++ .../fguillot/picofeed/tests/Reader/ReaderTest.php | 11 +- .../fguillot/picofeed/tests/fixtures/podbean.xml | 1596 ++++++++++++++++++++ 42 files changed, 2646 insertions(+), 616 deletions(-) create mode 100644 vendor/fguillot/picofeed/docs/image-proxy.markdown delete mode 100644 vendor/fguillot/picofeed/example.php delete mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Client/Favicon.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php delete mode 100644 vendor/fguillot/picofeed/tests/Client/FaviconTest.php create mode 100644 vendor/fguillot/picofeed/tests/Parser/ItemTest.php create mode 100644 vendor/fguillot/picofeed/tests/Reader/FaviconTest.php create mode 100644 vendor/fguillot/picofeed/tests/fixtures/podbean.xml diff --git a/composer.lock b/composer.lock index 0e25a2be0..a73fdcce7 100644 --- a/composer.lock +++ b/composer.lock @@ -57,12 +57,12 @@ "source": { "type": "git", "url": "https://github.com/fguillot/picoFeed.git", - "reference": "687cc58b51d739f06bc1729d69280e5a867840dd" + "reference": "6485f32d62698be73c3f0456bb87d960fcae1586" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/687cc58b51d739f06bc1729d69280e5a867840dd", - "reference": "687cc58b51d739f06bc1729d69280e5a867840dd", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/6485f32d62698be73c3f0456bb87d960fcae1586", + "reference": "6485f32d62698be73c3f0456bb87d960fcae1586", "shasum": "" }, "require": { @@ -86,7 +86,7 @@ ], "description": "Modern library to write or read feeds (RSS/Atom)", "homepage": "http://fguillot.github.io/picoFeed", - "time": "2014-11-25 23:40:52" + "time": "2014-12-16 23:53:59" }, { "name": "pear/net_url2", diff --git a/fetcher/feedfetcher.php b/fetcher/feedfetcher.php index faf473475..28d1f951c 100644 --- a/fetcher/feedfetcher.php +++ b/fetcher/feedfetcher.php @@ -179,7 +179,7 @@ class FeedFetcher implements IFeedFetcher { $etag, $location) { $feed = new Feed(); - $link = $parsedFeed->getUrl(); + $link = $parsedFeed->getSiteUrl(); if (!$link) { $link = $location; diff --git a/tests/unit/fetcher/FeedFetcherTest.php b/tests/unit/fetcher/FeedFetcherTest.php index 5f68ff333..9c24524b0 100644 --- a/tests/unit/fetcher/FeedFetcherTest.php +++ b/tests/unit/fetcher/FeedFetcherTest.php @@ -77,7 +77,7 @@ class FeedFetcherTest extends \PHPUnit_Framework_TestCase { ->disableOriginalConstructor() ->getMock(); $this->faviconFetcher = $this->getMockBuilder( - '\PicoFeed\Client\Favicon') + '\PicoFeed\Reader\Favicon') ->disableOriginalConstructor() ->getMock(); $this->faviconFactory = $this->getMockBuilder( @@ -228,7 +228,7 @@ class FeedFetcherTest extends \PHPUnit_Framework_TestCase { private function createFeed($hasFavicon=false) { $this->expectFeed('getTitle', $this->feedTitle); - $this->expectFeed('getUrl', $this->feedLink); + $this->expectFeed('getSiteUrl', $this->feedLink); $feed = new Feed(); $feed->setTitle('&its a title'); diff --git a/utility/picofeedfaviconfactory.php b/utility/picofeedfaviconfactory.php index 4509b9e06..b3a48747a 100644 --- a/utility/picofeedfaviconfactory.php +++ b/utility/picofeedfaviconfactory.php @@ -15,7 +15,7 @@ namespace OCA\News\Utility; use \PicoFeed\Config\Config; -use \PicoFeed\Client\Favicon; +use \PicoFeed\Reader\Favicon; class PicoFeedFaviconFactory { diff --git a/vendor/autoload.php b/vendor/autoload.php index 357c2c52a..f8495def9 100644 --- a/vendor/autoload.php +++ b/vendor/autoload.php @@ -4,4 +4,4 @@ require_once __DIR__ . '/composer' . '/autoload_real.php'; -return ComposerAutoloaderInit80e526884d05a69ec5f87ea0eadf0d03::getLoader(); +return ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef::getLoader(); diff --git a/vendor/composer/ClassLoader.php b/vendor/composer/ClassLoader.php index 443364959..70d78bc3f 100644 --- a/vendor/composer/ClassLoader.php +++ b/vendor/composer/ClassLoader.php @@ -56,7 +56,11 @@ class ClassLoader public function getPrefixes() { - return call_user_func_array('array_merge', $this->prefixesPsr0); + if (!empty($this->prefixesPsr0)) { + return call_user_func_array('array_merge', $this->prefixesPsr0); + } + + return array(); } public function getPrefixesPsr4() diff --git a/vendor/composer/autoload_real.php b/vendor/composer/autoload_real.php index bc6f011c7..113cd5ee5 100644 --- a/vendor/composer/autoload_real.php +++ b/vendor/composer/autoload_real.php @@ -2,7 +2,7 @@ // autoload_real.php @generated by Composer -class ComposerAutoloaderInit80e526884d05a69ec5f87ea0eadf0d03 +class ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef { private static $loader; @@ -19,9 +19,9 @@ class ComposerAutoloaderInit80e526884d05a69ec5f87ea0eadf0d03 return self::$loader; } - spl_autoload_register(array('ComposerAutoloaderInit80e526884d05a69ec5f87ea0eadf0d03', 'loadClassLoader'), true, true); + spl_autoload_register(array('ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef', 'loadClassLoader'), true, true); self::$loader = $loader = new \Composer\Autoload\ClassLoader(); - spl_autoload_unregister(array('ComposerAutoloaderInit80e526884d05a69ec5f87ea0eadf0d03', 'loadClassLoader')); + spl_autoload_unregister(array('ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef', 'loadClassLoader')); $includePaths = require __DIR__ . '/include_paths.php'; array_push($includePaths, get_include_path()); @@ -46,14 +46,14 @@ class ComposerAutoloaderInit80e526884d05a69ec5f87ea0eadf0d03 $includeFiles = require __DIR__ . '/autoload_files.php'; foreach ($includeFiles as $file) { - composerRequire80e526884d05a69ec5f87ea0eadf0d03($file); + composerRequireb70f37963a41b6db289ef240676024ef($file); } return $loader; } } -function composerRequire80e526884d05a69ec5f87ea0eadf0d03($file) +function composerRequireb70f37963a41b6db289ef240676024ef($file) { require $file; } diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index 7c9dac276..2a5124d70 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -119,20 +119,20 @@ "source": { "type": "git", "url": "https://github.com/fguillot/picoFeed.git", - "reference": "687cc58b51d739f06bc1729d69280e5a867840dd" + "reference": "6485f32d62698be73c3f0456bb87d960fcae1586" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/687cc58b51d739f06bc1729d69280e5a867840dd", - "reference": "687cc58b51d739f06bc1729d69280e5a867840dd", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/6485f32d62698be73c3f0456bb87d960fcae1586", + "reference": "6485f32d62698be73c3f0456bb87d960fcae1586", "shasum": "" }, "require": { "php": ">=5.3.0" }, - "time": "2014-11-25 23:40:52", + "time": "2014-12-16 23:53:59", "type": "library", - "installation-source": "source", + "installation-source": "dist", "autoload": { "psr-0": { "PicoFeed": "lib/" diff --git a/vendor/fguillot/picofeed/README.markdown b/vendor/fguillot/picofeed/README.markdown index 868d532e9..a1e1e39c5 100644 --- a/vendor/fguillot/picofeed/README.markdown +++ b/vendor/fguillot/picofeed/README.markdown @@ -59,6 +59,7 @@ Documentation - [Favicon fetcher](docs/favicon.markdown) - [OPML file importation](docs/opml-import.markdown) - [OPML file exportation](docs/opml-export.markdown) +- [Image proxy](docs/image-proxy.markdown) (avoid SSL mixed content warnings) - [Web scraping](docs/grabber.markdown) - [Exceptions](docs/exceptions.markdown) - [Debugging](docs/debugging.markdown) diff --git a/vendor/fguillot/picofeed/docs/config.markdown b/vendor/fguillot/picofeed/docs/config.markdown index 5e0dfea53..75546abd1 100644 --- a/vendor/fguillot/picofeed/docs/config.markdown +++ b/vendor/fguillot/picofeed/docs/config.markdown @@ -261,3 +261,26 @@ $config->setFilterSchemeWhitelist(['http://', 'ftp://']); ```php $config->setFilterWhitelistedTags(['a' => ['href'], 'img' => ['src', 'title']]); ``` + +### Define a image proxy url + +- Method name: `setFilterImageProxyUrl()` +- Default value: Empty +- Argument value: string + +```php +$config->setFilterImageProxyUrl('http://myproxy.example.org/?url=%s'); +``` + +### Define a image proxy callback + +- Method name: `setFilterImageProxyCallback()` +- Default value: null +- Argument value: Closure + +```php +$config->setFilterImageProxyCallback(function ($image_url) { + $key = hash_hmac('sha1', $image_url, 'secret'); + return 'https://mypublicproxy/'.$key.'/'.urlencode($image_url); +}); +``` \ No newline at end of file diff --git a/vendor/fguillot/picofeed/docs/favicon.markdown b/vendor/fguillot/picofeed/docs/favicon.markdown index 14e25955d..1ac3ee1fc 100644 --- a/vendor/fguillot/picofeed/docs/favicon.markdown +++ b/vendor/fguillot/picofeed/docs/favicon.markdown @@ -5,7 +5,7 @@ Find and download the favicon ----------------------------- ```php -use PicoFeed\Client\Favicon; +use PicoFeed\Reader\Favicon; $favicon = new Favicon; @@ -21,11 +21,42 @@ PicoFeed will try first to find the favicon from the meta tags and fallback to t When the HTML page is parsed, relative links and protocol relative links are converted to absolute url. +Get Favicon file type +--------------------- + +It's possible to fetch the image type, this information come from the Content-Type HTTP header: + +```php +$favicon = new Favicon; +$favicon->find('http://example.net/'); + +echo $favicon->getType(); + +// Will output the content type, by example "image/png" +``` + +Get the Favicon as Data URI +--------------------------- + +You can also get the whole image as Data URI. +It's useful if you want to store the icon in your database and avoid too many HTTP requests. + +```php +$favicon = new Favicon; +$favicon->find('http://example.net/'); + +echo $favicon->getDataUri(); + +// Output something like that: data:image/png;base64,iVBORw0KGgoAAAANSUh..... +``` + +See: http://en.wikipedia.org/wiki/Data_URI_scheme + Check if a favicon link exists ------------------------------ ```php -use PicoFeed\Client\Favicon; +use PicoFeed\Reader\Favicon; $favicon = new Favicon; @@ -40,7 +71,7 @@ Like other classes, the Favicon class support the Config object as constructor a ```php use PicoFeed\Config\Config; -use PicoFeed\Client\Favicon; +use PicoFeed\Reader\Favicon; $config = new Config; $config->setClientUserAgent('My RSS Reader'); diff --git a/vendor/fguillot/picofeed/docs/feed-parsing.markdown b/vendor/fguillot/picofeed/docs/feed-parsing.markdown index df881400e..82d3703e9 100644 --- a/vendor/fguillot/picofeed/docs/feed-parsing.markdown +++ b/vendor/fguillot/picofeed/docs/feed-parsing.markdown @@ -43,7 +43,8 @@ Output: ```bash Feed::id = tag:linuxfr.org,2005:/news Feed::title = LinuxFr.org : les dĂ©pĂȘches -Feed::url = http://linuxfr.org/news +Feed::feed_url = http://linuxfr.org/news.atom +Feed::site_url = http://linuxfr.org/news Feed::date = 1415138079 Feed::language = en-US Feed::description = @@ -58,6 +59,7 @@ Item::language = en-US Item::author = Syvolc Item::enclosure_url = Item::enclosure_type = +Item::isRTL() = false Item::content = 18307 bytes .... ``` @@ -181,7 +183,8 @@ Feed and item properties // Feed object $feed->getId(); // Unique feed id $feed->getTitle(); // Feed title -$feed->getUrl(); // Website url +$feed->getFeedUrl(); // Feed url +$feed->getSiteUrl(); // Website url $feed->getDate(); // Feed last updated date $feed->getLanguage(); // Feed language $feed->getDescription(); // Feed description @@ -198,16 +201,16 @@ $feed->items[0]->getAuthor(); // Item author $feed->items[0]->getEnclosureUrl(); // Enclosure url $feed->items[0]->getEnclosureType(); // Enclosure mime-type (audio/mp3, image/png...) $feed->items[0]->getContent(); // Item content (filtered or raw) +$feed->items[0]->isRTL(); // Return true if the item language is Right-To-Left ``` RTL language detection ---------------------- -There is an utility method to determine if a language code is Right-To-Left or not: +Use the method `Item::isRTL()` to test if an item is RTL or not: ```php -// Return true if RTL -Parser::isLanguageRTL($item->getLanguage()); +var_dump($item->isRTL()); // true or false ``` Known RTL languages are: diff --git a/vendor/fguillot/picofeed/docs/image-proxy.markdown b/vendor/fguillot/picofeed/docs/image-proxy.markdown new file mode 100644 index 000000000..74e10d0c6 --- /dev/null +++ b/vendor/fguillot/picofeed/docs/image-proxy.markdown @@ -0,0 +1,66 @@ +Image Proxy +=========== + +To prevent mixed content warnings on SSL pages served from your RSS reader you might want to use an assets proxy. + +Images url will be rewritten to be downloaded through the proxy. + +Example: + +```html + +``` + +Can be rewritten like that: + +```html + +``` + +Currently this feature is only compatible with images. + +There is several open source SSL image proxy available like [Camo](https://github.com/atmos/camo). +You can also write your own proxy. + +Usage +----- + +There two different ways to use this feature, define a proxy url or a callback. + +### Define a proxy url + +A proxy url must be defined with a placeholder `%s`. +The placeholder will be replaced by the image source urlencoded. + +```php +$config = new Config; +$config->setFilterImageProxyUrl('http://myproxy.example.org/?url=%s'); +``` + +Will rewrite the image source like that: + +```html + +``` + +### Define a callback + +Your callback will be called each time an image url need to be rewritten. +The first argument is the original image url and your function must returns the new image url. + +Here an example if your proxy need a shared secret key: + +```php +$config = new Config; + +$config->setFilterImageProxyCallback(function ($image_url) { + $key = hash_hmac('sha1', $image_url, 'secret'); + return 'https://mypublicproxy/'.$key.'/'.urlencode($image_url); +}); +``` + +Will generate an image url like that: + +```html + +``` diff --git a/vendor/fguillot/picofeed/example.php b/vendor/fguillot/picofeed/example.php deleted file mode 100644 index 6bf9b6e20..000000000 --- a/vendor/fguillot/picofeed/example.php +++ /dev/null @@ -1,45 +0,0 @@ -download('http://linuxfr.org/news.atom', $last_modified, $etag); - - if ($resource->isModified()) { - - $parser = $reader->getParser( - $resource->getUrl(), - $resource->getContent(), - $resource->getEncoding() - ); - - $feed = $parser->execute(); - - // Save your feed in your database - // ... - - // Store the Etag and the LastModified headers in your database - $etag = $resource->getEtag(); - $last_modified = $resource->getLastModified(); - - // ... - } - else { - - echo 'Not modified, nothing to do!'; - } -} -catch (PicoFeedException $e) { - // Do something... -} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php index e962ba4e8..c8c812c1a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php @@ -21,6 +21,14 @@ abstract class Client */ private $is_modified = true; + /** + * HTTP Content-Type + * + * @access private + * @var string + */ + private $content_type = ''; + /** * HTTP encoding * @@ -231,7 +239,8 @@ abstract class Client { if ($response['status'] == 200) { $this->content = $response['body']; - $this->encoding = $this->findCharset($response); + $this->content_type = $this->findContentType($response); + $this->encoding = $this->findCharset(); } } @@ -249,14 +258,26 @@ abstract class Client } /** - * Find charset from response headers + * Find content type from response headers * * @access public * @param array $response Client response + * @return string */ - public function findCharset(array $response) + public function findContentType(array $response) { - $result = explode('charset=', strtolower($this->getHeader($response, 'Content-Type'))); + return strtolower($this->getHeader($response, 'Content-Type')); + } + + /** + * Find charset from response headers + * + * @access public + * @return string + */ + public function findCharset() + { + $result = explode('charset=', $this->content_type); return isset($result[1]) ? $result[1] : ''; } @@ -389,6 +410,17 @@ abstract class Client return $this->content; } + /** + * Get the content type value from HTTP headers + * + * @access public + * @return string + */ + public function getContentType() + { + return $this->content_type; + } + /** * Get the encoding value from HTTP headers * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Favicon.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Favicon.php deleted file mode 100644 index 5b63cf044..000000000 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Favicon.php +++ /dev/null @@ -1,170 +0,0 @@ -config = $config ?: new Config; - } - - /** - * Get the icon file content (available only after the download) - * - * @access public - * @return string - */ - public function getContent() - { - return $this->content; - } - - /** - * Download and check if a resource exists - * - * @access public - * @param string $url URL - * @return string Resource content - */ - public function download($url) - { - try { - - Logger::setMessage(get_called_class().' Download => '.$url); - - $client = Client::getInstance(); - $client->setConfig($this->config); - $client->execute($url); - - return $client->getContent(); - } - catch (ClientException $e) { - return ''; - } - } - - /** - * Check if a remote file exists - * - * @access public - * @param string $url URL - * @return boolean - */ - public function exists($url) - { - return $this->download($url) !== ''; - } - - /** - * Get the icon link for a website - * - * @access public - * @param string $website_link URL - * @return string - */ - public function find($website_link) - { - $website = new Url($website_link); - - $icons = $this->extract($this->download($website->getBaseUrl('/'))); - $icons[] = $website->getBaseUrl('/favicon.ico'); - - foreach ($icons as $icon_link) { - - $icon_link = $this->convertLink($website, new Url($icon_link)); - $this->content = $this->download($icon_link); - - if ($this->content !== '') { - return $icon_link; - } - } - - return ''; - } - - /** - * Convert icon links to absolute url - * - * @access public - * @param \PicoFeed\Client\Url $website Website url - * @param \PicoFeed\Client\Url $icon Icon url - * @return string - */ - public function convertLink(Url $website, Url $icon) - { - $base_url = ''; - - if ($icon->isRelativeUrl()) { - $base_url = $website->getBaseUrl(); - } - else if ($icon->isProtocolRelative()) { - $icon->setScheme($website->getScheme()); - } - - return $icon->getAbsoluteUrl($base_url); - } - - /** - * Extract the icon links from the HTML - * - * @access public - * @param string $html HTML - * @return array - */ - public function extract($html) - { - $icons = array(); - - if (empty($html)) { - return $icons; - } - - $dom = XmlParser::getHtmlDocument($html); - - $xpath = new DOMXpath($dom); - $elements = $xpath->query("//link[contains(@rel, 'icon') and not(contains(@rel, 'apple'))]"); - - for ($i = 0; $i < $elements->length; $i++) { - $icons[] = $elements->item($i)->getAttribute('href'); - } - - return $icons; - } -} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Url.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Url.php index 90d7fb6f7..a74c23508 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Url.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Url.php @@ -79,6 +79,20 @@ class Url return $link->getAbsoluteUrl(); } + /** + * Shortcut method to get a base url + * + * @static + * @access public + * @param string $url + * @return string + */ + public static function base($url) + { + $link = new Url($url); + return $link->getBaseUrl(); + } + /** * Get the base URL * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php b/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php index 298b9a2d7..9a5381f0f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php @@ -30,6 +30,8 @@ namespace PicoFeed\Config; * @method \PicoFeed\Config\Config setFilterSchemeWhitelist(array $value) * @method \PicoFeed\Config\Config setFilterWhitelistedTags(array $value) * @method \PicoFeed\Config\Config setFilterBlacklistedTags(array $value) + * @method \PicoFeed\Config\Config setFilterImageProxyUrl($value) + * @method \PicoFeed\Config\Config setFilterImageProxyCallback($closure) * * @method integer getClientTimeout() * @method string getClientUserAgent() @@ -53,6 +55,8 @@ namespace PicoFeed\Config; * @method array getFilterSchemeWhitelist(array $default_value) * @method array getFilterWhitelistedTags(array $default_value) * @method array getFilterBlacklistedTags(array $default_value) + * @method string getFilterImageProxyUrl($default_value) + * @method string getFilterImageProxyCallback($default_value) */ class Config { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php index 23b1103ad..66b3470f8 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php @@ -12,6 +12,22 @@ use \PicoFeed\Client\Url; */ class Attribute { + /** + * Image proxy url + * + * @access private + * @var string + */ + private $image_proxy_url = ''; + + /** + * Image proxy callback + * + * @access private + * @var \Closure|null + */ + private $image_proxy_callback = null; + /** * Tags and attribute whitelist * @@ -204,10 +220,11 @@ class Attribute 'filterEmptyAttribute', 'filterAllowedAttribute', 'filterIntegerAttribute', - 'filterAbsoluteUrlAttribute', + 'rewriteAbsoluteUrl', 'filterIframeAttribute', 'filterBlacklistResourceAttribute', 'filterProtocolUrlAttribute', + 'rewriteImageProxyUrl', ); /** @@ -349,7 +366,7 @@ class Attribute * @param string $value Atttribute value * @return boolean */ - public function filterAbsoluteUrlAttribute($tag, $attribute, &$value) + public function rewriteAbsoluteUrl($tag, $attribute, &$value) { if ($this->isResource($attribute)) { $value = Url::resolve($value, $this->website); @@ -358,6 +375,30 @@ class Attribute return true; } + /** + * Rewrite image url to use with a proxy + * + * @access public + * @param string $tag Tag name + * @param string $attribute Atttribute name + * @param string $value Atttribute value + * @return boolean + */ + public function rewriteImageProxyUrl($tag, $attribute, &$value) + { + if ($tag === 'img' && $attribute === 'src') { + + if ($this->image_proxy_url) { + $value = sprintf($this->image_proxy_url, urlencode($value)); + } + else if (is_callable($this->image_proxy_callback)) { + $value = call_user_func($this->image_proxy_callback, $value); + } + } + + return true; + } + /** * Return true if the scheme is authorized * @@ -484,7 +525,7 @@ class Attribute } /** - * Set whitelisted tags adn attributes for each tag + * Set whitelisted tags and attributes for each tag * * @access public * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] @@ -586,4 +627,32 @@ class Attribute $this->iframe_whitelist = $values ?: $this->iframe_whitelist; return $this; } + + /** + * Set image proxy URL + * + * The original image url will be urlencoded + * + * @access public + * @param string $url Proxy URL + * @return \PicoFeed\Filter\Filter + */ + public function setImageProxyUrl($url) + { + $this->image_proxy_url = $url ?: $this->image_proxy_url; + return $this; + } + + /** + * Set image proxy callback + * + * @access public + * @param \Closure $callback + * @return \PicoFeed\Filter\Filter + */ + public function setImageProxyCallback($callback) + { + $this->image_proxy_callback = $callback ?: $this->image_proxy_callback; + return $this; + } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php index 0490e2f49..82289444b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php @@ -2,6 +2,8 @@ namespace PicoFeed\Filter; +use PicoFeed\Parser\XmlParser; + /** * Filter class * @@ -137,34 +139,4 @@ class Filter return $data; } - - /** - * Get the first XML tag - * - * @static - * @access public - * @param string $data Feed content - * @return string - */ - public static function getFirstTag($data) - { - // Strip HTML comments (max of 5,000 characters long to prevent crashing) - $data = preg_replace('//Uis', '', $data); - - /* Strip Doctype: - * Doctype needs to be within the first 100 characters. (Ideally the first!) - * If it's not found by then, we need to stop looking to prevent PREG - * from reaching max backtrack depth and crashing. - */ - $data = preg_replace('/^.{0,100}]*)>/Uis', '', $data); - - // Strip '); - - return substr($data, $open_tag, $close_tag); - } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php index d14d80907..f7816f1d2 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php @@ -96,6 +96,8 @@ class Html $this->config = $config; if ($this->config !== null) { + $this->attribute->setImageProxyCallback($this->config->getFilterImageProxyCallback()); + $this->attribute->setImageProxyUrl($this->config->getFilterImageProxyUrl()); $this->attribute->setIframeWhitelist($this->config->getFilterIframeWhitelist(array())); $this->attribute->setIntegerAttributes($this->config->getFilterIntegerAttributes(array())); $this->attribute->setAttributeOverrides($this->config->getFilterAttributeOverrides(array())); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php index 1217bc4b0..5bb930b22 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php @@ -30,19 +30,31 @@ class Atom extends Parser * Find the feed url * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedUrl(SimpleXMLElement $xml, Feed $feed) { - $feed->url = $this->getLink($xml); + $feed->feed_url = $this->getUrl($xml, 'self'); + } + + /** + * Find the site url + * + * @access public + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object + */ + public function findSiteUrl(SimpleXMLElement $xml, Feed $feed) + { + $feed->site_url = $this->getUrl($xml, 'alternate', true); } /** * Find the feed description * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDescription(SimpleXMLElement $xml, Feed $feed) @@ -54,7 +66,7 @@ class Atom extends Parser * Find the feed logo url * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLogo(SimpleXMLElement $xml, Feed $feed) @@ -66,19 +78,19 @@ class Atom extends Parser * Find the feed title * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedTitle(SimpleXMLElement $xml, Feed $feed) { - $feed->title = Filter::stripWhiteSpace((string) $xml->title) ?: $feed->url; + $feed->title = Filter::stripWhiteSpace((string) $xml->title) ?: $feed->getSiteUrl(); } /** * Find the feed language * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed) @@ -90,7 +102,7 @@ class Atom extends Parser * Find the feed id * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedId(SimpleXMLElement $xml, Feed $feed) @@ -102,7 +114,7 @@ class Atom extends Parser * Find the feed date * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDate(SimpleXMLElement $xml, Feed $feed) @@ -115,7 +127,7 @@ class Atom extends Parser * * @access public * @param SimpleXMLElement $entry Feed item - * @param Item $item Item object + * @param Item $item Item object */ public function findItemDate(SimpleXMLElement $entry, Item $item) { @@ -145,8 +157,8 @@ class Atom extends Parser * Find the item author * * @access public - * @param SimpleXMLElement $xml Feed - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $xml Feed + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item) @@ -180,7 +192,7 @@ class Atom extends Parser */ public function findItemUrl(SimpleXMLElement $entry, Item $item) { - $item->url = $this->getLink($entry); + $item->url = $this->getUrl($entry, 'alternate'); } /** @@ -215,13 +227,11 @@ class Atom extends Parser */ public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed) { - foreach ($entry->link as $link) { - if ((string) $link['rel'] === 'enclosure') { + $enclosure = $this->findLink($entry, 'enclosure'); - $item->enclosure_url = Url::resolve((string) $link['href'], $feed->url); - $item->enclosure_type = (string) $link['type']; - break; - } + if ($enclosure) { + $item->enclosure_url = Url::resolve((string) $enclosure['href'], $feed->getSiteUrl()); + $item->enclosure_type = (string) $enclosure['type']; } } @@ -241,29 +251,54 @@ class Atom extends Parser /** * Get the URL from a link tag * - * @access public - * @param SimpleXMLElement $xml XML tag + * @access private + * @param SimpleXMLElement $xml XML tag + * @param string $rel Link relationship: alternate, enclosure, related, self, via * @return string */ - public function getLink(SimpleXMLElement $xml) + private function getUrl(SimpleXMLElement $xml, $rel, $fallback = false) + { + $link = $this->findLink($xml, $rel); + + if ($link) { + return (string) $link['href']; + } + + if ($fallback) { + $link = $this->findLink($xml, ''); + return $link ? (string) $link['href'] : ''; + } + + return ''; + } + + /** + * Get a link tag that match a relationship + * + * @access private + * @param SimpleXMLElement $xml XML tag + * @param string $rel Link relationship: alternate, enclosure, related, self, via + * @return SimpleXMLElement|null + */ + private function findLink(SimpleXMLElement $xml, $rel) { foreach ($xml->link as $link) { - if ((string) $link['type'] === 'text/html' || (string) $link['type'] === 'application/xhtml+xml') { - return (string) $link['href']; + if (empty($rel) || $rel === (string) $link['rel']) { + return $link; } } - return (string) $xml->link['href']; + return null; } /** * Get the entry content * - * @access public + * @access private * @param SimpleXMLElement $entry XML Entry * @return string */ - public function getContent(SimpleXMLElement $entry) + private function getContent(SimpleXMLElement $entry) { if (isset($entry->content) && ! empty($entry->content)) { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php index 77a6f0c97..b8edbd6f8 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php @@ -48,7 +48,15 @@ class Feed * @access public * @var string */ - public $url = ''; + public $feed_url = ''; + + /** + * Site url + * + * @access public + * @var string + */ + public $site_url = ''; /** * Feed date @@ -84,7 +92,7 @@ class Feed { $output = ''; - foreach (array('id', 'title', 'url', 'date', 'language', 'description', 'logo') as $property) { + foreach (array('id', 'title', 'feed_url', 'site_url', 'date', 'language', 'description', 'logo') as $property) { $output .= 'Feed::'.$property.' = '.$this->$property.PHP_EOL; } @@ -132,14 +140,25 @@ class Feed } /** - * Get url + * Get feed url + * + * @access public + * $return string + */ + public function getFeedUrl() + { + return $this->feed_url; + } + + /** + * Get site url * * @access public * $return string */ - public function getUrl() + public function getSiteUrl() { - return $this->url; + return $this->site_url; } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php index 1731f5a29..6b2864ba7 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php @@ -10,6 +10,23 @@ namespace PicoFeed\Parser; */ class Item { + /** + * List of known RTL languages + * + * @access public + * @var public + */ + public $rtl = array( + 'ar', // Arabic (ar-**) + 'fa', // Farsi (fa-**) + 'ur', // Urdu (ur-**) + 'ps', // Pashtu (ps-**) + 'syr', // Syriac (syr-**) + 'dv', // Divehi (dv-**) + 'he', // Hebrew (he-**) + 'yi', // Yiddish (yi-**) + ); + /** * Item id * @@ -96,6 +113,7 @@ class Item $output .= 'Item::'.$property.' = '.$this->$property.PHP_EOL; } + $output .= 'Item::isRTL() = '.($this->isRTL() ? 'true' : 'false').PHP_EOL; $output .= 'Item::content = '.strlen($this->content).' bytes'.PHP_EOL; return $output; @@ -199,4 +217,23 @@ class Item { return $this->author; } + + /** + * Return true if the item is "Right to Left" + * + * @access public + * @return bool + */ + public function isRTL() + { + $language = strtolower($this->language); + + foreach ($this->rtl as $prefix) { + if (strpos($language, $prefix) === 0) { + return true; + } + } + + return false; + } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php index 80e09e016..de73504e4 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php @@ -141,6 +141,9 @@ abstract class Parser $this->findFeedUrl($xml, $feed); $this->checkFeedUrl($feed); + $this->findSiteUrl($xml, $feed); + $this->checkSiteUrl($feed); + $this->findFeedTitle($xml, $feed); $this->findFeedDescription($xml, $feed); $this->findFeedLanguage($xml, $feed); @@ -185,10 +188,27 @@ abstract class Parser */ public function checkFeedUrl(Feed $feed) { - $url = new Url($feed->getUrl()); + if ($feed->getFeedUrl() === '') { + $feed->feed_url = $this->fallback_url; + } + else { + $feed->feed_url = Url::resolve($feed->getFeedUrl(), $this->fallback_url); + } + } - if ($url->isRelativeUrl()) { - $feed->url = $this->fallback_url; + /** + * Check if the site url is correct + * + * @access public + * @param Feed $feed Feed object + */ + public function checkSiteUrl(Feed $feed) + { + if ($feed->getSiteUrl() === '') { + $feed->site_url = Url::base($feed->getFeedUrl()); + } + else { + $feed->site_url = Url::resolve($feed->getSiteUrl(), $this->fallback_url); } } @@ -201,11 +221,7 @@ abstract class Parser */ public function checkItemUrl(Feed $feed, Item $item) { - $url = new Url($item->getUrl()); - - if ($url->isRelativeUrl()) { - $item->url = Url::resolve($item->getUrl(), $feed->getUrl()); - } + $item->url = Url::resolve($item->getUrl(), $feed->getSiteUrl()); } /** @@ -238,7 +254,7 @@ abstract class Parser public function filterItemContent(Feed $feed, Item $item) { if ($this->isFilteringEnabled()) { - $filter = Filter::html($item->getContent(), $feed->getUrl()); + $filter = Filter::html($item->getContent(), $feed->getSiteUrl()); $filter->setConfig($this->config); $item->content = $filter->execute(); } @@ -283,6 +299,7 @@ abstract class Parser 'D, d M Y H:i:s' => 25, 'D, d M Y h:i:s' => 25, 'D M d Y H:i:s' => 24, + 'j M Y H:i:s' => 20, 'Y-m-d H:i:s' => 19, 'Y-m-d\TH:i:s' => 19, 'd/m/Y H:i:s' => 19, @@ -339,38 +356,6 @@ abstract class Parser return 0; } - /** - * Return true if the given language is "Right to Left" - * - * @static - * @access public - * @param string $language Language: fr-FR, en-US - * @return bool - */ - public static function isLanguageRTL($language) - { - $language = strtolower($language); - - $rtl_languages = array( - 'ar', // Arabic (ar-**) - 'fa', // Farsi (fa-**) - 'ur', // Urdu (ur-**) - 'ps', // Pashtu (ps-**) - 'syr', // Syriac (syr-**) - 'dv', // Divehi (dv-**) - 'he', // Hebrew (he-**) - 'yi', // Yiddish (yi-**) - ); - - foreach ($rtl_languages as $prefix) { - if (strpos($language, $prefix) === 0) { - return true; - } - } - - return false; - } - /** * Set Hash algorithm used for id generation * @@ -464,16 +449,25 @@ abstract class Parser * Find the feed url * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public abstract function findFeedUrl(SimpleXMLElement $xml, Feed $feed); + /** + * Find the site url + * + * @access public + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object + */ + public abstract function findSiteUrl(SimpleXMLElement $xml, Feed $feed); + /** * Find the feed title * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public abstract function findFeedTitle(SimpleXMLElement $xml, Feed $feed); @@ -482,7 +476,7 @@ abstract class Parser * Find the feed description * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public abstract function findFeedDescription(SimpleXMLElement $xml, Feed $feed); @@ -491,7 +485,7 @@ abstract class Parser * Find the feed language * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public abstract function findFeedLanguage(SimpleXMLElement $xml, Feed $feed); @@ -500,7 +494,7 @@ abstract class Parser * Find the feed id * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public abstract function findFeedId(SimpleXMLElement $xml, Feed $feed); @@ -509,7 +503,7 @@ abstract class Parser * Find the feed date * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public abstract function findFeedDate(SimpleXMLElement $xml, Feed $feed); @@ -518,7 +512,7 @@ abstract class Parser * Find the feed logo url * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public abstract function findFeedLogo(SimpleXMLElement $xml, Feed $feed); @@ -536,8 +530,8 @@ abstract class Parser * Find the item author * * @access public - * @param SimpleXMLElement $xml Feed - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $xml Feed + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public abstract function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item); @@ -546,7 +540,7 @@ abstract class Parser * Find the item URL * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public abstract function findItemUrl(SimpleXMLElement $entry, Item $item); @@ -555,7 +549,7 @@ abstract class Parser * Find the item title * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public abstract function findItemTitle(SimpleXMLElement $entry, Item $item); @@ -564,7 +558,7 @@ abstract class Parser * Genereate the item id * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ @@ -574,7 +568,7 @@ abstract class Parser * Find the item date * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public abstract function findItemDate(SimpleXMLElement $entry, Item $item); @@ -583,7 +577,7 @@ abstract class Parser * Find the item content * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public abstract function findItemContent(SimpleXMLElement $entry, Item $item); @@ -592,7 +586,7 @@ abstract class Parser * Find the item enclosure * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ @@ -602,7 +596,7 @@ abstract class Parser * Find the item language * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php index 80e65fab8..c0417f9ac 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php @@ -30,34 +30,31 @@ class Rss20 extends Parser * Find the feed url * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedUrl(SimpleXMLElement $xml, Feed $feed) { - if ($xml->channel->link && $xml->channel->link->count() > 1) { - - foreach ($xml->channel->link as $xml_link) { - - $link = (string) $xml_link; - - if ($link !== '') { - $feed->url = $link; - break; - } - } - } - else { + $feed->feed_url = ''; + } - $feed->url = (string) $xml->channel->link; - } + /** + * Find the site url + * + * @access public + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object + */ + public function findSiteUrl(SimpleXMLElement $xml, Feed $feed) + { + $feed->site_url = (string) $xml->channel->link; } /** * Find the feed description * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDescription(SimpleXMLElement $xml, Feed $feed) @@ -69,7 +66,7 @@ class Rss20 extends Parser * Find the feed logo url * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLogo(SimpleXMLElement $xml, Feed $feed) @@ -83,19 +80,19 @@ class Rss20 extends Parser * Find the feed title * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedTitle(SimpleXMLElement $xml, Feed $feed) { - $feed->title = Filter::stripWhiteSpace((string) $xml->channel->title) ?: $feed->url; + $feed->title = Filter::stripWhiteSpace((string) $xml->channel->title) ?: $feed->getSiteUrl(); } /** * Find the feed language * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed) @@ -107,19 +104,19 @@ class Rss20 extends Parser * Find the feed id * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedId(SimpleXMLElement $xml, Feed $feed) { - $feed->id = $feed->url; + $feed->id = $feed->getFeedUrl() ?: $feed->getSiteUrl(); } /** * Find the feed date * * @access public - * @param SimpleXMLElement $xml Feed xml + * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDate(SimpleXMLElement $xml, Feed $feed) @@ -132,7 +129,7 @@ class Rss20 extends Parser * Find the item date * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemDate(SimpleXMLElement $entry, Item $item) @@ -154,7 +151,7 @@ class Rss20 extends Parser * Find the item title * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemTitle(SimpleXMLElement $entry, Item $item) @@ -170,8 +167,8 @@ class Rss20 extends Parser * Find the item author * * @access public - * @param SimpleXMLElement $xml Feed - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $xml Feed + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item) @@ -192,7 +189,7 @@ class Rss20 extends Parser * Find the item content * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemContent(SimpleXMLElement $entry, Item $item) @@ -210,7 +207,7 @@ class Rss20 extends Parser * Find the item URL * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemUrl(SimpleXMLElement $entry, Item $item) @@ -234,7 +231,7 @@ class Rss20 extends Parser * Genereate the item id * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ @@ -256,7 +253,7 @@ class Rss20 extends Parser * Find the item enclosure * * @access public - * @param SimpleXMLElement $entry Feed item + * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ @@ -271,7 +268,7 @@ class Rss20 extends Parser } $item->enclosure_type = isset($entry->enclosure['type']) ? (string) $entry->enclosure['type'] : ''; - $item->enclosure_url = Url::resolve($item->enclosure_url, $feed->url); + $item->enclosure_url = Url::resolve($item->enclosure_url, $feed->getSiteUrl()); } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php new file mode 100644 index 000000000..f762c56b2 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php @@ -0,0 +1,209 @@ +config = $config ?: new Config; + } + + /** + * Get the icon file content (available only after the download) + * + * @access public + * @return string + */ + public function getContent() + { + return $this->content; + } + + /** + * Get the icon file type (available only after the download) + * + * @access public + * @return string + */ + public function getType() + { + return $this->content_type; + } + + /** + * Get data URI (http://en.wikipedia.org/wiki/Data_URI_scheme) + * + * @access public + * @return string + */ + public function getDataUri() + { + return sprintf( + 'data:%s;base64,%s', + $this->content_type, + base64_encode($this->content) + ); + } + + /** + * Download and check if a resource exists + * + * @access public + * @param string $url URL + * @return \PicoFeed\Client Client instance + */ + public function download($url) + { + $client = Client::getInstance(); + $client->setConfig($this->config); + + Logger::setMessage(get_called_class().' Download => '.$url); + + try { + $client->execute($url); + } + catch (ClientException $e) { + Logger::setMessage(get_called_class().' Download Failed => '.$e->getMessage()); + } + + return $client; + } + + /** + * Check if a remote file exists + * + * @access public + * @param string $url URL + * @return boolean + */ + public function exists($url) + { + return $this->download($url)->getContent() !== ''; + } + + /** + * Get the icon link for a website + * + * @access public + * @param string $website_link URL + * @return string + */ + public function find($website_link) + { + $website = new Url($website_link); + + $icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent()); + $icons[] = $website->getBaseUrl('/favicon.ico'); + + foreach ($icons as $icon_link) { + + $icon_link = $this->convertLink($website, new Url($icon_link)); + $resource = $this->download($icon_link); + $this->content = $resource->getContent(); + $this->content_type = $resource->getContentType(); + + if ($this->content !== '') { + return $icon_link; + } + } + + return ''; + } + + /** + * Convert icon links to absolute url + * + * @access public + * @param \PicoFeed\Client\Url $website Website url + * @param \PicoFeed\Client\Url $icon Icon url + * @return string + */ + public function convertLink(Url $website, Url $icon) + { + $base_url = ''; + + if ($icon->isRelativeUrl()) { + $base_url = $website->getBaseUrl(); + } + else if ($icon->isProtocolRelative()) { + $icon->setScheme($website->getScheme()); + } + + return $icon->getAbsoluteUrl($base_url); + } + + /** + * Extract the icon links from the HTML + * + * @access public + * @param string $html HTML + * @return array + */ + public function extract($html) + { + $icons = array(); + + if (empty($html)) { + return $icons; + } + + $dom = XmlParser::getHtmlDocument($html); + + $xpath = new DOMXpath($dom); + $elements = $xpath->query("//link[contains(@rel, 'icon') and not(contains(@rel, 'apple'))]"); + + for ($i = 0; $i < $elements->length; $i++) { + $icons[] = $elements->item($i)->getAttribute('href'); + } + + return $icons; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php index a64eb139c..5b807e251 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php @@ -26,11 +26,11 @@ class Reader * @var array */ private $formats = array( - 'Atom' => array(' array('