diff options
Diffstat (limited to '3rdparty/fguillot/picofeed/docs/feed-parsing.markdown')
-rw-r--r-- | 3rdparty/fguillot/picofeed/docs/feed-parsing.markdown | 269 |
1 files changed, 114 insertions, 155 deletions
diff --git a/3rdparty/fguillot/picofeed/docs/feed-parsing.markdown b/3rdparty/fguillot/picofeed/docs/feed-parsing.markdown index 10f20d31a..22f84339b 100644 --- a/3rdparty/fguillot/picofeed/docs/feed-parsing.markdown +++ b/3rdparty/fguillot/picofeed/docs/feed-parsing.markdown @@ -1,205 +1,164 @@ Feed parsing ============ -Download and parse a feed -------------------------- - -Try this example from a command line script: +Parsing a subscription +---------------------- ```php -<?php - -require 'path/to/PicoFeed.php'; - -use PicoFeed\Reader; +use PicoFeed\Reader\Reader; +use PicoFeed\PicoFeedException; -$reader = new Reader; +try { -// Try to discover the XML feed automatically -$reader->download('http://bbc.co.uk/news'); + $reader = new Reader; -$parser = $reader->getParser(); + // Return a resource + $resource = $reader->download('https://linuxfr.org/news.atom'); -if ($parser !== false) { + // Return the right parser instance according to the feed format + $parser = $reader->getParser( + $resource->getUrl(), + $resource->getContent(), + $resource->getEncoding() + ); + // Return a Feed object $feed = $parser->execute(); - if ($feed !== false) { - echo $feed; - } + // Print the feed properties with the magic method __toString() + echo $feed; +} +catch (PicoFeedException $e) { + // Do Something... } ``` -- The method `getParser()` return `false` when there is something wrong during the download or the feed detection -- The call `$parser->execute()` return `false` when there is a parsing error - -In your terminal you will got an output like that: - -``` -Feed::id = http://www.bbc.co.uk/news/#sa-ns_mchannel=rss&ns_source=PublicRSS20-sa -Feed::title = BBC News - Home -Feed::url = http://www.bbc.co.uk/news/#sa-ns_mchannel=rss&ns_source=PublicRSS20-sa -Feed::date = 1399934742 -Feed::language = en-gb -Feed::items = 84 items +- The Reader class is the entry point for feed reading +- The method `download()` fetch the remote content and return a resource, an instance of `PicoFeed\Client\Client` +- The method `getParser()` returns a Parser instance according to the feed format Atom, Rss 2.0... +- The parser itself returns a `Feed` object that contains feed and item properties + +Output: + +```bash +Feed::id = tag:linuxfr.org,2005:/news +Feed::title = LinuxFr.org : les dépêches +Feed::url = http://linuxfr.org/news +Feed::date = 1415138079 +Feed::language = en-US +Feed::description = +Feed::logo = +Feed::items = 15 items ---- -Item::id = e411a646 -Item::title = Nigeria rejects captive girls 'swap' -Item::url = http://www.bbc.co.uk/news/world-africa-27386285#sa-ns_mchannel=rss&ns_source=PublicRSS20-sa -Item::date = 1399933404 -Item::language = en-gb -Item::author = +Item::id = 38d8f48284fb03940cbb3aff9101089b81e44efb1281641bdd7c3e7e4bf3b0cd +Item::title = openSUSE 13.2 : nouvelle version du caméléon disponible ! +Item::url = http://linuxfr.org/news/opensuse-13-2-nouvelle-version-du-cameleon-disponible +Item::date = 1415122640 +Item::language = en-US +Item::author = Syvolc Item::enclosure_url = Item::enclosure_type = -Item::content = <p>Nigeria insists i... (152 bytes) +Item::content = 18307 bytes ---- -Item::id = 6c50fcf2 -Item::title = Woman tells of Harris 'assaults' -Item::url = http://www.bbc.co.uk/news/uk-27371573#sa-ns_mchannel=rss&ns_source=PublicRSS20-sa -Item::date = 1399908906 -Item::language = en-gb -Item::author = +Item::id = d0ebddc90bfc3f109f9be00a3bb0b4a770af7a647cdc88454fe15d79168e0dea +Item::title = Fuzix OS, parce que les petites choses sont belles +Item::url = http://linuxfr.org/news/fuzix-os-parce-que-les-petites-choses-sont-belles +Item::date = 1415112167 +Item::language = en-US +Item::author = Thomas DEBESSE Item::enclosure_url = Item::enclosure_type = -Item::content = <p>A woman tells the... (142 bytes) -........... -``` - -This ouput is generated by the magic method `__toString()` of the class `Feed` and `Item`. -All properties are public and they are also available with getter methods: - -```php - -// Examples for the feed: -echo $feed->getId(); // Unique feed id -echo $feed->getTitle(); // Feed title -echo $feed->getUrl(); // Feed url -echo $feed->getDate(); // Feed last updated date -echo $feed->getLanguage(); // Feed language -echo $feed->getDescription(); // Feed description -echo $feed->getLogo(); // Feed logo (can be a large image, different from icon) -echo $feed->getItems(); // List of items - -// Examples for items: -echo $feed->items[0]->getId(); -echo $feed->items[0]->getTitle(); -echo $feed->items[0]->getUrl(); -echo $feed->items[0]->getDate(); -echo $feed->items[0]->getLanguage(); -echo $feed->items[0]->getAuthor(); -echo $feed->items[0]->getEnclosureUrl(); -echo $feed->items[0]->getEnclosureType(); -echo $feed->items[0]->getContent(); +Item::content = 6104 bytes +.... ``` -Handle HTTP cache ------------------ - -To avoid downloading and parsing the feed each time, it's a good idea to handle the HTTP caching: +Get the list of available subscriptions for a website +----------------------------------------------------- -1. After the first HTTP request, we save somewhere (in a database) the headers Etag and Last-Modified for the next checks -2. If the feed is not modified, we don't need to parse again the feed - -Example: +The example below will returns all available subscriptions for the website: ```php -use PicoFeed\Reader; - -$reader = new Reader; +use PicoFeed\Reader\Reader; -// Get last modified infos from previous requests -$lastModified = '...'; -$etag = '...'; +try { -// Download directly the feed -$resource = $reader->download('http://linuxfr.org/news.atom', $lastModified, $etag); + $reader = new Reader; + $resource = $reader->download('http://www.cnn.com'); -// Return true is the feed has changed -if ($resource->isModified()) { + $feeds = $reader->find( + $resource->getUrl(), + $resource->getContent() + ); - $parser = $reader->getParser(); - - if ($parser !== false) { - - $feed = $parser->execute(); - - if ($feed !== false) { - - // Save cache infos for the next request - $lastModified = $resource->getLastModified(); - $etag = $resource->getEtag(); - } - } + print_r($feeds); +} +catch (PicoFeedException $e) { + // Do something... } ``` -Use a custom user agent ------------------------ - -You have to define a custom configuration for that: +Output: ```php -use PicoFeed\Reader; -use PicoFeed\Config; - -$config = new Config; -$config->setClientUserAgent('My RSS Reader'); - -$reader = new Reader($config); -... +Array +( + [0] => http://rss.cnn.com/rss/cnn_topstories.rss + [1] => http://rss.cnn.com/rss/cnn_latest.rss +) ``` -The complete config parameters are [described here](config.markdown). +Feed discovery and parsing +-------------------------- -Set a custom timezone ---------------------- - -By default, the timezone used is UTC but you can define a custom timezone for the logging and item parsing. +This example will discover automatically the subscription and parse the feed: ```php -use PicoFeed\Reader; -use PicoFeed\Config; - -$config = new Config; -$config->setTimezone('Europe/Paris'); +try { -$reader = new Reader($config); -... -``` - -[List of supported TimeZones](http://php.net/manual/en/timezones.php) - -Disable content filtering -------------------------- + $reader = new Reader; + $resource = $reader->discover('http://linuxfr.org'); -If you want to disable the internal filtering system to use an external library like [HTMLPurifier](http://htmlpurifier.org): - -```php -use PicoFeed\Reader; -use PicoFeed\Config; + $parser = $reader->getParser( + $resource->getUrl(), + $resource->getContent(), + $resource->getEncoding() + ); -$config = new Config; -$config->setTimezone('Europe/Paris'); -$config->setContentFiltering(false); - -$reader = new Reader($config); -... + $feed = $parser->execute(); + echo $feed; +} +catch (PicoFeedException $e) { +} ``` -or +HTTP caching +------------ -```php -use PicoFeed\Reader; +TODO -$reader = new Reader; -$reader->download('http://.....'); -$parser = $reader->getParser(); +Feed and item properties +------------------------ -if ($parser !== false) { - - $parser->disableContentFiltering(); // <= Disable content filtering - $feed = $parser->execute(); - // ... -} +```php +// Feed object +$feed->getId(); // Unique feed id +$feed->getTitle(); // Feed title +$feed->getUrl(); // Website url +$feed->getDate(); // Feed last updated date +$feed->getLanguage(); // Feed language +$feed->getDescription(); // Feed description +$feed->getLogo(); // Feed logo (can be a large image, different from icon) +$feed->getItems(); // List of item objects + +// Item object +$feed->items[0]->getId(); // Item unique id (hash) +$feed->items[0]->getTitle(); // Item title +$feed->items[0]->getUrl(); // Item url +$feed->items[0]->getDate(); // Item published date (timestamp) +$feed->items[0]->getLanguage(); // Item language +$feed->items[0]->getAuthor(); // Item author +$feed->items[0]->getEnclosureUrl(); // Enclosure url +$feed->items[0]->getEnclosureType(); // Enclosure mime-type (audio/mp3, image/png...) +$feed->items[0]->getContent(); // Item content (filtered or raw) ``` |