From 7f81afd7efbeb931d16ce1f3a1ed53a54226d553 Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Fri, 7 Nov 2014 12:32:05 +0100 Subject: move 3rdparty directory to vendor to be more consistent with composer standard and because we also use js/vendor for third party libs --- vendor/fguillot/picofeed/docs/config.markdown | 263 +++++++++++++++++++++ vendor/fguillot/picofeed/docs/debugging.markdown | 46 ++++ vendor/fguillot/picofeed/docs/favicon.markdown | 56 +++++ .../fguillot/picofeed/docs/feed-creation.markdown | 72 ++++++ .../fguillot/picofeed/docs/feed-parsing.markdown | 164 +++++++++++++ vendor/fguillot/picofeed/docs/grabber.markdown | 97 ++++++++ .../fguillot/picofeed/docs/installation.markdown | 65 +++++ vendor/fguillot/picofeed/docs/opml-export.markdown | 46 ++++ vendor/fguillot/picofeed/docs/opml-import.markdown | 19 ++ vendor/fguillot/picofeed/docs/tests.markdown | 14 ++ 10 files changed, 842 insertions(+) create mode 100644 vendor/fguillot/picofeed/docs/config.markdown create mode 100644 vendor/fguillot/picofeed/docs/debugging.markdown create mode 100644 vendor/fguillot/picofeed/docs/favicon.markdown create mode 100644 vendor/fguillot/picofeed/docs/feed-creation.markdown create mode 100644 vendor/fguillot/picofeed/docs/feed-parsing.markdown create mode 100644 vendor/fguillot/picofeed/docs/grabber.markdown create mode 100644 vendor/fguillot/picofeed/docs/installation.markdown create mode 100644 vendor/fguillot/picofeed/docs/opml-export.markdown create mode 100644 vendor/fguillot/picofeed/docs/opml-import.markdown create mode 100644 vendor/fguillot/picofeed/docs/tests.markdown (limited to 'vendor/fguillot/picofeed/docs') diff --git a/vendor/fguillot/picofeed/docs/config.markdown b/vendor/fguillot/picofeed/docs/config.markdown new file mode 100644 index 000000000..bf6fdee1f --- /dev/null +++ b/vendor/fguillot/picofeed/docs/config.markdown @@ -0,0 +1,263 @@ +Configuration +============= + +How to use the Config object +---------------------------- + +To change the default parameters, you have to use the Config class. +Create a new instance and pass it to the Reader object like that: + +```php +use PicoFeed\Reader; +use PicoFeed\Config; + +$config = new Config; +$config->setClientUserAgent('My custom RSS Reader') + ->setProxyHostname('127.0.0.1') + ->setProxyPort(8118); + +$reader = new Reader($config); +... +``` + +HTTP Client parameters +---------------------- + +### Connection timeout + +- Method name: `setClientTimeout()` +- Default value: 10 seconds +- Argument value: number of seconds (integer) + +```php +$config->setClientTimeout(20); // 20 seconds +``` + +### User Agent + +- Method name: `setClientUserAgent()` +- Default value: `PicoFeed (https://github.com/fguillot/picoFeed)` +- Argument value: string + +```php +$config->setClientUserAgent('My RSS reader'); +``` + +### Maximum HTTP redirections + +- Method name: `setMaxRedirections()` +- Default value: 5 +- Argument value: integer + +```php +$config->setMaxRedirections(10); +``` + +### Maximum HTTP body response size + +- Method name: `setMaxBodySize()` +- Default value: 2097152 (2MB) +- Argument value: value in bytes (integer) + +```php +$config->setMaxBodySize(10485760); // 10MB +``` + +### Proxy hostname + +- Method name: `setProxyHostname()` +- Default value: empty +- Argument value: string + +```php +$config->setProxyHostname('proxy.example.org'); +``` + +### Proxy port + +- Method name: `setProxyPort()` +- Default value: 3128 +- Argument value: port number (integer) + +```php +$config->getProxyPort(8118); +``` + +### Proxy username + +- Method name: `setProxyUsername()` +- Default value: empty +- Argument value: string + +```php +$config->setProxyUsername('myuser'); +``` + +### Proxy password + +- Method name: `setProxyPassword()` +- Default value: empty +- Argument value: string + +```php +$config->setProxyPassword('mysecret'); +``` + +Content grabber +--------------- + +### Connection timeout + +- Method name: `setGrabberTimeout()` +- Default value: 10 seconds +- Argument value: number of seconds (integer) + +```php +$config->setGrabberTimeout(20); // 20 seconds +``` + +### User Agent + +- Method name: `setGrabberUserAgent()` +- Default value: `PicoFeed (https://github.com/fguillot/picoFeed)` +- Argument value: string + +```php +$config->setGrabberUserAgent('My content scraper'); +``` + +Parser +------ + +### Hash algorithm used for item id generation + +- Method name: `setParserHashAlgo()` +- Default value: `crc32b` +- Argument value: any value returned by the function `hash_algos()` (string) +- See: http://php.net/hash_algos + +```php +$config->setParserHashAlgo('sha1'); +``` + +### Disable item content filtering + +- Method name: `setContentFiltering()` +- Default value: true (filtering is enabled by default) +- Argument value: boolean + +```php +$config->setContentFiltering(); +``` + +### Timezone + +- Method name: `setTimezone()` +- Default value: UTC +- Argument value: See https://php.net/manual/en/timezones.php (string) +- Note: define the timezone for items/feeds + +```php +$config->setTimezone('Europe/Paris'); +``` + +Logging +------- + +### Timezone + +- Method name: `setTimezone()` +- Default value: UTC +- Argument value: See https://php.net/manual/en/timezones.php (string) +- Note: define the timezone for the logging class + +```php +$config->setTimezone('Europe/Paris'); +``` + +Filter +------ + +### Set the iframe whitelist (allowed iframe sources) + +- Method name: `setFilterIframeWhitelist()` +- Default value: See the Filter class source code +- Argument value: array + +```php +$config->setFilterIframeWhitelist(['http://www.youtube.com', 'http://www.vimeo.com']); +``` + +### Define HTML integer attributes + +- Method name: `setFilterIntegerAttributes()` +- Default value: See the Filter class source code +- Argument value: array + +```php +$config->setFilterIntegerAttributes(['width', 'height']); +``` + +### Add HTML attributes automatically + +- Method name: `setFilterAttributeOverrides()` +- Default value: See the Filter class source code +- Argument value: array + +```php +$config->setFilterAttributeOverrides(['a' => ['target' => '_blank']); +``` + +### Set the list of required attributes for tags + +- Method name: `setFilterRequiredAttributes()` +- Default value: See the Filter class source code +- Argument value: array +- Note: If the required attributes are not there, the tag is stripped + +```php +$config->setFilterRequiredAttributes(['a' => 'href', 'img' => 'src']); +``` + +### Set the resource blacklist (Ads blocker) + +- Method name: `setFilterMediaBlacklist()` +- Default value: See the Filter class source code +- Argument value: array +- Note: Tags are stripped if they have those URLs + +```php +$config->setFilterMediaBlacklist(['feeds.feedburner.com', 'share.feedsportal.com']); +``` + +### Define which attributes are used for external resources + +- Method name: `setFilterMediaAttributes()` +- Default value: See the Filter class source code +- Argument value: array + +```php +$config->setFilterMediaAttributes(['src', 'href']); +``` + +### Define the scheme whitelist + +- Method name: `setFilterSchemeWhitelist()` +- Default value: See the Filter class source code +- Argument value: array +- See: http://en.wikipedia.org/wiki/URI_scheme + +```php +$config->setFilterSchemeWhitelist(['http://', 'ftp://']); +``` + +### Define the tags and attributes whitelist + +- Method name: `setFilterWhitelistedTags()` +- Default value: See the Filter class source code +- Argument value: array +- Note: Only those tags are allowed everything else is stripped + +```php +$config->setFilterWhitelistedTags(['a' => ['href'], 'img' => ['src', 'title']]); +``` diff --git a/vendor/fguillot/picofeed/docs/debugging.markdown b/vendor/fguillot/picofeed/docs/debugging.markdown new file mode 100644 index 000000000..bbe1fd067 --- /dev/null +++ b/vendor/fguillot/picofeed/docs/debugging.markdown @@ -0,0 +1,46 @@ +Debugging +========= + +Get log messages +---------------- + +PicoFeed log in memory the execution flow, if a feed doesn't work correctly it's easy to see what is wrong. + +```php +print_r(PicoFeed\Logging::getMessages()); +``` + +You will got an output like that: + +```php +Array +( + [0] => Fetch URL: http://petitcodeur.fr/feed.xml + [1] => Etag: + [2] => Last-Modified: + [3] => cURL total time: 0.711378 + [4] => cURL dns lookup time: 0.001064 + [5] => cURL connect time: 0.100733 + [6] => cURL speed download: 74825 + [7] => HTTP status code: 200 + [8] => HTTP headers: Set-Cookie => start=R2701971637; path=/; expires=Sat, 06-Jul-2013 05:16:33 GMT + [9] => HTTP headers: Date => Sat, 06 Jul 2013 03:55:52 GMT + [10] => HTTP headers: Content-Type => application/xml + [11] => HTTP headers: Content-Length => 53229 + [12] => HTTP headers: Connection => close + [13] => HTTP headers: Server => Apache + [14] => HTTP headers: Last-Modified => Tue, 02 Jul 2013 03:26:02 GMT + [15] => HTTP headers: ETag => "393e79c-cfed-4e07ee78b2680" + [16] => HTTP headers: Accept-Ranges => bytes + .... +) +``` + +Remove messages +--------------- + +All messages are stored in memory, if you need to clear them just call the method `Logging::deleteMessages()`: + +```php +PicoFeed\Logging::deleteMessages(); +``` diff --git a/vendor/fguillot/picofeed/docs/favicon.markdown b/vendor/fguillot/picofeed/docs/favicon.markdown new file mode 100644 index 000000000..92ae11928 --- /dev/null +++ b/vendor/fguillot/picofeed/docs/favicon.markdown @@ -0,0 +1,56 @@ +Favicon fetcher +=============== + +Find and download the favicon +----------------------------- + +```php + +use PicoFeed\Favicon; + +$favicon = new Favicon; + +// The icon link is https://bits.wikimedia.org/favicon/wikipedia.ico +$icon_link = $favicon->find('https://en.wikipedia.org/'); +$icon_content = $favicon->getContent(); + +``` + +PicoFeed will try first to find the favicon from the meta tags and fallback to the `favicon.ico` located in the website's root if nothing is found. + +- `Favicon::find()` returns the favicon absolute url or an empty string if nothing is found. +- `Favicon::getContent()` returns the favicon file content (binary content) + +When the HTML page is parsed, relative links and protocol relative links are converted to absolute url. + +Check if a favicon link exists +------------------------------ + +```php + +use PicoFeed\Favicon; + +$favicon = new Favicon; + +// Return true if the file exists +var_dump($favicon->exists('http://php.net/favicon.ico')); + +``` + +Use personalized HTTP settings +------------------------------ + +Like other classes, the Favicon class support the Config object as constructor argument: + +```php + +use PicoFeed\Config; +use PicoFeed\Favicon; + +$config = new Config; +$config->setClientUserAgent('My RSS Reader'); + +$favicon = new Favicon($config); +$favicon->find('https://github.com'); + +``` \ No newline at end of file diff --git a/vendor/fguillot/picofeed/docs/feed-creation.markdown b/vendor/fguillot/picofeed/docs/feed-creation.markdown new file mode 100644 index 000000000..24329f82c --- /dev/null +++ b/vendor/fguillot/picofeed/docs/feed-creation.markdown @@ -0,0 +1,72 @@ +Feed creation +============= + +Generate RSS 2.0 feed +---------------------- + +```php +use PicoFeed\Writers\Rss20; + +$writer = new Rss20(); +$writer->title = 'My site'; +$writer->site_url = 'http://boo/'; +$writer->feed_url = 'http://boo/feed.atom'; +$writer->author = array( + 'name' => 'Me', + 'url' => 'http://me', + 'email' => 'me@here' +); + +$writer->items[] = array( + 'title' => 'My article 1', + 'updated' => strtotime('-2 days'), + 'url' => 'http://foo/bar', + 'summary' => 'Super summary', + 'content' => '

content

' +); + +$writer->items[] = array( + 'title' => 'My article 2', + 'updated' => strtotime('-1 day'), + 'url' => 'http://foo/bar2', + 'summary' => 'Super summary 2', + 'content' => '

content 2   © 2015

', + 'author' => array( + 'name' => 'Me too', + ) +); + +$writer->items[] = array( + 'title' => 'My article 3', + 'url' => 'http://foo/bar3' +); + +echo $writer->execute(); +``` + +Generate Atom feed +------------------ + +```php +use PicoFeed\Writers\Atom; + +$writer = new Atom(); +$writer->title = 'My site'; +$writer->site_url = 'http://boo/'; +$writer->feed_url = 'http://boo/feed.atom'; +$writer->author = array( + 'name' => 'Me', + 'url' => 'http://me', + 'email' => 'me@here' +); + +$writer->items[] = array( + 'title' => 'My article 1', + 'updated' => strtotime('-2 days'), + 'url' => 'http://foo/bar', + 'summary' => 'Super summary', + 'content' => '

content

' +); + +echo $writer->execute(); +``` diff --git a/vendor/fguillot/picofeed/docs/feed-parsing.markdown b/vendor/fguillot/picofeed/docs/feed-parsing.markdown new file mode 100644 index 000000000..22f84339b --- /dev/null +++ b/vendor/fguillot/picofeed/docs/feed-parsing.markdown @@ -0,0 +1,164 @@ +Feed parsing +============ + +Parsing a subscription +---------------------- + +```php +use PicoFeed\Reader\Reader; +use PicoFeed\PicoFeedException; + +try { + + $reader = new Reader; + + // Return a resource + $resource = $reader->download('https://linuxfr.org/news.atom'); + + // Return the right parser instance according to the feed format + $parser = $reader->getParser( + $resource->getUrl(), + $resource->getContent(), + $resource->getEncoding() + ); + + // Return a Feed object + $feed = $parser->execute(); + + // Print the feed properties with the magic method __toString() + echo $feed; +} +catch (PicoFeedException $e) { + // Do Something... +} +``` + +- The Reader class is the entry point for feed reading +- The method `download()` fetch the remote content and return a resource, an instance of `PicoFeed\Client\Client` +- The method `getParser()` returns a Parser instance according to the feed format Atom, Rss 2.0... +- The parser itself returns a `Feed` object that contains feed and item properties + +Output: + +```bash +Feed::id = tag:linuxfr.org,2005:/news +Feed::title = LinuxFr.org : les dépêches +Feed::url = http://linuxfr.org/news +Feed::date = 1415138079 +Feed::language = en-US +Feed::description = +Feed::logo = +Feed::items = 15 items +---- +Item::id = 38d8f48284fb03940cbb3aff9101089b81e44efb1281641bdd7c3e7e4bf3b0cd +Item::title = openSUSE 13.2 : nouvelle version du caméléon disponible ! +Item::url = http://linuxfr.org/news/opensuse-13-2-nouvelle-version-du-cameleon-disponible +Item::date = 1415122640 +Item::language = en-US +Item::author = Syvolc +Item::enclosure_url = +Item::enclosure_type = +Item::content = 18307 bytes +---- +Item::id = d0ebddc90bfc3f109f9be00a3bb0b4a770af7a647cdc88454fe15d79168e0dea +Item::title = Fuzix OS, parce que les petites choses sont belles +Item::url = http://linuxfr.org/news/fuzix-os-parce-que-les-petites-choses-sont-belles +Item::date = 1415112167 +Item::language = en-US +Item::author = Thomas DEBESSE +Item::enclosure_url = +Item::enclosure_type = +Item::content = 6104 bytes +.... +``` + +Get the list of available subscriptions for a website +----------------------------------------------------- + +The example below will returns all available subscriptions for the website: + +```php +use PicoFeed\Reader\Reader; + +try { + + $reader = new Reader; + $resource = $reader->download('http://www.cnn.com'); + + $feeds = $reader->find( + $resource->getUrl(), + $resource->getContent() + ); + + print_r($feeds); +} +catch (PicoFeedException $e) { + // Do something... +} +``` + +Output: + +```php +Array +( + [0] => http://rss.cnn.com/rss/cnn_topstories.rss + [1] => http://rss.cnn.com/rss/cnn_latest.rss +) +``` + +Feed discovery and parsing +-------------------------- + +This example will discover automatically the subscription and parse the feed: + +```php +try { + + $reader = new Reader; + $resource = $reader->discover('http://linuxfr.org'); + + $parser = $reader->getParser( + $resource->getUrl(), + $resource->getContent(), + $resource->getEncoding() + ); + + $feed = $parser->execute(); + echo $feed; +} +catch (PicoFeedException $e) { +} +``` + +HTTP caching +------------ + +TODO + + +Feed and item properties +------------------------ + +```php +// Feed object +$feed->getId(); // Unique feed id +$feed->getTitle(); // Feed title +$feed->getUrl(); // Website url +$feed->getDate(); // Feed last updated date +$feed->getLanguage(); // Feed language +$feed->getDescription(); // Feed description +$feed->getLogo(); // Feed logo (can be a large image, different from icon) +$feed->getItems(); // List of item objects + +// Item object +$feed->items[0]->getId(); // Item unique id (hash) +$feed->items[0]->getTitle(); // Item title +$feed->items[0]->getUrl(); // Item url +$feed->items[0]->getDate(); // Item published date (timestamp) +$feed->items[0]->getLanguage(); // Item language +$feed->items[0]->getAuthor(); // Item author +$feed->items[0]->getEnclosureUrl(); // Enclosure url +$feed->items[0]->getEnclosureType(); // Enclosure mime-type (audio/mp3, image/png...) +$feed->items[0]->getContent(); // Item content (filtered or raw) +``` diff --git a/vendor/fguillot/picofeed/docs/grabber.markdown b/vendor/fguillot/picofeed/docs/grabber.markdown new file mode 100644 index 000000000..6a7dd2ada --- /dev/null +++ b/vendor/fguillot/picofeed/docs/grabber.markdown @@ -0,0 +1,97 @@ +Web scraper +=========== + +The web scraper is useful for feeds that display only a summary of articles, the scraper can download and parse the full content from the original website. + +How the content grabber works? +------------------------------ + +1. Try with rules first (xpath patterns) for the domain name (see `PicoFeed\Rules\`) +2. Try to find the text content by using common attributes for class and id +3. Finally, if nothing is found, the feed content is displayed + +**The best results are obtained with Xpath rules file.** + +How to use the content scraper? +------------------------------- + +```php +use PicoFeed\Reader; + +$reader = new Reader; +$reader->download('http://www.egscomics.com/rss.php'); + +$parser = $reader->getParser(); + +if ($parser !== false) { + + $parser->enableContentGrabber(); // <= Enable the content grabber + $feed = $parser->execute(); + // ... +} +``` + +When the content scraper is enabled, everything will be slower. +For each item a new HTTP request is made and the HTML downloaded is parsed with XML/Xpath. + +Configuration +------------- + +### Enable content grabber for items + +- Method name: `enableContentGrabber()` +- Default value: false (content grabber is disabled by default) +- Argument value: none + +```php +$parser->enableContentGrabber(); +``` + +### Ignore item urls for the content grabber + +- Method name: `setGrabberIgnoreUrls()` +- Default value: empty (fetch all item urls) +- Argument value: array (list of item urls to ignore) + +```php +$parser->setGrabberIgnoreUrls(['http://foo', 'http://bar']); +``` + +How to write a grabber rules file? +---------------------------------- + +Add a PHP file to the directory `PicoFeed\Rules`, the filename must be the same as the domain name: + +Example with the BBC website, `www.bbc.co.uk.php`: + +```php + 'http://www.bbc.co.uk/news/world-middle-east-23911833', + 'body' => array( + '//div[@class="story-body"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[@class="story-date"]', + '//*[@class="story-header"]', + '//*[@class="story-related"]', + '//*[contains(@class, "byline")]', + '//*[contains(@class, "story-feature")]', + '//*[@id="video-carousel-container"]', + '//*[@id="also-related-links"]', + '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', + ) +); +``` + +Actually, only `body`, `strip` and `test_url` are supported. + +Don't forget to send a pull request or a ticket to share your contribution with everybody, + +List of content grabber rules +----------------------------- + +Rules are stored inside the directory [lib/PicoFeed/Rules](https://github.com/fguillot/picoFeed/tree/master/lib/PicoFeed/Rules) diff --git a/vendor/fguillot/picofeed/docs/installation.markdown b/vendor/fguillot/picofeed/docs/installation.markdown new file mode 100644 index 000000000..827908f75 --- /dev/null +++ b/vendor/fguillot/picofeed/docs/installation.markdown @@ -0,0 +1,65 @@ +Installation +============ + +Versions +-------- + +- Development version: branch master +- Available versions: + - v0.1.0 (stable) + - v0.0.2 + - v0.0.1 + +Installation with Composer +-------------------------- + +Configure your `composer.json`: + +```json +{ + "require": { + "fguillot/picofeed": "0.1.0" + } +} +``` + +Or simply: + +```bash +composer require fguillot/picofeed:0.1.0 +``` + +And download the code: + +```bash +composer install # or update +``` + +Usage example with the Composer autoloading: + +```php +download('https://linuxfr.org/news.atom'); + + $parser = $reader->getParser( + $resource->getUrl(), + $resource->getContent(), + $resource->getEncoding() + ); + + $feed = $parser->execute(); + + echo $feed; +} +catch (Exception $e) { + // Do something... +} +``` diff --git a/vendor/fguillot/picofeed/docs/opml-export.markdown b/vendor/fguillot/picofeed/docs/opml-export.markdown new file mode 100644 index 000000000..4d7cf8db0 --- /dev/null +++ b/vendor/fguillot/picofeed/docs/opml-export.markdown @@ -0,0 +1,46 @@ +OPML export +=========== + +Example with no categories +-------------------------- + +```php +use PicoFeed\Export; + +$feeds = array( + array( + 'title' => 'Site title', + 'description' => 'Optional description', + 'site_url' => 'http://petitcodeur.fr/', + 'site_feed' => 'http://petitcodeur.fr/feed.xml' + ) +); + +$export = new Export($feeds); +$opml = $export->execute(); + +echo $opml; // XML content +``` + +Example with categories +----------------------- + +```php +use PicoFeed\Export; + +$feeds = array( + 'my category' => array( + array( + 'title' => 'Site title', + 'description' => 'Optional description', + 'site_url' => 'http://petitcodeur.fr/', + 'site_feed' => 'http://petitcodeur.fr/feed.xml' + ) + ) +); + +$export = new Export($feeds); +$opml = $export->execute(); + +echo $opml; // XML content +``` \ No newline at end of file diff --git a/vendor/fguillot/picofeed/docs/opml-import.markdown b/vendor/fguillot/picofeed/docs/opml-import.markdown new file mode 100644 index 000000000..9f47b376a --- /dev/null +++ b/vendor/fguillot/picofeed/docs/opml-import.markdown @@ -0,0 +1,19 @@ +Import OPML file +================ + +Importing a list of subscriptions is pretty straightforward: + +```php +use PicoFeed\Import; + +$opml = file_get_contents('mySubscriptions.opml'); +$import = new Import($opml); +$entries = $import->execute(); + +if ($entries !== false) { + print_r($entries); +} + +``` + +The method `execute()` return `false` if there is a parsing error. diff --git a/vendor/fguillot/picofeed/docs/tests.markdown b/vendor/fguillot/picofeed/docs/tests.markdown new file mode 100644 index 000000000..72bb48b0f --- /dev/null +++ b/vendor/fguillot/picofeed/docs/tests.markdown @@ -0,0 +1,14 @@ +Running unit tests +================== + +If the autoloader is not yet installed run: + +```php +composer dump-autoload +``` + +Then run: + +```php +phpunit tests +``` -- cgit v1.2.3