From 8241180c6ce0cb19255d70a3394f891e08182542 Mon Sep 17 00:00:00 2001
From: Bernhard Posselt <dev@bernhard-posselt.com>
Date: Tue, 27 Jan 2015 09:31:40 +0100
Subject: dont use picofeed submodule

---
 vendor/fguillot/picofeed                       |   1 -
 vendor/fguillot/picofeed/docs/grabber.markdown | 136 +++++++++++++++++++++++++
 2 files changed, 136 insertions(+), 1 deletion(-)
 delete mode 160000 vendor/fguillot/picofeed
 create mode 100644 vendor/fguillot/picofeed/docs/grabber.markdown

(limited to 'vendor/fguillot/picofeed/docs/grabber.markdown')

diff --git a/vendor/fguillot/picofeed b/vendor/fguillot/picofeed
deleted file mode 160000
index 0a1d0d395..000000000
--- a/vendor/fguillot/picofeed
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 0a1d0d3950f7f047dc8fb1d80aa6296e15f306d0
diff --git a/vendor/fguillot/picofeed/docs/grabber.markdown b/vendor/fguillot/picofeed/docs/grabber.markdown
new file mode 100644
index 000000000..b99b756ed
--- /dev/null
+++ b/vendor/fguillot/picofeed/docs/grabber.markdown
@@ -0,0 +1,136 @@
+Web scraper
+===========
+
+The web scraper is useful for feeds that display only a summary of articles, the scraper can download and parse the full content from the original website.
+
+How the content grabber works?
+------------------------------
+
+1. Try with rules first (XPath queries) for the domain name (see `PicoFeed\Rules\`)
+2. Try to find the text content by using common attributes for class and id
+3. Finally, if nothing is found, the feed content is displayed
+
+**The best results are obtained with XPath rules file.**
+
+Standalone usage
+----------------
+
+```php
+<?php
+
+use PicoFeed\Client\Grabber;
+
+$grabber = new Grabber($item_url);
+$grabber->download();
+$grabber->parse();
+
+// Get raw HTML content
+echo $grabber->getRawContent();
+
+// Get relevant content
+echo $grabber->getContent();
+
+// Get filtered relevant content
+echo $grabber->getFilteredContent();
+```
+
+Fetch full item contents during feed parsing
+--------------------------------------------
+
+Before parsing all items, just call the method `$parser->enableContentGrabber()`:
+
+```php
+<?php
+
+use PicoFeed\Reader\Reader;
+use PicoFeed\PicoFeedException;
+
+try {
+
+    $reader = new Reader;
+
+    // Return a resource
+    $resource = $reader->download('http://www.egscomics.com/rss.php');
+
+    // Return the right parser instance according to the feed format
+    $parser = $reader->getParser(
+        $resource->getUrl(),
+        $resource->getContent(),
+        $resource->getEncoding()
+    );
+
+    // Enable content grabber before parsing items
+    $parser->enableContentGrabber();
+
+    // Return a Feed object
+    $feed = $parser->execute();
+}
+catch (PicoFeedException $e) {
+    // Do Something...
+}
+```
+
+When the content scraper is enabled, everything will be slower.
+**For each item a new HTTP request is made** and the HTML downloaded is parsed with XML/XPath.
+
+Configuration
+-------------
+
+### Enable content grabber for items
+
+- Method name: `enableContentGrabber()`
+- Default value: false (content grabber is disabled by default)
+- Argument value: none
+
+```php
+$parser->enableContentGrabber();
+```
+
+### Ignore item urls for the content grabber
+
+- Method name: `setGrabberIgnoreUrls()`
+- Default value: empty (fetch all item urls)
+- Argument value: array (list of item urls to ignore)
+
+```php
+$parser->setGrabberIgnoreUrls(['http://foo', 'http://bar']);
+```
+
+How to write a grabber rules file?
+----------------------------------
+
+Add a PHP file to the directory `PicoFeed\Rules`, the filename must be the same as the domain name:
+
+Example with the BBC website, `www.bbc.co.uk.php`:
+
+```php
+<?php
+return array(
+    'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833',
+    'body' => array(
+        '//div[@class="story-body"]',
+    ),
+    'strip' => array(
+        '//script',
+        '//form',
+        '//style',
+        '//*[@class="story-date"]',
+        '//*[@class="story-header"]',
+        '//*[@class="story-related"]',
+        '//*[contains(@class, "byline")]',
+        '//*[contains(@class, "story-feature")]',
+        '//*[@id="video-carousel-container"]',
+        '//*[@id="also-related-links"]',
+        '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]',
+    )
+);
+```
+
+Actually, only `body`, `strip` and `test_url` are supported.
+
+Don't forget to send a pull request or a ticket to share your contribution with everybody,
+
+List of content grabber rules
+-----------------------------
+
+Rules are stored inside the directory [lib/PicoFeed/Rules](https://github.com/fguillot/picoFeed/tree/master/lib/PicoFeed/Rules)
-- 
cgit v1.2.3