From 522f9ef81b39c7b2dbc5e8e817b1ab45ef6b0aa1 Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Sat, 12 Sep 2015 23:18:16 +0200 Subject: update picofeed --- vendor/fguillot/picofeed/.gitignore | 3 - vendor/fguillot/picofeed/.travis.yml | 19 - vendor/fguillot/picofeed/README.markdown | 67 - vendor/fguillot/picofeed/composer.json | 29 - vendor/fguillot/picofeed/docs/config.markdown | 306 --- vendor/fguillot/picofeed/docs/debugging.markdown | 102 - vendor/fguillot/picofeed/docs/exceptions.markdown | 28 - vendor/fguillot/picofeed/docs/favicon.markdown | 96 - .../fguillot/picofeed/docs/feed-creation.markdown | 74 - .../fguillot/picofeed/docs/feed-parsing.markdown | 333 --- vendor/fguillot/picofeed/docs/grabber.markdown | 195 -- vendor/fguillot/picofeed/docs/image-proxy.markdown | 67 - .../fguillot/picofeed/docs/installation.markdown | 50 - vendor/fguillot/picofeed/docs/opml-export.markdown | 46 - vendor/fguillot/picofeed/docs/opml-import.markdown | 19 - vendor/fguillot/picofeed/docs/tests.markdown | 14 - .../picofeed/lib/PicoFeed/Filter/Attribute.php | 17 +- .../picofeed/lib/PicoFeed/Rules/.phoronix.com.php | 2 +- .../lib/PicoFeed/Rules/bigfootjustice.com.php | 8 + .../lib/PicoFeed/Rules/blog.mapillary.com.php | 11 + .../lib/PicoFeed/Rules/endlessorigami.com.php | 2 +- .../lib/PicoFeed/Rules/extrafabulouscomics.com.php | 2 +- .../lib/PicoFeed/Rules/lastplacecomics.com.php | 2 +- .../lib/PicoFeed/Rules/loadingartist.com.php | 2 +- .../picofeed/lib/PicoFeed/Rules/mercworks.net.php | 17 + .../lib/PicoFeed/Rules/mrlovenstein.com.php | 3 +- .../lib/PicoFeed/Rules/neustadt-ticker.de.php | 5 +- .../lib/PicoFeed/Rules/theawkwardyeti.com.php | 2 +- .../lib/PicoFeed/Rules/threepanelsoul.com.php | 10 + .../picofeed/lib/PicoFeed/Rules/voz.vn.php | 10 - vendor/fguillot/picofeed/phpunit.xml | 7 - .../fguillot/picofeed/tests/Client/ClientTest.php | 145 -- vendor/fguillot/picofeed/tests/Client/CurlTest.php | 74 - .../picofeed/tests/Client/HttpHeadersTest.php | 19 - .../fguillot/picofeed/tests/Client/StreamTest.php | 91 - vendor/fguillot/picofeed/tests/Client/UrlTest.php | 298 --- .../picofeed/tests/Filter/AttributeFilterTest.php | 324 --- .../fguillot/picofeed/tests/Filter/FilterTest.php | 122 - .../picofeed/tests/Filter/HtmlFilterTest.php | 173 -- .../picofeed/tests/Filter/TagFilterTest.php | 33 - .../picofeed/tests/Parser/AtomParserTest.php | 463 ---- .../picofeed/tests/Parser/DateParserTest.php | 49 - vendor/fguillot/picofeed/tests/Parser/FeedTest.php | 24 - vendor/fguillot/picofeed/tests/Parser/ItemTest.php | 36 - .../fguillot/picofeed/tests/Parser/ParserTest.php | 71 - .../picofeed/tests/Parser/Rss10ParserTest.php | 352 --- .../picofeed/tests/Parser/Rss20ParserTest.php | 311 --- .../picofeed/tests/Parser/Rss91ParserTest.php | 31 - .../picofeed/tests/Parser/Rss92ParserTest.php | 29 - .../picofeed/tests/Parser/XmlParserTest.php | 197 -- .../fguillot/picofeed/tests/Reader/FaviconTest.php | 188 -- .../fguillot/picofeed/tests/Reader/ReaderTest.php | 269 --- .../picofeed/tests/Scraper/RuleLoaderTest.php | 86 - .../picofeed/tests/Scraper/ScraperTest.php | 89 - .../picofeed/tests/Serialization/ExportTest.php | 70 - .../picofeed/tests/Serialization/ImportTest.php | 61 - .../picofeed/tests/Syndication/AtomWriterTest.php | 90 - .../picofeed/tests/Syndication/Rss20WriterTest.php | 85 - vendor/fguillot/picofeed/tests/fixtures/atom.xml | 93 - .../tests/fixtures/atom_element_preference.xml | 38 - .../picofeed/tests/fixtures/atom_empty_feed.xml | 9 - .../picofeed/tests/fixtures/atom_empty_item.xml | 4 - .../picofeed/tests/fixtures/atom_extra.xml | 19 - .../atom_fallback_on_invalid_feed_values.xml | 15 - .../atom_fallback_on_invalid_item_values.xml | 28 - .../tests/fixtures/atom_no_default_namespace.xml | 92 - .../picofeed/tests/fixtures/atom_prefixed.xml | 93 - .../picofeed/tests/fixtures/atomsample.xml | 20 - .../picofeed/tests/fixtures/cercle.psy.xml | 58 - vendor/fguillot/picofeed/tests/fixtures/ezrss.it | 496 ---- .../picofeed/tests/fixtures/fulltextrss.xml | 88 - .../picofeed/tests/fixtures/google-reader.opml | 78 - .../picofeed/tests/fixtures/googleblog.xml | 477 ---- .../fguillot/picofeed/tests/fixtures/groovehq.xml | 1767 -------------- .../tests/fixtures/html4_head_stripped_page.html | 435 ---- .../picofeed/tests/fixtures/html4_page.html | 486 ---- .../tests/fixtures/html_head_stripped_page.html | 804 ------- .../picofeed/tests/fixtures/html_page.html | 967 -------- .../fguillot/picofeed/tests/fixtures/ibash.ru.xml | 359 --- .../picofeed/tests/fixtures/jeux-linux.fr.xml | 924 -------- .../fguillot/picofeed/tests/fixtures/lagrange.xml | 1986 ---------------- .../picofeed/tests/fixtures/lincoln_loop.xml | 1150 --------- .../picofeed/tests/fixtures/miniflux_favicon.ico | Bin 16958 -> 0 bytes .../picofeed/tests/fixtures/newsbeuter.opml | 43 - .../picofeed/tests/fixtures/next_inpact_full.xml | 2435 -------------------- .../fguillot/picofeed/tests/fixtures/pcinpact.xml | 1 - .../picofeed/tests/fixtures/planete-jquery.xml | 443 ---- .../fguillot/picofeed/tests/fixtures/podbean.xml | 1596 ------------- .../fguillot/picofeed/tests/fixtures/resorts.xml | 169 -- .../picofeed/tests/fixtures/rss2sample.xml | 41 - .../fguillot/picofeed/tests/fixtures/rss_0.91.xml | 50 - .../fguillot/picofeed/tests/fixtures/rss_0.92.xml | 103 - vendor/fguillot/picofeed/tests/fixtures/rss_10.xml | 69 - .../tests/fixtures/rss_10_element_preference.xml | 51 - .../tests/fixtures/rss_10_empty_channel.xml | 13 - .../picofeed/tests/fixtures/rss_10_empty_feed.xml | 10 - .../picofeed/tests/fixtures/rss_10_empty_item.xml | 5 - .../picofeed/tests/fixtures/rss_10_extra.xml | 25 - .../rss_10_fallback_on_invalid_feed_values.xml | 18 - .../rss_10_fallback_on_invalid_item_values.xml | 39 - .../tests/fixtures/rss_10_no_default_namespace.xml | 69 - .../picofeed/tests/fixtures/rss_10_prefixed.xml | 42 - vendor/fguillot/picofeed/tests/fixtures/rss_20.xml | 81 - .../fguillot/picofeed/tests/fixtures/rss_20_dc.xml | 18 - .../tests/fixtures/rss_20_element_preference.xml | 69 - .../tests/fixtures/rss_20_empty_channel.xml | 12 - .../picofeed/tests/fixtures/rss_20_empty_feed.xml | 9 - .../picofeed/tests/fixtures/rss_20_empty_item.xml | 6 - .../picofeed/tests/fixtures/rss_20_extra.xml | 30 - .../rss_20_fallback_on_invalid_feed_values.xml | 17 - .../rss_20_fallback_on_invalid_item_values.xml | 34 - vendor/fguillot/picofeed/tests/fixtures/rue89.xml | 2 - .../fguillot/picofeed/tests/fixtures/sametmax.xml | 1067 --------- .../picofeed/tests/fixtures/subscriptionList.opml | 1 - .../picofeed/tests/fixtures/tinytinyrss.opml | 13 - .../picofeed/tests/fixtures/univers_freebox.xml | 30 - .../fguillot/picofeed/tests/fixtures/xakep.ru.xml | 245 -- 117 files changed, 58 insertions(+), 22612 deletions(-) delete mode 100644 vendor/fguillot/picofeed/.gitignore delete mode 100644 vendor/fguillot/picofeed/.travis.yml delete mode 100644 vendor/fguillot/picofeed/README.markdown delete mode 100644 vendor/fguillot/picofeed/composer.json delete mode 100644 vendor/fguillot/picofeed/docs/config.markdown delete mode 100644 vendor/fguillot/picofeed/docs/debugging.markdown delete mode 100644 vendor/fguillot/picofeed/docs/exceptions.markdown delete mode 100644 vendor/fguillot/picofeed/docs/favicon.markdown delete mode 100644 vendor/fguillot/picofeed/docs/feed-creation.markdown delete mode 100644 vendor/fguillot/picofeed/docs/feed-parsing.markdown delete mode 100644 vendor/fguillot/picofeed/docs/grabber.markdown delete mode 100644 vendor/fguillot/picofeed/docs/image-proxy.markdown delete mode 100644 vendor/fguillot/picofeed/docs/installation.markdown delete mode 100644 vendor/fguillot/picofeed/docs/opml-export.markdown delete mode 100644 vendor/fguillot/picofeed/docs/opml-import.markdown delete mode 100644 vendor/fguillot/picofeed/docs/tests.markdown create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigfootjustice.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.mapillary.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/mercworks.net.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/threepanelsoul.com.php delete mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/voz.vn.php delete mode 100644 vendor/fguillot/picofeed/phpunit.xml delete mode 100644 vendor/fguillot/picofeed/tests/Client/ClientTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Client/CurlTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Client/HttpHeadersTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Client/StreamTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Client/UrlTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Filter/AttributeFilterTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Filter/FilterTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Filter/TagFilterTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/AtomParserTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/DateParserTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/FeedTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/ItemTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/ParserTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/Rss10ParserTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/Rss20ParserTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/Rss91ParserTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/Rss92ParserTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Parser/XmlParserTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Reader/FaviconTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Reader/ReaderTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Scraper/RuleLoaderTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Scraper/ScraperTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Serialization/ExportTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Serialization/ImportTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Syndication/AtomWriterTest.php delete mode 100644 vendor/fguillot/picofeed/tests/Syndication/Rss20WriterTest.php delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atom.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atom_element_preference.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atom_empty_feed.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atom_empty_item.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atom_extra.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atom_fallback_on_invalid_feed_values.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atom_fallback_on_invalid_item_values.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atom_no_default_namespace.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atom_prefixed.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/atomsample.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/cercle.psy.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/ezrss.it delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/fulltextrss.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/google-reader.opml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/googleblog.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/groovehq.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/html4_head_stripped_page.html delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/html4_page.html delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/html_head_stripped_page.html delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/html_page.html delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/ibash.ru.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/jeux-linux.fr.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/lagrange.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/lincoln_loop.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/miniflux_favicon.ico delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/newsbeuter.opml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/next_inpact_full.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/pcinpact.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/planete-jquery.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/podbean.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/resorts.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss2sample.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_0.91.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_0.92.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10_element_preference.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10_empty_channel.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10_empty_feed.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10_empty_item.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10_extra.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10_fallback_on_invalid_feed_values.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10_fallback_on_invalid_item_values.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10_no_default_namespace.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_10_prefixed.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_20.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_20_dc.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_20_element_preference.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_20_empty_channel.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_20_empty_feed.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_20_empty_item.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_20_extra.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_20_fallback_on_invalid_feed_values.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rss_20_fallback_on_invalid_item_values.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/rue89.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/sametmax.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/subscriptionList.opml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/tinytinyrss.opml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/univers_freebox.xml delete mode 100644 vendor/fguillot/picofeed/tests/fixtures/xakep.ru.xml (limited to 'vendor/fguillot') diff --git a/vendor/fguillot/picofeed/.gitignore b/vendor/fguillot/picofeed/.gitignore deleted file mode 100644 index acbddf5e6..000000000 --- a/vendor/fguillot/picofeed/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.DS_Store -vendor/ -*.py \ No newline at end of file diff --git a/vendor/fguillot/picofeed/.travis.yml b/vendor/fguillot/picofeed/.travis.yml deleted file mode 100644 index 00b2b5bbb..000000000 --- a/vendor/fguillot/picofeed/.travis.yml +++ /dev/null @@ -1,19 +0,0 @@ -language: php - -php: - - 7.0 - - 5.6 - - 5.5 - - 5.4 - - 5.3 - -matrix: - fast_finish: true - allow_failures: - - php: 7.0 - -before_script: - - composer dump-autoload - -script: - - phpunit \ No newline at end of file diff --git a/vendor/fguillot/picofeed/README.markdown b/vendor/fguillot/picofeed/README.markdown deleted file mode 100644 index 1fb88e96f..000000000 --- a/vendor/fguillot/picofeed/README.markdown +++ /dev/null @@ -1,67 +0,0 @@ -PicoFeed -======== - -PicoFeed was originally developed for [Miniflux](http://miniflux.net), a minimalist and open source news reader. - -However, this library can be used inside any project. -PicoFeed is tested with a lot of different feeds and it's simple and easy to use. - -[![Build Status](https://travis-ci.org/fguillot/picoFeed.svg?branch=master)](https://travis-ci.org/fguillot/picoFeed) - -[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/fguillot/picoFeed/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/fguillot/picoFeed/?branch=master) - -Features --------- - -- Simple and fast -- Feed parser for Atom 1.0 and RSS 0.91, 0.92, 1.0 and 2.0 -- Feed writer for Atom 1.0 and RSS 2.0 -- Favicon fetcher -- Import/Export OPML subscriptions -- Content filter: HTML cleanup, remove pixel trackers and Ads -- Multiple HTTP client adapters: cURL or Stream Context -- Proxy support -- Content grabber: download from the original website the full content -- Enclosure detection -- RTL languages support -- License: MIT - -Requirements ------------- - -- PHP >= 5.3 -- libxml >= 2.7 -- XML PHP extensions: DOM and SimpleXML -- cURL or Stream Context (`allow_url_fopen=On`) -- iconv extension - -Authors -------- - -- Original author: Frédéric Guillot -- Major Contributors: - - [Bernhard Posselt](https://github.com/Raydiation) - - [David Pennington](https://github.com/Xeoncross) - - [Mathias Kresin](https://github.com/mkresin) - -Real world usage ----------------- - -- [Miniflux](http://miniflux.net) -- [Owncloud News](https://github.com/owncloud/news) - -Documentation -------------- - -- [Installation](docs/installation.markdown) -- [Running unit tests](docs/tests.markdown) -- [Feed parsing](docs/feed-parsing.markdown) -- [Feed creation](docs/feed-creation.markdown) -- [Favicon fetcher](docs/favicon.markdown) -- [OPML file importation](docs/opml-import.markdown) -- [OPML file exportation](docs/opml-export.markdown) -- [Image proxy](docs/image-proxy.markdown) (avoid SSL mixed content warnings) -- [Web scraping](docs/grabber.markdown) -- [Exceptions](docs/exceptions.markdown) -- [Debugging](docs/debugging.markdown) -- [Configuration](docs/config.markdown) diff --git a/vendor/fguillot/picofeed/composer.json b/vendor/fguillot/picofeed/composer.json deleted file mode 100644 index 8fd534b91..000000000 --- a/vendor/fguillot/picofeed/composer.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "name": "fguillot/picofeed", - "description": "Modern library to handle RSS/Atom feeds", - "homepage": "https://github.com/fguillot/picoFeed", - "type": "library", - "license": "MIT", - "authors": [ - { - "name": "Frédéric Guillot" - } - ], - "require": { - "php": ">=5.3.0", - "ext-iconv": "*", - "ext-dom": "*", - "ext-xml": "*", - "ext-libxml": "*", - "ext-SimpleXML": "*" - }, - "suggest": { - "ext-curl": "PicoFeed will use cURL if present" - }, - "autoload": { - "psr-0": {"PicoFeed": "lib/"} - }, - "bin" : [ - "picofeed" - ] -} diff --git a/vendor/fguillot/picofeed/docs/config.markdown b/vendor/fguillot/picofeed/docs/config.markdown deleted file mode 100644 index 3360abf73..000000000 --- a/vendor/fguillot/picofeed/docs/config.markdown +++ /dev/null @@ -1,306 +0,0 @@ -Configuration -============= - -How to use the Config object ----------------------------- - -To change the default parameters, you have to use the Config class. -Create a new instance and pass it to the Reader object like that: - -```php -use PicoFeed\Reader\Reader; -use PicoFeed\Config\Config; - -$config = new Config; -$config->setClientUserAgent('My custom RSS Reader') - ->setProxyHostname('127.0.0.1') - ->setProxyPort(8118); - -$reader = new Reader($config); -... -``` - -HTTP Client parameters ----------------------- - -### Connection timeout - -- Method name: `setClientTimeout()` -- Default value: 10 seconds -- Argument value: number of seconds (integer) - -```php -$config->setClientTimeout(20); // 20 seconds -``` - -### User Agent - -- Method name: `setClientUserAgent()` -- Default value: `PicoFeed (https://github.com/fguillot/picoFeed)` -- Argument value: string - -```php -$config->setClientUserAgent('My RSS reader'); -``` - -### Maximum HTTP redirections - -- Method name: `setMaxRedirections()` -- Default value: 5 -- Argument value: integer - -```php -$config->setMaxRedirections(10); -``` - -### Maximum HTTP body response size - -- Method name: `setMaxBodySize()` -- Default value: 2097152 (2MB) -- Argument value: value in bytes (integer) - -```php -$config->setMaxBodySize(10485760); // 10MB -``` - -### Proxy hostname - -- Method name: `setProxyHostname()` -- Default value: empty -- Argument value: string - -```php -$config->setProxyHostname('proxy.example.org'); -``` - -### Proxy port - -- Method name: `setProxyPort()` -- Default value: 3128 -- Argument value: port number (integer) - -```php -$config->setProxyPort(8118); -``` - -### Proxy username - -- Method name: `setProxyUsername()` -- Default value: empty -- Argument value: string - -```php -$config->setProxyUsername('myuser'); -``` - -### Proxy password - -- Method name: `setProxyPassword()` -- Default value: empty -- Argument value: string - -```php -$config->setProxyPassword('mysecret'); -``` - -Content grabber ---------------- - -### Connection timeout - -- Method name: `setGrabberTimeout()` -- Default value: 10 seconds -- Argument value: number of seconds (integer) - -```php -$config->setGrabberTimeout(20); // 20 seconds -``` - -### User Agent - -- Method name: `setGrabberUserAgent()` -- Default value: `PicoFeed (https://github.com/fguillot/picoFeed)` -- Argument value: string - -```php -$config->setGrabberUserAgent('My content scraper'); -``` - -### Add a rules folder - -- Method name: `setGrabberRulesFolder()` -- Default value: `null` -- Argument value: string - -```php -$config->setGrabberRulesFolder('/path/to/my/grabber/rules'); -``` - -Parser ------- - -### Hash algorithm used for item id generation - -- Method name: `setParserHashAlgo()` -- Default value: `sha256` -- Argument value: any value returned by the function `hash_algos()` (string) -- See: http://php.net/hash_algos - -```php -$config->setParserHashAlgo('sha1'); -``` - -### Disable item content filtering - -- Method name: `setContentFiltering()` -- Default value: true (filtering is enabled by default) -- Argument value: boolean - -```php -$config->setContentFiltering(false); -``` - -### Timezone - -- Method name: `setTimezone()` -- Default value: UTC -- Argument value: See https://php.net/manual/en/timezones.php (string) -- Note: define the timezone for items/feeds - -```php -$config->setTimezone('Europe/Paris'); -``` - -Logging -------- - -### Timezone - -- Method name: `setTimezone()` -- Default value: UTC -- Argument value: See https://php.net/manual/en/timezones.php (string) -- Note: define the timezone for the logging class - -```php -$config->setTimezone('Europe/Paris'); -``` - -Filter ------- - -### Set the iframe whitelist (allowed iframe sources) - -- Method name: `setFilterIframeWhitelist()` -- Default value: See the Filter class source code -- Argument value: array - -```php -$config->setFilterIframeWhitelist(['http://www.youtube.com', 'http://www.vimeo.com']); -``` - -### Define HTML integer attributes - -- Method name: `setFilterIntegerAttributes()` -- Default value: See the Filter class source code -- Argument value: array - -```php -$config->setFilterIntegerAttributes(['width', 'height']); -``` - -### Add HTML attributes automatically - -- Method name: `setFilterAttributeOverrides()` -- Default value: See the Filter class source code -- Argument value: array - -```php -$config->setFilterAttributeOverrides(['a' => ['target' => '_blank']); -``` - -### Set the list of required attributes for tags - -- Method name: `setFilterRequiredAttributes()` -- Default value: See the Filter class source code -- Argument value: array -- Note: If the required attributes are not there, the tag is stripped - -```php -$config->setFilterRequiredAttributes(['a' => 'href', 'img' => 'src']); -``` - -### Set the resource blacklist (Ads blocker) - -- Method name: `setFilterMediaBlacklist()` -- Default value: See the Filter class source code -- Argument value: array -- Note: Tags are stripped if they have those URLs - -```php -$config->setFilterMediaBlacklist(['feeds.feedburner.com', 'share.feedsportal.com']); -``` - -### Define which attributes are used for external resources - -- Method name: `setFilterMediaAttributes()` -- Default value: See the Filter class source code -- Argument value: array - -```php -$config->setFilterMediaAttributes(['src', 'href']); -``` - -### Define the scheme whitelist - -- Method name: `setFilterSchemeWhitelist()` -- Default value: See the Filter class source code -- Argument value: array -- See: http://en.wikipedia.org/wiki/URI_scheme - -```php -$config->setFilterSchemeWhitelist(['http://', 'ftp://']); -``` - -### Define the tags and attributes whitelist - -- Method name: `setFilterWhitelistedTags()` -- Default value: See the Filter class source code -- Argument value: array -- Note: Only those tags are allowed everything else is stripped - -```php -$config->setFilterWhitelistedTags(['a' => ['href'], 'img' => ['src', 'title']]); -``` - -### Define a image proxy url - -- Method name: `setFilterImageProxyUrl()` -- Default value: Empty -- Argument value: string - -```php -$config->setFilterImageProxyUrl('http://myproxy.example.org/?url=%s'); -``` - -### Define a image proxy callback - -- Method name: `setFilterImageProxyCallback()` -- Default value: null -- Argument value: Closure - -```php -$config->setFilterImageProxyCallback(function ($image_url) { - $key = hash_hmac('sha1', $image_url, 'secret'); - return 'https://mypublicproxy/'.$key.'/'.urlencode($image_url); -}); -``` - -### Define image proxy protocol restriction - -- Method name: `setFilterImageProxyProtocol()` -- Default value: Empty (all protocols) -- Argument value: string - -```php -$config->setFilterImageProxyProtocol('http'); -``` diff --git a/vendor/fguillot/picofeed/docs/debugging.markdown b/vendor/fguillot/picofeed/docs/debugging.markdown deleted file mode 100644 index 1356e0f72..000000000 --- a/vendor/fguillot/picofeed/docs/debugging.markdown +++ /dev/null @@ -1,102 +0,0 @@ -Debugging -========= - -Logging -------- - -PicoFeed can log **in memory** the execution flow, if a feed doesn't work correctly it's easy to see what is wrong. - -### Enable/disable logging - -The logging is **disabled by default** to avoid unnecessary memory usage. - -Enable logging: - -```php -use PicoFeed\Logging\Logger; - -Logger::enable(); - -// or change the flag value - -Logger::$enable = true; -``` - -### Reading messages - -```php -use PicoFeed\Logging\Logger; - -// All messages are stored inside an Array -print_r(Logger::getMessages()); -``` - -You will got an output like that: - -```php -Array -( - [0] => Fetch URL: http://petitcodeur.fr/feed.xml - [1] => Etag: - [2] => Last-Modified: - [3] => cURL total time: 0.711378 - [4] => cURL dns lookup time: 0.001064 - [5] => cURL connect time: 0.100733 - [6] => cURL speed download: 74825 - [7] => HTTP status code: 200 - [8] => HTTP headers: Set-Cookie => start=R2701971637; path=/; expires=Sat, 06-Jul-2013 05:16:33 GMT - [9] => HTTP headers: Date => Sat, 06 Jul 2013 03:55:52 GMT - [10] => HTTP headers: Content-Type => application/xml - [11] => HTTP headers: Content-Length => 53229 - [12] => HTTP headers: Connection => close - [13] => HTTP headers: Server => Apache - [14] => HTTP headers: Last-Modified => Tue, 02 Jul 2013 03:26:02 GMT - [15] => HTTP headers: ETag => "393e79c-cfed-4e07ee78b2680" - [16] => HTTP headers: Accept-Ranges => bytes - .... -) -``` - -### Remove messages - -All messages are stored in memory, if you need to clear them just call the method `Logger::deleteMessages()`: - -```php -Logger::deleteMessages(); -``` - -Command line utility -==================== - -PicoFeed provides a basic command line tool to debug feeds quickly. -The tool is located in the root directory project. - -### Usage - -```bash -$ ./picofeed -Usage: -./picofeed feed # Parse a feed a dump the ouput on stdout -./picofeed debug # Display all logging messages for a feed -./picofeed item # Fetch only one item -./picofeed nofilter # Fetch an item but with no content filtering -``` - -### Example - -```bash -$ ./picofeed debug https://linuxfr.org/ -Exception thrown ===> "Invalid SSL certificate" -Array -( - [0] => [2014-11-08 14:04:14] PicoFeed\Client\Curl Fetch URL: https://linuxfr.org/ - [1] => [2014-11-08 14:04:14] PicoFeed\Client\Curl Etag provided: - [2] => [2014-11-08 14:04:14] PicoFeed\Client\Curl Last-Modified provided: - [3] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL total time: 1.850634 - [4] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL dns lookup time: 0.00093 - [5] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL connect time: 0.115213 - [6] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL speed download: 0 - [7] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL effective url: https://linuxfr.org/ - [8] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL error: SSL certificate problem: Invalid certificate chain -) -``` diff --git a/vendor/fguillot/picofeed/docs/exceptions.markdown b/vendor/fguillot/picofeed/docs/exceptions.markdown deleted file mode 100644 index 399ba3ef6..000000000 --- a/vendor/fguillot/picofeed/docs/exceptions.markdown +++ /dev/null @@ -1,28 +0,0 @@ -Exceptions -========== - -All exceptions inherits from the standard `Exception` class. - -### Library Exceptions - -- `PicoFeed\PicoFeedException`: Base class exception for the library - -### Client Exceptions - -- `PicoFeed\Client\ClientException`: Base exception class for the Client class -- `PicoFeed\Client\InvalidCertificateException`: Invalid SSL certificate -- `PicoFeed\Client\InvalidUrlException`: Malformed URL, page not found (404), unable to establish a connection -- `PicoFeed\Client\MaxRedirectException`: Maximum of HTTP redirections reached -- `PicoFeed\Client\MaxSizeException`: The response size exceeds to maximum allowed -- `PicoFeed\Client\TimeoutException`: Connection timeout - -### Parser Exceptions - -- `PicoFeed\Parser\ParserException`: Base exception class for the Parser class -- `PicoFeed\Parser\MalformedXmlException`: XML Parser error - -### Reader Exceptions - -- `PicoFeed\Reader\ReaderException`: Base exception class for the Reader -- `PicoFeed\Reader\SubscriptionNotFoundException`: Unable to find a feed for the given website -- `PicoFeed\Reader\UnsupportedFeedFormatException`: Unable to detect the feed format diff --git a/vendor/fguillot/picofeed/docs/favicon.markdown b/vendor/fguillot/picofeed/docs/favicon.markdown deleted file mode 100644 index b9ea73b7d..000000000 --- a/vendor/fguillot/picofeed/docs/favicon.markdown +++ /dev/null @@ -1,96 +0,0 @@ -Favicon fetcher -=============== - -Find and download the favicon ------------------------------ - -```php -use PicoFeed\Reader\Favicon; - -$favicon = new Favicon; - -// The icon link is https://bits.wikimedia.org/favicon/wikipedia.ico -$icon_link = $favicon->find('https://en.wikipedia.org/'); -$icon_content = $favicon->getContent(); -``` - -PicoFeed will try first to find the favicon from the meta tags and fallback to the `favicon.ico` located in the website's root if nothing is found. - -- `Favicon::find()` returns the favicon absolute url or an empty string if nothing is found. -- `Favicon::getContent()` returns the favicon file content (binary content) - -When the HTML page is parsed, relative links and protocol relative links are converted to absolute url. - -Download a known favicon ------------------------ -It's possible to download a known favicon using the second optional parameter of Favicon::find(). The link to the favicon can be a relative or protocol relative url as well, but it has to be relative to the specified website. - -If the requested favicon could not be found, the HTML of the website is parsed instead, with the fallback to the `favicon.ico` located in the website's root. - -```php -use PicoFeed\Reader\Favicon; - -$favicon = new Favicon; - -$icon_link = $favicon->find('https://en.wikipedia.org/','https://bits.wikimedia.org/favicon/wikipedia.ico'); -$icon_content = $favicon->getContent(); -``` - -Get Favicon file type ---------------------- - -It's possible to fetch the image type, this information come from the Content-Type HTTP header: - -```php -$favicon = new Favicon; -$favicon->find('http://example.net/'); - -echo $favicon->getType(); - -// Will output the content type, by example "image/png" -``` - -Get the Favicon as Data URI ---------------------------- - -You can also get the whole image as Data URI. -It's useful if you want to store the icon in your database and avoid too many HTTP requests. - -```php -$favicon = new Favicon; -$favicon->find('http://example.net/'); - -echo $favicon->getDataUri(); - -// Output something like that: data:image/png;base64,iVBORw0KGgoAAAANSUh..... -``` - -See: http://en.wikipedia.org/wiki/Data_URI_scheme - -Check if a favicon link exists ------------------------------- - -```php -use PicoFeed\Reader\Favicon; - -$favicon = new Favicon; - -// Return true if the file exists -var_dump($favicon->exists('http://php.net/favicon.ico')); -``` - -Use personalized HTTP settings ------------------------------- - -Like other classes, the Favicon class support the Config object as constructor argument: - -```php -use PicoFeed\Config\Config; -use PicoFeed\Reader\Favicon; - -$config = new Config; -$config->setClientUserAgent('My RSS Reader'); - -$favicon = new Favicon($config); -$favicon->find('https://github.com'); -``` diff --git a/vendor/fguillot/picofeed/docs/feed-creation.markdown b/vendor/fguillot/picofeed/docs/feed-creation.markdown deleted file mode 100644 index 35a24a9d7..000000000 --- a/vendor/fguillot/picofeed/docs/feed-creation.markdown +++ /dev/null @@ -1,74 +0,0 @@ -Feed creation -============= - -PicoFeed can also generate Atom and RSS feeds. - -Generate RSS 2.0 feed ----------------------- - -```php -use PicoFeed\Syndication\Rss20; - -$writer = new Rss20(); -$writer->title = 'My site'; -$writer->site_url = 'http://boo/'; -$writer->feed_url = 'http://boo/feed.atom'; -$writer->author = array( - 'name' => 'Me', - 'url' => 'http://me', - 'email' => 'me@here' -); - -$writer->items[] = array( - 'title' => 'My article 1', - 'updated' => strtotime('-2 days'), - 'url' => 'http://foo/bar', - 'summary' => 'Super summary', - 'content' => '

content

' -); - -$writer->items[] = array( - 'title' => 'My article 2', - 'updated' => strtotime('-1 day'), - 'url' => 'http://foo/bar2', - 'summary' => 'Super summary 2', - 'content' => '

content 2   © 2015

', - 'author' => array( - 'name' => 'Me too', - ) -); - -$writer->items[] = array( - 'title' => 'My article 3', - 'url' => 'http://foo/bar3' -); - -echo $writer->execute(); -``` - -Generate Atom feed ------------------- - -```php -use PicoFeed\Syndication\Atom; - -$writer = new Atom(); -$writer->title = 'My site'; -$writer->site_url = 'http://boo/'; -$writer->feed_url = 'http://boo/feed.atom'; -$writer->author = array( - 'name' => 'Me', - 'url' => 'http://me', - 'email' => 'me@here' -); - -$writer->items[] = array( - 'title' => 'My article 1', - 'updated' => strtotime('-2 days'), - 'url' => 'http://foo/bar', - 'summary' => 'Super summary', - 'content' => '

content

' -); - -echo $writer->execute(); -``` diff --git a/vendor/fguillot/picofeed/docs/feed-parsing.markdown b/vendor/fguillot/picofeed/docs/feed-parsing.markdown deleted file mode 100644 index e3e43d49d..000000000 --- a/vendor/fguillot/picofeed/docs/feed-parsing.markdown +++ /dev/null @@ -1,333 +0,0 @@ -Feed parsing -============ - -Parsing a subscription ----------------------- - -```php -use PicoFeed\Reader\Reader; -use PicoFeed\PicoFeedException; - -try { - - $reader = new Reader; - - // Return a resource - $resource = $reader->download('http://linuxfr.org/news.atom'); - - // Return the right parser instance according to the feed format - $parser = $reader->getParser( - $resource->getUrl(), - $resource->getContent(), - $resource->getEncoding() - ); - - // Return a Feed object - $feed = $parser->execute(); - - // Print the feed properties with the magic method __toString() - echo $feed; -} -catch (PicoFeedException $e) { - // Do Something... -} -``` - -- The Reader class is the entry point for feed reading -- The method `download()` fetch the remote content and return a resource, an instance of `PicoFeed\Client\Client` -- The method `getParser()` returns a Parser instance according to the feed format Atom, Rss 2.0... -- The parser itself returns a `Feed` object that contains feed and item properties - -Output: - -```bash -Feed::id = tag:linuxfr.org,2005:/news -Feed::title = LinuxFr.org : les dépêches -Feed::feed_url = http://linuxfr.org/news.atom -Feed::site_url = http://linuxfr.org/news -Feed::language = en-US -Feed::description = -Feed::logo = -Feed::date = Thu, 26 Feb 15 09:33:08 +0100 -Feed::isRTL() = false -Feed::items = 15 items ----- -Item::id = 56198c98ae852d21c369bfb5ffbc2ad13db2f3227236dde3e21ca1a9eb943faf -Item::title = Les brevets logiciels : un frein à l'innovation et la recherche (un nouvel exemple aux États-Unis) -Item::url = http://linuxfr.org/news/les-brevets-logiciels-un-frein-a-l-innovation-et-la-recherche-un-nouvel-exemple-aux-etats-unis -Item::language = en-US -Item::author = alenvers -Item::enclosure_url = -Item::enclosure_type = -Item::date = Thu, 26 Feb 15 09:33:08 +0100 -Item::isRTL() = false -Item::content = 6452 bytes -.... -``` - -Get the list of available subscriptions for a website ------------------------------------------------------ - -The example below will returns all available subscriptions for the website: - -```php -use PicoFeed\Reader\Reader; - -try { - - $reader = new Reader; - $resource = $reader->download('http://www.cnn.com'); - - $feeds = $reader->find( - $resource->getUrl(), - $resource->getContent() - ); - - print_r($feeds); -} -catch (PicoFeedException $e) { - // Do something... -} -``` - -Output: - -```php -Array -( - [0] => http://rss.cnn.com/rss/cnn_topstories.rss - [1] => http://rss.cnn.com/rss/cnn_latest.rss -) -``` - -Feed discovery and parsing --------------------------- - -This example will discover automatically the subscription and parse the feed: - -```php -try { - - $reader = new Reader; - $resource = $reader->discover('http://linuxfr.org'); - - $parser = $reader->getParser( - $resource->getUrl(), - $resource->getContent(), - $resource->getEncoding() - ); - - $feed = $parser->execute(); - echo $feed; -} -catch (PicoFeedException $e) { -} -``` - -HTTP caching ------------- - -PicoFeed supports HTTP caching to avoid unnecessary processing. - -1. After the first download, save in your database the values of the Etag and LastModified HTTP headers -2. For the next requests, provide those values to the `download()` method and check if the feed was modified or not - -Here an example: - -```php -try { - - // Fetch from your database the previous values of the Etag and LastModified headers - $etag = '...'; - $last_modified = '...'; - - $reader = new Reader; - - // Provide those values to the download method - $resource = $reader->download('http://linuxfr.org/news.atom', $last_modified, $etag); - - // Return true if the remote content has changed - if ($resource->isModified()) { - - $parser = $reader->getParser( - $resource->getUrl(), - $resource->getContent(), - $resource->getEncoding() - ); - - $feed = $parser->execute(); - - // Save your feed in your database - // ... - - // Store the Etag and the LastModified headers in your database for the next requests - $etag = $resource->getEtag(); - $last_modified = $resource->getLastModified(); - - // ... - } - else { - - echo 'Not modified, nothing to do!'; - } -} -catch (PicoFeedException $e) { - // Do something... -} -``` - -HTTP basic auth ---------------- -If a feed requires basic auth headers, you can pass them as parameters to the **download** method, e.g.: - -```php -try { - $reader = new Reader; - - $user = 'john'; - $password = 'doe'; - - // Provide those values to the download method - $resource = $reader->download('http://linuxfr.org/news.atom', '', '', $user, $password); - - // Return true if the remote content has changed - if ($resource->isModified()) { - - $parser = $reader->getParser( - $resource->getUrl(), - $resource->getContent(), - $resource->getEncoding() - ); - - $feed = $parser->execute(); - - // Save your feed in your database - // ... - - } - else { - - echo 'Not modified, nothing to do!'; - } -} -catch (PicoFeedException $e) { - // Do something... -} -``` - -Custom regex filters --------------------- -In case you want modify the content with a simple regex, you can create a rule file named after the domain of the feed's link attribute. For the feed pointing to **http://www.twogag.com/** the file is stored under **Rules/twogag.com.php** - -For filtering, only the array with the key **filter** will be considered. The first level key is a preg_match regex that will match the sub url, e.g. to only match a feed whose link attribute points to **twogag.com/test**, the regex could look like **%/test.*%**. The second level array contains a list of search and replace strings, which will be passed to the preg\_replace function. The first string is the argument that should be matched, the second is the replacement. - -To replace all occurences of links to smaller images for twogag, the following rule can be used: - - -```php - array( - '%.*%' => array( - "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%" => - "http://www.twogag.com/comics/$1.jpg" - ) - ) -); -``` - -Feed and item properties ------------------------- - -```php -// Feed object -$feed->getId(); // Unique feed id -$feed->getTitle(); // Feed title -$feed->getFeedUrl(); // Feed url -$feed->getSiteUrl(); // Website url -$feed->getDate(); // Feed last updated date (DateTime object) -$feed->getLanguage(); // Feed language -$feed->getDescription(); // Feed description -$feed->getLogo(); // Feed logo (can be a large image, different from icon) -$feed->getItems(); // List of item objects - -// Item object -$feed->items[0]->getId(); // Item unique id (hash) -$feed->items[0]->getTitle(); // Item title -$feed->items[0]->getUrl(); // Item url -$feed->items[0]->getDate(); // Item published date (DateTime object) -$feed->items[0]->getLanguage(); // Item language -$feed->items[0]->getAuthor(); // Item author -$feed->items[0]->getEnclosureUrl(); // Enclosure url -$feed->items[0]->getEnclosureType(); // Enclosure mime-type (audio/mp3, image/png...) -$feed->items[0]->getContent(); // Item content (filtered or raw) -$feed->items[0]->isRTL(); // Return true if the item language is Right-To-Left -``` - -Get raw XML tags/attributes or non standard tags for items ----------------------------------------------------------- -The getTag function returns an array with all values of matching tags. If nothing can be found, an empty array is returned. In case of errors, the return value is false. - -Get the original `guid` tag for RSS 2.0 feeds: - -```php -$values = $feed->items[0]->getTag('guid'); -print_r ($values); -``` - -Get a specific attribute value: - -```php -$values = $feed->items[1]->getTag('category', 'term'); -print_r ($values); -``` - -Get value of namespaced tag: - -```php -if (array_key_exists('wfw', $feed->items[0]->namespaces)) { - $values = $feed->items[1]->getTag('wfw:commentRss'); - print_r ($values); -} -``` - -Get attribute value of a namespaced tag: - -```php -if (array_key_exists('media', $feed->items[0]->namespaces)) { - $values = $feed->items[0]->getTag('media:content', 'url'); - print_r ($values); -} -``` - -Get the xml of the item (returns a SimpleXMLElement instance): - -```php -$simplexml = $feed->items[0]->xml; -``` - -Get the list of namespaces: - -```php -print_r($feed->items[0]->namespaces); -``` - -RTL language detection ----------------------- - -Use the method `Item::isRTL()` to test if an item is RTL or not: - -```php -var_dump($item->isRTL()); // true or false -``` - -Known RTL languages are: - -- Arabic (ar-**) -- Farsi (fa-**) -- Urdu (ur-**) -- Pashtu (ps-**) -- Syriac (syr-**) -- Divehi (dv-**) -- Hebrew (he-**) -- Yiddish (yi-**) diff --git a/vendor/fguillot/picofeed/docs/grabber.markdown b/vendor/fguillot/picofeed/docs/grabber.markdown deleted file mode 100644 index 4ac83068f..000000000 --- a/vendor/fguillot/picofeed/docs/grabber.markdown +++ /dev/null @@ -1,195 +0,0 @@ -Web scraper -=========== - -The web scraper is useful for feeds that display only a summary of articles, the scraper can download and parse the full content from the original website. - -How the content grabber works? ------------------------------- - -1. Try with rules first (XPath queries) for the domain name (see `PicoFeed\Rules\`) -2. Try to find the text content by using common attributes for class and id -3. Finally, if nothing is found, the feed content is displayed - -**The best results are obtained with XPath rules file.** - -Standalone usage ----------------- - -Fetch remote content: - -```php -setUrl($url); -$grabber->execute(); - -// Get raw HTML content -echo $grabber->getRawContent(); - -// Get relevant content -echo $grabber->getRelevantContent(); - -// Get filtered relevant content -echo $grabber->getFilteredContent(); - -// Return true if there is relevant content -var_dump($grabber->hasRelevantContent()); -``` - -Parse HTML content: - -```php -setRawContent($html); -$grabber->execute(); -``` - -Fetch full item contents during feed parsing --------------------------------------------- - -Before parsing all items, just call the method `$parser->enableContentGrabber()`: - -```php -download('http://www.egscomics.com/rss.php'); - - // Return the right parser instance according to the feed format - $parser = $reader->getParser( - $resource->getUrl(), - $resource->getContent(), - $resource->getEncoding() - ); - - // Enable content grabber before parsing items - $parser->enableContentGrabber(); - - // Return a Feed object - $feed = $parser->execute(); -} -catch (PicoFeedException $e) { - // Do Something... -} -``` - -When the content scraper is enabled, everything will be slower. -**For each item a new HTTP request is made** and the HTML downloaded is parsed with XML/XPath. - -Configuration -------------- - -### Enable content grabber for items - -- Method name: `enableContentGrabber()` -- Default value: false (also fetch content if no rule file exist) -- Argument value: bool (true scrape only webpages which have a rule file) - -```php -$parser->enableContentGrabber(false); -``` - -### Ignore item urls for the content grabber - -- Method name: `setGrabberIgnoreUrls()` -- Default value: empty (fetch all item urls) -- Argument value: array (list of item urls to ignore) - -```php -$parser->setGrabberIgnoreUrls(['http://foo', 'http://bar']); -``` - -How to write a grabber rules file? ----------------------------------- - -Add a PHP file to the directory `PicoFeed\Rules`, the filename must be the same as the domain name: - -Example with the BBC website, `www.bbc.co.uk.php`: - -```php - array( - '%.*%' => array( - 'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833', - 'body' => array( - '//div[@class="story-body"]', - ), - 'strip' => array( - '//script', - '//form', - '//style', - '//*[@class="story-date"]', - '//*[@class="story-header"]', - '//*[@class="story-related"]', - '//*[contains(@class, "byline")]', - '//*[contains(@class, "story-feature")]', - '//*[@id="video-carousel-container"]', - '//*[@id="also-related-links"]', - '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', - ) - ) - ) -); -``` -Each rule file can contain multiple rules, based so links to different website URLs can be handled differently. The first level key is a regex, which will be matched against the full path of the URL using **preg_match**, e.g. for **http://www.bbc.co.uk/news/world-middle-east-23911833?test=1** the URL that would be matched is **/news/world-middle-east-23911833?test=1** - -Each rule has the following keys: -* **body**: An array of xpath expressions which will be extracted from the page -* **strip**: An array of xpath expressions which will be removed from the matched content -* **test_url**: A test url to a matching page to test the grabber - -Don't forget to send a pull request or a ticket to share your contribution with everybody, - -**A more complex example**: - -Let's say you wanted to extract a div with the id **video** if the article points to an URL like **http://comix.com/videos/423**, **audio** if the article points to an URL like **http://comix.com/podcasts/5** and all other links to the page should instead take the div with the id **content**. The following rulefile would fit that requirement and would be stored in a file called **lib/PicoFeed/Rules/comix.com.php**: - - -```php -return array( - 'grabber' => array( - '%^/videos.*%' => array( - 'test_url' => 'http://comix.com/videos/423', - 'body' => array( - '//div[@id="video"]', - ), - 'strip' => array() - ), - '%^/podcasts.*%' => array( - 'test_url' => 'http://comix.com/podcasts/5', - 'body' => array( - '//div[@id="audio"]', - ), - 'strip' => array() - ), - '%.*%' => array( - 'test_url' => 'http://comix.com/blog/1', - 'body' => array( - '//div[@id="content"]', - ), - 'strip' => array() - ) - ) -); -``` - -List of content grabber rules ------------------------------ - -Rules are stored inside the directory [lib/PicoFeed/Rules](https://github.com/fguillot/picoFeed/tree/master/lib/PicoFeed/Rules) diff --git a/vendor/fguillot/picofeed/docs/image-proxy.markdown b/vendor/fguillot/picofeed/docs/image-proxy.markdown deleted file mode 100644 index 2c3dafdf8..000000000 --- a/vendor/fguillot/picofeed/docs/image-proxy.markdown +++ /dev/null @@ -1,67 +0,0 @@ -Image Proxy -=========== - -To prevent mixed content warnings on SSL pages served from your RSS reader you might want to use an assets proxy. - -Images url will be rewritten to be downloaded through the proxy. - -Example: - -```html - -``` - -Can be rewritten like that: - -```html - -``` - -Currently this feature is only compatible with images. - -There is several open source SSL image proxy available like [Camo](https://github.com/atmos/camo). -You can also write your own proxy. - -Usage ------ - -There two different ways to use this feature, define a proxy url or a callback. - -### Define a proxy url - -A proxy url must be defined with a placeholder `%s`. -The placeholder will be replaced by the image source url encoded (RFC 3986). -In PHP, the url can be decoded with the function `rawurldecode()`. - -```php -$config = new Config; -$config->setFilterImageProxyUrl('http://myproxy.example.org/?url=%s'); -``` - -Will rewrite the image source like that: - -```html - -``` - -### Define a callback - -Your callback will be called each time an image url need to be rewritten. -The first argument is the original image url and your function must returns the new image url. - -Here an example if your proxy need a shared secret key: - -```php -$config = new Config; - -$config->setFilterImageProxyCallback(function ($image_url) { - $key = hash_hmac('sha1', $image_url, 'secret'); - return 'https://mypublicproxy/'.$key.'/'.rawurlencode($image_url); -}); -``` - -Will generate an image url like that: - -```html - -``` diff --git a/vendor/fguillot/picofeed/docs/installation.markdown b/vendor/fguillot/picofeed/docs/installation.markdown deleted file mode 100644 index beb0bc828..000000000 --- a/vendor/fguillot/picofeed/docs/installation.markdown +++ /dev/null @@ -1,50 +0,0 @@ -Installation -============ - -Versions --------- - -- Development version: master -- Stable version: use the last tag - -Installation with Composer --------------------------- - -```bash -composer require fguillot/picofeed @stable -``` - -And download the code: - -```bash -composer install # or update -``` - -Usage example with the Composer autoloader: - -```php -download('http://linuxfr.org/news.atom'); - - $parser = $reader->getParser( - $resource->getUrl(), - $resource->getContent(), - $resource->getEncoding() - ); - - $feed = $parser->execute(); - - echo $feed; -} -catch (Exception $e) { - // Do something... -} -``` diff --git a/vendor/fguillot/picofeed/docs/opml-export.markdown b/vendor/fguillot/picofeed/docs/opml-export.markdown deleted file mode 100644 index bd4f0b04b..000000000 --- a/vendor/fguillot/picofeed/docs/opml-export.markdown +++ /dev/null @@ -1,46 +0,0 @@ -OPML export -=========== - -Example with no categories --------------------------- - -```php -use PicoFeed\Serialization\Export; - -$feeds = array( - array( - 'title' => 'Site title', - 'description' => 'Optional description', - 'site_url' => 'http://petitcodeur.fr/', - 'site_feed' => 'http://petitcodeur.fr/feed.xml' - ) -); - -$export = new Export($feeds); -$opml = $export->execute(); - -echo $opml; // XML content -``` - -Example with categories ------------------------ - -```php -use PicoFeed\Serialization\Export; - -$feeds = array( - 'my category' => array( - array( - 'title' => 'Site title', - 'description' => 'Optional description', - 'site_url' => 'http://petitcodeur.fr/', - 'site_feed' => 'http://petitcodeur.fr/feed.xml' - ) - ) -); - -$export = new Export($feeds); -$opml = $export->execute(); - -echo $opml; // XML content -``` \ No newline at end of file diff --git a/vendor/fguillot/picofeed/docs/opml-import.markdown b/vendor/fguillot/picofeed/docs/opml-import.markdown deleted file mode 100644 index 8ce2026c3..000000000 --- a/vendor/fguillot/picofeed/docs/opml-import.markdown +++ /dev/null @@ -1,19 +0,0 @@ -Import OPML file -================ - -Importing a list of subscriptions is pretty straightforward: - -```php -use PicoFeed\Serialization\Import; - -$opml = file_get_contents('mySubscriptions.opml'); -$import = new Import($opml); -$entries = $import->execute(); - -if ($entries !== false) { - print_r($entries); -} - -``` - -The method `execute()` return `false` if there is a parsing error. diff --git a/vendor/fguillot/picofeed/docs/tests.markdown b/vendor/fguillot/picofeed/docs/tests.markdown deleted file mode 100644 index 72bb48b0f..000000000 --- a/vendor/fguillot/picofeed/docs/tests.markdown +++ /dev/null @@ -1,14 +0,0 @@ -Running unit tests -================== - -If the autoloader is not yet installed run: - -```php -composer dump-autoload -``` - -Then run: - -```php -phpunit tests -``` diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php index 1c5842df4..684dbf7ad 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php @@ -79,7 +79,7 @@ class Attribute 'cite' => array(), 'time' => array('datetime'), 'abbr' => array('title'), - 'iframe' => array('width', 'height', 'frameborder', 'src'), + 'iframe' => array('width', 'height', 'frameborder', 'src', 'allowfullscreen'), 'q' => array('cite') ); @@ -227,7 +227,6 @@ class Attribute * @var array */ private $filters = array( - 'filterEmptyAttribute', 'filterAllowedAttribute', 'filterIntegerAttribute', 'rewriteAbsoluteUrl', @@ -280,20 +279,6 @@ class Attribute return $attributes; } - /** - * Return true if the value is not empty (remove empty attributes) - * - * @access public - * @param string $tag Tag name - * @param string $attribute Attribute name - * @param string $value Attribute value - * @return boolean - */ - public function filterEmptyAttribute($tag, $attribute, $value) - { - return $value !== ''; - } - /** * Return true if the value is allowed (remove not allowed attributes) * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php index a2be240d5..867faa48b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php @@ -4,7 +4,7 @@ return array( '%.*%' => array( 'test_url' => 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1', 'body' => array( - '//div[@class="KonaBody"]', + '//div[@class="content"]', ), 'strip' => array() ) diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigfootjustice.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigfootjustice.com.php new file mode 100644 index 000000000..907eec42c --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigfootjustice.com.php @@ -0,0 +1,8 @@ + array( + '%.*%' => array( + '%-150x150%' => '', + ) + ) +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.mapillary.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.mapillary.com.php new file mode 100644 index 000000000..5c77b95d0 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.mapillary.com.php @@ -0,0 +1,11 @@ + array( + '%.*%' => array( + 'test_url' => 'http://blog.mapillary.com/update/2015/08/26/traffic-sign-updates.html', + 'body' => array( + '//div[contains(@class, "blog-post__content")]' + ) + ) + ) +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/endlessorigami.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/endlessorigami.com.php index 4dd63daeb..907eec42c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/endlessorigami.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/endlessorigami.com.php @@ -2,7 +2,7 @@ return array( 'filter' => array( '%.*%' => array( - '%-\\d+x\\d+%' => "", + '%-150x150%' => '', ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/extrafabulouscomics.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/extrafabulouscomics.com.php index 6f0fbec06..d6d1e08cc 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/extrafabulouscomics.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/extrafabulouscomics.com.php @@ -2,7 +2,7 @@ return array( 'filter' => array( '%.*%' => array( - '%-\\d+x\\d+%' => "", + '%-150x150%' => '', ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lastplacecomics.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lastplacecomics.com.php index 6f0fbec06..d6d1e08cc 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lastplacecomics.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lastplacecomics.com.php @@ -2,7 +2,7 @@ return array( 'filter' => array( '%.*%' => array( - '%-\\d+x\\d+%' => "", + '%-150x150%' => '', ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php index 4dd63daeb..907eec42c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php @@ -2,7 +2,7 @@ return array( 'filter' => array( '%.*%' => array( - '%-\\d+x\\d+%' => "", + '%-150x150%' => '', ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mercworks.net.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mercworks.net.php new file mode 100644 index 000000000..d69280284 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mercworks.net.php @@ -0,0 +1,17 @@ + array( + '%.*%' => array( + 'body' => array('//div[@id="comic"]', + '//div[contains(@class,"entry-content")]', + ), + 'strip' => array(), + 'test_url' => 'http://mercworks.net/comicland/healthy-choice/', + ) + ), + 'filter' => array( + '%.*%' => array( + '%title="(.+)" */>%' => "/>
$1" + ) + ) +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mrlovenstein.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mrlovenstein.com.php index 4d8dc6ea2..8276a1e4a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mrlovenstein.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mrlovenstein.com.php @@ -2,7 +2,8 @@ return array( 'filter' => array( '%.*%' => array( - '%alt="(.+)" */>%' => "/>
$1" + '%alt="(.+)" */>%' => "/>
$1", + '%\.png%' => "_rollover.png", ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php index ac41ee6fa..3d2a058a0 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php @@ -5,8 +5,9 @@ return array( 'test_url' => 'http://www.neustadt-ticker.de/36480/aktuell/nachrichten/buergerbuero-neustadt-ab-heute-wieder-geoeffnet', 'body' => array('//div[contains(@class,"article")]/div[@class="PostContent" and *[not(contains(@class, "navigation"))]]'), 'strip' => array( - '//*[@id="wp_rp_first"]' + '//*