From 8e128604b9bace8a834daa3b02f242f976df85e7 Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Fri, 25 Mar 2016 17:54:22 +0100 Subject: add makefile for building the project --- .gitignore | 1 + Makefile | 110 ++++++----------- composer.lock | 8 +- js/package.json | 8 +- vendor/composer/ClassLoader.php | 8 +- vendor/composer/LICENSE | 2 +- vendor/composer/autoload_files.php | 2 +- vendor/composer/autoload_real.php | 12 +- vendor/composer/installed.json | 8 +- vendor/fguillot/picofeed/lib/PicoFeed/Base.php | 34 +++++ .../fguillot/picofeed/lib/PicoFeed/Filter/Tag.php | 18 +-- .../Generator/ContentGeneratorInterface.php | 23 ++++ .../PicoFeed/Generator/FileContentGenerator.php | 36 ++++++ .../PicoFeed/Generator/YoutubeContentGenerator.php | 67 ++++++++++ .../fguillot/picofeed/lib/PicoFeed/Parser/Atom.php | 6 +- .../picofeed/lib/PicoFeed/Parser/DateParser.php | 21 +++- .../fguillot/picofeed/lib/PicoFeed/Parser/Item.php | 45 ++++++- .../picofeed/lib/PicoFeed/Parser/Parser.php | 137 ++++++--------------- .../picofeed/lib/PicoFeed/Parser/Rss10.php | 4 +- .../picofeed/lib/PicoFeed/Parser/Rss20.php | 8 +- .../picofeed/lib/PicoFeed/Parser/XmlParser.php | 6 +- .../PicoFeed/Processor/ContentFilterProcessor.php | 37 ++++++ .../Processor/ContentGeneratorProcessor.php | 49 ++++++++ .../lib/PicoFeed/Processor/ItemPostProcessor.php | 84 +++++++++++++ .../PicoFeed/Processor/ItemProcessorInterface.php | 25 ++++ .../lib/PicoFeed/Processor/ScraperProcessor.php | 71 +++++++++++ .../picofeed/lib/PicoFeed/Reader/Favicon.php | 23 +--- .../picofeed/lib/PicoFeed/Reader/Reader.php | 23 +--- .../picofeed/lib/PicoFeed/Rules/.over-blog.com.php | 11 ++ .../PicoFeed/Rules/encyclopedie.naheulbeuk.com.php | 13 ++ .../lib/PicoFeed/Rules/greekculture.com.php | 12 -- .../lib/PicoFeed/Rules/news.sciencemag.org.php | 18 --- .../picofeed/lib/PicoFeed/Rules/rugbyrama.fr.php | 20 +++ .../lib/PicoFeed/Rules/www.franceculture.fr.php | 3 +- .../PicoFeed/Rules/www.monsieur-le-chien.fr.php | 11 ++ .../lib/PicoFeed/Rules/www.sciencemag.org.php | 16 +++ .../picofeed/lib/PicoFeed/Scraper/RuleLoader.php | 25 +--- .../picofeed/lib/PicoFeed/Scraper/Scraper.php | 98 ++------------- 38 files changed, 697 insertions(+), 406 deletions(-) create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Base.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Generator/ContentGeneratorInterface.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Generator/FileContentGenerator.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Generator/YoutubeContentGenerator.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentFilterProcessor.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentGeneratorProcessor.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemProcessorInterface.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/.over-blog.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/encyclopedie.naheulbeuk.com.php delete mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/greekculture.com.php delete mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/news.sciencemag.org.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/rugbyrama.fr.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.monsieur-le-chien.fr.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.sciencemag.org.php diff --git a/.gitignore b/.gitignore index 63d4e433c..f1e9c46ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ #specific to news app +composer.phar node_modules/ *.log /build/ diff --git a/Makefile b/Makefile index 9b4f045df..9ded601b2 100644 --- a/Makefile +++ b/Makefile @@ -1,83 +1,43 @@ # Makefile for building the project +app_name=$(notdir $(CURDIR)) +build_directory=$(CURDIR)/build/artifacts/source +package_name=$(build_directory)/$(app_name) -app_name=news -project_dir=$(CURDIR)/../$(app_name) -build_dir=$(CURDIR)/build/artifacts -appstore_dir=$(build_dir)/appstore -source_dir=$(build_dir)/source -package_name=$(app_name) +all: build -all: appstore +.PHONY: build +build: + make composer + make npm -clean: - rm -rf $(build_dir) +.PHONY: composer +composer: + curl -sS https://getcomposer.org/installer | php + php composer.phar install --prefer-dist + php composer.phar update --prefer-dist + rm -f composer.phar -update-composer: - rm -f composer.lock - git rm -r vendor - composer install --prefer-dist +.PHONY: npm +npm: + cd js && npm run build -dist: clean - mkdir -p $(source_dir) - tar cvzf $(source_dir)/$(package_name).tar.gz $(project_dir) \ - --exclude-vcs \ - --exclude=$(project_dir)/build/artifacts \ - --exclude=$(project_dir)/js/node_modules \ - --exclude=$(project_dir)/js/coverage +.PHONY: clean +clean: + rm -rf ./build -appstore: clean - mkdir -p $(appstore_dir) - tar cvzf $(appstore_dir)/$(package_name).tar.gz $(project_dir) \ +.PHONY: dist +dist: + make clean + make build + make test + mkdir -p $(build_directory) + tar cvzf $(package_name).tar.gz ../$(app_name) \ --exclude-vcs \ - --exclude=$(project_dir)/build/artifacts \ - --exclude=$(project_dir)/js/node_modules \ - --exclude=$(project_dir)/js/.bowerrc \ - --exclude=$(project_dir)/js/.jshintrc \ - --exclude=$(project_dir)/js/.jshintignore \ - --exclude=$(project_dir)/js/gulpfile.js \ - --exclude=$(project_dir)/js/*.json \ - --exclude=$(project_dir)/js/*.conf.js \ - --exclude=$(project_dir)/js/*.log \ - --exclude=$(project_dir)/js/README.md \ - --exclude=$(project_dir)/js/.bowerrc \ - --exclude=$(project_dir)/js/app \ - --exclude=$(project_dir)/js/controller \ - --exclude=$(project_dir)/js/coverage \ - --exclude=$(project_dir)/js/directive \ - --exclude=$(project_dir)/js/filter \ - --exclude=$(project_dir)/js/gui \ - --exclude=$(project_dir)/js/plugin \ - --exclude=$(project_dir)/js/service \ - --exclude=$(project_dir)/js/tests \ - --exclude=$(project_dir)/js/vendor/jquery \ - --exclude=$(project_dir)/js/vendor/angular-mocks \ - --exclude=$(project_dir)/\.* \ - --exclude=$(project_dir)/phpunit*xml \ - --exclude=$(project_dir)/composer* \ - --exclude=$(project_dir)/issue_template.md \ - --exclude=$(project_dir)/Makefile \ - --exclude=$(project_dir)/tests \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/.gitattributes \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/Doxyfile \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/FOCUS \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/INSTALL* \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/NEWS \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/phpdoc.ini \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/README \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/TODO \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/VERSION \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/WHATSNEW \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/WYSIWYG \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/art \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/benchmarks \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/configdoc \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/docs \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/extras \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/maintenance \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/plugins \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/smoketests \ - --exclude=$(project_dir)/vendor/ezyang/htmlpurifier/tests \ - --exclude=$(project_dir)/vendor/fguillot/picofeed/docs \ - --exclude=$(project_dir)/vendor/fguillot/picofeed/tests \ - --exclude=$(project_dir)/vendor/pear/net_url2/docs \ - --exclude=$(project_dir)/vendor/pear/net_url2/tests + --exclude=../$(app_name)/build \ + --exclude=../$(app_name)/js/node_modules \ + +.PHONY: test +test: + cd js && npm run test + phpunit -c phpunit.xml + phpunit -c phpunit.integration.xml diff --git a/composer.lock b/composer.lock index cf3f2e04d..3b4ed28a8 100644 --- a/composer.lock +++ b/composer.lock @@ -57,12 +57,12 @@ "source": { "type": "git", "url": "https://github.com/fguillot/picoFeed.git", - "reference": "c270ef4474a2460d857f99c84612025c5f9975f2" + "reference": "d6bbdd248fa4a3eef7831ffaae0491a2ea58f897" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/c270ef4474a2460d857f99c84612025c5f9975f2", - "reference": "c270ef4474a2460d857f99c84612025c5f9975f2", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d6bbdd248fa4a3eef7831ffaae0491a2ea58f897", + "reference": "d6bbdd248fa4a3eef7831ffaae0491a2ea58f897", "shasum": "" }, "require": { @@ -97,7 +97,7 @@ ], "description": "Modern library to handle RSS/Atom feeds", "homepage": "https://github.com/fguillot/picoFeed", - "time": "2016-02-11 19:52:02" + "time": "2016-03-24 12:09:56" }, { "name": "pear/net_url2", diff --git a/js/package.json b/js/package.json index e189c3907..96b0ca7e0 100644 --- a/js/package.json +++ b/js/package.json @@ -3,7 +3,9 @@ "description": "An RSS/Atom feed reader", "main": "build/app.min.js", "scripts": { - "test": "gulp karma" + "test": "node node_modules/gulp-cli/bin/gulp.js karma", + "prebuild": "npm install && node_modules/bower/bin/bower install && node_modules/bower/bin/bower update", + "build": "node node_modules/gulp-cli/bin/gulp.js" }, "repository": { "type": "git", @@ -23,12 +25,14 @@ "url": "https://github.com/owncloud/news/issues" }, "engines": { - "node": ">=5.6" + "node": ">=5.6" }, "private": true, "homepage": "https://github.com/owncloud/news", "devDependencies": { + "bower": "^1.7.7", "gulp": "^3.9.1", + "gulp-cli": "^1.2.1", "gulp-concat": "^2.6.0", "gulp-jshint": "^2.0.0", "gulp-ng-annotate": "^2.0.0", diff --git a/vendor/composer/ClassLoader.php b/vendor/composer/ClassLoader.php index 5e1469e83..ff6ecfb82 100644 --- a/vendor/composer/ClassLoader.php +++ b/vendor/composer/ClassLoader.php @@ -13,9 +13,7 @@ namespace Composer\Autoload; /** - * ClassLoader implements a PSR-0 class loader - * - * See https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-0.md + * ClassLoader implements a PSR-0, PSR-4 and classmap class loader. * * $loader = new \Composer\Autoload\ClassLoader(); * @@ -39,6 +37,8 @@ namespace Composer\Autoload; * * @author Fabien Potencier * @author Jordi Boggiano + * @see http://www.php-fig.org/psr/psr-0/ + * @see http://www.php-fig.org/psr/psr-4/ */ class ClassLoader { @@ -147,7 +147,7 @@ class ClassLoader * appending or prepending to the ones previously set for this namespace. * * @param string $prefix The prefix/namespace, with trailing '\\' - * @param array|string $paths The PSR-0 base directories + * @param array|string $paths The PSR-4 base directories * @param bool $prepend Whether to prepend the directories * * @throws \InvalidArgumentException diff --git a/vendor/composer/LICENSE b/vendor/composer/LICENSE index c8d57af8b..1a2812488 100644 --- a/vendor/composer/LICENSE +++ b/vendor/composer/LICENSE @@ -1,5 +1,5 @@ -Copyright (c) 2015 Nils Adermann, Jordi Boggiano +Copyright (c) 2016 Nils Adermann, Jordi Boggiano Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/vendor/composer/autoload_files.php b/vendor/composer/autoload_files.php index 915496bea..c25686b15 100644 --- a/vendor/composer/autoload_files.php +++ b/vendor/composer/autoload_files.php @@ -6,5 +6,5 @@ $vendorDir = dirname(dirname(__FILE__)); $baseDir = dirname($vendorDir); return array( - $vendorDir . '/ezyang/htmlpurifier/library/HTMLPurifier.composer.php', + '2cffec82183ee1cea088009cef9a6fc3' => $vendorDir . '/ezyang/htmlpurifier/library/HTMLPurifier.composer.php', ); diff --git a/vendor/composer/autoload_real.php b/vendor/composer/autoload_real.php index 9d756efad..d963d9da7 100644 --- a/vendor/composer/autoload_real.php +++ b/vendor/composer/autoload_real.php @@ -41,15 +41,19 @@ class ComposerAutoloaderInit1865bc95e48cb97339d0616ae5d8b993 $loader->register(true); $includeFiles = require __DIR__ . '/autoload_files.php'; - foreach ($includeFiles as $file) { - composerRequire1865bc95e48cb97339d0616ae5d8b993($file); + foreach ($includeFiles as $fileIdentifier => $file) { + composerRequire1865bc95e48cb97339d0616ae5d8b993($fileIdentifier, $file); } return $loader; } } -function composerRequire1865bc95e48cb97339d0616ae5d8b993($file) +function composerRequire1865bc95e48cb97339d0616ae5d8b993($fileIdentifier, $file) { - require $file; + if (empty($GLOBALS['__composer_autoload_files'][$fileIdentifier])) { + require $file; + + $GLOBALS['__composer_autoload_files'][$fileIdentifier] = true; + } } diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index 7f5c5b671..6e19b3984 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -209,12 +209,12 @@ "source": { "type": "git", "url": "https://github.com/fguillot/picoFeed.git", - "reference": "c270ef4474a2460d857f99c84612025c5f9975f2" + "reference": "d6bbdd248fa4a3eef7831ffaae0491a2ea58f897" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/c270ef4474a2460d857f99c84612025c5f9975f2", - "reference": "c270ef4474a2460d857f99c84612025c5f9975f2", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d6bbdd248fa4a3eef7831ffaae0491a2ea58f897", + "reference": "d6bbdd248fa4a3eef7831ffaae0491a2ea58f897", "shasum": "" }, "require": { @@ -229,7 +229,7 @@ "suggest": { "ext-curl": "PicoFeed will use cURL if present" }, - "time": "2016-02-11 19:52:02", + "time": "2016-03-24 12:09:56", "bin": [ "picofeed" ], diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Base.php b/vendor/fguillot/picofeed/lib/PicoFeed/Base.php new file mode 100644 index 000000000..4be0985e4 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Base.php @@ -0,0 +1,34 @@ +config = $config ?: new Config(); + Logger::setTimezone($this->config->getTimezone()); + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php index 34e21dc19..5fd8d6da9 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php @@ -2,24 +2,17 @@ namespace PicoFeed\Filter; -use DOMXpath; +use DOMXPath; +use PicoFeed\Base; use PicoFeed\Parser\XmlParser; -use PicoFeed\Config\Config; /** * Tag Filter class. * * @author Frederic Guillot */ -class Tag +class Tag extends Base { - /** - * Config object. - * - * @var \PicoFeed\Config\Config - */ - private $config; - /** * Tags blacklist (Xpath expressions). * @@ -76,11 +69,6 @@ class Tag 'q', ); - public function __construct(Config $config) - { - $this->config = $config; - } - /** * Check if the tag is allowed and is not a pixel tracker. * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Generator/ContentGeneratorInterface.php b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/ContentGeneratorInterface.php new file mode 100644 index 000000000..5c2f205c6 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/ContentGeneratorInterface.php @@ -0,0 +1,23 @@ +extensions as $extension) { + if (substr($item->getUrl(), - strlen($extension)) === $extension) { + $item->setContent(''.$item->getUrl().''); + return true; + } + } + + return false; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Generator/YoutubeContentGenerator.php b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/YoutubeContentGenerator.php new file mode 100644 index 000000000..198090d4f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/YoutubeContentGenerator.php @@ -0,0 +1,67 @@ +hasNamespace('yt')) { + return $this->generateHtmlFromXml($item); + } + + return $this->generateHtmlFromUrl($item); + } + + /** + * Generate HTML + * + * @access public + * @param Item $item + * @return boolean + */ + private function generateHtmlFromXml(Item $item) + { + $videoId = $item->getTag('yt:videoId'); + + if (! empty($videoId)) { + $item->setContent(''); + return true; + } + + return false; + } + + /** + * Generate HTML from item URL + * + * @access public + * @param Item $item + * @return bool + */ + public function generateHtmlFromUrl(Item $item) + { + if (preg_match('/youtube\.com\/watch\?v=(.*)/', $item->getUrl(), $matches)) { + $item->setContent(''); + return true; + } + + return false; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php index 356453c9d..63259235f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php @@ -150,7 +150,7 @@ class Atom extends Parser $updated = XmlParser::getXPathResult($xml, 'atom:updated', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'updated'); - $feed->date = $this->date->getDateTime((string) current($updated)); + $feed->date = $this->getDateParser()->getDateTime((string) current($updated)); } /** @@ -168,8 +168,8 @@ class Atom extends Parser $updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'updated'); - $published = !empty($published) ? $this->date->getDateTime((string) current($published)) : null; - $updated = !empty($updated) ? $this->date->getDateTime((string) current($updated)) : null; + $published = !empty($published) ? $this->getDateParser()->getDateTime((string) current($published)) : null; + $updated = !empty($updated) ? $this->getDateParser()->getDateTime((string) current($updated)) : null; if ($published === null && $updated === null) { $item->date = $feed->getDate(); // We use the feed date if there is no date for the item diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php index e4d08b527..4ad00789a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php @@ -4,20 +4,22 @@ namespace PicoFeed\Parser; use DateTime; use DateTimeZone; +use PicoFeed\Base; /** * Date Parser. * * @author Frederic Guillot */ -class DateParser +class DateParser extends Base { /** * Timezone used to parse feed dates. * + * @access private * @var string */ - public $timezone = 'UTC'; + private $timezone = 'UTC'; /** * Supported formats [ 'format' => length ]. @@ -88,7 +90,7 @@ class DateParser */ public function getValidDate($format, $value) { - $date = DateTime::createFromFormat($format, $value, new DateTimeZone($this->timezone)); + $date = DateTime::createFromFormat($format, $value, $this->getTimeZone()); if ($date !== false) { $errors = DateTime::getLastErrors(); @@ -108,6 +110,17 @@ class DateParser */ public function getCurrentDateTime() { - return new DateTime('now', new DateTimeZone($this->timezone)); + return new DateTime('now', $this->getTimeZone()); + } + + /** + * Get DateTimeZone instance + * + * @access public + * @return DateTimeZone + */ + public function getTimeZone() + { + return new DateTimeZone($this->config->getTimezone() ?: $this->timezone); } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php index 22d7c5951..34e557a11 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php @@ -102,6 +102,18 @@ class Item */ public $namespaces = array(); + /** + * Check if a XML namespace exists + * + * @access public + * @param string $namespace + * @return bool + */ + public function hasNamespace($namespace) + { + return array_key_exists($namespace, $this->namespaces); + } + /** * Get specific XML tag or attribute value. * @@ -112,12 +124,10 @@ class Item */ public function getTag($tag, $attribute = '') { - // convert to xPath attribute query if ($attribute !== '') { $attribute = '/@'.$attribute; } - // construct query $query = './/'.$tag.$attribute; $elements = XmlParser::getXPathResult($this->xml, $query, $this->namespaces); @@ -155,13 +165,29 @@ class Item } /** - * Get url. + * Get URL + * + * @access public + * @return string */ public function getUrl() { return $this->url; } + /** + * Set URL + * + * @access public + * @param string $url + * @return Item + */ + public function setUrl($url) + { + $this->url = $url; + return $this; + } + /** * Get id. */ @@ -186,6 +212,19 @@ class Item return $this->content; } + /** + * Set content + * + * @access public + * @param string $value + * @return Item + */ + public function setContent($value) + { + $this->content = $value; + return $this; + } + /** * Get enclosure url. */ diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php index 5130b68bb..433f21a26 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php @@ -2,12 +2,15 @@ namespace PicoFeed\Parser; +use PicoFeed\Processor\ContentFilterProcessor; +use PicoFeed\Processor\ContentGeneratorProcessor; +use PicoFeed\Processor\ItemPostProcessor; +use PicoFeed\Processor\ScraperProcessor; use SimpleXMLElement; use PicoFeed\Client\Url; use PicoFeed\Encoding\Encoding; use PicoFeed\Filter\Filter; use PicoFeed\Logging\Logger; -use PicoFeed\Scraper\Scraper; /** * Base parser class. @@ -28,7 +31,7 @@ abstract class Parser * * @var \PicoFeed\Parser\DateParser */ - protected $date; + private $dateParser; /** * Hash algorithm used to generate item id, any value supported by PHP, see hash_algos(). @@ -66,32 +69,12 @@ abstract class Parser protected $used_namespaces = array(); /** - * Enable the content filtering. + * Item Post Processor instance * - * @var bool + * @access private + * @var ItemPostProcessor */ - private $enable_filter = true; - - /** - * Enable the content grabber. - * - * @var bool - */ - private $enable_grabber = false; - - /** - * Enable the content grabber on all pages. - * - * @var bool - */ - private $grabber_needs_rule_file = false; - - /** - * Ignore those urls for the content scraper. - * - * @var array - */ - private $grabber_ignore_urls = array(); + private $itemPostProcessor; /** * Constructor. @@ -102,7 +85,6 @@ abstract class Parser */ public function __construct($content, $http_encoding = '', $fallback_url = '') { - $this->date = new DateParser(); $this->fallback_url = $fallback_url; $xml_encoding = XmlParser::getEncodingFromXmlTag($content); @@ -112,6 +94,10 @@ abstract class Parser // Encode everything in UTF-8 Logger::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"'); $this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding); + + $this->itemPostProcessor = new ItemPostProcessor($this->config); + $this->itemPostProcessor->register(new ContentGeneratorProcessor($this->config)); + $this->itemPostProcessor->register(new ContentFilterProcessor($this->config)); } /** @@ -173,15 +159,11 @@ abstract class Parser // Id generation can use the item url/title/content (order is important) $this->findItemId($entry, $item, $feed); - $this->findItemDate($entry, $item, $feed); $this->findItemEnclosure($entry, $item, $feed); $this->findItemLanguage($entry, $item, $feed); - // Order is important (avoid double filtering) - $this->filterItemContent($feed, $item); - $this->scrapWebsite($item); - + $this->itemPostProcessor->execute($feed, $item); $feed->items[] = $item; } @@ -230,43 +212,29 @@ abstract class Parser } /** - * Fetch item content with the content grabber. + * Get Item Post Processor instance * - * @param Item $item Item object + * @access public + * @return ItemPostProcessor */ - public function scrapWebsite(Item $item) + public function getItemPostProcessor() { - if ($this->enable_grabber && !in_array($item->getUrl(), $this->grabber_ignore_urls)) { - $grabber = new Scraper($this->config); - $grabber->setUrl($item->getUrl()); - - if ($this->grabber_needs_rule_file) { - $grabber->disableCandidateParser(); - } - - $grabber->execute(); - - if ($grabber->hasRelevantContent()) { - $item->content = $grabber->getFilteredContent(); - } - } + return $this->itemPostProcessor; } /** - * Filter HTML for entry content. + * Get DateParser instance * - * @param Feed $feed Feed object - * @param Item $item Item object + * @access public + * @return DateParser */ - public function filterItemContent(Feed $feed, Item $item) + public function getDateParser() { - if ($this->isFilteringEnabled()) { - $filter = Filter::html($item->getContent(), $feed->getSiteUrl()); - $filter->setConfig($this->config); - $item->content = $filter->execute(); - } else { - Logger::setMessage(get_called_class().': Content filtering disabled'); + if ($this->dateParser === null) { + return new DateParser($this->config); } + + return $this->dateParser; } /** @@ -316,31 +284,11 @@ abstract class Parser * Set Hash algorithm used for id generation. * * @param string $algo Algorithm name - * * @return \PicoFeed\Parser\Parser */ public function setHashAlgo($algo) { $this->hash_algo = $algo ?: $this->hash_algo; - - return $this; - } - - /** - * Set a different timezone. - * - * @see http://php.net/manual/en/timezones.php - * - * @param string $timezone Timezone - * - * @return \PicoFeed\Parser\Parser - */ - public function setTimezone($timezone) - { - if ($timezone) { - $this->date->timezone = $timezone; - } - return $this; } @@ -354,7 +302,6 @@ abstract class Parser public function setConfig($config) { $this->config = $config; - return $this; } @@ -365,21 +312,8 @@ abstract class Parser */ public function disableContentFiltering() { - $this->enable_filter = false; - } - - /** - * Return true if the content filtering is enabled. - * - * @return bool - */ - public function isFilteringEnabled() - { - if ($this->config === null) { - return $this->enable_filter; - } - - return $this->config->getContentFiltering($this->enable_filter); + $this->itemPostProcessor->unregister('PicoFeed\Processor\ContentFilterProcessor'); + return $this; } /** @@ -392,8 +326,14 @@ abstract class Parser */ public function enableContentGrabber($needs_rule_file = false) { - $this->enable_grabber = true; - $this->grabber_needs_rule_file = $needs_rule_file; + $processor = new ScraperProcessor($this->config); + + if ($needs_rule_file) { + $processor->getScraper()->disableCandidateParser(); + } + + $this->itemPostProcessor->register($processor); + return $this; } /** @@ -405,7 +345,8 @@ abstract class Parser */ public function setGrabberIgnoreUrls(array $urls) { - $this->grabber_ignore_urls = $urls; + $this->itemPostProcessor->getProcessor('PicoFeed\Processor\ScraperProcessor')->ignoreUrls($urls); + return $this; } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php index dd2aa7a8c..315c7db26 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php @@ -149,7 +149,7 @@ class Rss10 extends Parser $date = XmlParser::getXPathResult($xml, 'rss:channel/dc:date', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'channel/dc:date', $this->namespaces); - $feed->date = $this->date->getDateTime((string) current($date)); + $feed->date = $this->getDateParser()->getDateTime((string) current($date)); } /** @@ -163,7 +163,7 @@ class Rss10 extends Parser { $date = XmlParser::getXPathResult($entry, 'dc:date', $this->namespaces); - $item->date = empty($date) ? $feed->getDate() : $this->date->getDateTime((string) current($date)); + $item->date = empty($date) ? $feed->getDate() : $this->getDateParser()->getDateTime((string) current($date)); } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php index 005691f48..b265656ac 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php @@ -139,11 +139,11 @@ class Rss20 extends Parser $publish_date = XmlParser::getXPathResult($xml, 'channel/pubDate'); $update_date = XmlParser::getXPathResult($xml, 'channel/lastBuildDate'); - $published = !empty($publish_date) ? $this->date->getDateTime((string) current($publish_date)) : null; - $updated = !empty($update_date) ? $this->date->getDateTime((string) current($update_date)) : null; + $published = !empty($publish_date) ? $this->getDateParser()->getDateTime((string) current($publish_date)) : null; + $updated = !empty($update_date) ? $this->getDateParser()->getDateTime((string) current($update_date)) : null; if ($published === null && $updated === null) { - $feed->date = $this->date->getCurrentDateTime(); // We use the current date if there is no date for the feed + $feed->date = $this->getDateParser()->getCurrentDateTime(); // We use the current date if there is no date for the feed } elseif ($published !== null && $updated !== null) { $feed->date = max($published, $updated); // We use the most recent date between published and updated } else { @@ -162,7 +162,7 @@ class Rss20 extends Parser { $date = XmlParser::getXPathResult($entry, 'pubDate'); - $item->date = empty($date) ? $feed->getDate() : $this->date->getDateTime((string) current($date)); + $item->date = empty($date) ? $feed->getDate() : $this->getDateParser()->getDateTime((string) current($date)); } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php index 7c8ebc602..6ed5a4887 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php @@ -38,7 +38,7 @@ class XmlParser * * @param string $input XML content * - * @return \DOMNDocument + * @return \DOMDocument */ public static function getDomDocument($input) { @@ -60,10 +60,10 @@ class XmlParser * Small wrapper around ZendXml to turn their exceptions into picoFeed * exceptions * @param $input the xml to load - * @param $dom pass in a dom document or use null/omit if simpleXml should + * @param $dom pass in a dom document or use null/omit if simpleXml should * be used */ - private static function scan($input, $dom=null) + private static function scan($input, $dom = null) { try { return Security::scan($input, $dom); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentFilterProcessor.php b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentFilterProcessor.php new file mode 100644 index 000000000..9b7ddcce3 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentFilterProcessor.php @@ -0,0 +1,37 @@ +config->getContentFiltering(true)) { + $filter = Filter::html($item->getContent(), $feed->getSiteUrl()); + $filter->setConfig($this->config); + $item->setContent($filter->execute()); + } else { + Logger::setMessage(get_called_class().': Content filtering disabled'); + } + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentGeneratorProcessor.php b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentGeneratorProcessor.php new file mode 100644 index 000000000..49adf9ccb --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentGeneratorProcessor.php @@ -0,0 +1,49 @@ +generators as $generator) { + $className = '\PicoFeed\Generator\\'.ucfirst($generator).'ContentGenerator'; + $object = new $className($this->config); + + if ($object->execute($item)) { + return true; + } + } + + return false; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php new file mode 100644 index 000000000..ff9740b78 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php @@ -0,0 +1,84 @@ +processors as $processor) { + if ($processor->execute($feed, $item)) { + return true; + } + } + + return false; + } + + /** + * Register a new Item post-processor + * + * @access public + * @param ItemProcessorInterface $processor + * @return ItemPostProcessor + */ + public function register(ItemProcessorInterface $processor) + { + $this->processors[get_class($processor)] = $processor; + return $this; + } + + /** + * Remove Processor instance + * + * @access public + * @param string $class + * @return ItemPostProcessor + */ + public function unregister($class) + { + if (isset($this->processors[$class])) { + unset($this->processors[$class]); + } + + return $this; + } + + /** + * Get Processor instance + * + * @access public + * @param string $class + * @return ItemProcessorInterface|null + */ + public function getProcessor($class) + { + return isset($this->processors[$class]) ? $this->processors[$class] : null; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemProcessorInterface.php b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemProcessorInterface.php new file mode 100644 index 000000000..5d5322624 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemProcessorInterface.php @@ -0,0 +1,25 @@ +getUrl(), $this->ignoredUrls)) { + $scraper = $this->getScraper(); + $scraper->setUrl($item->getUrl()); + $scraper->execute(); + + if ($scraper->hasRelevantContent()) { + $item->setContent($scraper->getFilteredContent()); + } + } + + return false; + } + + /** + * Ignore list of URLs + * + * @access public + * @param array $urls + * @return $this + */ + public function ignoreUrls(array $urls) + { + $this->ignoredUrls = $urls; + return $this; + } + + /** + * Returns Scraper instance + * + * @access public + * @return Scraper + */ + public function getScraper() + { + if ($this->scraper === null) { + $this->scraper = new Scraper($this->config); + } + + return $this->scraper; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php index 53c5cf7a4..09feb49e3 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php @@ -2,11 +2,11 @@ namespace PicoFeed\Reader; -use DOMXpath; +use DOMXPath; +use PicoFeed\Base; use PicoFeed\Client\Client; use PicoFeed\Client\ClientException; use PicoFeed\Client\Url; -use PicoFeed\Config\Config; use PicoFeed\Logging\Logger; use PicoFeed\Parser\XmlParser; @@ -17,7 +17,7 @@ use PicoFeed\Parser\XmlParser; * * @author Frederic Guillot */ -class Favicon +class Favicon extends Base { /** * Valid types for favicon (supported by browsers). @@ -33,13 +33,6 @@ class Favicon 'image/svg+xml' ); - /** - * Config class instance. - * - * @var \PicoFeed\Config\Config - */ - private $config; - /** * Icon binary content. * @@ -54,16 +47,6 @@ class Favicon */ private $content_type = ''; - /** - * Constructor. - * - * @param \PicoFeed\Config\Config $config Config class instance - */ - public function __construct(Config $config = null) - { - $this->config = $config ?: new Config(); - } - /** * Get the icon file content (available only after the download). * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php index cfe517146..7b26deaec 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php @@ -3,7 +3,7 @@ namespace PicoFeed\Reader; use DOMXPath; -use PicoFeed\Config\Config; +use PicoFeed\Base; use PicoFeed\Client\Client; use PicoFeed\Client\Url; use PicoFeed\Logging\Logger; @@ -14,7 +14,7 @@ use PicoFeed\Parser\XmlParser; * * @author Frederic Guillot */ -class Reader +class Reader extends Base { /** * Feed formats for detection. @@ -29,24 +29,6 @@ class Reader 'Rss10' => '//rdf', ); - /** - * Config class instance. - * - * @var \PicoFeed\Config\Config - */ - private $config; - - /** - * Constructor. - * - * @param \PicoFeed\Config\Config $config Config class instance - */ - public function __construct(Config $config = null) - { - $this->config = $config ?: new Config(); - Logger::setTimezone($this->config->getTimezone()); - } - /** * Download a feed (no discovery). * @@ -163,7 +145,6 @@ class Reader $parser = new $className($content, $encoding, $url); $parser->setHashAlgo($this->config->getParserHashAlgo()); - $parser->setTimezone($this->config->getTimezone()); $parser->setConfig($this->config); return $parser; diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.over-blog.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.over-blog.com.php new file mode 100644 index 000000000..cc5d83c78 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.over-blog.com.php @@ -0,0 +1,11 @@ + array( + '%.*%' => array( + 'test_url' => 'http://eliascarpe.over-blog.com/2015/12/re-upload-projets-d-avenir.html', + 'body' => array( + '//div[contains(concat(" ", normalize-space(@class), " "), " ob-section ")]', + ), + ) + ) +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/encyclopedie.naheulbeuk.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/encyclopedie.naheulbeuk.com.php new file mode 100644 index 000000000..19bcbdefb --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/encyclopedie.naheulbeuk.com.php @@ -0,0 +1,13 @@ + array( + '%.*%' => array( + 'test_url' => 'http://encyclopedie.naheulbeuk.com/article.php3?id_article=352', + 'body' => array( + '//td//h1[@class="titre-texte"]', + '//td//div[@class="surtitre"]', + '//td//div[@class="texte"]', + ), + ) + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/greekculture.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/greekculture.com.php deleted file mode 100644 index 9410de9c5..000000000 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/greekculture.com.php +++ /dev/null @@ -1,12 +0,0 @@ - array( - '%/joyoftech/.*%' => array( - 'body' => array( - '//img[@width="640"]', - ), - 'test_url' => 'http://www.geekculture.com/joyoftech/joyarchives/2235.html', - ), - ), -); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/news.sciencemag.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/news.sciencemag.org.php deleted file mode 100644 index 9b572ef4b..000000000 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/news.sciencemag.org.php +++ /dev/null @@ -1,18 +0,0 @@ - array( - '%.*%' => array( - 'test_url' => 'http://news.sciencemag.org/biology/2015/09/genetic-engineering-turns-common-plant-cancer-fighter', - 'body' => array( - '//div[@class="content"]', - ), - 'strip' => array( - '//h1[@class="snews-article__headline"]', - '//div[contains(@class,"easy_social_box")]', - '//div[@class="author-teaser"]', - '//div[@class="article-byline"]', - ), - ), - ) -); - diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rugbyrama.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rugbyrama.fr.php new file mode 100644 index 000000000..2280b66d2 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rugbyrama.fr.php @@ -0,0 +1,20 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.rugbyrama.fr/rugby/top-14/2015-2016/top-14-hayman-coupe-du-monde-finale-2012-lutte.-voici-levan-chilachava-toulon_sto5283863/story.shtml', + 'body' => array( + '//div[@class="story-simple-content"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[@class="share-buttons"]', + '//*[@class="show-mobile-block"]', + '//*[@class="hide-desktop"]', + '//*[@id="tracking_img"]', + ) + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.franceculture.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.franceculture.fr.php index 67e3253a1..f7ec0d8db 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.franceculture.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.franceculture.fr.php @@ -4,8 +4,7 @@ return array( '%.*%' => array( 'test_url' => 'http://www.franceculture.fr/emission-culture-eco-la-finance-aime-toujours-la-france-2016-01-08', 'body' => array( - '//div[@class="heading"]/*/*/div[contains(@class,"player-inline")]', - '//article/div[@class="text-zone"]', + '//div[@class="text-zone"]', ), 'strip' => array( '//ul[@class="tags"]', diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.monsieur-le-chien.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.monsieur-le-chien.fr.php new file mode 100644 index 000000000..5f5e987ba --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.monsieur-le-chien.fr.php @@ -0,0 +1,11 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.monsieur-le-chien.fr/index.php?planche=672', + 'body' => array( + '//img[starts-with(@src, "i/planches/")]', + ), + ) + ) +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.sciencemag.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.sciencemag.org.php new file mode 100644 index 000000000..3d348577d --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.sciencemag.org.php @@ -0,0 +1,16 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.sciencemag.org/news/2016/01/could-bright-foamy-wak$ + 'body' => array( + '//div[@class="row--hero"]', + '//article[contains(@class,"primary")]', + ), + 'strip' => array( + '//header[@class="article__header"]', + '//footer[@class="article__foot"]', + ), + ), + ) +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleLoader.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleLoader.php index 0cffbf617..6650682d1 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleLoader.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleLoader.php @@ -2,8 +2,8 @@ namespace PicoFeed\Scraper; +use PicoFeed\Base; use PicoFeed\Logging\Logger; -use PicoFeed\Config\Config; /** * RuleLoader class. @@ -11,25 +11,8 @@ use PicoFeed\Config\Config; * @author Frederic Guillot * @author Bernhard Posselt */ -class RuleLoader +class RuleLoader extends Base { - /** - * Config object. - * - * @var \PicoFeed\Config\Config - */ - private $config; - - /** - * Constructor. - * - * @param \PicoFeed\Config\Config $config Config class instance - */ - public function __construct(Config $config) - { - $this->config = $config; - } - /** * Get the rules for an URL. * @@ -111,12 +94,14 @@ class RuleLoader */ public function getRulesFolders() { - $folders = array(__DIR__.'/../Rules'); + $folders = array(); if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) { $folders[] = $this->config->getGrabberRulesFolder(); } + $folders[] = __DIR__ . '/../Rules'; + return $folders; } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php index f1d1222d9..980a88da6 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php @@ -2,10 +2,10 @@ namespace PicoFeed\Scraper; +use PicoFeed\Base; use PicoFeed\Client\Client; use PicoFeed\Client\ClientException; use PicoFeed\Client\Url; -use PicoFeed\Config\Config; use PicoFeed\Encoding\Encoding; use PicoFeed\Filter\Filter; use PicoFeed\Logging\Logger; @@ -16,7 +16,7 @@ use PicoFeed\Parser\XmlParser; * * @author Frederic Guillot */ -class Scraper +class Scraper extends Base { /** * URL. @@ -53,24 +53,6 @@ class Scraper */ private $enableCandidateParser = true; - /** - * Config object. - * - * @var \PicoFeed\Config\Config - */ - private $config; - - /** - * Constructor. - * - * @param \PicoFeed\Config\Config $config Config class instance - */ - public function __construct(Config $config) - { - $this->config = $config; - Logger::setTimezone($this->config->getTimezone()); - } - /** * Disable candidates parsing. * @@ -79,7 +61,6 @@ class Scraper public function disableCandidateParser() { $this->enableCandidateParser = false; - return $this; } @@ -227,45 +208,19 @@ class Scraper */ public function execute() { - $this->download(); - - if (!$this->skipProcessing()) { - $this->prepareHtml(); + $this->content = ''; + $this->html = ''; + $this->encoding = ''; - $parser = $this->getParser(); - - if ($parser !== null) { - $this->content = $parser->execute(); - Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes'); - } - } - } - - /** - * Returns true if the parsing must be skipped. - * - * @return bool - */ - public function skipProcessing() - { - $handlers = array( - 'detectStreamingVideos', - 'detectPdfFiles', - ); - - foreach ($handlers as $handler) { - if ($this->$handler()) { - return true; - } - } + $this->download(); + $this->prepareHtml(); - if (empty($this->html)) { - Logger::setMessage(get_called_class().': Raw HTML is empty'); + $parser = $this->getParser(); - return true; + if ($parser !== null) { + $this->content = $parser->execute(); + Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes'); } - - return false; } /** @@ -287,17 +242,14 @@ class Scraper if (preg_match($pattern, $sub_url)) { Logger::setMessage(get_called_class().': Matched url '.$sub_url); - return new RuleParser($this->html, $rule); } } } elseif ($this->enableCandidateParser) { Logger::setMessage(get_called_class().': Parse content with candidates'); - - return new CandidateParser($this->html); } - return; + return new CandidateParser($this->html); } /** @@ -312,30 +264,4 @@ class Scraper Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'" ; HTML Encoding "'.$html_encoding.'"'); } - - /** - * Return the Youtube embed player and skip processing. - * - * @return bool - */ - public function detectStreamingVideos() - { - if (preg_match("#(?<=v=|v\/|vi=|vi\/|youtu.be\/)[a-zA-Z0-9_-]{11}#", $this->url, $matches)) { - $this->content = ''; - - return true; - } - - return false; - } - - /** - * Skip processing for PDF documents. - * - * @return bool - */ - public function detectPdfFiles() - { - return substr($this->url, -3) === 'pdf'; - } } -- cgit v1.2.3