summaryrefslogtreecommitdiffstats
path: root/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php')
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php31
1 files changed, 23 insertions, 8 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
index 7ef904f0a..810494b70 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
@@ -3,11 +3,11 @@
namespace PicoFeed\Parser;
use SimpleXMLElement;
+use PicoFeed\Client\Url;
use PicoFeed\Encoding\Encoding;
use PicoFeed\Filter\Filter;
use PicoFeed\Logging\Logger;
-use PicoFeed\Client\Url;
-use PicoFeed\Client\Grabber;
+use PicoFeed\Scraper\Scraper;
/**
* Base parser class
@@ -82,6 +82,14 @@ abstract class Parser
private $enable_grabber = false;
/**
+ * Enable the content grabber on all pages
+ *
+ * @access private
+ * @var bool
+ */
+ private $grabber_needs_rule_file = false;
+
+ /**
* Ignore those urls for the content scraper
*
* @access private
@@ -237,11 +245,16 @@ abstract class Parser
{
if ($this->enable_grabber && ! in_array($item->getUrl(), $this->grabber_ignore_urls)) {
- $grabber = new Grabber($item->getUrl());
- $grabber->setConfig($this->config);
- $grabber->download();
+ $grabber = new Scraper($this->config);
+ $grabber->setUrl($item->getUrl());
+
+ if ($this->grabber_needs_rule_file) {
+ $grabber->disableCandidateParser();
+ }
+
+ $grabber->execute();
- if ($grabber->parse()) {
+ if ($grabber->hasRelevantContent()) {
$item->content = $grabber->getFilteredContent();
}
}
@@ -270,7 +283,6 @@ abstract class Parser
* Generate a unique id for an entry (hash all arguments)
*
* @access public
- * @param string $args Pieces of data to hash
* @return string
*/
public function generateId()
@@ -383,11 +395,14 @@ abstract class Parser
* Enable the content grabber
*
* @access public
+ * @param bool $needs_rule_file true if only pages with rule files should be
+ * scraped
* @return \PicoFeed\Parser\Parser
*/
- public function enableContentGrabber()
+ public function enableContentGrabber($needs_rule_file = false)
{
$this->enable_grabber = true;
+ $this->grabber_needs_rule_file = $needs_rule_file;
}
/**