summaryrefslogtreecommitdiffstats
path: root/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php')
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php71
1 files changed, 71 insertions, 0 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php
new file mode 100644
index 000000000..99669656a
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php
@@ -0,0 +1,71 @@
+<?php
+
+namespace PicoFeed\Processor;
+
+use PicoFeed\Base;
+use PicoFeed\Parser\Feed;
+use PicoFeed\Parser\Item;
+use PicoFeed\Scraper\Scraper;
+
+/**
+ * Scraper Processor
+ *
+ * @package PicoFeed\Processor
+ * @author Frederic Guillot
+ */
+class ScraperProcessor extends Base implements ItemProcessorInterface
+{
+ private $ignoredUrls = array();
+ private $scraper;
+
+ /**
+ * Execute Item Processor
+ *
+ * @access public
+ * @param Feed $feed
+ * @param Item $item
+ * @return bool
+ */
+ public function execute(Feed $feed, Item $item)
+ {
+ if (!in_array($item->getUrl(), $this->ignoredUrls)) {
+ $scraper = $this->getScraper();
+ $scraper->setUrl($item->getUrl());
+ $scraper->execute();
+
+ if ($scraper->hasRelevantContent()) {
+ $item->setContent($scraper->getFilteredContent());
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Ignore list of URLs
+ *
+ * @access public
+ * @param array $urls
+ * @return $this
+ */
+ public function ignoreUrls(array $urls)
+ {
+ $this->ignoredUrls = $urls;
+ return $this;
+ }
+
+ /**
+ * Returns Scraper instance
+ *
+ * @access public
+ * @return Scraper
+ */
+ public function getScraper()
+ {
+ if ($this->scraper === null) {
+ $this->scraper = new Scraper($this->config);
+ }
+
+ return $this->scraper;
+ }
+}