blob: 99669656ac60bada09e5d370ff0ef2b2345cb11c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
<?php
namespace PicoFeed\Processor;
use PicoFeed\Base;
use PicoFeed\Parser\Feed;
use PicoFeed\Parser\Item;
use PicoFeed\Scraper\Scraper;
/**
* Scraper Processor
*
* @package PicoFeed\Processor
* @author Frederic Guillot
*/
class ScraperProcessor extends Base implements ItemProcessorInterface
{
private $ignoredUrls = array();
private $scraper;
/**
* Execute Item Processor
*
* @access public
* @param Feed $feed
* @param Item $item
* @return bool
*/
public function execute(Feed $feed, Item $item)
{
if (!in_array($item->getUrl(), $this->ignoredUrls)) {
$scraper = $this->getScraper();
$scraper->setUrl($item->getUrl());
$scraper->execute();
if ($scraper->hasRelevantContent()) {
$item->setContent($scraper->getFilteredContent());
}
}
return false;
}
/**
* Ignore list of URLs
*
* @access public
* @param array $urls
* @return $this
*/
public function ignoreUrls(array $urls)
{
$this->ignoredUrls = $urls;
return $this;
}
/**
* Returns Scraper instance
*
* @access public
* @return Scraper
*/
public function getScraper()
{
if ($this->scraper === null) {
$this->scraper = new Scraper($this->config);
}
return $this->scraper;
}
}
|