summaryrefslogtreecommitdiffstats
path: root/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php')
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php91
1 files changed, 75 insertions, 16 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php
index fe4890400..bec8ab07b 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php
@@ -284,7 +284,7 @@ class Grabber
Logger::setMessage(get_called_class().': Content length: '.strlen($this->html).' bytes');
$rules = $this->getRules();
- if (is_array($rules)) {
+ if (! empty($rules)) {
Logger::setMessage(get_called_class().': Parse content with rules');
$this->parseContentWithRules($rules);
}
@@ -316,7 +316,13 @@ class Grabber
try {
$client = Client::getInstance();
- $client->setConfig($this->config);
+
+ if ($this->config !== null) {
+ $client->setConfig($this->config);
+ $client->setTimeout($this->config->getGrabberTimeout());
+ $client->setUserAgent($this->config->getGrabberUserAgent());
+ }
+
$client->execute($this->url);
$this->url = $client->getUrl();
@@ -335,31 +341,67 @@ class Grabber
* Try to find a predefined rule
*
* @access public
- * @return mixed
+ * @return array
*/
public function getRules()
{
$hostname = parse_url($this->url, PHP_URL_HOST);
- if ($hostname === false) {
- return false;
- }
+ if ($hostname !== false) {
- $files = array($hostname);
+ $files = $this->getRulesFileList($hostname);
- if (substr($hostname, 0, 4) == 'www.') {
- $files[] = substr($hostname, 4);
+ foreach ($this->getRulesFolders() as $folder) {
+ $rule = $this->loadRuleFile($folder, $files);
+
+ if (! empty($rule)) {
+ return $rule;
+ }
+ }
}
- if (($pos = strpos($hostname, '.')) !== false) {
- $files[] = substr($hostname, $pos);
- $files[] = substr($hostname, $pos + 1);
- $files[] = substr($hostname, 0, $pos);
+ return array();
+ }
+
+ /**
+ * Get the list of possible rules file names for a given hostname
+ *
+ * @access public
+ * @param string $hostname Hostname
+ * @return array
+ */
+ public function getRulesFileList($hostname)
+ {
+ $files = array($hostname); // subdomain.domain.tld
+ $parts = explode('.', $hostname);
+ $len = count($parts);
+
+ if ($len > 2) {
+ $subdomain = array_shift($parts);
+ $files[] = implode('.', $parts); // domain.tld
+ $files[] = '.'.implode('.', $parts); // .domain.tld
+ $files[] = $subdomain; // subdomain
+ }
+ else if ($len === 2) {
+ $files[] = '.'.implode('.', $parts); // .domain.tld
+ $files[] = $parts[0]; // domain
}
- foreach ($files as $file) {
+ return $files;
+ }
- $filename = __DIR__.'/../Rules/'.$file.'.php';
+ /**
+ * Load a rule file from the defined folder
+ *
+ * @access public
+ * @param string $folder Rule directory
+ * @param array $files List of possible file names
+ * @return array
+ */
+ public function loadRuleFile($folder, array $files)
+ {
+ foreach ($files as $file) {
+ $filename = $folder.'/'.$file.'.php';
if (file_exists($filename)) {
Logger::setMessage(get_called_class().' Load rule: '.$file);
@@ -367,7 +409,24 @@ class Grabber
}
}
- return false;
+ return array();
+ }
+
+ /**
+ * Get the list of folders that contains rules
+ *
+ * @access public
+ * @return array
+ */
+ public function getRulesFolders()
+ {
+ $folders = array(__DIR__.'/../Rules');
+
+ if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) {
+ $folders[] = $this->config->getGrabberRulesFolder();
+ }
+
+ return $folders;
}
/**