summaryrefslogtreecommitdiffstats
path: root/utility
diff options
context:
space:
mode:
authorBernhard Posselt <nukeawhale@gmail.com>2013-08-29 13:30:38 +0200
committerBernhard Posselt <nukeawhale@gmail.com>2013-08-29 13:30:38 +0200
commit4d7f53380d31154709faa3f9d6cdc467ff141951 (patch)
treefdbe6e2e76b7b3c3483cc50216aaac8ffd34dc07 /utility
parenta73fe145a2856d6d075f8541f28c70b5cf01e1db (diff)
allow more than one article enhancer per url based on the url regex, also allow embedded youtube videos that start with //
Diffstat (limited to 'utility')
-rw-r--r--utility/articleenhancer/articleenhancer.php46
-rw-r--r--utility/articleenhancer/cyanideandhappinessenhancer.php6
2 files changed, 26 insertions, 26 deletions
diff --git a/utility/articleenhancer/articleenhancer.php b/utility/articleenhancer/articleenhancer.php
index 194137e72..7fc67c660 100644
--- a/utility/articleenhancer/articleenhancer.php
+++ b/utility/articleenhancer/articleenhancer.php
@@ -32,8 +32,6 @@ abstract class ArticleEnhancer {
private $feedRegex;
- private $articleUrlRegex;
- private $articleXPath;
private $purifier;
private $fileFactory;
private $maximumTimeout;
@@ -43,38 +41,38 @@ abstract class ArticleEnhancer {
* @param $purifier the purifier object to clean the html which will be
* matched
* @param SimplePieFileFactory a factory for getting a simple pie file instance
- * @param string $articleUrlRegex the regex to match which article should be
- * handled
- * @param string $articleXPath the xpath which tells the fetcher with what
- * body the feed should be replaced
+ * @param array $regexXPathPair an associative array containing regex to
+ * match the url and the xpath that should be used for it to extract the
+ * page
* @param int $maximumTimeout maximum timeout in seconds
*/
public function __construct($purifier, SimplePieFileFactory $fileFactory,
- $articleUrlRegex, $articleXPath,
- $maximumTimeout=10){
+ array $regexXPathPair, $maximumTimeout=10){
$this->purifier = $purifier;
- $this->articleUrlRegex = $articleUrlRegex;
- $this->articleXPath = $articleXPath;
+ $this->regexXPathPair = $regexXPathPair;
$this->fileFactory = $fileFactory;
- $this->timeout = $maximumTimeout;
+ $this->maximumTimeout = $maximumTimeout;
}
public function enhance($item){
- if(preg_match($this->articleUrlRegex, $item->getUrl())) {
- $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
- $dom = new \DOMDocument();
- @$dom->loadHTML($file->body);
- $xpath = new \DOMXpath($dom);
- $xpathResult = $xpath->evaluate($this->articleXPath);
-
- // in case it wasnt a text query assume its a single
- if(!is_string($xpathResult)) {
- $xpathResult = $this->domToString($xpathResult);
+ foreach($this->regexXPathPair as $regex => $search) {
+
+ if(preg_match($regex, $item->getUrl())) {
+ $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
+ $dom = new \DOMDocument();
+ @$dom->loadHTML($file->body);
+ $xpath = new \DOMXpath($dom);
+ $xpathResult = $xpath->evaluate($search);
+
+ // in case it wasnt a text query assume its a single
+ if(!is_string($xpathResult)) {
+ $xpathResult = $this->domToString($xpathResult);
+ }
+
+ $sanitizedResult = $this->purifier->purify($xpathResult);
+ $item->setBody($sanitizedResult);
}
-
- $sanitizedResult = $this->purifier->purify($xpathResult);
- $item->setBody($sanitizedResult);
}
return $item;
diff --git a/utility/articleenhancer/cyanideandhappinessenhancer.php b/utility/articleenhancer/cyanideandhappinessenhancer.php
index 1faee6d5c..037a3179e 100644
--- a/utility/articleenhancer/cyanideandhappinessenhancer.php
+++ b/utility/articleenhancer/cyanideandhappinessenhancer.php
@@ -36,8 +36,10 @@ class CyanideAndHappinessEnhancer extends ArticleEnhancer {
parent::__construct(
$purifier,
$fileFactory,
- '/explosm.net\/comics/', // match article url
- '//*[@id=\'maincontent\']/div[2]/div', // xpath statement to extract the html from the page
+ array(
+ '/explosm.net\/comics/' => '//*[@id=\'maincontent\']/div[2]/div',
+ '/explosm.net\/show/' => '//*[@id=\'videoPlayer\']/iframe'
+ ),
$timeout
);
}