allow more than one article enhancer per url based on the url regex, also allow embedded youtube videos that start with //

author: Bernhard Posselt <nukeawhale@gmail.com> 2013-08-29 13:30:38 +0200
committer: Bernhard Posselt <nukeawhale@gmail.com> 2013-08-29 13:30:38 +0200
commit: 4d7f53380d31154709faa3f9d6cdc467ff141951 (patch)
tree: fdbe6e2e76b7b3c3483cc50216aaac8ffd34dc07 /utility
parent: a73fe145a2856d6d075f8541f28c70b5cf01e1db (diff)
2 files changed, 26 insertions, 26 deletions
diff --git a/utility/articleenhancer/articleenhancer.php b/utility/articleenhancer/articleenhancer.php
index 194137e72..7fc67c660 100644
--- a/utility/articleenhancer/articleenhancer.php
+++ b/utility/articleenhancer/articleenhancer.php
@@ -32,8 +32,6 @@ abstract class ArticleEnhancer {
 
 
 	private $feedRegex;
-	private $articleUrlRegex;
-	private $articleXPath;
 	private $purifier;
 	private $fileFactory;
 	private $maximumTimeout;
@@ -43,38 +41,38 @@ abstract class ArticleEnhancer {
 	 * @param $purifier the purifier object to clean the html which will be
 	 * matched
 	 * @param SimplePieFileFactory a factory for getting a simple pie file instance
-	 * @param string $articleUrlRegex the regex to match which article should be
-	 * handled
-	 * @param string $articleXPath the xpath which tells the fetcher with what
-	 * body the feed should be replaced
+	 * @param array $regexXPathPair an associative array containing regex to 
+	 * match the url and the xpath that should be used for it to extract the 
+	 * page
 	 * @param int $maximumTimeout maximum timeout in seconds
 	 */
 	public function __construct($purifier, SimplePieFileFactory $fileFactory, 
-	                            $articleUrlRegex, $articleXPath, 
-	                            $maximumTimeout=10){
+	                            array $regexXPathPair, $maximumTimeout=10){
 		$this->purifier = $purifier;
-		$this->articleUrlRegex = $articleUrlRegex;
-		$this->articleXPath = $articleXPath;
+		$this->regexXPathPair = $regexXPathPair;
 		$this->fileFactory = $fileFactory;
-		$this->timeout = $maximumTimeout;
+		$this->maximumTimeout = $maximumTimeout;
 	}
 
 
 	public function enhance($item){
-		if(preg_match($this->articleUrlRegex, $item->getUrl())) {
-			$file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
-			$dom = new \DOMDocument();
-			@$dom->loadHTML($file->body);
-			$xpath = new \DOMXpath($dom);
-			$xpathResult = $xpath->evaluate($this->articleXPath);
-
-			// in case it wasnt a text query assume its a single 
-			if(!is_string($xpathResult)) {
-				$xpathResult = $this->domToString($xpathResult);
+		foreach($this->regexXPathPair as $regex => $search) {
+
+			if(preg_match($regex, $item->getUrl())) {
+				$file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
+				$dom = new \DOMDocument();
+				@$dom->loadHTML($file->body);
+				$xpath = new \DOMXpath($dom);
+				$xpathResult = $xpath->evaluate($search);
+
+				// in case it wasnt a text query assume its a single 
+				if(!is_string($xpathResult)) {
+					$xpathResult = $this->domToString($xpathResult);
+				}
+
+				$sanitizedResult = $this->purifier->purify($xpathResult);
+				$item->setBody($sanitizedResult);
 			}
-
-			$sanitizedResult = $this->purifier->purify($xpathResult);
-			$item->setBody($sanitizedResult);
 		}
 
 		return $item;
diff --git a/utility/articleenhancer/cyanideandhappinessenhancer.php b/utility/articleenhancer/cyanideandhappinessenhancer.php
index 1faee6d5c..037a3179e 100644
--- a/utility/articleenhancer/cyanideandhappinessenhancer.php
+++ b/utility/articleenhancer/cyanideandhappinessenhancer.php
@@ -36,8 +36,10 @@ class CyanideAndHappinessEnhancer extends ArticleEnhancer {
 		parent::__construct(
 			$purifier,
 			$fileFactory,
-			'/explosm.net\/comics/', // match article url
-			'//*[@id=\'maincontent\']/div[2]/div', // xpath statement to extract the html from the page
+			array(
+				'/explosm.net\/comics/' => '//*[@id=\'maincontent\']/div[2]/div',
+				'/explosm.net\/show/' => '//*[@id=\'videoPlayer\']/iframe'
+			), 
 			$timeout
 		);
 	}
author	Bernhard Posselt <nukeawhale@gmail.com>	2013-08-29 13:30:38 +0200
committer	Bernhard Posselt <nukeawhale@gmail.com>	2013-08-29 13:30:38 +0200
commit	4d7f53380d31154709faa3f9d6cdc467ff141951 (patch)
tree	fdbe6e2e76b7b3c3483cc50216aaac8ffd34dc07 /utility
parent	a73fe145a2856d6d075f8541f28c70b5cf01e1db (diff)