. * */ namespace OCA\News\Utility\ArticleEnhancer; use \OCA\News\Utility\SimplePieFileFactory; abstract class ArticleEnhancer { private $feedRegex; private $articleUrlRegex; private $articleXPath; private $purifier; private $fileFactory; private $maximumTimeout; /** * @param $purifier the purifier object to clean the html which will be * matched * @param SimplePieFileFactory a factory for getting a simple pie file instance * @param string $articleUrlRegex the regex to match which article should be * handled * @param string $articleXPath the xpath which tells the fetcher with what * body the feed should be replaced * @param int $maximumTimeout maximum timeout in seconds */ public function __construct($purifier, SimplePieFileFactory $fileFactory, $articleUrlRegex, $articleXPath, $maximumTimeout=10){ $this->purifier = $purifier; $this->articleUrlRegex = $articleUrlRegex; $this->articleXPath = $articleXPath; $this->fileFactory = $fileFactory; $this->timeout = $maximumTimeout; } public function enhance($item){ if(preg_match($this->articleUrlRegex, $item->getUrl())) { $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout); $dom = new \DOMDocument(); @$dom->loadHTML($file->body); $xpath = new \DOMXpath($dom); $xpathResult = $xpath->evaluate($this->articleXPath); // in case it wasnt a text query assume its a single if(!is_string($xpathResult)) { $xpathResult = $this->domToString($xpathResult); } $sanitizedResult = $this->purifier->purify($xpathResult); $item->setBody($sanitizedResult); } return $item; } /** * Method which turns an xpath result to a string * Assumes that the result matches a single element. If the result * is not a single element, you can customize it by overwriting this * method * @param $xpathResult the result from the xpath query * @return the result as a string */ protected function domToString($xpathResult) { if($xpathResult->length > 0) { return $this->toInnerHTML($xpathResult->item(0)); } else { return ""; } } protected function toInnerHTML($node) { $dom = new \DOMDocument(); $dom->appendChild($dom->importNode($node, true)); return trim($dom->saveHTML()); } }