summaryrefslogtreecommitdiffstats
path: root/articleenhancer/xpatharticleenhancer.php
diff options
context:
space:
mode:
authorBernhard Posselt <dev@bernhard-posselt.com>2014-10-21 16:45:36 +0200
committerBernhard Posselt <dev@bernhard-posselt.com>2014-10-21 16:45:36 +0200
commit42d69a95f3276a2d6089ca68f635c4e2f6aa7a23 (patch)
tree6a17fd7998f291e6dec1d996c1e7c724b92b8e58 /articleenhancer/xpatharticleenhancer.php
parent0e6598b0734fb927109f745d9c0f3a8605a30ca5 (diff)
convert tabs indention to indention with 4 spaces because of mixing of both variants in code and better readability on github and websites because you cant set the indention width there and 8 spaces will be used for a tab
Diffstat (limited to 'articleenhancer/xpatharticleenhancer.php')
-rw-r--r--articleenhancer/xpatharticleenhancer.php292
1 files changed, 146 insertions, 146 deletions
diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php
index 623b42bfc..7d80868f5 100644
--- a/articleenhancer/xpatharticleenhancer.php
+++ b/articleenhancer/xpatharticleenhancer.php
@@ -27,10 +27,10 @@ use \OCA\News\Config\Config;
class XPathArticleEnhancer implements ArticleEnhancer {
- private $fileFactory;
- private $maximumTimeout;
- private $config;
- private $regexXPathPair;
+ private $fileFactory;
+ private $maximumTimeout;
+ private $config;
+ private $regexXPathPair;
/**
@@ -40,152 +40,152 @@ class XPathArticleEnhancer implements ArticleEnhancer {
* page
* @param \OCA\News\Config\Config $config
*/
- public function __construct(SimplePieAPIFactory $fileFactory,
- array $regexXPathPair,
- Config $config){
- $this->fileFactory = $fileFactory;
- $this->regexXPathPair = $regexXPathPair;
- $this->config = $config;
- $this->maximumTimeout = $config->getFeedFetcherTimeout();
- }
-
- /**
- * @param \OCA\News\Db\Item $item
- * @return \OCA\News\Db\Item enhanced item
- */
- public function enhance(Item $item){
-
- foreach($this->regexXPathPair as $regex => $search) {
-
- if(preg_match($regex, $item->getUrl())) {
- $file = $this->getFile($item->getUrl());
-
- // convert encoding by detecting charset from header
+ public function __construct(SimplePieAPIFactory $fileFactory,
+ array $regexXPathPair,
+ Config $config){
+ $this->fileFactory = $fileFactory;
+ $this->regexXPathPair = $regexXPathPair;
+ $this->config = $config;
+ $this->maximumTimeout = $config->getFeedFetcherTimeout();
+ }
+
+ /**
+ * @param \OCA\News\Db\Item $item
+ * @return \OCA\News\Db\Item enhanced item
+ */
+ public function enhance(Item $item){
+
+ foreach($this->regexXPathPair as $regex => $search) {
+
+ if(preg_match($regex, $item->getUrl())) {
+ $file = $this->getFile($item->getUrl());
+
+ // convert encoding by detecting charset from header
$contentType = $file->headers['content-type'];
$body = $file->body;
- if( preg_match( '/(?<=charset=)[^;]*/', $contentType, $matches ) ) {
+ if( preg_match( '/(?<=charset=)[^;]*/', $contentType, $matches ) ) {
$body = mb_convert_encoding($body, 'HTML-ENTITIES', $matches[0]);
- }
-
- $dom = new DOMDocument();
-
- Security::scan($body, $dom, function ($xml, $dom) {
- return @$dom->loadHTML($xml, LIBXML_NONET);
- });
-
- $xpath = new DOMXpath($dom);
- $xpathResult = $xpath->evaluate($search);
-
- // in case it wasnt a text query assume its a dom element and
- // convert it to text
- if(!is_string($xpathResult)) {
- $xpathResult = $this->domToString($xpathResult);
- }
-
- $xpathResult = trim($xpathResult);
-
- // convert all relative to absolute URLs
- $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl());
-
- if($xpathResult) {
- $item->setBody($xpathResult);
- }
- }
- }
-
- return $item;
- }
-
-
- private function getFile($url) {
- return $this->fileFactory->getFile(
- $url, $this->maximumTimeout, 5, null, 'Mozilla/5.0 AppleWebKit'
- );
- }
-
-
- /**
- * Method which converts all relative "href" and "src" URLs of
- * a HTML snippet with their absolute equivalent
- * @param string $xmlString a HTML snippet as string with the relative URLs to be replaced
- * @param string $absoluteUrl the approptiate absolute url of the HTML snippet
- * @return string the result HTML snippet as a string
- */
- protected function substituteRelativeLinks($xmlString, $absoluteUrl) {
- $dom = new DOMDocument();
- $dom->preserveWhiteSpace = false;
-
- $isOk = Security::scan($xmlString, $dom, function ($xml, $dom) {
- // wrap in div to prevent loadHTML from inserting weird elements
- $xml = '<div>' . $xml . '</div>';
- return @$dom->loadHTML($xml, LIBXML_NONET | LIBXML_HTML_NODEFDTD
- | LIBXML_HTML_NOIMPLIED);
- });
-
- if($xmlString === '' || !$isOk) {
- return false;
- }
-
- foreach (['href', 'src'] as $attribute) {
- $xpath = new DOMXpath($dom);
- $xpathResult = $xpath->query(
- "//*[@" . $attribute . " " .
- "and not(contains(@" . $attribute . ", '://')) " .
- "and not(starts-with(@" . $attribute . ", 'mailto:')) " .
- "and not(starts-with(@" . $attribute . ", '//'))]");
- foreach ($xpathResult as $linkNode) {
- $urlElement = $linkNode->attributes->getNamedItem($attribute);
- $abs = $this->relativeToAbsoluteUrl($urlElement->nodeValue, $absoluteUrl);
- $urlElement->nodeValue = htmlspecialchars($abs);
- }
- }
-
- $xmlString = $dom->saveHTML();
-
- // domdocument spoils the string with line breaks between the elements. strip them.
- $xmlString = str_replace("\n", '', $xmlString);
-
- return $xmlString;
- }
-
-
- /**
- * Method which builds a URL by taking a relative URL and its corresponding
- * absolute URL
- * For example relative URL "../example/path/file.php?a=1#anchor" and
- * absolute URL "https://username:password@www.website.com/subfolder/index.html"
- * will result in "https://username:password@www.website.com/example/path/file.php?a=1#anchor"
- * @param string $relativeUrl the relative URL
- * @param string $absoluteUrl the absolute URL with at least scheme and host
- * @return string the resulting absolute URL
- */
- protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl) {
- $base = new \Net_URL2($absoluteUrl);
- return $base->resolve($relativeUrl);
- }
-
-
- /**
- * Method which turns an xpath result to a string
- * you can customize it by overwriting this method
- * @param mixed $xpathResult the result from the xpath query
- * @return string the result as a string
- */
- protected function domToString($xpathResult) {
- $result = '';
- foreach($xpathResult as $node) {
- $result .= $this->toInnerHTML($node);
- }
- return $result;
- }
-
-
- protected function toInnerHTML($node) {
- $dom = new DOMDocument();
- $dom->appendChild($dom->importNode($node, true));
- return trim($dom->saveHTML($dom->documentElement));
- }
+ }
+
+ $dom = new DOMDocument();
+
+ Security::scan($body, $dom, function ($xml, $dom) {
+ return @$dom->loadHTML($xml, LIBXML_NONET);
+ });
+
+ $xpath = new DOMXpath($dom);
+ $xpathResult = $xpath->evaluate($search);
+
+ // in case it wasnt a text query assume its a dom element and
+ // convert it to text
+ if(!is_string($xpathResult)) {
+ $xpathResult = $this->domToString($xpathResult);
+ }
+
+ $xpathResult = trim($xpathResult);
+
+ // convert all relative to absolute URLs
+ $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl());
+
+ if($xpathResult) {
+ $item->setBody($xpathResult);
+ }
+ }
+ }
+
+ return $item;
+ }
+
+
+ private function getFile($url) {
+ return $this->fileFactory->getFile(
+ $url, $this->maximumTimeout, 5, null, 'Mozilla/5.0 AppleWebKit'
+ );
+ }
+
+
+ /**
+ * Method which converts all relative "href" and "src" URLs of
+ * a HTML snippet with their absolute equivalent
+ * @param string $xmlString a HTML snippet as string with the relative URLs to be replaced
+ * @param string $absoluteUrl the approptiate absolute url of the HTML snippet
+ * @return string the result HTML snippet as a string
+ */
+ protected function substituteRelativeLinks($xmlString, $absoluteUrl) {
+ $dom = new DOMDocument();
+ $dom->preserveWhiteSpace = false;
+
+ $isOk = Security::scan($xmlString, $dom, function ($xml, $dom) {
+ // wrap in div to prevent loadHTML from inserting weird elements
+ $xml = '<div>' . $xml . '</div>';
+ return @$dom->loadHTML($xml, LIBXML_NONET | LIBXML_HTML_NODEFDTD
+ | LIBXML_HTML_NOIMPLIED);
+ });
+
+ if($xmlString === '' || !$isOk) {
+ return false;
+ }
+
+ foreach (['href', 'src'] as $attribute) {
+ $xpath = new DOMXpath($dom);
+ $xpathResult = $xpath->query(
+ "//*[@" . $attribute . " " .
+ "and not(contains(@" . $attribute . ", '://')) " .
+ "and not(starts-with(@" . $attribute . ", 'mailto:')) " .
+ "and not(starts-with(@" . $attribute . ", '//'))]");
+ foreach ($xpathResult as $linkNode) {
+ $urlElement = $linkNode->attributes->getNamedItem($attribute);
+ $abs = $this->relativeToAbsoluteUrl($urlElement->nodeValue, $absoluteUrl);
+ $urlElement->nodeValue = htmlspecialchars($abs);
+ }
+ }
+
+ $xmlString = $dom->saveHTML();
+
+ // domdocument spoils the string with line breaks between the elements. strip them.
+ $xmlString = str_replace("\n", '', $xmlString);
+
+ return $xmlString;
+ }
+
+
+ /**
+ * Method which builds a URL by taking a relative URL and its corresponding
+ * absolute URL
+ * For example relative URL "../example/path/file.php?a=1#anchor" and
+ * absolute URL "https://username:password@www.website.com/subfolder/index.html"
+ * will result in "https://username:password@www.website.com/example/path/file.php?a=1#anchor"
+ * @param string $relativeUrl the relative URL
+ * @param string $absoluteUrl the absolute URL with at least scheme and host
+ * @return string the resulting absolute URL
+ */
+ protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl) {
+ $base = new \Net_URL2($absoluteUrl);
+ return $base->resolve($relativeUrl);
+ }
+
+
+ /**
+ * Method which turns an xpath result to a string
+ * you can customize it by overwriting this method
+ * @param mixed $xpathResult the result from the xpath query
+ * @return string the result as a string
+ */
+ protected function domToString($xpathResult) {
+ $result = '';
+ foreach($xpathResult as $node) {
+ $result .= $this->toInnerHTML($node);
+ }
+ return $result;
+ }
+
+
+ protected function toInnerHTML($node) {
+ $dom = new DOMDocument();
+ $dom->appendChild($dom->importNode($node, true));
+ return trim($dom->saveHTML($dom->documentElement));
+ }
}