summaryrefslogtreecommitdiffstats
path: root/articleenhancer
diff options
context:
space:
mode:
authorBernhard Posselt <dev@bernhard-posselt.com>2014-10-21 16:45:36 +0200
committerBernhard Posselt <dev@bernhard-posselt.com>2014-10-21 16:45:36 +0200
commit42d69a95f3276a2d6089ca68f635c4e2f6aa7a23 (patch)
tree6a17fd7998f291e6dec1d996c1e7c724b92b8e58 /articleenhancer
parent0e6598b0734fb927109f745d9c0f3a8605a30ca5 (diff)
convert tabs indention to indention with 4 spaces because of mixing of both variants in code and better readability on github and websites because you cant set the indention width there and 8 spaces will be used for a tab
Diffstat (limited to 'articleenhancer')
-rw-r--r--articleenhancer/articleenhancer.php2
-rw-r--r--articleenhancer/enhancer.php112
-rw-r--r--articleenhancer/globalarticleenhancer.php62
-rw-r--r--articleenhancer/regexarticleenhancer.php46
-rw-r--r--articleenhancer/regexenhancers.json68
-rw-r--r--articleenhancer/xpatharticleenhancer.php292
-rw-r--r--articleenhancer/xpathenhancers.json248
7 files changed, 415 insertions, 415 deletions
diff --git a/articleenhancer/articleenhancer.php b/articleenhancer/articleenhancer.php
index 0587f4bad..a151de656 100644
--- a/articleenhancer/articleenhancer.php
+++ b/articleenhancer/articleenhancer.php
@@ -21,5 +21,5 @@ interface ArticleEnhancer {
* @param \OCA\News\Db\Item $item
* @return \OCA\News\Db\Item enhanced item
*/
- public function enhance(Item $item);
+ public function enhance(Item $item);
} \ No newline at end of file
diff --git a/articleenhancer/enhancer.php b/articleenhancer/enhancer.php
index e0ad2ab2d..abe8360d2 100644
--- a/articleenhancer/enhancer.php
+++ b/articleenhancer/enhancer.php
@@ -16,68 +16,68 @@ namespace OCA\News\ArticleEnhancer;
class Enhancer {
- private $enhancers = [];
- private $globalEnhancers = [];
-
- /**
- * @param string $feedUrl
- * @param ArticleEnhancer $enhancer
- */
- public function registerEnhancer($feedUrl, ArticleEnhancer $enhancer){
- $feedUrl = $this->removeTrailingSlash($feedUrl);
-
- // create hashkeys for all supported protocols for quick access
- $this->enhancers[$feedUrl] = $enhancer;
- $this->enhancers['https://' . $feedUrl] = $enhancer;
- $this->enhancers['http://' . $feedUrl] = $enhancer;
- $this->enhancers['https://www.' . $feedUrl] = $enhancer;
- $this->enhancers['http://www.' . $feedUrl] = $enhancer;
- }
-
-
- /**
- * Registers enhancers that are run for every item and after all previous
- * enhancers have been run
- * @param ArticleEnhancer $enhancer
- */
- public function registerGlobalEnhancer (ArticleEnhancer $enhancer) {
- $this->globalEnhancers[] = $enhancer;
- }
-
-
- /**
- * @param \OCA\News\Db\Item $item
- * @param string $feedUrl
- * @return \OCA\News\Db\Item enhanced item
- */
- public function enhance($item, $feedUrl){
- $feedUrl = $this->removeTrailingSlash($feedUrl);
-
- if(array_key_exists($feedUrl, $this->enhancers)) {
- $result = $this->enhancers[$feedUrl]->enhance($item);
- } else {
- $result = $item;
- }
-
- foreach ($this->globalEnhancers as $enhancer) {
- $result = $enhancer->enhance($result);
- }
-
- return $result;
- }
+ private $enhancers = [];
+ private $globalEnhancers = [];
+
+ /**
+ * @param string $feedUrl
+ * @param ArticleEnhancer $enhancer
+ */
+ public function registerEnhancer($feedUrl, ArticleEnhancer $enhancer){
+ $feedUrl = $this->removeTrailingSlash($feedUrl);
+
+ // create hashkeys for all supported protocols for quick access
+ $this->enhancers[$feedUrl] = $enhancer;
+ $this->enhancers['https://' . $feedUrl] = $enhancer;
+ $this->enhancers['http://' . $feedUrl] = $enhancer;
+ $this->enhancers['https://www.' . $feedUrl] = $enhancer;
+ $this->enhancers['http://www.' . $feedUrl] = $enhancer;
+ }
+
+
+ /**
+ * Registers enhancers that are run for every item and after all previous
+ * enhancers have been run
+ * @param ArticleEnhancer $enhancer
+ */
+ public function registerGlobalEnhancer (ArticleEnhancer $enhancer) {
+ $this->globalEnhancers[] = $enhancer;
+ }
+
+
+ /**
+ * @param \OCA\News\Db\Item $item
+ * @param string $feedUrl
+ * @return \OCA\News\Db\Item enhanced item
+ */
+ public function enhance($item, $feedUrl){
+ $feedUrl = $this->removeTrailingSlash($feedUrl);
+
+ if(array_key_exists($feedUrl, $this->enhancers)) {
+ $result = $this->enhancers[$feedUrl]->enhance($item);
+ } else {
+ $result = $item;
+ }
+
+ foreach ($this->globalEnhancers as $enhancer) {
+ $result = $enhancer->enhance($result);
+ }
+
+ return $result;
+ }
/**
* @param string $url
* @return string
*/
- private function removeTrailingSlash($url) {
- if($url[strlen($url)-1] === '/') {
- return substr($url, 0, -1);
- } else {
- return $url;
- }
- }
+ private function removeTrailingSlash($url) {
+ if($url[strlen($url)-1] === '/') {
+ return substr($url, 0, -1);
+ } else {
+ return $url;
+ }
+ }
} \ No newline at end of file
diff --git a/articleenhancer/globalarticleenhancer.php b/articleenhancer/globalarticleenhancer.php
index 117c0981c..b556a285a 100644
--- a/articleenhancer/globalarticleenhancer.php
+++ b/articleenhancer/globalarticleenhancer.php
@@ -21,47 +21,47 @@ use \OCA\News\Db\Item;
class GlobalArticleEnhancer implements ArticleEnhancer {
- /**
- * This method is run after all enhancers and for every item
- */
- public function enhance(Item $item) {
+ /**
+ * This method is run after all enhancers and for every item
+ */
+ public function enhance(Item $item) {
- $dom = new \DOMDocument();
+ $dom = new \DOMDocument();
- // wrap it inside a div if there is none to prevent invalid wrapping
- // inside <p> tags
- $body = '<div>' . $item->getBody() . '</div>';
+ // wrap it inside a div if there is none to prevent invalid wrapping
+ // inside <p> tags
+ $body = '<div>' . $item->getBody() . '</div>';
- Security::scan($body, $dom, function ($xml, $dom) {
- return @$dom->loadHTML($xml, LIBXML_HTML_NOIMPLIED
- | LIBXML_HTML_NODEFDTD | LIBXML_NONET);
- });
+ Security::scan($body, $dom, function ($xml, $dom) {
+ return @$dom->loadHTML($xml, LIBXML_HTML_NOIMPLIED
+ | LIBXML_HTML_NODEFDTD | LIBXML_NONET);
+ });
- $xpath = new \DOMXpath($dom);
+ $xpath = new \DOMXpath($dom);
- // remove youtube autoplay
- // NOTE: PHP supports only XPath 1.0 so no matches() function :(
- $youtubeIframes = "//iframe[contains(@src, 'youtube.com')]";
+ // remove youtube autoplay
+ // NOTE: PHP supports only XPath 1.0 so no matches() function :(
+ $youtubeIframes = "//iframe[contains(@src, 'youtube.com')]";
- $elements = $xpath->query($youtubeIframes);
- foreach ($elements as $element) {
+ $elements = $xpath->query($youtubeIframes);
+ foreach ($elements as $element) {
- // src needs to be matched against regex to prevent false positives
- // and because theres no XPath matches function available
- $src = $element->getAttribute('src');
- $regex = '%^(http://|https://|//)(www\.)?youtube.com/.*\?.*autoplay=1.*%i';
+ // src needs to be matched against regex to prevent false positives
+ // and because theres no XPath matches function available
+ $src = $element->getAttribute('src');
+ $regex = '%^(http://|https://|//)(www\.)?youtube.com/.*\?.*autoplay=1.*%i';
- if (preg_match($regex, $src)) {
- $replaced = str_replace('autoplay=1', 'autoplay=0', $src);
- $element->setAttribute('src', $replaced);
- }
- }
+ if (preg_match($regex, $src)) {
+ $replaced = str_replace('autoplay=1', 'autoplay=0', $src);
+ $element->setAttribute('src', $replaced);
+ }
+ }
- // save all changes back to the item
- $item->setBody(trim($dom->saveHTML()));
+ // save all changes back to the item
+ $item->setBody(trim($dom->saveHTML()));
- return $item;
- }
+ return $item;
+ }
} \ No newline at end of file
diff --git a/articleenhancer/regexarticleenhancer.php b/articleenhancer/regexarticleenhancer.php
index 543eca09b..0f600468e 100644
--- a/articleenhancer/regexarticleenhancer.php
+++ b/articleenhancer/regexarticleenhancer.php
@@ -18,29 +18,29 @@ use \OCA\News\Db\Item;
class RegexArticleEnhancer implements ArticleEnhancer {
- private $matchArticleUrl;
- private $regexPair;
-
- public function __construct($matchArticleUrl, array $regexPair) {
- $this->matchArticleUrl = $matchArticleUrl;
- $this->regexPair = $regexPair;
- }
-
-
- /**
- * @param \OCA\News\Db\Item $item
- * @return \OCA\News\Db\Item enhanced item
- */
- public function enhance(Item $item) {
- if (preg_match($this->matchArticleUrl, $item->getUrl())) {
- $body = $item->getBody();
- foreach($this->regexPair as $search => $replaceWith) {
- $body = preg_replace($search, $replaceWith, $body);
- }
- $item->setBody($body);
- }
- return $item;
- }
+ private $matchArticleUrl;
+ private $regexPair;
+
+ public function __construct($matchArticleUrl, array $regexPair) {
+ $this->matchArticleUrl = $matchArticleUrl;
+ $this->regexPair = $regexPair;
+ }
+
+
+ /**
+ * @param \OCA\News\Db\Item $item
+ * @return \OCA\News\Db\Item enhanced item
+ */
+ public function enhance(Item $item) {
+ if (preg_match($this->matchArticleUrl, $item->getUrl())) {
+ $body = $item->getBody();
+ foreach($this->regexPair as $search => $replaceWith) {
+ $body = preg_replace($search, $replaceWith, $body);
+ }
+ $item->setBody($body);
+ }
+ return $item;
+ }
}
diff --git a/articleenhancer/regexenhancers.json b/articleenhancer/regexenhancers.json
index 1a67900aa..fd83d0b3f 100644
--- a/articleenhancer/regexenhancers.json
+++ b/articleenhancer/regexenhancers.json
@@ -1,36 +1,36 @@
{
- "twogag.com": {
- "%(?:www.twogag.com/archives)|(feedproxy.google.com/~r/TwoGuysAndGuy)%": {
- "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%": "http://www.twogag.com/comics/$1.jpg"
- }
- },
- "buttersafe.com": {
- "%(?:buttersafe.com)|(feedproxy.google.com/~r/Buttersafe)%": {
- "%buttersafe.com/comics/rss/([^.]+)RSS([^.]+)?.jpg%": "buttersafe.com/comics/$1$2.jpg"
- }
- },
- "nichtlustig.de": {
- "%nichtlustig.de%": {
- "%.*static.nichtlustig.de/comics/full/(\\d+).*%s": "<img src=\"http://static.nichtlustig.de/comics/full/$1.jpg\" />"
- }
- },
- "nerfnow.com": {
- "%feedproxy.google.com/~r/nerfnow%": {
- "%(width|width)=\"\\d+\"%": "",
- "%nerfnow.com/comic/thumb/(\\d+)/large%": "nerfnow.com/comic/image/$1",
- "%<div><a target=\"_blank\" href=\"http://www.nerfnow.com/comic/\\d+\">Click for full size</a></div>%": "",
- "%<img src=\"http://feeds.feedburner.com[^>]+>%": ""
- }
- },
- "fowllanguagecomics.com": {
- "%fowllanguagecomics.com/comic%": {
- "%\\?resize=[^\"]+%": ""
- }
- },
- "cheerupemokid.com": {
- "%feedproxy.google.com/~r/cheerupemokid%": {
- "%-\\d+x\\d+%": "",
- "%(width|height)=\"\\d+\"%" : ""
- }
- }
+ "twogag.com": {
+ "%(?:www.twogag.com/archives)|(feedproxy.google.com/~r/TwoGuysAndGuy)%": {
+ "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%": "http://www.twogag.com/comics/$1.jpg"
+ }
+ },
+ "buttersafe.com": {
+ "%(?:buttersafe.com)|(feedproxy.google.com/~r/Buttersafe)%": {
+ "%buttersafe.com/comics/rss/([^.]+)RSS([^.]+)?.jpg%": "buttersafe.com/comics/$1$2.jpg"
+ }
+ },
+ "nichtlustig.de": {
+ "%nichtlustig.de%": {
+ "%.*static.nichtlustig.de/comics/full/(\\d+).*%s": "<img src=\"http://static.nichtlustig.de/comics/full/$1.jpg\" />"
+ }
+ },
+ "nerfnow.com": {
+ "%feedproxy.google.com/~r/nerfnow%": {
+ "%(width|width)=\"\\d+\"%": "",
+ "%nerfnow.com/comic/thumb/(\\d+)/large%": "nerfnow.com/comic/image/$1",
+ "%<div><a target=\"_blank\" href=\"http://www.nerfnow.com/comic/\\d+\">Click for full size</a></div>%": "",
+ "%<img src=\"http://feeds.feedburner.com[^>]+>%": ""
+ }
+ },
+ "fowllanguagecomics.com": {
+ "%fowllanguagecomics.com/comic%": {
+ "%\\?resize=[^\"]+%": ""
+ }
+ },
+ "cheerupemokid.com": {
+ "%feedproxy.google.com/~r/cheerupemokid%": {
+ "%-\\d+x\\d+%": "",
+ "%(width|height)=\"\\d+\"%" : ""
+ }
+ }
}
diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php
index 623b42bfc..7d80868f5 100644
--- a/articleenhancer/xpatharticleenhancer.php
+++ b/articleenhancer/xpatharticleenhancer.php
@@ -27,10 +27,10 @@ use \OCA\News\Config\Config;
class XPathArticleEnhancer implements ArticleEnhancer {
- private $fileFactory;
- private $maximumTimeout;
- private $config;
- private $regexXPathPair;
+ private $fileFactory;
+ private $maximumTimeout;
+ private $config;
+ private $regexXPathPair;
/**
@@ -40,152 +40,152 @@ class XPathArticleEnhancer implements ArticleEnhancer {
* page
* @param \OCA\News\Config\Config $config
*/
- public function __construct(SimplePieAPIFactory $fileFactory,
- array $regexXPathPair,
- Config $config){
- $this->fileFactory = $fileFactory;
- $this->regexXPathPair = $regexXPathPair;
- $this->config = $config;
- $this->maximumTimeout = $config->getFeedFetcherTimeout();
- }
-
- /**
- * @param \OCA\News\Db\Item $item
- * @return \OCA\News\Db\Item enhanced item
- */
- public function enhance(Item $item){
-
- foreach($this->regexXPathPair as $regex => $search) {
-
- if(preg_match($regex, $item->getUrl())) {
- $file = $this->getFile($item->getUrl());
-
- // convert encoding by detecting charset from header
+ public function __construct(SimplePieAPIFactory $fileFactory,
+ array $regexXPathPair,
+ Config $config){
+ $this->fileFactory = $fileFactory;
+ $this->regexXPathPair = $regexXPathPair;
+ $this->config = $config;
+ $this->maximumTimeout = $config->getFeedFetcherTimeout();
+ }
+
+ /**
+ * @param \OCA\News\Db\Item $item
+ * @return \OCA\News\Db\Item enhanced item
+ */
+ public function enhance(Item $item){
+
+ foreach($this->regexXPathPair as $regex => $search) {
+
+ if(preg_match($regex, $item->getUrl())) {
+ $file = $this->getFile($item->getUrl());
+
+ // convert encoding by detecting charset from header
$contentType = $file->headers['content-type'];
$body = $file->body;
- if( preg_match( '/(?<=charset=)[^;]*/', $contentType, $matches ) ) {
+ if( preg_match( '/(?<=charset=)[^;]*/', $contentType, $matches ) ) {
$body = mb_convert_encoding($body, 'HTML-ENTITIES', $matches[0]);
- }
-
- $dom = new DOMDocument();
-
- Security::scan($body, $dom, function ($xml, $dom) {
- return @$dom->loadHTML($xml, LIBXML_NONET);
- });
-
- $xpath = new DOMXpath($dom);
- $xpathResult = $xpath->evaluate($search);
-
- // in case it wasnt a text query assume its a dom element and
- // convert it to text
- if(!is_string($xpathResult)) {
- $xpathResult = $this->domToString($xpathResult);
- }
-
- $xpathResult = trim($xpathResult);
-
- // convert all relative to absolute URLs
- $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl());
-
- if($xpathResult) {
- $item->setBody($xpathResult);
- }
- }
- }
-
- return $item;
- }
-
-
- private function getFile($url) {
- return $this->fileFactory->getFile(
- $url, $this->maximumTimeout, 5, null, 'Mozilla/5.0 AppleWebKit'
- );
- }
-
-
- /**
- * Method which converts all relative "href" and "src" URLs of
- * a HTML snippet with their absolute equivalent
- * @param string $xmlString a HTML snippet as string with the relative URLs to be replaced
- * @param string $absoluteUrl the approptiate absolute url of the HTML snippet
- * @return string the result HTML snippet as a string
- */
- protected function substituteRelativeLinks($xmlString, $absoluteUrl) {
- $dom = new DOMDocument();
- $dom->preserveWhiteSpace = false;
-
- $isOk = Security::scan($xmlString, $dom, function ($xml, $dom) {
- // wrap in div to prevent loadHTML from inserting weird elements
- $xml = '<div>' . $xml . '</div>';
- return @$dom->loadHTML($xml, LIBXML_NONET | LIBXML_HTML_NODEFDTD
- | LIBXML_HTML_NOIMPLIED);
- });
-
- if($xmlString === '' || !$isOk) {
- return false;
- }
-
- foreach (['href', 'src'] as $attribute) {
- $xpath = new DOMXpath($dom);
- $xpathResult = $xpath->query(
- "//*[@" . $attribute . " " .
- "and not(contains(@" . $attribute . ", '://')) " .
- "and not(starts-with(@" . $attribute . ", 'mailto:')) " .
- "and not(starts-with(@" . $attribute . ", '//'))]");
- foreach ($xpathResult as $linkNode) {
- $urlElement = $linkNode->attributes->getNamedItem($attribute);
- $abs = $this->relativeToAbsoluteUrl($urlElement->nodeValue, $absoluteUrl);
- $urlElement->nodeValue = htmlspecialchars($abs);
- }
- }
-
- $xmlString = $dom->saveHTML();
-
- // domdocument spoils the string with line breaks between the elements. strip them.
- $xmlString = str_replace("\n", '', $xmlString);
-
- return $xmlString;
- }
-
-
- /**
- * Method which builds a URL by taking a relative URL and its corresponding
- * absolute URL
- * For example relative URL "../example/path/file.php?a=1#anchor" and
- * absolute URL "https://username:password@www.website.com/subfolder/index.html"
- * will result in "https://username:password@www.website.com/example/path/file.php?a=1#anchor"
- * @param string $relativeUrl the relative URL
- * @param string $absoluteUrl the absolute URL with at least scheme and host
- * @return string the resulting absolute URL
- */
- protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl) {
- $base = new \Net_URL2($absoluteUrl);
- return $base->resolve($relativeUrl);
- }
-
-
- /**
- * Method which turns an xpath result to a string
- * you can customize it by overwriting this method
- * @param mixed $xpathResult the result from the xpath query
- * @return string the result as a string
- */
- protected function domToString($xpathResult) {
- $result = '';
- foreach($xpathResult as $node) {
- $result .= $this->toInnerHTML($node);
- }
- return $result;
- }
-
-
- protected function toInnerHTML($node) {
- $dom = new DOMDocument();
- $dom->appendChild($dom->importNode($node, true));
- return trim($dom->saveHTML($dom->documentElement));
- }
+ }
+
+ $dom = new DOMDocument();
+
+ Security::scan($body, $dom, function ($xml, $dom) {
+ return @$dom->loadHTML($xml, LIBXML_NONET);
+ });
+
+ $xpath = new DOMXpath($dom);
+ $xpathResult = $xpath->evaluate($search);
+
+ // in case it wasnt a text query assume its a dom element and
+ // convert it to text
+ if(!is_string($xpathResult)) {
+ $xpathResult = $this->domToString($xpathResult);
+ }
+
+ $xpathResult = trim($xpathResult);
+
+ // convert all relative to absolute URLs
+ $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl());
+
+ if($xpathResult) {
+ $item->setBody($xpathResult);
+ }
+ }
+ }
+
+ return $item;
+ }
+
+
+ private function getFile($url) {
+ return $this->fileFactory->getFile(
+ $url, $this->maximumTimeout, 5, null, 'Mozilla/5.0 AppleWebKit'
+ );
+ }
+
+
+ /**
+ * Method which converts all relative "href" and "src" URLs of
+ * a HTML snippet with their absolute equivalent
+ * @param string $xmlString a HTML snippet as string with the relative URLs to be replaced
+ * @param string $absoluteUrl the approptiate absolute url of the HTML snippet
+ * @return string the result HTML snippet as a string
+ */
+ protected function substituteRelativeLinks($xmlString, $absoluteUrl) {
+ $dom = new DOMDocument();
+ $dom->preserveWhiteSpace = false;
+
+ $isOk = Security::scan($xmlString, $dom, function ($xml, $dom) {
+ // wrap in div to prevent loadHTML from inserting weird elements
+ $xml = '<div>' . $xml . '</div>';
+ return @$dom->loadHTML($xml, LIBXML_NONET | LIBXML_HTML_NODEFDTD
+ | LIBXML_HTML_NOIMPLIED);
+ });
+
+ if($xmlString === '' || !$isOk) {
+ return false;
+ }
+
+ foreach (['href', 'src'] as $attribute) {
+ $xpath = new DOMXpath($dom);
+ $xpathResult = $xpath->query(
+ "//*[@" . $attribute . " " .
+ "and not(contains(@" . $attribute . ", '://')) " .
+ "and not(starts-with(@" . $attribute . ", 'mailto:')) " .
+ "and not(starts-with(@" . $attribute . ", '//'))]");
+ foreach ($xpathResult as $linkNode) {
+ $urlElement = $linkNode->attributes->getNamedItem($attribute);
+ $abs = $this->relativeToAbsoluteUrl($urlElement->nodeValue, $absoluteUrl);
+ $urlElement->nodeValue = htmlspecialchars($abs);
+ }
+ }
+
+ $xmlString = $dom->saveHTML();
+
+ // domdocument spoils the string with line breaks between the elements. strip them.
+ $xmlString = str_replace("\n", '', $xmlString);
+
+ return $xmlString;
+ }
+
+
+ /**
+ * Method which builds a URL by taking a relative URL and its corresponding
+ * absolute URL
+ * For example relative URL "../example/path/file.php?a=1#anchor" and
+ * absolute URL "https://username:password@www.website.com/subfolder/index.html"
+ * will result in "https://username:password@www.website.com/example/path/file.php?a=1#anchor"
+ * @param string $relativeUrl the relative URL
+ * @param string $absoluteUrl the absolute URL with at least scheme and host
+ * @return string the resulting absolute URL
+ */
+ protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl) {
+ $base = new \Net_URL2($absoluteUrl);
+ return $base->resolve($relativeUrl);
+ }
+
+
+ /**
+ * Method which turns an xpath result to a string
+ * you can customize it by overwriting this method
+ * @param mixed $xpathResult the result from the xpath query
+ * @return string the result as a string
+ */
+ protected function domToString($xpathResult) {
+ $result = '';
+ foreach($xpathResult as $node) {
+ $result .= $this->toInnerHTML($node);
+ }
+ return $result;
+ }
+
+
+ protected function toInnerHTML($node) {
+ $dom = new DOMDocument();
+ $dom->appendChild($dom->importNode($node, true));
+ return trim($dom->saveHTML($dom->documentElement));
+ }
}
diff --git a/articleenhancer/xpathenhancers.json b/articleenhancer/xpathenhancers.json
index 1441ce920..4a66e86d1 100644
--- a/articleenhancer/xpathenhancers.json
+++ b/articleenhancer/xpathenhancers.json
@@ -1,126 +1,126 @@
{
- "cad-comic.com": {
- "%cad-comic.com/cad/\\d+/$%": "//*[@id='content']/img"
- },
- "slashdot.org": {
- "%rss.slashdot.org%": "//article/div[@class='body'] | //article[@class='layout-article']/div"
- },
- "explosm.net": {
- "%explosm.net/comics%": "//*[@id='maincontent']/div[2]/div/img",
- "%explosm.net/show%": "//*[@id='videoPlayer']/iframe"
- },
- "themerepublic.net": {
- "%feedproxy.google.com/~r/blogspot/DngUJ%": "//*[@class='post hentry']"
- },
- "penny-arcade.com": {
- "%penny-arcade.com/comic%": "//*[@id='comicFrame']/a/img",
- "%penny-arcade.com/news%": "//*[@class='postBody']/*"
- },
- "leasticoulddo.com": {
- "%feedproxy.google.com/~r/LICD%": "//*[@id='comic-img']/a/img | //*[@id='comic-img']/img | //*[@id='post']"
- },
- "escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss": {
- "%escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss%": "//*[@class='body']/span/img | //div[@class='folder_nav_links']/following::p"
- },
- "escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame": {