summaryrefslogtreecommitdiffstats
path: root/utility/articleenhancer
diff options
context:
space:
mode:
Diffstat (limited to 'utility/articleenhancer')
-rw-r--r--utility/articleenhancer/articleenhancer.php33
-rw-r--r--utility/articleenhancer/enhancer.php65
-rw-r--r--utility/articleenhancer/regexarticleenhancer.php55
-rw-r--r--utility/articleenhancer/regexenhancers.json12
-rw-r--r--utility/articleenhancer/xpatharticleenhancer.php218
-rw-r--r--utility/articleenhancer/xpathenhancers.json57
6 files changed, 0 insertions, 440 deletions
diff --git a/utility/articleenhancer/articleenhancer.php b/utility/articleenhancer/articleenhancer.php
deleted file mode 100644
index 1842a0c02..000000000
--- a/utility/articleenhancer/articleenhancer.php
+++ /dev/null
@@ -1,33 +0,0 @@
-<?php
-
-/**
-* ownCloud - News
-*
-* @author Alessandro Cosentino
-* @author Bernhard Posselt
-* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
-* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
-*
-* This library is free software; you can redistribute it and/or
-* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
-* License as published by the Free Software Foundation; either
-* version 3 of the License, or any later version.
-*
-* This library is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
-*
-* You should have received a copy of the GNU Affero General Public
-* License along with this library. If not, see <http://www.gnu.org/licenses/>.
-*
-*/
-
-namespace OCA\News\Utility\ArticleEnhancer;
-
-use \OCA\News\Db\Item;
-
-
-interface ArticleEnhancer {
- public function enhance(Item $item);
-} \ No newline at end of file
diff --git a/utility/articleenhancer/enhancer.php b/utility/articleenhancer/enhancer.php
deleted file mode 100644
index d7d96f6a9..000000000
--- a/utility/articleenhancer/enhancer.php
+++ /dev/null
@@ -1,65 +0,0 @@
-<?php
-
-/**
-* ownCloud - News
-*
-* @author Alessandro Cosentino
-* @author Bernhard Posselt
-* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
-* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
-*
-* This library is free software; you can redistribute it and/or
-* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
-* License as published by the Free Software Foundation; either
-* version 3 of the License, or any later version.
-*
-* This library is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
-*
-* You should have received a copy of the GNU Affero General Public
-* License along with this library. If not, see <http://www.gnu.org/licenses/>.
-*
-*/
-
-namespace OCA\News\Utility\ArticleEnhancer;
-
-
-class Enhancer {
-
- private $enhancers = array();
-
- public function registerEnhancer($feedUrl, ArticleEnhancer $enhancer){
- $feedUrl = $this->removeTrailingSlash($feedUrl);
-
- // create hashkeys for all supported protocols for quick access
- $this->enhancers[$feedUrl] = $enhancer;
- $this->enhancers['https://' . $feedUrl] = $enhancer;
- $this->enhancers['http://' . $feedUrl] = $enhancer;
- $this->enhancers['https://www.' . $feedUrl] = $enhancer;
- $this->enhancers['http://www.' . $feedUrl] = $enhancer;
- }
-
-
- public function enhance($item, $feedUrl){
- $feedUrl = $this->removeTrailingSlash($feedUrl);
-
- if(array_key_exists($feedUrl, $this->enhancers)) {
- return $this->enhancers[$feedUrl]->enhance($item);
- } else {
- return $item;
- }
- }
-
-
- private function removeTrailingSlash($url) {
- if($url[strlen($url)-1] === '/') {
- return substr($url, 0, -1);
- } else {
- return $url;
- }
- }
-
-
-} \ No newline at end of file
diff --git a/utility/articleenhancer/regexarticleenhancer.php b/utility/articleenhancer/regexarticleenhancer.php
deleted file mode 100644
index dfd822c85..000000000
--- a/utility/articleenhancer/regexarticleenhancer.php
+++ /dev/null
@@ -1,55 +0,0 @@
-<?php
-
-/**
- * ownCloud - News
- *
- * @author Alessandro Cosentino
- * @author Bernhard Posselt
- * @copyright 2012 Alessandro Cosentino cosenal@gmail.com
- * @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
- * License as published by the Free Software Foundation; either
- * version 3 of the License, or any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU AFFERO GENERAL PUBLIC LICENSE for more details.
- *
- * You should have received a copy of the GNU Affero General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-namespace OCA\News\Utility\ArticleEnhancer;
-
-use \OCA\News\Utility\SimplePieFileFactory;
-use \OCA\News\Db\Item;
-
-
-class RegexArticleEnhancer implements ArticleEnhancer {
-
- private $matchArticleUrl;
- private $regexPair;
-
- public function __construct($matchArticleUrl, array $regexPair) {
- $this->matchArticleUrl = $matchArticleUrl;
- $this->regexPair = $regexPair;
- }
-
-
- public function enhance(Item $item) {
- if (preg_match($this->matchArticleUrl, $item->getUrl())) {
- $body = $item->getBody();
- foreach($this->regexPair as $search => $replaceWith) {
- $body = preg_replace($search, $replaceWith, $body);
- }
- $item->setBody($body);
- }
- return $item;
- }
-
-
-}
diff --git a/utility/articleenhancer/regexenhancers.json b/utility/articleenhancer/regexenhancers.json
deleted file mode 100644
index 95231985d..000000000
--- a/utility/articleenhancer/regexenhancers.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
- "twogag.com": {
- "%(?:www.twogag.com/archives)|(feedproxy.google.com/~r/TwoGuysAndGuy)%": {
- "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%": "http://www.twogag.com/comics/$1.jpg"
- }
- },
- "buttersafe.com": {
- "%(?:buttersafe.com)|(feedproxy.google.com/~r/Buttersafe)%": {
- "%buttersafe.com/comics/rss/([^.]+)RSS([^.]+)?.jpg%": "buttersafe.com/comics/$1$2.jpg"
- }
- }
-}
diff --git a/utility/articleenhancer/xpatharticleenhancer.php b/utility/articleenhancer/xpatharticleenhancer.php
deleted file mode 100644
index c9cff238a..000000000
--- a/utility/articleenhancer/xpatharticleenhancer.php
+++ /dev/null
@@ -1,218 +0,0 @@
-<?php
-
-/**
-* ownCloud - News
-*
-* @author Alessandro Cosentino
-* @author Bernhard Posselt
-* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
-* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
-*
-* This library is free software; you can redistribute it and/or
-* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
-* License as published by the Free Software Foundation; either
-* version 3 of the License, or any later version.
-*
-* This library is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
-*
-* You should have received a copy of the GNU Affero General Public
-* License along with this library. If not, see <http://www.gnu.org/licenses/>.
-*
-*/
-
-namespace OCA\News\Utility\ArticleEnhancer;
-
-use \OCA\News\Utility\SimplePieFileFactory;
-use \OCA\News\Db\Item;
-
-
-class XPathArticleEnhancer implements ArticleEnhancer {
-
-
- private $feedRegex;
- private $purifier;
- private $fileFactory;
- private $maximumTimeout;
-
-
- /**
- * @param $purifier the purifier object to clean the html which will be
- * matched
- * @param SimplePieFileFactory a factory for getting a simple pie file instance
- * @param array $regexXPathPair an associative array containing regex to
- * match the url and the xpath that should be used for it to extract the
- * page
- * @param int $maximumTimeout maximum timeout in seconds, defaults to 10 sec
- */
- public function __construct($purifier, SimplePieFileFactory $fileFactory,
- array $regexXPathPair, $maximumTimeout=10){
- $this->purifier = $purifier;
- $this->regexXPathPair = $regexXPathPair;
- $this->fileFactory = $fileFactory;
- $this->maximumTimeout = $maximumTimeout;
- }
-
-
- public function enhance(Item $item){
-
- foreach($this->regexXPathPair as $regex => $search) {
-
- if(preg_match($regex, $item->getUrl())) {
- $file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
-
- // convert encoding by detecting charset from header
- $contentType = $file->headers['content-type'];
- if( preg_match( '/(?<=charset=)[^;]*/', $contentType, $matches ) ) {
- $body = mb_convert_encoding($file->body, 'HTML-ENTITIES', $matches[0]);
- } else {
- $body = $file->body;
- }
-
- $dom = new \DOMDocument();
- @$dom->loadHTML($body);
-
- $xpath = new \DOMXpath($dom);
- $xpathResult = $xpath->evaluate($search);
-
- // in case it wasnt a text query assume its a single
- if(!is_string($xpathResult)) {
- $xpathResult = $this->domToString($xpathResult);
- }
-
- // convert all relative to absolute URLs
- $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl());
-
- $sanitizedResult = $this->purifier->purify($xpathResult);
- $item->setBody($sanitizedResult);
- }
- }
-
- return $item;
- }
-
-
- /**
- * Method which converts all relative "href" and "src" URLs of
- * a HTML snippet with their absolute equivalent
- * @param string $xmlString a HTML snippet as string with the relative URLs to be replaced
- * @param string $absoluteUrl the approptiate absolute url of the HTML snippet
- * @return string the result HTML snippet as a string
- */
- protected function substituteRelativeLinks($xmlString, $absoluteUrl) {
- $dom = new \DOMDocument();
- $dom->preserveWhiteSpace = false;
-
- // return, if xml is empty or loading the HTML fails
- if( trim($xmlString) == "" || !@$dom->loadHTML($xmlString) ) {
- return $xmlString;
- }
-
- // remove <!DOCTYPE
- $dom->removeChild($dom->firstChild);
- // remove <html></html>
- $dom->replaceChild($dom->firstChild->firstChild, $dom->firstChild);
-
- $substitution = array("href", "src");
-
- foreach ($substitution as $attribute) {
- $xpath = new \DOMXpath($dom);
- $xpathResult = $xpath->query(
- "//*[@" . $attribute . " " .
- "and not(contains(@" . $attribute . ", '://')) " .
- "and not(starts-with(@" . $attribute . ", 'mailto:'))]");
- foreach ($xpathResult as $linkNode) {
- $urlElement = $linkNode->attributes->getNamedItem($attribute);
- $abs = $this->relativeToAbsoluteUrl( $urlElement->nodeValue, $absoluteUrl );
- $urlElement->nodeValue = htmlspecialchars($abs);
- }
- }
-
- // save dom to string and remove <body></body>
- $xmlString = substr(trim($dom->saveHTML()), 6, -7);
- // domdocument spoils the string with line breaks between the elements. strip them.
- $xmlString = str_replace("\n", "", $xmlString);
-
- return $xmlString;
- }
-
-
- /**
- * Method which builds a URL by taking a relative URL and its corresponding
- * absolute URL
- * For examle relative URL "../example/path/file.php?a=1#anchor" and
- * absolute URL "https://username:password@www.website.com/subfolder/index.html"
- * will result in "https://username:password@www.website.com/example/path/file.php?a=1#anchor"
- * @param string $relativeUrl the relative URL
- * @param string $absoluteUrl the absolute URL with at least scheme and host
- * @return string the resulting absolute URL
- */
- protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl) {
- $abs = parse_url($absoluteUrl);
-
- $newUrl = $abs["scheme"]."://"
- .( (isset($abs["user"])) ? $abs["user"] . ( (isset($abs["pass"])) ? ":".$abs["pass"] : "") . "@" : "" )
- .$abs["host"]
- .( (isset($abs["port"])) ? ":".$abs["port"] : "" );
-
- if(substr(trim($relativeUrl), 0, 1) == "/") {
- // we have a relative url like "/a/path/file"
- return $newUrl . $relativeUrl;
- } else {
- // we have a relative url like "a/path/file", handle "."" and ".." directories
-
- // the starting point is the absolute path, but with out the last part (we don't need the file name)
- $newPath = explode("/", substr($abs["path"], 1) );
- array_pop($newPath);
-
- $relPath = parse_url($relativeUrl, PHP_URL_PATH);
- $relPath = explode("/", $relPath);
-
- // cross the relative and the absolute path
- for($i=0; $i<count($relPath)-1; $i++) {
- if($relPath[$i] == ".") {
- continue;
- } elseif($relPath[$i] == "..") {
- array_pop($newPath);
- } else {
- $newPath[] = $relPath[$i];
- }
- }
-
- // add the last part (the file name) of the relative URL
- $newPath[] = $relPath[ count($relPath)-1 ];
- $newPath = implode("/", $newPath);
-
- $rel = parse_url($relativeUrl);
- return $newUrl . "/" . $newPath
- . ( (isset($rel["query"])) ? "?".$rel["query"] : "")
- . ( (isset($rel["fragment"])) ? "#".$rel["fragment"] : "");
- }
- }
-
-
- /**
- * Method which turns an xpath result to a string
- * you can customize it by overwriting this method
- * @param $xpathResult the result from the xpath query
- * @return the result as a string
- */
- protected function domToString($xpathResult) {
- $result = "";
- foreach($xpathResult as $node) {
- $result .= $this->toInnerHTML($node);
- }
- return $result;
- }
-
-
- protected function toInnerHTML($node) {
- $dom = new \DOMDocument();
- $dom->appendChild($dom->importNode($node, true));
- return trim($dom->saveHTML($dom->documentElement));
- }
-
-
-} \ No newline at end of file
diff --git a/utility/articleenhancer/xpathenhancers.json b/utility/articleenhancer/xpathenhancers.json
deleted file mode 100644
index 29296c79f..000000000
--- a/utility/articleenhancer/xpathenhancers.json
+++ /dev/null
@@ -1,57 +0,0 @@
-{
- "cad-comic.com": {
- "%cad-comic.com/cad/\\d+/$%": "//*[@id='content']/img"
- },
- "explosm.net": {
- "%explosm.net/comics%": "//*[@id='maincontent']/div[2]/div/img",
- "%explosm.net/show%": "//*[@id='videoPlayer']/iframe"
- },
- "themerepublic.net": {
- "%feedproxy.google.com/~r/blogspot/DngUJ%": "//*[@class='post hentry']"
- },
- "penny-arcade.com": {
- "%feeds.penny-arcade.com/~r/pa-mainsite%": "//*[starts-with(@class, \"post\")]"
- },
- "leasticoulddo.com": {
- "%feedproxy.google.com/~r/LICD%": "//*[@id='comic-img']/a/img | //*[@id='comic-img']/img"
- },
- "escapistmagazine.com/articles/view/comics/critical-miss": {
- "%escapistmagazine.com/articles/view/comics/critical-miss%": "//*[@class='body']/span/img"
- },
- "escapistmagazine.com/articles/view/comics/namegame": {
- "%escapistmagazine.com/articles/view/comics/namegame%": "//*[@class='body']/span/p/img[@height != \"120\"]"
- },
- "escapistmagazine.com/articles/view/comics/stolen-pixels": {
- "%escapistmagazine.com/articles/view/comics/stolen-pixels%": "//*[@class='body']/span/p[2]/img"
- },
- "escapistmagazine.com/articles/view/comics/bumhugparade": {
- "%escapistmagazine.com/articles/view/comics/bumhugparade%": "//*[@class='body']/span/p[2]/img"
- },
- "escapistmagazine.com/articles/view/comics/escapistradiotheater": {
- "%escapistmagazine.com/articles/view/comics/escapistradiotheater%": "//*[@class='body']/span/p[2]/img"
- },
- "escapistmagazine.com/articles/view/comics/paused": {
- "%escapistmagazine.com/articles/view/comics/paused%": "//*[@class='body']/span/p[2]/img | //*[@class='body']/span/div/img"
- },
- "escapistmagazine.com/articles/view/comics/fraughtwithperil": {
- "%escapistmagazine.com/articles/view/comics/fraughtwithperil%": "//*[@class='body']"
- },
- "trenchescomic.com": {
- "%trenchescomic.com/comic%": "//*[@class=\"top\"]/img",
- "%trenchescomic.com/tales%": "//*[@class=\"copy\"]"
- },
- "lfgcomic.com": {
- "%(lfgcomic.com/page)|(feedproxy.google.com/~r/LookingForGroup)%": "//*[@id=\"comic\"]/img | //*[@class=\"content\"]"
- },
- "sandraandwoo.com": {
- "%sandraandwoo.com%": "//*[@id=\"comic\"]/img | //*[@class=\"post\"]"
- },
- "sandraandwoo.com/gaia": {
- "%sandraandwoo.com%": "//*[@id=\"comic\"]/img | //*[@class=\"post\"]"
- },
- "theoatmeal.com": {
- "%theoatmeal.com/blog%": "//*[@class=\"post_body\"]",
- "%theoatmeal.com/comics%": "//*[@id=\"comic\"] | //*[@class=\"super_vacum\"] | //*[@class=\"pad\"]"
- }
-}
-