summaryrefslogtreecommitdiffstats
path: root/articleenhancer
diff options
context:
space:
mode:
authorBernhard Posselt <dev@bernhard-posselt.com>2015-08-10 20:20:30 +0200
committerBernhard Posselt <dev@bernhard-posselt.com>2015-08-12 17:05:18 +0200
commit53679811da855acf9bd944a389a48399ca5d5a15 (patch)
treefa75e06a965fb5751017288a5c135bc179574210 /articleenhancer
parentc77a6705d34c81cb933f3d4b83eb18e2b586035a (diff)
serverside full text
remove enhancers add full text client side implementation fix bugs and tests for full text feed
Diffstat (limited to 'articleenhancer')
-rw-r--r--articleenhancer/articleenhancer.php25
-rw-r--r--articleenhancer/enhancer.php83
-rw-r--r--articleenhancer/regexarticleenhancer.php46
-rw-r--r--articleenhancer/regexenhancers.json59
-rw-r--r--articleenhancer/xpatharticleenhancer.php196
-rw-r--r--articleenhancer/xpathenhancers.json186
6 files changed, 0 insertions, 595 deletions
diff --git a/articleenhancer/articleenhancer.php b/articleenhancer/articleenhancer.php
deleted file mode 100644
index a151de656..000000000
--- a/articleenhancer/articleenhancer.php
+++ /dev/null
@@ -1,25 +0,0 @@
-<?php
-/**
- * ownCloud - News
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Alessandro Cosentino <cosenal@gmail.com>
- * @author Bernhard Posselt <dev@bernhard-posselt.com>
- * @copyright Alessandro Cosentino 2012
- * @copyright Bernhard Posselt 2012, 2014
- */
-
-namespace OCA\News\ArticleEnhancer;
-
-use \OCA\News\Db\Item;
-
-
-interface ArticleEnhancer {
- /**
- * @param \OCA\News\Db\Item $item
- * @return \OCA\News\Db\Item enhanced item
- */
- public function enhance(Item $item);
-} \ No newline at end of file
diff --git a/articleenhancer/enhancer.php b/articleenhancer/enhancer.php
deleted file mode 100644
index abe8360d2..000000000
--- a/articleenhancer/enhancer.php
+++ /dev/null
@@ -1,83 +0,0 @@
-<?php
-/**
- * ownCloud - News
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Alessandro Cosentino <cosenal@gmail.com>
- * @author Bernhard Posselt <dev@bernhard-posselt.com>
- * @copyright Alessandro Cosentino 2012
- * @copyright Bernhard Posselt 2012, 2014
- */
-
-namespace OCA\News\ArticleEnhancer;
-
-
-class Enhancer {
-
- private $enhancers = [];
- private $globalEnhancers = [];
-
- /**
- * @param string $feedUrl
- * @param ArticleEnhancer $enhancer
- */
- public function registerEnhancer($feedUrl, ArticleEnhancer $enhancer){
- $feedUrl = $this->removeTrailingSlash($feedUrl);
-
- // create hashkeys for all supported protocols for quick access
- $this->enhancers[$feedUrl] = $enhancer;
- $this->enhancers['https://' . $feedUrl] = $enhancer;
- $this->enhancers['http://' . $feedUrl] = $enhancer;
- $this->enhancers['https://www.' . $feedUrl] = $enhancer;
- $this->enhancers['http://www.' . $feedUrl] = $enhancer;
- }
-
-
- /**
- * Registers enhancers that are run for every item and after all previous
- * enhancers have been run
- * @param ArticleEnhancer $enhancer
- */
- public function registerGlobalEnhancer (ArticleEnhancer $enhancer) {
- $this->globalEnhancers[] = $enhancer;
- }
-
-
- /**
- * @param \OCA\News\Db\Item $item
- * @param string $feedUrl
- * @return \OCA\News\Db\Item enhanced item
- */
- public function enhance($item, $feedUrl){
- $feedUrl = $this->removeTrailingSlash($feedUrl);
-
- if(array_key_exists($feedUrl, $this->enhancers)) {
- $result = $this->enhancers[$feedUrl]->enhance($item);
- } else {
- $result = $item;
- }
-
- foreach ($this->globalEnhancers as $enhancer) {
- $result = $enhancer->enhance($result);
- }
-
- return $result;
- }
-
-
- /**
- * @param string $url
- * @return string
- */
- private function removeTrailingSlash($url) {
- if($url[strlen($url)-1] === '/') {
- return substr($url, 0, -1);
- } else {
- return $url;
- }
- }
-
-
-} \ No newline at end of file
diff --git a/articleenhancer/regexarticleenhancer.php b/articleenhancer/regexarticleenhancer.php
deleted file mode 100644
index 0f600468e..000000000
--- a/articleenhancer/regexarticleenhancer.php
+++ /dev/null
@@ -1,46 +0,0 @@
-<?php
-/**
- * ownCloud - News
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Alessandro Cosentino <cosenal@gmail.com>
- * @author Bernhard Posselt <dev@bernhard-posselt.com>
- * @copyright Alessandro Cosentino 2012
- * @copyright Bernhard Posselt 2012, 2014
- */
-
-namespace OCA\News\ArticleEnhancer;
-
-use \OCA\News\Db\Item;
-
-
-class RegexArticleEnhancer implements ArticleEnhancer {
-
- private $matchArticleUrl;
- private $regexPair;
-
- public function __construct($matchArticleUrl, array $regexPair) {
- $this->matchArticleUrl = $matchArticleUrl;
- $this->regexPair = $regexPair;
- }
-
-
- /**
- * @param \OCA\News\Db\Item $item
- * @return \OCA\News\Db\Item enhanced item
- */
- public function enhance(Item $item) {
- if (preg_match($this->matchArticleUrl, $item->getUrl())) {
- $body = $item->getBody();
- foreach($this->regexPair as $search => $replaceWith) {
- $body = preg_replace($search, $replaceWith, $body);
- }
- $item->setBody($body);
- }
- return $item;
- }
-
-
-}
diff --git a/articleenhancer/regexenhancers.json b/articleenhancer/regexenhancers.json
deleted file mode 100644
index 53eeece40..000000000
--- a/articleenhancer/regexenhancers.json
+++ /dev/null
@@ -1,59 +0,0 @@
-{
- "twogag.com": {
- "%(?:www.twogag.com/archives)|(feedproxy.google.com/~r/TwoGuysAndGuy)%": {
- "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%": "http://www.twogag.com/comics/$1.jpg"
- }
- },
- "nichtlustig.de": {
- "%nichtlustig.de%": {
- "%.*static.nichtlustig.de/comics/full/(\\d+).*%s": "<img src=\"http://static.nichtlustig.de/comics/full/$1.jpg\" />"
- }
- },
- "nerfnow.com": {
- "%feedproxy.google.com/~r/nerfnow%": {
- "%(width|width)=\"\\d+\"%": "",
- "%nerfnow.com/comic/thumb/(\\d+)/large%": "nerfnow.com/comic/image/$1",
- "%<div><a target=\"_blank\" href=\"http://www.nerfnow.com/comic/\\d+\">Click for full size</a></div>%": "",
- "%<img src=\"http://feeds.feedburner.com[^>]+>%": ""
- }
- },
- "fowllanguagecomics.com": {
- "%fowllanguagecomics.com/comic%": {
- "%\\?resize=[^\"]+%": ""
- }
- },
- "cheerupemokid.com": {
- "%feedproxy.google.com/~r/cheerupemokid%": {
- "%-\\d+x\\d+%": "",
- "%(width|height)=\"\\d+\"%" : ""
- }
- },
- "extrafabulouscomics.com": {
- "%extrafabulouscomics.com/comic/%": {
- "%-\\d+x\\d+%": "",
- "%(width|height)=\"\\d+\"%" : ""
- }
- },
- "endlessorigami.com": {
- "%endlessorigami.com/comic/%": {
- "%-\\d+x\\d+%": "",
- "%(width|height)=\"\\d+\"%" : ""
- }
- },
- "www.loadingartist.com": {
- "%www.loadingartist.com/comic/%": {
- "%-\\d+x\\d+%": "",
- "%(width|height)=\"\\d+\"%" : ""
- }
- },
- "xkcd.com": {
- "%xkcd.com/\\d+/%": {
- "%alt=\"(.+)\" */>%": "/><br/>$1"
- }
- },
- "www.treelobsters.com": {
- "%www.treelobsters.com%": {
- "%title=\"(.+)\" */>%": "/><br/>$1"
- }
- }
-}
diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php
deleted file mode 100644
index 61bf230a0..000000000
--- a/articleenhancer/xpatharticleenhancer.php
+++ /dev/null
@@ -1,196 +0,0 @@
-<?php
-/**
- * ownCloud - News
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Alessandro Cosentino <cosenal@gmail.com>
- * @author Bernhard Posselt <dev@bernhard-posselt.com>
- * @copyright Alessandro Cosentino 2012
- * @copyright Bernhard Posselt 2012, 2014
- */
-
-namespace OCA\News\ArticleEnhancer;
-
-use DOMDocument;
-use DOMXpath;
-
-use PicoFeed\Encoding\Encoding;
-
-use OCA\News\Utility\PicoFeedClientFactory;
-
-use OCA\News\Db\Item;
-
-
-class XPathArticleEnhancer implements ArticleEnhancer {
-
- private $clientFactory;
- private $regexXPathPair;
-
-
- /**
- * @param \Utility\PicoFeedClientFactory $clientFactory
- * @param array $regexXPathPair an associative array containing regex to
- * match the url and the xpath that should be used for it to extract the
- * page
- */
- public function __construct(PicoFeedClientFactory $clientFactory,
- array $regexXPathPair){
- $this->clientFactory = $clientFactory;
- $this->regexXPathPair = $regexXPathPair;
- }
-
- /**
- * @param \OCA\News\Db\Item $item
- * @return \OCA\News\Db\Item enhanced item
- */
- public function enhance(Item $item){
-
- foreach($this->regexXPathPair as $regex => $search) {
-
- if(preg_match($regex, $item->getUrl())) {
- $body = $this->getFile($item->getUrl());
-
- // First check if either <meta charset="..."> or
- // <meta http-equiv="Content-Type" ...> is specified and use it
- // If this fails use mb_detect_encoding()
- $regex = '/<meta\s+[^>]*(?:charset\s*=\s*[\'"]([^>\'"]*)[\'"]' .
- '|http-equiv\s*=\s*[\'"]content-type[\'"]\s+[^>]*' .
- 'content\s*=\s*[\'"][^>]*charset=([^>]*)[\'"])[^>]*>' .
- '/i';
- if(preg_match($regex, $body, $matches)) {
- $enc = strtoupper($matches[sizeof($matches) - 1]);
- } else {
- $enc = mb_detect_encoding($body);
- }
- $enc = $enc ? $enc : 'UTF-8';
- $body = mb_convert_encoding($body, 'HTML-ENTITIES', $enc);
- if (trim($body) === '') {
- return $item;
- }
-
- $dom = new DOMDocument();
- $isOk = @$dom->loadHTML($body);
-
- $xpath = new DOMXpath($dom);
- $xpathResult = $xpath->evaluate($search);
-
- // in case it wasnt a text query assume its a dom element and
- // convert it to text
- if(!is_string($xpathResult)) {
- $xpathResult = $this->domToString($xpathResult);
- }
-
- $xpathResult = trim($xpathResult);
-
- // convert all relative to absolute URLs
- $xpathResult = $this->substituteRelativeLinks(
- $xpathResult, $item->getUrl()
- );
-
- if($isOk && $xpathResult !== false && $xpathResult !== '') {
- $item->setBody($xpathResult);
- }
- }
- }
-
- return $item;
- }
-
-
- private function getFile($url) {
- $client = $this->clientFactory->build();
- $client->execute($url);
- $client->setUserAgent('Mozilla/5.0 AppleWebKit');
- return $client->getContent();
- }
-
-
- /**
- * Method which converts all relative "href" and "src" URLs of
- * a HTML snippet with their absolute equivalent
- * @param string $xmlString a HTML snippet as string with the relative URLs
- * to be replaced
- * @param string $absoluteUrl the approptiate absolute url of the HTML
- * snippet
- * @return string the result HTML snippet as a string
- */
- protected function substituteRelativeLinks($xmlString, $absoluteUrl) {
- $dom = new DOMDocument();
- $dom->preserveWhiteSpace = false;
-
- if($xmlString === '') {
- return '';
- }
-
- $xmlString = '<div>' . $xmlString . '</div>';
- $isOk = @$dom->loadHTML($xmlString, LIBXML_HTML_NOIMPLIED |
- LIBXML_HTML_NODEFDTD);
-
- if(!$isOk) {
- return '';
- }
-
- foreach (['href', 'src'] as $attribute) {
- $xpath = new DOMXpath($dom);
- $xpathResult = $xpath->query(
- "//*[@" . $attribute . " " .
- "and not(contains(@" . $attribute . ", '://')) " .
- "and not(starts-with(@" . $attribute . ", 'mailto:')) " .
- "and not(starts-with(@" . $attribute . ", '//'))]");
- foreach ($xpathResult as $linkNode) {
- $urlElement = $linkNode->attributes->getNamedItem($attribute);
- $abs = $this->relativeToAbsoluteUrl(
- $urlElement->nodeValue, $absoluteUrl
- );
- $urlElement->nodeValue = htmlspecialchars($abs);
- }
- }
-
- $xmlString = $dom->saveHTML();
-
- // domdocument spoils the string with line breaks between the elements
- // strip them
- $xmlString = str_replace("\n", '', $xmlString);
-
- return $xmlString;
- }
-
-
- /**
- * Method which builds a URL by taking a relative URL and its corresponding
- * absolute URL
- * @param string $relativeUrl the relative URL
- * @param string $absoluteUrl the absolute URL with at least scheme and host
- * @return string the resulting absolute URL
- */
- protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl) {
- $base = new \Net_URL2($absoluteUrl);
- return $base->resolve($relativeUrl);
- }
-
-
- /**
- * Method which turns an xpath result to a string
- * you can customize it by overwriting this method
- * @param mixed $xpathResult the result from the xpath query
- * @return string the result as a string
- */
- protected function domToString($xpathResult) {
- $result = '';
- foreach($xpathResult as $node) {
- $result .= $this->toInnerHTML($node);
- }
- return $result;
- }
-
-
- protected function toInnerHTML($node) {
- $dom = new DOMDocument();
- $dom->appendChild($dom->importNode($node, true));
- return trim($dom->saveHTML($dom->documentElement));
- }
-
-
-}
diff --git a/articleenhancer/xpathenhancers.json b/articleenhancer/xpathenhancers.json
deleted file mode 100644
index a088a6ec6..000000000
--- a/articleenhancer/xpathenhancers.json
+++ /dev/null
@@ -1,186 +0,0 @@
-{
- "cad-comic.com": {
- "%cad-comic.com/cad/\\d+/$%": "//*[@id='content']/img"
- },
- "slashdot.org/": {
- "%slashdot.feedsportal.com%": "//article/div[@class='body'] | //article[@class='layout-article']/div[@class='elips']"
- },
- "explosm.net/rss.php": {
- "%explosm.net/comics%": "//*[@id='main-comic']",
- "%explosm.net/show%": "//*[@id='player']/*"
- },
- "themerepublic.net": {
- "%feedproxy.google.com/~r/blogspot/DngUJ%": "//*[@class='post hentry']"
- },
- "penny-arcade.com": {
- "%penny-arcade.com/comic%": "//*[@id='comicFrame']/a/img",
- "%penny-arcade.com/news%": "//*[@class='postBody']/*"
- },
- "leasticoulddo.com": {
- "%feedproxy.google.com/~r/LICD%": "//*[@id='comic-img']/a/img | //*[@id='comic-img']/img | //*[@id='post']"
- },
- "escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss": {
- "%escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss%": "//*[@class='body']/span/img | //div[@class='folder_nav_links']/following::p"
- },
- "escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame": {
- "%escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame%": "//*[@class='body']/span/p/img[@height != \"120\"]"
- },
- "escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels": {
- "%escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels%": "//*[@class='body']/span/p[2]/img"
- },
- "escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade": {
- "%escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade%": "//*[@class='body']/span/p[2]/img"
- },
- "escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater": {
- "%escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater%": "//*[@class='body']/span/p[2]/img"
- },
- "escapistmagazine.com/articles/view/comicsandcosplay/comics/paused": {
- "%escapistmagazine.com/articles/view/comicsandcosplay/comics/paused%": "//*[@class='body']/span/p[2]/img | //*[@class='body']/span/div/img"
- },
- "escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil": {
- "%escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil%": "//*[@class='body']"
- },
- "trenchescomic.com": {
- "%trenchescomic.com/comic%": "//*[@class=\"top\"]/img",
- "%trenchescomic.com/tales%": "//*[@class=\"copy\"]"
- },
- "lfgcomic.com": {
- "%(lfgcomic.com/page)|(feedproxy.google.com/~r/LookingForGroup)%": "//*[@id=\"comic\"]/img | //*[@class=\"content\"]"
- },
- "sandraandwoo.com": {
- "%feedproxy.google.com/~r/sandraandwoo%": "//*[@id=\"comic\"]/img | //*[@class=\"post\"]/div[@class=\"entry\"] | //*[@class=\"transcript\"]"
- },
- "sandraandwoo.com/gaia": {
- "%feedproxy.google.com/~r/gaiacomic%": "//*[@id=\"comic\"]/img | //*[@class=\"post\"]/div[@class=\"entry\"] | //*[@class=\"transcript\"]"
- },
- "theoatmeal.com": {
- "%theoatmeal.com/blog%": "//*[@class=\"post_body\"]",
- "%theoatmeal.com/comics%": "//*[@id=\"comic\"] | //*[@class=\"super_vacum\"] | //*[@class=\"pad\"]"
- },
- "loldwell.com": {
- "%loldwell.com/\\?p=%": "//*[@id=\"comic\"]"
- },
- "mokepon.smackjeeves.com": {
- "%mokepon.smackjeeves.com/comics%": "//*[@id=\"comic_area\"]/img | //*[@id=\"comic_area\"]/a/img"
- },
- "twokinds.keenspot.com": {
- "%twokinds.keenspot.com/%": "//*[@class=\"comic\"]/div/a/img | //*[@class=\"comic\"]/div/img | //*[@id=\"cg_img\"]/img | //*[@id=\"cg_img\"]/a/img"
- },
- "niceteethcomic.com": {
- "%niceteethcomic.com/archives%": "//*[@class=\"comicpane\"]/a/img"
- },
- "awkwardzombie.com" :{
- "%awkwardzombie.com/index.php\\?comic=%": "//*[@id=\"comic\"]/img"
- },
- "vgcats.com": {
- "%vgcats.com/comics%": "//*[@align=\"center\"]/img"
- },
- "vgcats.com/super": {
- "%vgcats.com/super%": "//div[@align=\"center\"]/p/img"
- },
- "chaoslife.findchaos.com": {
- "%chaoslife.findchaos.com%": "//div[@id=\"comic\"]"
- },
- "forlackofabettercomic.com/archive.php": {
- "%forlackofabettercomic.com/\\?id=%": "//div[@id=\"comicset\"]/a/img"
- },
- "heise.de/newsticker": {
- "%heise.de%": "//*[@class='meldung_wrapper']/*[not(contains(@class, 'dossier'))]"
- },
- "heise.de/developer": {
- "%heise.de%": "//*[@class='meldung_wrapper']/*[not(contains(@class, 'dossier'))]"
- },
- "heise.de/open/news": {
- "%heise.de%": "//*[@class='meldung_wrapper']/*[not(contains(@class, 'dossier'))]"
- },
- "spiegel.de": {
- "%spiegel.de/(?!.*video).*%": "//p[@class='article-intro'] | //*[@itemprop='description' and not(ancestor::*[contains( normalize-space( @class ), 'article-section' )]) ] | //*[(@class='spPanoImageTeaserPic' or @class='spPanoGalleryTeaserPic' or @class='spPanoPlayerTeaserPic') and not(ancestor::*[contains( normalize-space( @class ), 'article-section' )])] | //*[@class='image-buttons-panel' and not(ancestor::*[contains( normalize-space( @class ), 'article-section' )])]/*[1]/img | //*[@class='article-image-description' and not(ancestor::*[contains( normalize-space( @class ), 'article-section' )]) ]/p | //*[@id='content-main']/*[@id='js-article-column']/div[contains( normalize-space( @class ), 'article-section' )]/*[not( contains( normalize-space( @class ), 'article-function-social-media' ))] | //*[@id='content-main']/*[@id='js-article-column']/p"
- },
- "eqcomics.com": {
- "%feedproxy.google.com/~r/eqcomics%": "//div[@id=\"comic\"]/div/a/img"
- },
- "thegamercat.com": {
- "%thegamercat.com%": "//div[@id=\"comic\"] | //div[@class=\"post-content\"]/div[@class=\"entry\"]/p"
- },
- "niebezpiecznik.pl": {
- "%feedproxy.google.com/~r/niebezpiecznik%": "//div[@class=\"entry\"]"
- },
- "maximumble.thebookofbiff.com": {
- "%maximumble.thebookofbiff.com%": "//div[@id=\"comic\"]/div/a/img"
- },
- "omgubuntu.co.uk": {
- "%www.omgubuntu.co.uk%": "//div[@class=\"entry-content\"]"
- },
- "tu-chemnitz.de/tu/presse/": {
- "%tu-chemnitz.de%": "//div[@id='content_inhalt']/*[preceding-sibling::h1 and following-sibling::div[@id]]"
- },
- "cliquerefresh.com": {
- "%cliquerefresh.com/comic%": "//div[@class=\"comicImg\"]/img | //div[@class=\"comicImg\"]/a/img"
- },
- "satwcomic.com": {
- "%feedproxy.google.com/~r/satwcomic%": "//div[@class=\"comicmid\"]/center/a/img"
- },
- "androidpolice.com": {
- "%rss.feedsportal.com/c/33941/f/615677/p/1/s/%": "//div[@class=\"post_content\"]"
- },
- "neustadt-ticker.de": {
- "%neustadt-ticker.de%": "//div[contains(@class,'article')]/div[@class='PostContent' and *[not(contains(@class, 'navigation'))]]"
- },
- "linuxtoday.com/": {
- "%linuxtoday.com%": "//div[@class='article']/p | //div[@class='article']/ul"
- },
- "metronieuws.nl": {
- "%metronieuws.nl%": "//div[contains(@class,'article-top')]/div[contains(@class,'image-component')] | //div[@class='article-full-width']/div[1]"
- },
- "thecodinglove.com": {
- "%thecodinglove.com%": "//div[@class='bodytype']"
- },
- "der-postillon.com": {
- "%der-postillon.com%": "//div[contains(@class,'post-body')]"
- },
- "areadvd.de": {
- "%areadvd.de%": "//div[contains(@class,'entry')]"
- },
- "engadget.com": {
- "%engadget.com%": "//div[@class='article-content']/p[not(@class='read-more')] | //div[@class='article-content']/div[@style='text-align: center;']"
- },
- "mydealz.de": {
- "%http://feedproxy.google.com/~r/myDealZ%": "//div[@class='section-hub'] | //div[contains(@class,'thread-deal')]"
- },
- "scrumalliance.org": {
- "%scrumalliance.org/rss?%": "//div[@class='article_content']"
- },
- "smarthomewelt.de": {
- "%smarthomewelt.de%": "//div[@class='entry-inner']/p | //div[@class='entry-inner']/div[contains(@class,'wp-caption')]"
- },
- "elegantthemes.com": {
- "%elegantthemes.com%": "//article[contains(@class,'post')]/p"
- },
- "smashingmagazine.com": {
- "%smashingmagazine.com%": "//article[contains(@class,'post')]/p"
- },
- "sueddeutsche.de": {
- "%sz.de%": "//article[@id='sitecontent']/section[@class='topenrichment']//img | //article[@id='sitecontent']/section[@class='body']/section[@class='authors']/preceding-sibling::*[not(contains(@class, 'ad') or contains(@class, 'article-sidebar-wrapper'))]"
- },
- "lifehacker.com": {
- "%lifehacker.com%": "//div[contains(@class,'entry-content')]"
- },
- "travel-dealz.de": {
- "%travel-dealz.de/blog/%": "//div[@class='post-entry']"
- },
- "outdoordeals.de": {
- "%outdoordeals.de%": "//article[contains(@class,'blogpost')]/div[@class='inner']"
- },
- "buttersafe.com": {
- "%buttersafe.com%": "//div[@id='comic'] | //div[@class='post-comic']"
- },
- "www.pixelbeat.org": {
- "%www.pixelbeat.org%": "//div[@class='contentText']"
- },
- "gocomics.com/pearlsbeforeswine": {
- "%gocomics.com/pearlsbeforeswine%": "//div[1]/p[1]/a[1]/img"
- },
- "marriedtothesea.com": {
- "%marriedtothesea.com%": "//div[@align]/a/img"
- }
-}