diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-08-10 20:20:30 +0200 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2015-08-12 17:05:18 +0200 |
commit | 53679811da855acf9bd944a389a48399ca5d5a15 (patch) | |
tree | fa75e06a965fb5751017288a5c135bc179574210 /articleenhancer | |
parent | c77a6705d34c81cb933f3d4b83eb18e2b586035a (diff) |
serverside full text
remove enhancers
add full text client side implementation
fix bugs and tests for full text feed
Diffstat (limited to 'articleenhancer')
-rw-r--r-- | articleenhancer/articleenhancer.php | 25 | ||||
-rw-r--r-- | articleenhancer/enhancer.php | 83 | ||||
-rw-r--r-- | articleenhancer/regexarticleenhancer.php | 46 | ||||
-rw-r--r-- | articleenhancer/regexenhancers.json | 59 | ||||
-rw-r--r-- | articleenhancer/xpatharticleenhancer.php | 196 | ||||
-rw-r--r-- | articleenhancer/xpathenhancers.json | 186 |
6 files changed, 0 insertions, 595 deletions
diff --git a/articleenhancer/articleenhancer.php b/articleenhancer/articleenhancer.php deleted file mode 100644 index a151de656..000000000 --- a/articleenhancer/articleenhancer.php +++ /dev/null @@ -1,25 +0,0 @@ -<?php -/** - * ownCloud - News - * - * This file is licensed under the Affero General Public License version 3 or - * later. See the COPYING file. - * - * @author Alessandro Cosentino <cosenal@gmail.com> - * @author Bernhard Posselt <dev@bernhard-posselt.com> - * @copyright Alessandro Cosentino 2012 - * @copyright Bernhard Posselt 2012, 2014 - */ - -namespace OCA\News\ArticleEnhancer; - -use \OCA\News\Db\Item; - - -interface ArticleEnhancer { - /** - * @param \OCA\News\Db\Item $item - * @return \OCA\News\Db\Item enhanced item - */ - public function enhance(Item $item); -}
\ No newline at end of file diff --git a/articleenhancer/enhancer.php b/articleenhancer/enhancer.php deleted file mode 100644 index abe8360d2..000000000 --- a/articleenhancer/enhancer.php +++ /dev/null @@ -1,83 +0,0 @@ -<?php -/** - * ownCloud - News - * - * This file is licensed under the Affero General Public License version 3 or - * later. See the COPYING file. - * - * @author Alessandro Cosentino <cosenal@gmail.com> - * @author Bernhard Posselt <dev@bernhard-posselt.com> - * @copyright Alessandro Cosentino 2012 - * @copyright Bernhard Posselt 2012, 2014 - */ - -namespace OCA\News\ArticleEnhancer; - - -class Enhancer { - - private $enhancers = []; - private $globalEnhancers = []; - - /** - * @param string $feedUrl - * @param ArticleEnhancer $enhancer - */ - public function registerEnhancer($feedUrl, ArticleEnhancer $enhancer){ - $feedUrl = $this->removeTrailingSlash($feedUrl); - - // create hashkeys for all supported protocols for quick access - $this->enhancers[$feedUrl] = $enhancer; - $this->enhancers['https://' . $feedUrl] = $enhancer; - $this->enhancers['http://' . $feedUrl] = $enhancer; - $this->enhancers['https://www.' . $feedUrl] = $enhancer; - $this->enhancers['http://www.' . $feedUrl] = $enhancer; - } - - - /** - * Registers enhancers that are run for every item and after all previous - * enhancers have been run - * @param ArticleEnhancer $enhancer - */ - public function registerGlobalEnhancer (ArticleEnhancer $enhancer) { - $this->globalEnhancers[] = $enhancer; - } - - - /** - * @param \OCA\News\Db\Item $item - * @param string $feedUrl - * @return \OCA\News\Db\Item enhanced item - */ - public function enhance($item, $feedUrl){ - $feedUrl = $this->removeTrailingSlash($feedUrl); - - if(array_key_exists($feedUrl, $this->enhancers)) { - $result = $this->enhancers[$feedUrl]->enhance($item); - } else { - $result = $item; - } - - foreach ($this->globalEnhancers as $enhancer) { - $result = $enhancer->enhance($result); - } - - return $result; - } - - - /** - * @param string $url - * @return string - */ - private function removeTrailingSlash($url) { - if($url[strlen($url)-1] === '/') { - return substr($url, 0, -1); - } else { - return $url; - } - } - - -}
\ No newline at end of file diff --git a/articleenhancer/regexarticleenhancer.php b/articleenhancer/regexarticleenhancer.php deleted file mode 100644 index 0f600468e..000000000 --- a/articleenhancer/regexarticleenhancer.php +++ /dev/null @@ -1,46 +0,0 @@ -<?php -/** - * ownCloud - News - * - * This file is licensed under the Affero General Public License version 3 or - * later. See the COPYING file. - * - * @author Alessandro Cosentino <cosenal@gmail.com> - * @author Bernhard Posselt <dev@bernhard-posselt.com> - * @copyright Alessandro Cosentino 2012 - * @copyright Bernhard Posselt 2012, 2014 - */ - -namespace OCA\News\ArticleEnhancer; - -use \OCA\News\Db\Item; - - -class RegexArticleEnhancer implements ArticleEnhancer { - - private $matchArticleUrl; - private $regexPair; - - public function __construct($matchArticleUrl, array $regexPair) { - $this->matchArticleUrl = $matchArticleUrl; - $this->regexPair = $regexPair; - } - - - /** - * @param \OCA\News\Db\Item $item - * @return \OCA\News\Db\Item enhanced item - */ - public function enhance(Item $item) { - if (preg_match($this->matchArticleUrl, $item->getUrl())) { - $body = $item->getBody(); - foreach($this->regexPair as $search => $replaceWith) { - $body = preg_replace($search, $replaceWith, $body); - } - $item->setBody($body); - } - return $item; - } - - -} diff --git a/articleenhancer/regexenhancers.json b/articleenhancer/regexenhancers.json deleted file mode 100644 index 53eeece40..000000000 --- a/articleenhancer/regexenhancers.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "twogag.com": { - "%(?:www.twogag.com/archives)|(feedproxy.google.com/~r/TwoGuysAndGuy)%": { - "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%": "http://www.twogag.com/comics/$1.jpg" - } - }, - "nichtlustig.de": { - "%nichtlustig.de%": { - "%.*static.nichtlustig.de/comics/full/(\\d+).*%s": "<img src=\"http://static.nichtlustig.de/comics/full/$1.jpg\" />" - } - }, - "nerfnow.com": { - "%feedproxy.google.com/~r/nerfnow%": { - "%(width|width)=\"\\d+\"%": "", - "%nerfnow.com/comic/thumb/(\\d+)/large%": "nerfnow.com/comic/image/$1", - "%<div><a target=\"_blank\" href=\"http://www.nerfnow.com/comic/\\d+\">Click for full size</a></div>%": "", - "%<img src=\"http://feeds.feedburner.com[^>]+>%": "" - } - }, - "fowllanguagecomics.com": { - "%fowllanguagecomics.com/comic%": { - "%\\?resize=[^\"]+%": "" - } - }, - "cheerupemokid.com": { - "%feedproxy.google.com/~r/cheerupemokid%": { - "%-\\d+x\\d+%": "", - "%(width|height)=\"\\d+\"%" : "" - } - }, - "extrafabulouscomics.com": { - "%extrafabulouscomics.com/comic/%": { - "%-\\d+x\\d+%": "", - "%(width|height)=\"\\d+\"%" : "" - } - }, - "endlessorigami.com": { - "%endlessorigami.com/comic/%": { - "%-\\d+x\\d+%": "", - "%(width|height)=\"\\d+\"%" : "" - } - }, - "www.loadingartist.com": { - "%www.loadingartist.com/comic/%": { - "%-\\d+x\\d+%": "", - "%(width|height)=\"\\d+\"%" : "" - } - }, - "xkcd.com": { - "%xkcd.com/\\d+/%": { - "%alt=\"(.+)\" */>%": "/><br/>$1" - } - }, - "www.treelobsters.com": { - "%www.treelobsters.com%": { - "%title=\"(.+)\" */>%": "/><br/>$1" - } - } -} diff --git a/articleenhancer/xpatharticleenhancer.php b/articleenhancer/xpatharticleenhancer.php deleted file mode 100644 index 61bf230a0..000000000 --- a/articleenhancer/xpatharticleenhancer.php +++ /dev/null @@ -1,196 +0,0 @@ -<?php -/** - * ownCloud - News - * - * This file is licensed under the Affero General Public License version 3 or - * later. See the COPYING file. - * - * @author Alessandro Cosentino <cosenal@gmail.com> - * @author Bernhard Posselt <dev@bernhard-posselt.com> - * @copyright Alessandro Cosentino 2012 - * @copyright Bernhard Posselt 2012, 2014 - */ - -namespace OCA\News\ArticleEnhancer; - -use DOMDocument; -use DOMXpath; - -use PicoFeed\Encoding\Encoding; - -use OCA\News\Utility\PicoFeedClientFactory; - -use OCA\News\Db\Item; - - -class XPathArticleEnhancer implements ArticleEnhancer { - - private $clientFactory; - private $regexXPathPair; - - - /** - * @param \Utility\PicoFeedClientFactory $clientFactory - * @param array $regexXPathPair an associative array containing regex to - * match the url and the xpath that should be used for it to extract the - * page - */ - public function __construct(PicoFeedClientFactory $clientFactory, - array $regexXPathPair){ - $this->clientFactory = $clientFactory; - $this->regexXPathPair = $regexXPathPair; - } - - /** - * @param \OCA\News\Db\Item $item - * @return \OCA\News\Db\Item enhanced item - */ - public function enhance(Item $item){ - - foreach($this->regexXPathPair as $regex => $search) { - - if(preg_match($regex, $item->getUrl())) { - $body = $this->getFile($item->getUrl()); - - // First check if either <meta charset="..."> or - // <meta http-equiv="Content-Type" ...> is specified and use it - // If this fails use mb_detect_encoding() - $regex = '/<meta\s+[^>]*(?:charset\s*=\s*[\'"]([^>\'"]*)[\'"]' . - '|http-equiv\s*=\s*[\'"]content-type[\'"]\s+[^>]*' . - 'content\s*=\s*[\'"][^>]*charset=([^>]*)[\'"])[^>]*>' . - '/i'; - if(preg_match($regex, $body, $matches)) { - $enc = strtoupper($matches[sizeof($matches) - 1]); - } else { - $enc = mb_detect_encoding($body); - } - $enc = $enc ? $enc : 'UTF-8'; - $body = mb_convert_encoding($body, 'HTML-ENTITIES', $enc); - if (trim($body) === '') { - return $item; - } - - $dom = new DOMDocument(); - $isOk = @$dom->loadHTML($body); - - $xpath = new DOMXpath($dom); - $xpathResult = $xpath->evaluate($search); - - // in case it wasnt a text query assume its a dom element and - // convert it to text - if(!is_string($xpathResult)) { - $xpathResult = $this->domToString($xpathResult); - } - - $xpathResult = trim($xpathResult); - - // convert all relative to absolute URLs - $xpathResult = $this->substituteRelativeLinks( - $xpathResult, $item->getUrl() - ); - - if($isOk && $xpathResult !== false && $xpathResult !== '') { - $item->setBody($xpathResult); - } - } - } - - return $item; - } - - - private function getFile($url) { - $client = $this->clientFactory->build(); - $client->execute($url); - $client->setUserAgent('Mozilla/5.0 AppleWebKit'); - return $client->getContent(); - } - - - /** - * Method which converts all relative "href" and "src" URLs of - * a HTML snippet with their absolute equivalent - * @param string $xmlString a HTML snippet as string with the relative URLs - * to be replaced - * @param string $absoluteUrl the approptiate absolute url of the HTML - * snippet - * @return string the result HTML snippet as a string - */ - protected function substituteRelativeLinks($xmlString, $absoluteUrl) { - $dom = new DOMDocument(); - $dom->preserveWhiteSpace = false; - - if($xmlString === '') { - return ''; - } - - $xmlString = '<div>' . $xmlString . '</div>'; - $isOk = @$dom->loadHTML($xmlString, LIBXML_HTML_NOIMPLIED | - LIBXML_HTML_NODEFDTD); - - if(!$isOk) { - return ''; - } - - foreach (['href', 'src'] as $attribute) { - $xpath = new DOMXpath($dom); - $xpathResult = $xpath->query( - "//*[@" . $attribute . " " . - "and not(contains(@" . $attribute . ", '://')) " . - "and not(starts-with(@" . $attribute . ", 'mailto:')) " . - "and not(starts-with(@" . $attribute . ", '//'))]"); - foreach ($xpathResult as $linkNode) { - $urlElement = $linkNode->attributes->getNamedItem($attribute); - $abs = $this->relativeToAbsoluteUrl( - $urlElement->nodeValue, $absoluteUrl - ); - $urlElement->nodeValue = htmlspecialchars($abs); - } - } - - $xmlString = $dom->saveHTML(); - - // domdocument spoils the string with line breaks between the elements - // strip them - $xmlString = str_replace("\n", '', $xmlString); - - return $xmlString; - } - - - /** - * Method which builds a URL by taking a relative URL and its corresponding - * absolute URL - * @param string $relativeUrl the relative URL - * @param string $absoluteUrl the absolute URL with at least scheme and host - * @return string the resulting absolute URL - */ - protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl) { - $base = new \Net_URL2($absoluteUrl); - return $base->resolve($relativeUrl); - } - - - /** - * Method which turns an xpath result to a string - * you can customize it by overwriting this method - * @param mixed $xpathResult the result from the xpath query - * @return string the result as a string - */ - protected function domToString($xpathResult) { - $result = ''; - foreach($xpathResult as $node) { - $result .= $this->toInnerHTML($node); - } - return $result; - } - - - protected function toInnerHTML($node) { - $dom = new DOMDocument(); - $dom->appendChild($dom->importNode($node, true)); - return trim($dom->saveHTML($dom->documentElement)); - } - - -} diff --git a/articleenhancer/xpathenhancers.json b/articleenhancer/xpathenhancers.json deleted file mode 100644 index a088a6ec6..000000000 --- a/articleenhancer/xpathenhancers.json +++ /dev/null @@ -1,186 +0,0 @@ -{ - "cad-comic.com": { - "%cad-comic.com/cad/\\d+/$%": "//*[@id='content']/img" - }, - "slashdot.org/": { - "%slashdot.feedsportal.com%": "//article/div[@class='body'] | //article[@class='layout-article']/div[@class='elips']" - }, - "explosm.net/rss.php": { - "%explosm.net/comics%": "//*[@id='main-comic']", - "%explosm.net/show%": "//*[@id='player']/*" - }, - "themerepublic.net": { - "%feedproxy.google.com/~r/blogspot/DngUJ%": "//*[@class='post hentry']" - }, - "penny-arcade.com": { - "%penny-arcade.com/comic%": "//*[@id='comicFrame']/a/img", - "%penny-arcade.com/news%": "//*[@class='postBody']/*" - }, - "leasticoulddo.com": { - "%feedproxy.google.com/~r/LICD%": "//*[@id='comic-img']/a/img | //*[@id='comic-img']/img | //*[@id='post']" - }, - "escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss": { - "%escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss%": "//*[@class='body']/span/img | //div[@class='folder_nav_links']/following::p" - }, - "escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame": { - "%escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame%": "//*[@class='body']/span/p/img[@height != \"120\"]" - }, - "escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels": { - "%escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels%": "//*[@class='body']/span/p[2]/img" - }, - "escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade": { - "%escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade%": "//*[@class='body']/span/p[2]/img" - }, - "escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater": { - "%escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater%": "//*[@class='body']/span/p[2]/img" - }, - "escapistmagazine.com/articles/view/comicsandcosplay/comics/paused": { - "%escapistmagazine.com/articles/view/comicsandcosplay/comics/paused%": "//*[@class='body']/span/p[2]/img | //*[@class='body']/span/div/img" - }, - "escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil": { - "%escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil%": "//*[@class='body']" - }, - "trenchescomic.com": { - "%trenchescomic.com/comic%": "//*[@class=\"top\"]/img", - "%trenchescomic.com/tales%": "//*[@class=\"copy\"]" - }, - "lfgcomic.com": { - "%(lfgcomic.com/page)|(feedproxy.google.com/~r/LookingForGroup)%": "//*[@id=\"comic\"]/img | //*[@class=\"content\"]" - }, - "sandraandwoo.com": { - "%feedproxy.google.com/~r/sandraandwoo%": "//*[@id=\"comic\"]/img | //*[@class=\"post\"]/div[@class=\"entry\"] | //*[@class=\"transcript\"]" - }, - "sandraandwoo.com/gaia": { - "%feedproxy.google.com/~r/gaiacomic%": "//*[@id=\"comic\"]/img | //*[@class=\"post\"]/div[@class=\"entry\"] | //*[@class=\"transcript\"]" - }, - "theoatmeal.com": { - "%theoatmeal.com/blog%": "//*[@class=\"post_body\"]", - "%theoatmeal.com/comics%": "//*[@id=\"comic\"] | //*[@class=\"super_vacum\"] | //*[@class=\"pad\"]" - }, - "loldwell.com": { - "%loldwell.com/\\?p=%": "//*[@id=\"comic\"]" - }, - "mokepon.smackjeeves.com": { - "%mokepon.smackjeeves.com/comics%": "//*[@id=\"comic_area\"]/img | //*[@id=\"comic_area\"]/a/img" - }, - "twokinds.keenspot.com": { - "%twokinds.keenspot.com/%": "//*[@class=\"comic\"]/div/a/img | //*[@class=\"comic\"]/div/img | //*[@id=\"cg_img\"]/img | //*[@id=\"cg_img\"]/a/img" - }, - "niceteethcomic.com": { - "%niceteethcomic.com/archives%": "//*[@class=\"comicpane\"]/a/img" - }, - "awkwardzombie.com" :{ - "%awkwardzombie.com/index.php\\?comic=%": "//*[@id=\"comic\"]/img" - }, - "vgcats.com": { - "%vgcats.com/comics%": "//*[@align=\"center\"]/img" - }, - "vgcats.com/super": { - "%vgcats.com/super%": "//div[@align=\"center\"]/p/img" - }, - "chaoslife.findchaos.com": { - "%chaoslife.findchaos.com%": "//div[@id=\"comic\"]" - }, - "forlackofabettercomic.com/archive.php": { - "%forlackofabettercomic.com/\\?id=%": "//div[@id=\"comicset\"]/a/img" - }, - "heise.de/newsticker": { - "%heise.de%": "//*[@class='meldung_wrapper']/*[not(contains(@class, 'dossier'))]" - }, - "heise.de/developer": { - "%heise.de%": "//*[@class='meldung_wrapper']/*[not(contains(@class, 'dossier'))]" - }, - "heise.de/open/news": { - "%heise.de%": "//*[@class='meldung_wrapper']/*[not(contains(@class, 'dossier'))]" - }, - "spiegel.de": { - "%spiegel.de/(?!.*video).*%": "//p[@class='article-intro'] | //*[@itemprop='description' and not(ancestor::*[contains( normalize-space( @class ), 'article-section' )]) ] | //*[(@class='spPanoImageTeaserPic' or @class='spPanoGalleryTeaserPic' or @class='spPanoPlayerTeaserPic') and not(ancestor::*[contains( normalize-space( @class ), 'article-section' )])] | //*[@class='image-buttons-panel' and not(ancestor::*[contains( normalize-space( @class ), 'article-section' )])]/*[1]/img | //*[@class='article-image-description' and not(ancestor::*[contains( normalize-space( @class ), 'article-section' )]) ]/p | //*[@id='content-main']/*[@id='js-article-column']/div[contains( normalize-space( @class ), 'article-section' )]/*[not( contains( normalize-space( @class ), 'article-function-social-media' ))] | //*[@id='content-main']/*[@id='js-article-column']/p" - }, - "eqcomics.com": { - "%feedproxy.google.com/~r/eqcomics%": "//div[@id=\"comic\"]/div/a/img" - }, - "thegamercat.com": { - "%thegamercat.com%": "//div[@id=\"comic\"] | //div[@class=\"post-content\"]/div[@class=\"entry\"]/p" - }, - "niebezpiecznik.pl": { - "%feedproxy.google.com/~r/niebezpiecznik%": "//div[@class=\"entry\"]" - }, - "maximumble.thebookofbiff.com": { - "%maximumble.thebookofbiff.com%": "//div[@id=\"comic\"]/div/a/img" - }, - "omgubuntu.co.uk": { - "%www.omgubuntu.co.uk%": "//div[@class=\"entry-content\"]" - }, - "tu-chemnitz.de/tu/presse/": { - "%tu-chemnitz.de%": "//div[@id='content_inhalt']/*[preceding-sibling::h1 and following-sibling::div[@id]]" - }, - "cliquerefresh.com": { - "%cliquerefresh.com/comic%": "//div[@class=\"comicImg\"]/img | //div[@class=\"comicImg\"]/a/img" - }, - "satwcomic.com": { - "%feedproxy.google.com/~r/satwcomic%": "//div[@class=\"comicmid\"]/center/a/img" - }, - "androidpolice.com": { - "%rss.feedsportal.com/c/33941/f/615677/p/1/s/%": "//div[@class=\"post_content\"]" - }, - "neustadt-ticker.de": { - "%neustadt-ticker.de%": "//div[contains(@class,'article')]/div[@class='PostContent' and *[not(contains(@class, 'navigation'))]]" - }, - "linuxtoday.com/": { - "%linuxtoday.com%": "//div[@class='article']/p | //div[@class='article']/ul" - }, - "metronieuws.nl": { - "%metronieuws.nl%": "//div[contains(@class,'article-top')]/div[contains(@class,'image-component')] | //div[@class='article-full-width']/div[1]" - }, - "thecodinglove.com": { - "%thecodinglove.com%": "//div[@class='bodytype']" - }, - "der-postillon.com": { - "%der-postillon.com%": "//div[contains(@class,'post-body')]" - }, - "areadvd.de": { - "%areadvd.de%": "//div[contains(@class,'entry')]" - }, - "engadget.com": { - "%engadget.com%": "//div[@class='article-content']/p[not(@class='read-more')] | //div[@class='article-content']/div[@style='text-align: center;']" - }, - "mydealz.de": { - "%http://feedproxy.google.com/~r/myDealZ%": "//div[@class='section-hub'] | //div[contains(@class,'thread-deal')]" - }, - "scrumalliance.org": { - "%scrumalliance.org/rss?%": "//div[@class='article_content']" - }, - "smarthomewelt.de": { - "%smarthomewelt.de%": "//div[@class='entry-inner']/p | //div[@class='entry-inner']/div[contains(@class,'wp-caption')]" - }, - "elegantthemes.com": { - "%elegantthemes.com%": "//article[contains(@class,'post')]/p" - }, - "smashingmagazine.com": { - "%smashingmagazine.com%": "//article[contains(@class,'post')]/p" - }, - "sueddeutsche.de": { - "%sz.de%": "//article[@id='sitecontent']/section[@class='topenrichment']//img | //article[@id='sitecontent']/section[@class='body']/section[@class='authors']/preceding-sibling::*[not(contains(@class, 'ad') or contains(@class, 'article-sidebar-wrapper'))]" - }, - "lifehacker.com": { - "%lifehacker.com%": "//div[contains(@class,'entry-content')]" - }, - "travel-dealz.de": { - "%travel-dealz.de/blog/%": "//div[@class='post-entry']" - }, - "outdoordeals.de": { - "%outdoordeals.de%": "//article[contains(@class,'blogpost')]/div[@class='inner']" - }, - "buttersafe.com": { - "%buttersafe.com%": "//div[@id='comic'] | //div[@class='post-comic']" - }, - "www.pixelbeat.org": { - "%www.pixelbeat.org%": "//div[@class='contentText']" - }, - "gocomics.com/pearlsbeforeswine": { - "%gocomics.com/pearlsbeforeswine%": "//div[1]/p[1]/a[1]/img" - }, - "marriedtothesea.com": { - "%marriedtothesea.com%": "//div[@align]/a/img" - } -} |