diff options
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Reader')
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php | 209 | ||||
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php | 43 |
2 files changed, 221 insertions, 31 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php new file mode 100644 index 000000000..f762c56b2 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php @@ -0,0 +1,209 @@ +<?php + +namespace PicoFeed\Reader; + +use DOMXpath; + +use PicoFeed\Client\Client; +use PicoFeed\Client\ClientException; +use PicoFeed\Client\Url; +use PicoFeed\Config\Config; +use PicoFeed\Logging\Logger; +use PicoFeed\Parser\XmlParser; + +/** + * Favicon class + * + * https://en.wikipedia.org/wiki/Favicon + * + * @author Frederic Guillot + * @package Reader + */ +class Favicon +{ + /** + * Config class instance + * + * @access private + * @var \PicoFeed\Config\Config + */ + private $config; + + /** + * Icon binary content + * + * @access private + * @var string + */ + private $content = ''; + + /** + * Icon content type + * + * @access private + * @var string + */ + private $content_type = ''; + + /** + * Constructor + * + * @access public + * @param \PicoFeed\Config\Config $config Config class instance + */ + public function __construct(Config $config = null) + { + $this->config = $config ?: new Config; + } + + /** + * Get the icon file content (available only after the download) + * + * @access public + * @return string + */ + public function getContent() + { + return $this->content; + } + + /** + * Get the icon file type (available only after the download) + * + * @access public + * @return string + */ + public function getType() + { + return $this->content_type; + } + + /** + * Get data URI (http://en.wikipedia.org/wiki/Data_URI_scheme) + * + * @access public + * @return string + */ + public function getDataUri() + { + return sprintf( + 'data:%s;base64,%s', + $this->content_type, + base64_encode($this->content) + ); + } + + /** + * Download and check if a resource exists + * + * @access public + * @param string $url URL + * @return \PicoFeed\Client Client instance + */ + public function download($url) + { + $client = Client::getInstance(); + $client->setConfig($this->config); + + Logger::setMessage(get_called_class().' Download => '.$url); + + try { + $client->execute($url); + } + catch (ClientException $e) { + Logger::setMessage(get_called_class().' Download Failed => '.$e->getMessage()); + } + + return $client; + } + + /** + * Check if a remote file exists + * + * @access public + * @param string $url URL + * @return boolean + */ + public function exists($url) + { + return $this->download($url)->getContent() !== ''; + } + + /** + * Get the icon link for a website + * + * @access public + * @param string $website_link URL + * @return string + */ + public function find($website_link) + { + $website = new Url($website_link); + + $icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent()); + $icons[] = $website->getBaseUrl('/favicon.ico'); + + foreach ($icons as $icon_link) { + + $icon_link = $this->convertLink($website, new Url($icon_link)); + $resource = $this->download($icon_link); + $this->content = $resource->getContent(); + $this->content_type = $resource->getContentType(); + + if ($this->content !== '') { + return $icon_link; + } + } + + return ''; + } + + /** + * Convert icon links to absolute url + * + * @access public + * @param \PicoFeed\Client\Url $website Website url + * @param \PicoFeed\Client\Url $icon Icon url + * @return string + */ + public function convertLink(Url $website, Url $icon) + { + $base_url = ''; + + if ($icon->isRelativeUrl()) { + $base_url = $website->getBaseUrl(); + } + else if ($icon->isProtocolRelative()) { + $icon->setScheme($website->getScheme()); + } + + return $icon->getAbsoluteUrl($base_url); + } + + /** + * Extract the icon links from the HTML + * + * @access public + * @param string $html HTML + * @return array + */ + public function extract($html) + { + $icons = array(); + + if (empty($html)) { + return $icons; + } + + $dom = XmlParser::getHtmlDocument($html); + + $xpath = new DOMXpath($dom); + $elements = $xpath->query("//link[contains(@rel, 'icon') and not(contains(@rel, 'apple'))]"); + + for ($i = 0; $i < $elements->length; $i++) { + $icons[] = $elements->item($i)->getAttribute('href'); + } + + return $icons; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php index a64eb139c..5b807e251 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php @@ -26,11 +26,11 @@ class Reader * @var array */ private $formats = array( - 'Atom' => array('<feed'), - 'Rss20' => array('<rss', '2.0'), - 'Rss92' => array('<rss', '0.92'), - 'Rss91' => array('<rss', '0.91'), - 'Rss10' => array('<rdf:'), + 'Atom' => '//feed', + 'Rss20' => '//rss[@version="2.0"]', + 'Rss92' => '//rss[@version="0.92"]', + 'Rss91' => '//rss[@version="0.91"]', + 'Rss10' => '//rdf', ); /** @@ -87,7 +87,7 @@ class Reader $client = $this->download($url, $last_modified, $etag); // It's already a feed or the feed was not modified - if (!$client->isModified() || $this->detectFormat($client->getContent())) { + if (! $client->isModified() || $this->detectFormat($client->getContent())) { return $client; } @@ -181,14 +181,14 @@ class Reader */ public function detectFormat($content) { - $first_tag = Filter::getFirstTag($content); - - Logger::setMessage(get_called_class().': DetectFormat(): '.$first_tag); + $dom = XmlParser::getHtmlDocument($content); + $xpath = new DOMXPath($dom); - foreach ($this->formats as $parser => $needles) { + foreach ($this->formats as $parser_name => $query) { + $nodes = $xpath->query($query); - if ($this->contains($first_tag, $needles)) { - return $parser; + if ($nodes->length === 1) { + return $parser_name; } } @@ -196,25 +196,6 @@ class Reader } /** - * Return true if all needles are found in the haystack - * - * @access private - * @param string $haystack Haystack - * @param string $needles Needles to find - * @return boolean - */ - private function contains($haystack, array $needles) - { - $results = array(); - - foreach ($needles as $needle) { - $results[] = strpos($haystack, $needle) !== false; - } - - return ! in_array(false, $results, true); - } - - /** * Add the prefix "http://" if the end-user just enter a domain name * * @access public |