From 831dc688e0ff894293e85ee029e15b1b1896ae43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20T=C3=A4tzner?= Date: Mon, 23 Jul 2012 18:43:54 +0200 Subject: Add more favicon detection and checking code --- lib/utils.php | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/utils.php b/lib/utils.php index 096a1211e..2afa229ca 100644 --- a/lib/utils.php +++ b/lib/utils.php @@ -45,19 +45,71 @@ class OC_News_Utils { $favicon = $spfeed->get_image_url(); //check if this file exists and the size with getimagesize() - if ($favicon == null) { - //handle favicon detection - $favicon = SimplePie_Misc::absolutize_url('/favicon.ico', $url); - // get file - $file = new SimplePie_File($favicon); + if ($favicon == null) { //try really hard to find a favicon + if( null !== ($webFavicon = OC_News_Utils::discoverFavicon($url)) ) + $feed->setFavicon($webFavicon); + } + else { //use favicon from feed + if(OC_News_Utils::checkFavicon($favicon)) + $feed->setFavicon($favicon); + } + return $feed; + } + + public static function checkFavicon($favicon) { + $file = new SimplePie_File($favicon); + + //TODO additional checks? + if($file->success && strlen($file->body) > 0) { $sniffer = new SimplePie_Content_Type_Sniffer($file); - // check file - if(substr($sniffer->get_type(), 0, 6) !== 'image/') - $favicon = null; + if(substr($sniffer->get_type(), 0, 6) === 'image/') { + return true; + } } + return false; + } - $feed->setFavicon($favicon); + public static function discoverFavicon($url) { + //try webroot favicon + $favicon = SimplePie_Misc::absolutize_url('/favicon.ico', $url); - return $feed; + if(OC_News_Utils::checkFavicon($favicon)) + return $favicon; + + //try to extract favicon from web page + $handle = curl_init ( ); + curl_setopt ( $handle, CURLOPT_URL, $url ); + curl_setopt ( $handle, CURLOPT_RETURNTRANSFER, 1 ); + curl_setopt ( $handle, CURLOPT_FOLLOWLOCATION, TRUE ); + curl_setopt ( $handle, CURLOPT_MAXREDIRS, 10 ); + + if ( FALSE!==($page=curl_exec($handle)) ) { + preg_match ( '/<[^>]*link[^>]*(rel=["\']icon["\']|rel=["\']shortcut icon["\']) .*href=["\']([^>]*)["\'].*>/iU', $page, $match ); + if (1