summaryrefslogtreecommitdiffstats
path: root/utility/faviconfetcher.php
blob: c38706af6205b6c42c49f17e3809e4ae76c6fc8d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
<?php
/**
 * ownCloud - News
 *
 * This file is licensed under the Affero General Public License version 3 or
 * later. See the COPYING file.
 *
 * @author Alessandro Cosentino <cosenal@gmail.com>
 * @author Bernhard Posselt <dev@bernhard-posselt.com>
 * @copyright Alessandro Cosentino 2012
 * @copyright Bernhard Posselt 2012, 2014
 */

namespace OCA\News\Utility;


class FaviconFetcher {

	private $apiFactory;
	private $config;

	/**
	 * Inject a factory to build a simplepie file object. This is needed because
	 * the file object contains logic in its constructor which makes it
	 * impossible to inject and test
	 */
	public function __construct(SimplePieAPIFactory $apiFactory, Config $config) {
		$this->apiFactory = $apiFactory;
		$this->config = $config;
	}


    /**
     * Fetches a favicon from a given URL
     *
     * @param string|null $url the url where to fetch it from
     * @return null|string
     */
	public function fetch($url) {
		try {
			$url = $this->buildURL($url);
		} catch (NoValidUrlException $e) {
			return null;
		}

		$faviconUrl = $this->extractFromPage($url);

		// check the url for a valid image
		if($faviconUrl && $this->isImage($faviconUrl)) {
			return $faviconUrl;
		} elseif($url) {
			// try /favicon.ico as fallback
			$parts = parse_url($url);
			$faviconUrl = $parts['scheme'] . "://" . $parts['host'] . (array_key_exists("port", $parts) ? $parts['port'] : '') . "/favicon.ico";

			if($this->isImage($faviconUrl)) {
				return $faviconUrl;
			}
		}

		return null;
	}


	/**
	 * Tries to get a favicon from a page
	 * @param string $url the url to the page
	 * @return string the full url to the page
	 */
	protected function extractFromPage($url) {
		if(!$url) {
			return null;
		}

		$file = $this->getFile($url);

        /** @noinspection PhpUndefinedFieldInspection */
        if($file->body !== '') {
			$document = new \DOMDocument();
            /** @noinspection PhpUndefinedFieldInspection */
            @$document->loadHTML($file->body);

			if($document) {
				$xpath = new \DOMXpath($document);
				$elements = $xpath->query("//link[contains(@rel, 'icon')]");

				if ($elements->length > 0) {
                    /** @noinspection PhpUndefinedMethodInspection */
                    $iconPath = $elements->item(0)->getAttribute('href');
					$absPath = \SimplePie_Misc::absolutize_url($iconPath, $url);
					return $absPath;
				}
			}
		}

        return null;
	}

	
	private function getFile($url) {
		if(trim($this->config->getProxyHost()) === '') {
			return $this->apiFactory->getFile($url, 10, 5, null, null, false,
				null, null, null);
		} else {
			return $this->apiFactory->getFile($url, 10, 5, null, null, false,
				$this->config->getProxyHost(),
				$this->config->getProxyPort(),
				$this->config->getProxyAuth());
		}
	}


	/**
	 * Test if the file is an image
	 * @param string $url the url to the file
	 * @return bool true if image
	 */
	protected function isImage($url) {
		// check for empty urls
		if(!$url) {
			return false;
		}

		$file = $this->getFile($url);
        /** @noinspection PhpParamsInspection */
        $sniffer = new \SimplePie_Content_Type_Sniffer($file);
		return $sniffer->image() !== false;
	}


	/**
	 * Get HTTP or HTTPS addresses from an incomplete URL
	 * @param string $url the url that should be built
	 * @return string a string containing the http or https address
	 * @throws NoValidUrlException when no valid url can be returned
	 */
	protected function buildURL($url) {
		// trim the right / from the url
		$url = trim($url);
		$url = rtrim($url, '/');

		// check for http:// or https:// and validate URL
		if (strpos($url, 'http://') === 0 || strpos($url, 'https://') === 0) {
			if (filter_var($url, FILTER_VALIDATE_URL)) {
				return $url;
			}
		} elseif (filter_var("http://" . $url, FILTER_VALIDATE_URL)) {
			// maybe $url was something like www.example.com
			return 'http://' . $url;
		}

		// no valid URL was passed in or could be built from $url
		throw new NoValidUrlException();
	}

}

/**
 * Thrown when no valid url was found by faviconfetcher
 */
class NoValidUrlException extends \Exception {
}