diff options
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Client')
13 files changed, 2124 insertions, 0 deletions
diff --git a/vendor/fguillot/picofeed b/vendor/fguillot/picofeed deleted file mode 160000 -Subproject 0a1d0d3950f7f047dc8fb1d80aa6296e15f306d diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php new file mode 100644 index 000000000..55d2c562f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php @@ -0,0 +1,692 @@ +<?php + +namespace PicoFeed\Client; + +use LogicException; +use PicoFeed\Logging\Logger; + +/** + * Client class + * + * @author Frederic Guillot + * @package client + */ +abstract class Client +{ + /** + * Flag that say if the resource have been modified + * + * @access private + * @var bool + */ + private $is_modified = true; + + /** + * HTTP Content-Type + * + * @access private + * @var string + */ + private $content_type = ''; + + /** + * HTTP encoding + * + * @access private + * @var string + */ + private $encoding = ''; + + /** + * HTTP Etag header + * + * @access protected + * @var string + */ + protected $etag = ''; + + /** + * HTTP Last-Modified header + * + * @access protected + * @var string + */ + protected $last_modified = ''; + + /** + * Proxy hostname + * + * @access protected + * @var string + */ + protected $proxy_hostname = ''; + + /** + * Proxy port + * + * @access protected + * @var integer + */ + protected $proxy_port = 3128; + + /** + * Proxy username + * + * @access protected + * @var string + */ + protected $proxy_username = ''; + + /** + * Proxy password + * + * @access protected + * @var string + */ + protected $proxy_password = ''; + + /** + * Basic auth username + * + * @access protected + * @var string + */ + protected $username = ''; + + /** + * Basic auth password + * + * @access protected + * @var string + */ + protected $password = ''; + + /** + * Client connection timeout + * + * @access protected + * @var integer + */ + protected $timeout = 10; + + /** + * User-agent + * + * @access protected + * @var string + */ + protected $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)'; + + /** + * Real URL used (can be changed after a HTTP redirect) + * + * @access protected + * @var string + */ + protected $url = ''; + + /** + * Page/Feed content + * + * @access protected + * @var string + */ + protected $content = ''; + + /** + * Number maximum of HTTP redirections to avoid infinite loops + * + * @access protected + * @var integer + */ + protected $max_redirects = 5; + + /** + * Maximum size of the HTTP body response + * + * @access protected + * @var integer + */ + protected $max_body_size = 2097152; // 2MB + + /** + * HTTP response status code + * + * @access protected + * @var integer + */ + protected $status_code = 0; + + /** + * HTTP response body + * + * @access protected + * @var string + */ + protected $body = ''; + + /** + * Body size + * + * @access protected + * @var integer + */ + protected $body_length = 0; + + /** + * HTTP response headers + * + * @access protected + * @var array + */ + protected $headers = array(); + + /** + * Counter on the number of header received + * + * @access protected + * @var integer + */ + protected $headers_counter = 0; + + /** + * Do the HTTP request + * + * @abstract + * @access public + * @param bool $follow_location Flag used when there is an open_basedir restriction + * @return array + */ + abstract public function doRequest($follow_location = true); + + /** + * Get client instance: curl or stream driver + * + * @static + * @access public + * @return \PicoFeed\Client\Client + */ + public static function getInstance() + { + if (function_exists('curl_init')) { + return new Curl; + } + else if (ini_get('allow_url_fopen')) { + return new Stream; + } + + throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed'); + } + + /** + * Perform the HTTP request + * + * @access public + * @param string $url URL + * @return Client + */ + public function execute($url = '') + { + if ($url !== '') { + $this->url = $url; + } + + Logger::setMessage(get_called_class().' Fetch URL: '.$this->url); + Logger::setMessage(get_called_class().' Etag provided: '.$this->etag); + Logger::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified); + + $response = $this->doRequest(); + + $this->status_code = $response['status']; + $this->handleNotModifiedResponse($response); + $this->handleNotFoundResponse($response); + $this->handleNormalResponse($response); + + return $this; + } + + /** + * Handle not modified response + * + * @access public + * @param array $response Client response + */ + public function handleNotModifiedResponse(array $response) + { + if ($response['status'] == 304) { + $this->is_modified = false; + } + else if ($response['status'] == 200) { + $this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified); + $this->etag = $this->getHeader($response, 'ETag'); + $this->last_modified = $this->getHeader($response, 'Last-Modified'); + } + + if ($this->is_modified === false) { + Logger::setMessage(get_called_class().' Resource not modified'); + } + } + + /** + * Handle not found response + * + * @access public + * @param array $response Client response + */ + public function handleNotFoundResponse(array $response) + { + if ($response['status'] == 404) { + throw new InvalidUrlException('Resource not found'); + } + } + + /** + * Handle normal response + * + * @access public + * @param array $response Client response + */ + public function handleNormalResponse(array $response) + { + if ($response['status'] == 200) { + $this->content = $response['body']; + $this->content_type = $this->findContentType($response); + $this->encoding = $this->findCharset(); + } + } + + /** + * Handle manually redirections when there is an open base dir restriction + * + * @access private + * @param string $location Redirected URL + * @return array + */ + public function handleRedirection($location) + { + $nb_redirects = 0; + $result = array(); + $this->url = Url::resolve($location, $this->url); + $this->body = ''; + $this->body_length = 0; + $this->headers = array(); + $this->headers_counter = 0; + + while (true) { + + $nb_redirects++; + + if ($nb_redirects >= $this->max_redirects) { + throw new MaxRedirectException('Maximum number of redirections reached'); + } + + $result = $this->doRequest(false); + + if ($result['status'] == 301 || $result['status'] == 302) { + $this->url = $result['headers']['Location']; + $this->body = ''; + $this->body_length = 0; + $this->headers = array(); + $this->headers_counter = 0; + } + else { + break; + } + } + + return $result; + } + + /** + * Check if a request has been modified according to the parameters + * + * @access public + * @param array $response + * @param string $etag + * @param string $lastModified + * @return boolean + */ + private function hasBeenModified($response, $etag, $lastModified) + { + $headers = array( + 'Etag' => $etag, + 'Last-Modified' => $lastModified + ); + + // Compare the values for each header that is present + $presentCacheHeaderCount = 0; + foreach ($headers as $key => $value) { + if (isset($response['headers'][$key])) { + if ($response['headers'][$key] !== $value) { + return true; + } + $presentCacheHeaderCount++; + } + } + + // If at least one header is present and the values match, the response + // was not modified + if ($presentCacheHeaderCount > 0) { + return false; + } + + return true; + } + + /** + * Find content type from response headers + * + * @access public + * @param array $response Client response + * @return string + */ + public function findContentType(array $response) + { + return strtolower($this->getHeader($response, 'Content-Type')); + } + + /** + * Find charset from response headers + * + * @access public + * @return string + */ + public function findCharset() + { + $result = explode('charset=', $this->content_type); + return isset($result[1]) ? $result[1] : ''; + } + + /** + * Get header value from a client response + * + * @access public + * @param array $response Client response + * @param string $header Header name + * @return string + */ + public function getHeader(array $response, $header) + { + return isset($response['headers'][$header]) ? $response['headers'][$header] : ''; + } + + /** + * Set the Last-Modified HTTP header + * + * @access public + * @param string $last_modified Header value + * @return \PicoFeed\Client\Client + */ + public function setLastModified($last_modified) + { + $this->last_modified = $last_modified; + return $this; + } + + /** + * Get the value of the Last-Modified HTTP header + * + * @access public + * @return string + */ + public function getLastModified() + { + return $this->last_modified; + } + + /** + * Set the value of the Etag HTTP header + * + * @access public + * @param string $etag Etag HTTP header value + * @return \PicoFeed\Client\Client + */ + public function setEtag($etag) + { + $this->etag = $etag; + return $this; + } + + /** + * Get the Etag HTTP header value + * + * @access public + * @return string + */ + public function getEtag() + { + return $this->etag; + } + + /** + * Get the final url value + * + * @access public + * @return string + */ + public function getUrl() + { + return $this->url; + } + + /** + * Set the url + * + * @access public + * @return string + * @return \PicoFeed\Client\Client + */ + public function setUrl($url) + { + $this->url = $url; + return $this; + } + + /** + * Get the HTTP response status code + * + * @access public + * @return integer + */ + public function getStatusCode() + { + return $this->status_code; + } + + /** + * Get the body of the HTTP response + * + * @access public + * @return string + */ + public function getContent() + { + return $this->content; + } + + /** + * Get the content type value from HTTP headers + * + * @access public + * @return string + */ + public function getContentType() + { + return $this->content_type; + } + + /** + * Get the encoding value from HTTP headers + * + * @access public + * @return string + */ + public function getEncoding() + { + return $this->encoding; + } + + /** + * Return true if the remote resource has changed + * + * @access public + * @return bool + */ + public function isModified() + { + return $this->is_modified; + } + + /** + * Set connection timeout + * + * @access public + * @param integer $timeout Connection timeout + * @return \PicoFeed\Client\Client + */ + public function setTimeout($timeout) + { + $this->timeout = $timeout ?: $this->timeout; + return $this; + } + + /** + * Set a custom user agent + * + * @access public + * @param string $user_agent User Agent + * @return \PicoFeed\Client\Client + */ + public function setUserAgent($user_agent) + { + $this->user_agent = $user_agent ?: $this->user_agent; + return $this; + } + + /** + * Set the mximum number of HTTP redirections + * + * @access public + * @param integer $max Maximum + * @return \PicoFeed\Client\Client + */ + public function setMaxRedirections($max) + { + $this->max_redirects = $max ?: $this->max_redirects; + return $this; + } + + /** + * Set the maximum size of the HTTP body + * + * @access public + * @param integer $max Maximum + * @return \PicoFeed\Client\Client + */ + public function setMaxBodySize($max) + { + $this->max_body_size = $max ?: $this->max_body_size; + return $this; + } + + /** + * Set the proxy hostname + * + * @access public + * @param string $hostname Proxy hostname + * @return \PicoFeed\Client\Client + */ + public function setProxyHostname($hostname) + { + $this->proxy_hostname = $hostname ?: $this->proxy_hostname; + return $this; + } + + /** + * Set the proxy port + * + * @access public + * @param integer $port Proxy port + * @return \PicoFeed\Client\Client + */ + public function setProxyPort($port) + { + $this->proxy_port = $port ?: $this->proxy_port; + return $this; + } + + /** + * Set the proxy username + * + * @access public + * @param string $username Proxy username + * @return \PicoFeed\Client\Client + */ + public function setProxyUsername($username) + { + $this->proxy_username = $username ?: $this->proxy_username; + return $this; + } + + /** + * Set the proxy password + * + * @access public + * @param string $password Password + * @return \PicoFeed\Client\Client + */ + public function setProxyPassword($password) + { + $this->proxy_password = $password ?: $this->proxy_password; + return $this; + } + + /** + * Set the username + * + * @access public + * @param string $username Basic Auth username + * @return \PicoFeed\Client\Client + */ + public function setUsername($username) + { + $this->username = $username ?: $this->username; + return $this; + } + + /** + * Set the password + * + * @access public + * @param string $password Basic Auth Password + * @return \PicoFeed\Client\Client + */ + public function setPassword($password) + { + $this->password = $password ?: $this->password; + return $this; + } + + /** + * Set config object + * + * @access public + * @param \PicoFeed\Config\Config $config Config instance + * @return \PicoFeed\Client\Client + */ + public function setConfig($config) + { + if ($config !== null) { + $this->setTimeout($config->getGrabberTimeout()); + $this->setUserAgent($config->getGrabberUserAgent()); + $this->setMaxRedirections($config->getMaxRedirections()); + $this->setMaxBodySize($config->getMaxBodySize()); + $this->setProxyHostname($config->getProxyHostname()); + $this->setProxyPort($config->getProxyPort()); + $this->setProxyUsername($config->getProxyUsername()); + $this->setProxyPassword($config->getProxyPassword()); + } + + return $this; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php new file mode 100644 index 000000000..0e27452ed --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php @@ -0,0 +1,16 @@ +<?php + +namespace PicoFeed\Client; + +use PicoFeed\PicoFeedException; + + +/** + * ClientException Exception + * + * @author Frederic Guillot + * @package Client + */ +abstract class ClientException extends PicoFeedException +{ +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php new file mode 100644 index 000000000..d45773d2d --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php @@ -0,0 +1,272 @@ +<?php + +namespace PicoFeed\Client; + +use PicoFeed\Logging\Logger; + +/** + * cURL HTTP client + * + * @author Frederic Guillot + * @package Client + */ +class Curl extends Client +{ + /** + * cURL callback to read the HTTP body + * + * If the function return -1, curl stop to read the HTTP response + * + * @access public + * @param resource $ch cURL handler + * @param string $buffer Chunk of data + * @return integer Length of the buffer + */ + public function readBody($ch, $buffer) + { + $length = strlen($buffer); + $this->body_length += $length; + + if ($this->body_length > $this->max_body_size) { + return -1; + } + + $this->body .= $buffer; + + return $length; + } + + /** + * cURL callback to read HTTP headers + * + * @access public + * @param resource $ch cURL handler + * @param string $buffer Header line + * @return integer Length of the buffer + */ + public function readHeaders($ch, $buffer) + { + $length = strlen($buffer); + + if ($buffer === "\r\n") { + $this->headers_counter++; + } + else { + + if (! isset($this->headers[$this->headers_counter])) { + $this->headers[$this->headers_counter] = ''; + } + + $this->headers[$this->headers_counter] .= $buffer; + } + + return $length; + } + + /** + * Prepare HTTP headers + * + * @access private + * @return string[] + */ + private function prepareHeaders() + { + $headers = array( + 'Connection: close', + 'User-Agent: '.$this->user_agent, + ); + + if ($this->etag) { + $headers[] = 'If-None-Match: '.$this->etag; + } + + if ($this->last_modified) { + $headers[] = 'If-Modified-Since: '.$this->last_modified; + } + + return $headers; + } + + /** + * Prepare curl proxy context + * + * @access private + * @return resource $ch + */ + private function prepareProxyContext($ch) + { + if ($this->proxy_hostname) { + + Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port); + + curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port); + curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP'); + curl_setopt($ch, CURLOPT_PROXY, $this->proxy_hostname); + + if ($this->proxy_username) { + Logger::setMessage(get_called_class().' Proxy credentials: Yes'); + curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password); + } + else { + Logger::setMessage(get_called_class().' Proxy credentials: No'); + } + } + + return $ch; + } + + /** + * Prepare curl auth context + * + * @access private + * @return resource $ch + */ + private function prepareAuthContext($ch) + { + if ($this->username && $this->password) { + curl_setopt($ch, CURLOPT_USERPWD, $this->username.':'.$this->password); + } + + return $ch; + } + + /** + * Prepare curl context + * + * @access private + * @return resource + */ + private function prepareContext() + { + $ch = curl_init(); + + curl_setopt($ch, CURLOPT_URL, $this->url); + curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout); + curl_setopt($ch, CURLOPT_HTTPHEADER, $this->prepareHeaders()); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, ini_get('open_basedir') === ''); + curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects); + curl_setopt($ch, CURLOPT_ENCODING, ''); + curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, 'readBody')); + curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, 'readHeaders')); + curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory'); + curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory'); + curl_setopt($ch, CURLOPT_SSLVERSION, 1); // Enforce TLS v1 + + $ch = $this->prepareProxyContext($ch); + $ch = $this->prepareAuthContext($ch); + + return $ch; + } + + /** + * Execute curl context + * + * @access private + */ + private function executeContext() + { + $ch = $this->prepareContext(); + curl_exec($ch); + + Logger::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME)); + Logger::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME)); + Logger::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME)); + Logger::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD)); + Logger::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); + + $curl_errno = curl_errno($ch); + + if ($curl_errno) { + Logger::setMessage(get_called_class().' cURL error: '.curl_error($ch)); + curl_close($ch); + + $this->handleError($curl_errno); + } + + // Update the url if there where redirects + $this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + + curl_close($ch); + } + + /** + * Do the HTTP request + * + * @access public + * @param bool $follow_location Flag used when there is an open_basedir restriction + * @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...] + */ + public function doRequest($follow_location = true) + { + $this->executeContext(); + + list($status, $headers) = HttpHeaders::parse(explode("\r\n", $this->headers[$this->headers_counter - 1])); + + // When restricted with open_basedir + if ($this->needToHandleRedirection($follow_location, $status)) { + return $this->handleRedirection($headers['Location']); + } + + return array( + 'status' => $status, + 'body' => $this->body, + 'headers' => $headers + ); + } + + /** + * Check if the redirection have to be handled manually + * + * @access private + * @param boolean $follow_location Flag + * @param integer $status HTTP status code + * @return boolean + */ + private function needToHandleRedirection($follow_location, $status) + { + return $follow_location && ini_get('open_basedir') !== '' && ($status == 301 || $status == 302); + } + + /** + * Handle cURL errors (throw individual exceptions) + * + * We don't use constants because they are not necessary always available + * (depends of the version of libcurl linked to php) + * + * @see http://curl.haxx.se/libcurl/c/libcurl-errors.html + * @access private + * @param integer $errno cURL error code + */ + private function handleError($errno) + { + switch ($errno) { + case 78: // CURLE_REMOTE_FILE_NOT_FOUND + throw new InvalidUrlException('Resource not found'); + case 6: // CURLE_COULDNT_RESOLVE_HOST + throw new InvalidUrlException('Unable to resolve hostname'); + case 7: // CURLE_COULDNT_CONNECT + throw new InvalidUrlException('Unable to connect to the remote host'); + case 23: // CURLE_WRITE_ERROR + throw new MaxSizeException('Maximum response size exceeded'); + case 28: // CURLE_OPERATION_TIMEDOUT + throw new TimeoutException('Operation timeout'); + case 35: // CURLE_SSL_CONNECT_ERROR + case 51: // CURLE_PEER_FAILED_VERIFICATION + case 58: // CURLE_SSL_CERTPROBLEM + case 60: // CURLE_SSL_CACERT + case 59: // CURLE_SSL_CIPHER + case 64: // CURLE_USE_SSL_FAILED + case 66: // CURLE_SSL_ENGINE_INITFAILED + case 77: // CURLE_SSL_CACERT_BADFILE + case 83: // CURLE_SSL_ISSUER_ERROR + throw new InvalidCertificateException('Invalid SSL certificate'); + case 47: // CURLE_TOO_MANY_REDIRECTS + throw new MaxRedirectException('Maximum number of redirections reached'); + case 63: // CURLE_FILESIZE_EXCEEDED + throw new MaxSizeException('Maximum resp |