diff options
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Client')
13 files changed, 0 insertions, 1891 deletions
diff --git a/vendor/fguillot/picofeed b/vendor/fguillot/picofeed new file mode 160000 +Subproject 0a1d0d3950f7f047dc8fb1d80aa6296e15f306d diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php deleted file mode 100644 index 602416e42..000000000 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php +++ /dev/null @@ -1,588 +0,0 @@ -<?php - -namespace PicoFeed\Client; - -use LogicException; -use PicoFeed\Logging\Logger; - -/** - * Client class - * - * @author Frederic Guillot - * @package client - */ -abstract class Client -{ - /** - * Flag that say if the resource have been modified - * - * @access private - * @var bool - */ - private $is_modified = true; - - /** - * HTTP Content-Type - * - * @access private - * @var string - */ - private $content_type = ''; - - /** - * HTTP encoding - * - * @access private - * @var string - */ - private $encoding = ''; - - /** - * HTTP Etag header - * - * @access protected - * @var string - */ - protected $etag = ''; - - /** - * HTTP Last-Modified header - * - * @access protected - * @var string - */ - protected $last_modified = ''; - - /** - * Proxy hostname - * - * @access protected - * @var string - */ - protected $proxy_hostname = ''; - - /** - * Proxy port - * - * @access protected - * @var integer - */ - protected $proxy_port = 3128; - - /** - * Proxy username - * - * @access protected - * @var string - */ - protected $proxy_username = ''; - - /** - * Proxy password - * - * @access protected - * @var string - */ - protected $proxy_password = ''; - - /** - * Client connection timeout - * - * @access protected - * @var integer - */ - protected $timeout = 10; - - /** - * User-agent - * - * @access protected - * @var string - */ - protected $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)'; - - /** - * Real URL used (can be changed after a HTTP redirect) - * - * @access protected - * @var string - */ - protected $url = ''; - - /** - * Page/Feed content - * - * @access protected - * @var string - */ - protected $content = ''; - - /** - * Number maximum of HTTP redirections to avoid infinite loops - * - * @access protected - * @var integer - */ - protected $max_redirects = 5; - - /** - * Maximum size of the HTTP body response - * - * @access protected - * @var integer - */ - protected $max_body_size = 2097152; // 2MB - - /** - * Do the HTTP request - * - * @abstract - * @access public - * @return array - */ - abstract public function doRequest(); - - /** - * Get client instance: curl or stream driver - * - * @static - * @access public - * @return \PicoFeed\Client\Client - */ - public static function getInstance() - { - if (function_exists('curl_init')) { - return new Curl; - } - else if (ini_get('allow_url_fopen')) { - return new Stream; - } - - throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed'); - } - - /** - * Perform the HTTP request - * - * @access public - * @param string $url URL - * @return Client - */ - public function execute($url = '') - { - if ($url !== '') { - $this->url = $url; - } - - Logger::setMessage(get_called_class().' Fetch URL: '.$this->url); - Logger::setMessage(get_called_class().' Etag provided: '.$this->etag); - Logger::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified); - - $response = $this->doRequest(); - - $this->handleNotModifiedResponse($response); - $this->handleNotFoundResponse($response); - $this->handleNormalResponse($response); - - return $this; - } - - /** - * Handle not modified response - * - * @access public - * @param array $response Client response - */ - public function handleNotModifiedResponse(array $response) - { - if ($response['status'] == 304) { - $this->is_modified = false; - } - else if ($response['status'] == 200) { - $this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified); - $this->etag = $this->getHeader($response, 'ETag'); - $this->last_modified = $this->getHeader($response, 'Last-Modified'); - } - - if ($this->is_modified === false) { - Logger::setMessage(get_called_class().' Resource not modified'); - } - } - - /** - * Handle not found response - * - * @access public - * @param array $response Client response - */ - public function handleNotFoundResponse(array $response) - { - if ($response['status'] == 404) { - throw new InvalidUrlException('Resource not found'); - } - } - - /** - * Handle normal response - * - * @access public - * @param array $response Client response - */ - public function handleNormalResponse(array $response) - { - if ($response['status'] == 200) { - $this->content = $response['body']; - $this->content_type = $this->findContentType($response); - $this->encoding = $this->findCharset(); - } - } - - /** - * Check if a request has been modified according to the parameters - * - * @access public - * @param array $response - * @param string $etag - * @param string $lastModified - * @return boolean - */ - private function hasBeenModified($response, $etag, $lastModified) - { - $headers = array( - 'Etag' => $etag, - 'Last-Modified' => $lastModified - ); - - // Compare the values for each header that is present - $presentCacheHeaderCount = 0; - foreach ($headers as $key => $value) { - if (isset($response['headers'][$key])) { - if ($response['headers'][$key] !== $value) { - return true; - } - $presentCacheHeaderCount++; - } - } - - // If at least one header is present and the values match, the response - // was not modified - if ($presentCacheHeaderCount > 0) { - return false; - } - - return true; - } - - /** - * Find content type from response headers - * - * @access public - * @param array $response Client response - * @return string - */ - public function findContentType(array $response) - { - return strtolower($this->getHeader($response, 'Content-Type')); - } - - /** - * Find charset from response headers - * - * @access public - * @return string - */ - public function findCharset() - { - $result = explode('charset=', $this->content_type); - return isset($result[1]) ? $result[1] : ''; - } - - /** - * Get header value from a client response - * - * @access public - * @param array $response Client response - * @param string $header Header name - * @return string - */ - public function getHeader(array $response, $header) - { - return isset($response['headers'][$header]) ? $response['headers'][$header] : ''; - } - - /** - * Parse HTTP headers - * - * @access public - * @param array $lines List of headers - * @return array - */ - public function parseHeaders(array $lines) - { - $status = 200; - $headers = array(); - - foreach ($lines as $line) { - - if (strpos($line, 'HTTP') === 0) { - $status = (int) substr($line, 9, 3); - } - else if (strpos($line, ':') !== false) { - - @list($name, $value) = explode(': ', $line); - if ($value) $headers[trim($name)] = trim($value); - } - } - - Logger::setMessage(get_called_class().' HTTP status code: '.$status); - - foreach ($headers as $name => $value) { - Logger::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value); - } - - return array($status, new HttpHeaders($headers)); - } - - /** - * Set the Last-Modified HTTP header - * - * @access public - * @param string $last_modified Header value - * @return \PicoFeed\Client\Client - */ - public function setLastModified($last_modified) - { - $this->last_modified = $last_modified; - return $this; - } - - /** - * Get the value of the Last-Modified HTTP header - * - * @access public - * @return string - */ - public function getLastModified() - { - return $this->last_modified; - } - - /** - * Set the value of the Etag HTTP header - * - * @access public - * @param string $etag Etag HTTP header value - * @return \PicoFeed\Client\Client - */ - public function setEtag($etag) - { - $this->etag = $etag; - return $this; - } - - /** - * Get the Etag HTTP header value - * - * @access public - * @return string - */ - public function getEtag() - { - return $this->etag; - } - - /** - * Get the final url value - * - * @access public - * @return string - */ - public function getUrl() - { - return $this->url; - } - - /** - * Set the url - * - * @access public - * @return string - * @return \PicoFeed\Client\Client - */ - public function setUrl($url) - { - $this->url = $url; - return $this; - } - - /** - * Get the body of the HTTP response - * - * @access public - * @return string - */ - public function getContent() - { - return $this->content; - } - - /** - * Get the content type value from HTTP headers - * - * @access public - * @return string - */ - public function getContentType() - { - return $this->content_type; - } - - /** - * Get the encoding value from HTTP headers - * - * @access public - * @return string - */ - public function getEncoding() - { - return $this->encoding; - } - - /** - * Return true if the remote resource has changed - * - * @access public - * @return bool - */ - public function isModified() - { - return $this->is_modified; - } - - /** - * Set connection timeout - * - * @access public - * @param integer $timeout Connection timeout - * @return \PicoFeed\Client\Client - */ - public function setTimeout($timeout) - { - $this->timeout = $timeout ?: $this->timeout; - return $this; - } - - /** - * Set a custom user agent - * - * @access public - * @param string $user_agent User Agent - * @return \PicoFeed\Client\Client - */ - public function setUserAgent($user_agent) - { - $this->user_agent = $user_agent ?: $this->user_agent; - return $this; - } - - /** - * Set the mximum number of HTTP redirections - * - * @access public - * @param integer $max Maximum - * @return \PicoFeed\Client\Client - */ - public function setMaxRedirections($max) - { - $this->max_redirects = $max ?: $this->max_redirects; - return $this; - } - - /** - * Set the maximum size of the HTTP body - * - * @access public - * @param integer $max Maximum - * @return \PicoFeed\Client\Client - */ - public function setMaxBodySize($max) - { - $this->max_body_size = $max ?: $this->max_body_size; - return $this; - } - - /** - * Set the proxy hostname - * - * @access public - * @param string $hostname Proxy hostname - * @return \PicoFeed\Client\Client - */ - public function setProxyHostname($hostname) - { - $this->proxy_hostname = $hostname ?: $this->proxy_hostname; - return $this; - } - - /** - * Set the proxy port - * - * @access public - * @param integer $port Proxy port - * @return \PicoFeed\Client\Client - */ - public function setProxyPort($port) - { - $this->proxy_port = $port ?: $this->proxy_port; - return $this; - } - - /** - * Set the proxy username - * - * @access public - * @param string $username Proxy username - * @return \PicoFeed\Client\Client - */ - public function setProxyUsername($username) - { - $this->proxy_username = $username ?: $this->proxy_username; - return $this; - } - - /** - * Set the proxy password - * - * @access public - * @param string $password Password - * @return \PicoFeed\Client\Client - */ - public function setProxyPassword($password) - { - $this->proxy_password = $password ?: $this->proxy_password; - return $this; - } - - /** - * Set config object - * - * @access public - * @param \PicoFeed\Config\Config $config Config instance - * @return \PicoFeed\Client\Client - */ - public function setConfig($config) - { - if ($config !== null) { - $this->setTimeout($config->getGrabberTimeout()); - $this->setUserAgent($config->getGrabberUserAgent()); - $this->setMaxRedirections($config->getMaxRedirections()); - $this->setMaxBodySize($config->getMaxBodySize()); - $this->setProxyHostname($config->getProxyHostname()); - $this->setProxyPort($config->getProxyPort()); - $this->setProxyUsername($config->getProxyUsername()); - $this->setProxyPassword($config->getProxyPassword()); - } - - return $this; - } -} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php deleted file mode 100644 index 0e27452ed..000000000 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php +++ /dev/null @@ -1,16 +0,0 @@ -<?php - -namespace PicoFeed\Client; - -use PicoFeed\PicoFeedException; - - -/** - * ClientException Exception - * - * @author Frederic Guillot - * @package Client - */ -abstract class ClientException extends PicoFeedException -{ -} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php deleted file mode 100644 index 54b3c6ef9..000000000 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php +++ /dev/null @@ -1,327 +0,0 @@ -<?php - -namespace PicoFeed\Client; - -use PicoFeed\Logging\Logger; - -/** - * cURL HTTP client - * - * @author Frederic Guillot - * @package Client - */ -class Curl extends Client -{ - /** - * HTTP response body - * - * @access private - * @var string - */ - private $body = ''; - - /** - * Body size - * - * @access private - * @var integer - */ - private $body_length = 0; - - /** - * HTTP response headers - * - * @access private - * @var array - */ - private $headers = array(); - - /** - * Counter on the number of header received - * - * @access private - * @var integer - */ - private $headers_counter = 0; - - /** - * cURL callback to read the HTTP body - * - * If the function return -1, curl stop to read the HTTP response - * - * @access public - * @param resource $ch cURL handler - * @param string $buffer Chunk of data - * @return integer Length of the buffer - */ - public function readBody($ch, $buffer) - { - $length = strlen($buffer); - $this->body_length += $length; - - if ($this->body_length > $this->max_body_size) { - return -1; - } - - $this->body .= $buffer; - - return $length; - } - - /** - * cURL callback to read HTTP headers - * - * @access public - * @param resource $ch cURL handler - * @param string $buffer Header line - * @return integer Length of the buffer - */ - public function readHeaders($ch, $buffer) - { - $length = strlen($buffer); - - if ($buffer === "\r\n") { - $this->headers_counter++; - } - else { - - if (! isset($this->headers[$this->headers_counter])) { - $this->headers[$this->headers_counter] = ''; - } - - $this->headers[$this->headers_counter] .= $buffer; - } - - return $length; - } - - /** - * Prepare HTTP headers - * - * @access private - * @return string[] - */ - private function prepareHeaders() - { - $headers = array( - 'Connection: close', - 'User-Agent: '.$this->user_agent, - ); - - if ($this->etag) { - $headers[] = 'If-None-Match: '.$this->etag; - } - - if ($this->last_modified) { - $headers[] = 'If-Modified-Since: '.$this->last_modified; - } - - return $headers; - } - - /** - * Prepare curl proxy context - * - * @access private - * @return resource $ch - */ - private function prepareProxyContext($ch) - { - if ($this->proxy_hostname) { - - Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port); - - curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port); - curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP'); - curl_setopt($ch, CURLOPT_PROXY, $this->proxy_hostname); - - if ($this->proxy_username) { - Logger::setMessage(get_called_class().' Proxy credentials: Yes'); - curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password); - } - else { - Logger::setMessage(get_called_class().' Proxy credentials: No'); - } - } - - return $ch; - } - - /** - * Prepare curl context - * - * @access private - * @return resource - */ - private function prepareContext() - { - $ch = curl_init(); - - curl_setopt($ch, CURLOPT_URL, $this->url); - curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); - curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout); - curl_setopt($ch, CURLOPT_HTTPHEADER, $this->prepareHeaders()); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, ini_get('open_basedir') === ''); - curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects); - curl_setopt($ch, CURLOPT_ENCODING, ''); - curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, 'readBody')); - curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, 'readHeaders')); - curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory'); - curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory'); - - $ch = $this->prepareProxyContext($ch); - - return $ch; - } - - /** - * Execute curl context - * - * @access private - */ - private function executeContext() - { - $ch = $this->prepareContext(); - curl_exec($ch); - - Logger::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME)); - Logger::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME)); - Logger::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME)); - Logger::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD)); - Logger::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); - - $curl_errno = curl_errno($ch); - - if ($curl_errno) { - Logger::setMessage(get_called_class().' cURL error: '.curl_error($ch)); - curl_close($ch); - - $this->handleError($curl_errno); - } - - // Update the url if there where redirects - $this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); - - curl_close($ch); - } - - /** - * Do the HTTP request - * - * @access public - * @param bool $follow_location Flag used when there is an open_basedir restriction - * @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...] - */ - public function doRequest($follow_location = true) - { - $this->executeContext(); - - list($status, $headers) = $this->parseHeaders(explode("\r\n", $this->headers[$this->headers_counter - 1])); - - // When restricted with open_basedir - if ($this->needToHandleRedirection($follow_location, $status)) { - return $this->handleRedirection($headers['Location']); - } - - return array( - 'status' => $status, - 'body' => $this->body, - 'headers' => $headers - ); - } - - /** - * Check if the redirection have to be handled manually - * - * @access private - * @param boolean $follow_location Flag - * @param integer $status HTTP status code - * @return boolean - */ - private function needToHandleRedirection($follow_location, $status) - { - return $follow_location && ini_get('open_basedir') !== '' && ($status == 301 || $status == 302); - } - - /** - * Handle manually redirections when there is an open base dir restriction - * - * @access private - * @param string $location Redirected URL - * @return array - */ - private function handleRedirection($location) - { - $nb_redirects = 0; - $result = array(); - $this->url = $location; - $this->body = ''; - $this->body_length = 0; - $this->headers = array(); - $this->headers_counter = 0; - - while (true) { - - $nb_redirects++; - - if ($nb_redirects >= $this->max_redirects) { - throw new MaxRedirectException('Maximum number of redirections reached'); - } - - $result = $this->doRequest(false); - - if ($result['status'] == 301 || $result['status'] == 302) { - $this->url = $result['headers']['Location']; - $this->body = ''; - $this->body_length = 0; - $this->headers = array(); - $this->headers_counter = 0; - } - else { - break; - } - } - - return $result; - } - - /** - * Handle cURL errors (throw individual exceptions) - * - * We don't use constants because they are not necessary always available - * (depends of the version of libcurl linked to php) - * - * @see http://curl.haxx.se/libcurl/c/libcurl-errors.html - * @access private - * @param integer $errno cURL error code - */ - private function handleError($errno) - { - switch ($errno) { - case 78: // CURLE_REMOTE_FILE_NOT_FOUND - throw new InvalidUrlException('Resource not found'); - case 6: // CURLE_COULDNT_RESOLVE_HOST - throw new InvalidUrlException('Unable to resolve hostname'); - case 7: // CURLE_COULDNT_CONNECT - throw new InvalidUrlException('Unable to connect to the remote host'); - case 28: // CURLE_OPERATION_TIMEDOUT - throw new TimeoutException('Operation timeout'); - case 35: // CURLE_SSL_CONNECT_ERROR - case 51: // CURLE_PEER_FAILED_VERIFICATION - case 58: // CURLE_SSL_CERTPROBLEM - case 60: // CURLE_SSL_CACERT - case 59: // CURLE_SSL_CIPHER - case 64: // CURLE_USE_SSL_FAILED - case 66: // CURLE_SSL_ENGINE_INITFAILED - case 77: // CURLE_SSL_CACERT_BADFILE - case 83: // CURLE_SSL_ISSUER_ERROR - throw new InvalidCertificateException('Invalid SSL certificate'); - case 47: // CURLE_TOO_MANY_REDIRECTS - throw new MaxRedirectException('Maximum number of redirections reached'); - case 63: // CURLE_FILESIZE_EXCEEDED - throw new MaxSizeException('Maximum response size exceeded'); - default: - throw new InvalidUrlException('Unable to fetch the URL'); - } - } -} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php deleted file mode 100644 index 1bca05664..000000000 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php +++ /dev/null @@ -1,414 +0,0 @@ -<?php - -namespace PicoFeed\Client; - -use DOMXPath; -use PicoFeed\Encoding\Encoding; -use PicoFeed\Logging\Logger; -use PicoFeed\Filter\Filter; -use PicoFeed\Parser\XmlParser; - -/** - * Grabber class - * - * @author Frederic Guillot - * @package Client - */ -class Grabber -{ - /** - * URL - * - * @access private - * @var string - */ - private $url = ''; - - /** - * Relevant content - * - * @access private - * @var string - */ - private $content = ''; - - /** - * HTML content - * - * @access private - * @var string - */ - private $html = ''; - - /** - * HTML content encoding - * - * @access private - * @var string - */ - private $encoding = ''; - - /** - * List of attributes to try to get the content, order is important, generic terms at the end - * - * @access private - * @var array - */ - private $candidatesAttributes = array( - 'articleBody', - 'articlebody', - 'article-body', - 'articleContent', - 'articlecontent', - 'article-content', - 'articlePage', - 'post-content', - 'post_content', - 'entry-content', - 'main-content', - 'story_content', - 'storycontent', - 'entryBox', - 'entrytext', - 'comic', - 'post', - 'article', - 'content', - 'main', - ); - - /** - * List of attributes to strip - * - * @access private - * @var array - */ |