diff options
Diffstat (limited to '3rdparty/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php')
-rw-r--r-- | 3rdparty/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php | 232 |
1 files changed, 232 insertions, 0 deletions
diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php new file mode 100644 index 000000000..ef6df4c32 --- /dev/null +++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php @@ -0,0 +1,232 @@ +<?php + +namespace PicoFeed\Reader; + +use DOMXPath; + +use PicoFeed\Config\Config; +use PicoFeed\Client\Client; +use PicoFeed\Client\Url; +use PicoFeed\Logging\Logging; +use PicoFeed\Filter\Filter; +use PicoFeed\Parser\XmlParser; + +/** + * Reader class + * + * @author Frederic Guillot + * @package Reader + */ +class Reader +{ + /** + * Feed formats for detection + * + * @access private + * @var array + */ + private $formats = array( + 'Atom' => array('<feed'), + 'Rss20' => array('<rss', '2.0'), + 'Rss92' => array('<rss', '0.92'), + 'Rss91' => array('<rss', '0.91'), + 'Rss10' => array('<rdf:'), + ); + + /** + * Config class instance + * + * @access private + * @var \PicoFeed\Config\Config + */ + private $config; + + /** + * Constructor + * + * @access public + * @param \PicoFeed\Config $config Config class instance + */ + public function __construct(Config $config = null) + { + $this->config = $config ?: new Config; + Logging::setTimezone($this->config->getTimezone()); + } + + /** + * Download a feed (no discovery) + * + * @access public + * @param string $url Feed url + * @param string $last_modified Last modified HTTP header + * @param string $etag Etag HTTP header + * @return \PicoFeed\Client\Client + */ + public function download($url, $last_modified = '', $etag = '') + { + $url = $this->prependScheme($url); + + return Client::getInstance() + ->setConfig($this->config) + ->setLastModified($last_modified) + ->setEtag($etag) + ->execute($url); + } + + /** + * Discover and download a feed + * + * @access public + * @param string $url Feed or website url + * @param string $last_modified Last modified HTTP header + * @param string $etag Etag HTTP header + * @return \PicoFeed\Client\Client + */ + public function discover($url, $last_modified = '', $etag = '') + { + $client = $this->download($url, $last_modified, $etag); + + // It's already a feed + if ($this->detectFormat($client->getContent())) { + return $client; + } + + // Try to find a subscription + $links = $this->find($client->getUrl(), $client->getContent()); + + if (empty($links)) { + throw new SubscriptionNotFoundException('Unable to find a subscription'); + } + + return $this->download($links[0], $last_modified, $etag); + } + + /** + * Find feed urls inside a HTML document + * + * @access public + * @param string $url Website url + * @param string $html HTML content + * @return array List of feed links + */ + public function find($url, $html) + { + Logging::setMessage(get_called_class().': Try to discover subscriptions'); + + $dom = XmlParser::getHtmlDocument($html); + $xpath = new DOMXPath($dom); + $links = array(); + + $queries = array( + '//link[@type="application/rss+xml"]', + '//link[@type="application/atom+xml"]', + ); + + foreach ($queries as $query) { + + $nodes = $xpath->query($query); + + foreach ($nodes as $node) { + + $link = $node->getAttribute('href'); + + if (! empty($link)) { + + $feedUrl = new Url($link); + $siteUrl = new Url($url); + + $links[] = $feedUrl->getAbsoluteUrl($feedUrl->isRelativeUrl() ? $siteUrl->getBaseUrl() : ''); + } + } + } + + Logging::setMessage(get_called_class().': '.implode(', ', $links)); + + return $links; + } + + /** + * Get a parser instance + * + * @access public + * @param string $url Site url + * @param string $content Feed content + * @param string $encoding HTTP encoding + * @return \PicoFeed\Parser\Parser + */ + public function getParser($url, $content, $encoding) + { + $format = $this->detectFormat($content); + + if (empty($format)) { + throw new UnsupportedFeedFormatException('Unable to detect feed format'); + } + + $className = '\PicoFeed\Parser\\'.$format; + + $parser = new $className($content, $encoding, $url); + $parser->setHashAlgo($this->config->getParserHashAlgo()); + $parser->setTimezone($this->config->getTimezone()); + $parser->setConfig($this->config); + + return $parser; + } + + /** + * Detect the feed format + * + * @access public + * @param string $content Feed content + * @return string + */ + public function detectFormat($content) + { + $first_tag = Filter::getFirstTag($content); + + Logging::setMessage(get_called_class().': DetectFormat(): '.$first_tag); + + foreach ($this->formats as $parser => $needles) { + + if ($this->contains($first_tag, $needles)) { + return $parser; + } + } + + return ''; + } + + /** + * Return true if all needles are found in the haystack + * + * @access private + * @param string $haystack Haystack + * @param string $needles Needles to find + * @return boolean + */ + private function contains($haystack, array $needles) + { + $results = array(); + + foreach ($needles as $needle) { + $results[] = strpos($haystack, $needle) !== false; + } + + return ! in_array(false, $results, true); + } + + /** + * Add the prefix "http://" if the end-user just enter a domain name + * + * @access public + * @param string $url Url + * @retunr string + */ + public function prependScheme($url) + { + if (! preg_match('%^https?://%', $url)) { + $url = 'http://' . $url; + } + + return $url; + } +} |