diff options
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php')
-rw-r--r-- | vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php | 324 |
1 files changed, 142 insertions, 182 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php index 918cdef33..5130b68bb 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php @@ -10,112 +10,99 @@ use PicoFeed\Logging\Logger; use PicoFeed\Scraper\Scraper; /** - * Base parser class + * Base parser class. * * @author Frederic Guillot - * @package Parser */ abstract class Parser { /** - * Config object + * Config object. * - * @access private * @var \PicoFeed\Config\Config */ private $config; /** - * DateParser object + * DateParser object. * - * @access protected * @var \PicoFeed\Parser\DateParser */ protected $date; /** - * Hash algorithm used to generate item id, any value supported by PHP, see hash_algos() + * Hash algorithm used to generate item id, any value supported by PHP, see hash_algos(). * - * @access private * @var string */ private $hash_algo = 'sha256'; /** - * Feed content (XML data) + * Feed content (XML data). * - * @access protected * @var string */ protected $content = ''; /** - * Fallback url + * Fallback url. * - * @access protected * @var string */ protected $fallback_url = ''; /** - * XML namespaces supported by parser + * XML namespaces supported by parser. * - * @access protected * @var array */ protected $namespaces = array(); /** - * XML namespaces used in document + * XML namespaces used in document. * - * @access protected * @var array */ protected $used_namespaces = array(); /** - * Enable the content filtering + * Enable the content filtering. * - * @access private * @var bool */ private $enable_filter = true; /** - * Enable the content grabber + * Enable the content grabber. * - * @access private * @var bool */ private $enable_grabber = false; /** - * Enable the content grabber on all pages + * Enable the content grabber on all pages. * - * @access private * @var bool */ private $grabber_needs_rule_file = false; /** - * Ignore those urls for the content scraper + * Ignore those urls for the content scraper. * - * @access private * @var array */ private $grabber_ignore_urls = array(); /** - * Constructor + * Constructor. * - * @access public - * @param string $content Feed content - * @param string $http_encoding HTTP encoding (headers) - * @param string $fallback_url Fallback url when the feed provide relative or broken url + * @param string $content Feed content + * @param string $http_encoding HTTP encoding (headers) + * @param string $fallback_url Fallback url when the feed provide relative or broken url */ public function __construct($content, $http_encoding = '', $fallback_url = '') { - $this->date = new DateParser; + $this->date = new DateParser(); $this->fallback_url = $fallback_url; $xml_encoding = XmlParser::getEncodingFromXmlTag($content); @@ -128,9 +115,8 @@ abstract class Parser } /** - * Parse the document + * Parse the document. * - * @access public * @return \PicoFeed\Parser\Feed */ public function execute() @@ -154,7 +140,7 @@ abstract class Parser $this->used_namespaces = $xml->getNamespaces(true); $xml = $this->registerSupportedNamespaces($xml); - $feed = new Feed; + $feed = new Feed(); $this->findFeedUrl($xml, $feed); $this->checkFeedUrl($feed); @@ -171,10 +157,9 @@ abstract class Parser $this->findFeedIcon($xml, $feed); foreach ($this->getItemsTree($xml) as $entry) { - $entry = $this->registerSupportedNamespaces($entry); - $item = new Item; + $item = new Item(); $item->xml = $entry; $item->namespaces = $this->used_namespaces; @@ -206,43 +191,38 @@ abstract class Parser } /** - * Check if the feed url is correct + * Check if the feed url is correct. * - * @access public - * @param Feed $feed Feed object + * @param Feed $feed Feed object */ public function checkFeedUrl(Feed $feed) { if ($feed->getFeedUrl() === '') { $feed->feed_url = $this->fallback_url; - } - else { + } else { $feed->feed_url = Url::resolve($feed->getFeedUrl(), $this->fallback_url); } } /** - * Check if the site url is correct + * Check if the site url is correct. * - * @access public - * @param Feed $feed Feed object + * @param Feed $feed Feed object */ public function checkSiteUrl(Feed $feed) { if ($feed->getSiteUrl() === '') { $feed->site_url = Url::base($feed->getFeedUrl()); - } - else { + } else { $feed->site_url = Url::resolve($feed->getSiteUrl(), $this->fallback_url); } } /** - * Check if the item url is correct + * Check if the item url is correct. * - * @access public - * @param Feed $feed Feed object - * @param Item $item Item object + * @param Feed $feed Feed object + * @param Item $item Item object */ public function checkItemUrl(Feed $feed, Item $item) { @@ -250,15 +230,13 @@ abstract class Parser } /** - * Fetch item content with the content grabber + * Fetch item content with the content grabber. * - * @access public - * @param Item $item Item object + * @param Item $item Item object */ public function scrapWebsite(Item $item) { - if ($this->enable_grabber && ! in_array($item->getUrl(), $this->grabber_ignore_urls)) { - + if ($this->enable_grabber && !in_array($item->getUrl(), $this->grabber_ignore_urls)) { $grabber = new Scraper($this->config); $grabber->setUrl($item->getUrl()); @@ -275,11 +253,10 @@ abstract class Parser } /** - * Filter HTML for entry content + * Filter HTML for entry content. * - * @access public - * @param Feed $feed Feed object - * @param Item $item Item object + * @param Feed $feed Feed object + * @param Item $item Item object */ public function filterItemContent(Feed $feed, Item $item) { @@ -287,16 +264,14 @@ abstract class Parser $filter = Filter::html($item->getContent(), $feed->getSiteUrl()); $filter->setConfig($this->config); $item->content = $filter->execute(); - } - else { + } else { Logger::setMessage(get_called_class().': Content filtering disabled'); } } /** - * Generate a unique id for an entry (hash all arguments) + * Generate a unique id for an entry (hash all arguments). * - * @access public * @return string */ public function generateId() @@ -305,11 +280,12 @@ abstract class Parser } /** - * Return true if the given language is "Right to Left" + * Return true if the given language is "Right to Left". * * @static - * @access public - * @param string $language Language: fr-FR, en-US + * + * @param string $language Language: fr-FR, en-US + * * @return bool */ public static function isLanguageRTL($language) @@ -337,24 +313,26 @@ abstract class Parser } /** - * Set Hash algorithm used for id generation + * Set Hash algorithm used for id generation. + * + * @param string $algo Algorithm name * - * @access public - * @param string $algo Algorithm name * @return \PicoFeed\Parser\Parser */ public function setHashAlgo($algo) { $this->hash_algo = $algo ?: $this->hash_algo; + return $this; } /** - * Set a different timezone + * Set a different timezone. * * @see http://php.net/manual/en/timezones.php - * @access public - * @param string $timezone Timezone + * + * @param string $timezone Timezone + * * @return \PicoFeed\Parser\Parser */ public function setTimezone($timezone) @@ -367,22 +345,22 @@ abstract class Parser } /** - * Set config object + * Set config object. + * + * @param \PicoFeed\Config\Config $config Config instance * - * @access public - * @param \PicoFeed\Config\Config $config Config instance * @return \PicoFeed\Parser\Parser */ public function setConfig($config) { $this->config = $config; + return $this; } /** - * Enable the content grabber + * Enable the content grabber. * - * @access public * @return \PicoFeed\Parser\Parser */ public function disableContentFiltering() @@ -391,10 +369,9 @@ abstract class Parser } /** - * Return true if the content filtering is enabled + * Return true if the content filtering is enabled. * - * @access public - * @return boolean + * @return bool */ public function isFilteringEnabled() { @@ -406,11 +383,11 @@ abstract class Parser } /** - * Enable the content grabber + * Enable the content grabber. * - * @access public * @param bool $needs_rule_file true if only pages with rule files should be - * scraped + * scraped + * * @return \PicoFeed\Parser\Parser */ public function enableContentGrabber($needs_rule_file = false) @@ -420,10 +397,10 @@ abstract class Parser } /** - * Set ignored URLs for the content grabber + * Set ignored URLs for the content grabber. + * + * @param array $urls URLs * - * @access public - * @param array $urls URLs * @return \PicoFeed\Parser\Parser */ public function setGrabberIgnoreUrls(array $urls) @@ -432,10 +409,10 @@ abstract class Parser } /** - * Register all supported namespaces to be used within an xpath query + * Register all supported namespaces to be used within an xpath query. + * + * @param SimpleXMLElement $xml Feed xml * - * @access public - * @param SimpleXMLElement $xml Feed xml * @return SimpleXMLElement */ public function registerSupportedNamespaces(SimpleXMLElement $xml) @@ -448,169 +425,152 @@ abstract class Parser } /** - * Find the feed url + * Find the feed url. * - * @access public - * @param SimpleXMLElement $xml Feed xml - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findFeedUrl(SimpleXMLElement $xml, Feed $feed); + abstract public function findFeedUrl(SimpleXMLElement $xml, Feed $feed); /** - * Find the site url + * Find the site url. * - * @access public - * @param SimpleXMLElement $xml Feed xml - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findSiteUrl(SimpleXMLElement $xml, Feed $feed); + abstract public function findSiteUrl(SimpleXMLElement $xml, Feed $feed); /** - * Find the feed title + * Find the feed title. * - * @access public - * @param SimpleXMLElement $xml Feed xml - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findFeedTitle(SimpleXMLElement $xml, Feed $feed); + abstract public function findFeedTitle(SimpleXMLElement $xml, Feed $feed); /** - * Find the feed description + * Find the feed description. * - * @access public - * @param SimpleXMLElement $xml Feed xml - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findFeedDescription(SimpleXMLElement $xml, Feed $feed); + abstract public function findFeedDescription(SimpleXMLElement $xml, Feed $feed); /** - * Find the feed language + * Find the feed language. * - * @access public - * @param SimpleXMLElement $xml Feed xml - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findFeedLanguage(SimpleXMLElement $xml, Feed $feed); + abstract public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed); /** - * Find the feed id + * Find the feed id. * - * @access public - * @param SimpleXMLElement $xml Feed xml - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findFeedId(SimpleXMLElement $xml, Feed $feed); + abstract public function findFeedId(SimpleXMLElement $xml, Feed $feed); /** - * Find the feed date + * Find the feed date. * - * @access public - * @param SimpleXMLElement $xml Feed xml - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findFeedDate(SimpleXMLElement $xml, Feed $feed); + abstract public function findFeedDate(SimpleXMLElement $xml, Feed $feed); /** - * Find the feed logo url + * Find the feed logo url. * - * @access public - * @param SimpleXMLElement $xml Feed xml - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findFeedLogo(SimpleXMLElement $xml, Feed $feed); + abstract public function findFeedLogo(SimpleXMLElement $xml, Feed $feed); /** - * Find the feed icon + * Find the feed icon. * - * @access public - * @param SimpleXMLElement $xml Feed xml - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $xml Feed xml + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findFeedIcon(SimpleXMLElement $xml, Feed $feed); + abstract public function findFeedIcon(SimpleXMLElement $xml, Feed $feed); /** - * Get the path to the items XML tree + * Get the path to the items XML tree. + * + * @param SimpleXMLElement $xml Feed xml * - * @access public - * @param SimpleXMLElement $xml Feed xml * @return SimpleXMLElement */ - public abstract function getItemsTree(SimpleXMLElement $xml); + abstract public function getItemsTree(SimpleXMLElement $xml); /** - * Find the item author + * Find the item author. * - * @access public - * @param SimpleXMLElement $xml Feed - * @param SimpleXMLElement $entry Feed item - * @param \PicoFeed\Parser\Item $item Item object + * @param SimpleXMLElement $xml Feed + * @param SimpleXMLElement $entry Feed item + * @param \PicoFeed\Parser\Item $item Item object */ - public abstract function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item); + abstract public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item); /** - * Find the item URL + * Find the item URL. * - * @access public - * @param SimpleXMLElement $entry Feed item - * @param \PicoFeed\Parser\Item $item Item object + * @param SimpleXMLElement $entry Feed item + * @param \PicoFeed\Parser\Item $item Item object */ - public abstract function findItemUrl(SimpleXMLElement $entry, Item $item); + abstract public function findItemUrl(SimpleXMLElement $entry, Item $item); /** - * Find the item title + * Find the item title. * - * @access public - * @param SimpleXMLElement $entry Feed item - * @param \PicoFeed\Parser\Item $item Item object + * @param SimpleXMLElement $entry Feed item + * @param \PicoFeed\Parser\Item $item Item object */ - public abstract function findItemTitle(SimpleXMLElement $entry, Item $item); + abstract public function findItemTitle(SimpleXMLElement $entry, Item $item); /** - * Genereate the item id + * Genereate the item id. * - * @access public - * @param SimpleXMLElement $entry Feed item - * @param \PicoFeed\Parser\Item $item Item object - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $entry Feed item + * @param \PicoFeed\Parser\Item $item Item object + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed); + abstract public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed); /** - * Find the item date + * Find the item date. * - * @access public - * @param SimpleXMLElement $entry Feed item - * @param Item $item Item object - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $entry Feed item + * @param Item $item Item object + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed); + abstract public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed); /** - * Find the item content + * Find the item content. * - * @access public - * @param SimpleXMLElement $entry Feed item - * @param \PicoFeed\Parser\Item $item Item object + * @param SimpleXMLElement $entry Feed item + * @param \PicoFeed\Parser\Item $item Item object */ - public abstract function findItemContent(SimpleXMLElement $entry, Item $item); + abstract public function findItemContent(SimpleXMLElement $entry, Item $item); /** - * Find the item enclosure + * Find the item enclosure. * - * @access public - * @param SimpleXMLElement $entry Feed item - * @param \PicoFeed\Parser\Item $item Item object - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $entry Feed item + * @param \PicoFeed\Parser\Item $item Item object + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed); + abstract public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed); /** - * Find the item language + * Find the item language. * - * @access public - * @param SimpleXMLElement $entry Feed item - * @param \PicoFeed\Parser\Item $item Item object - * @param \PicoFeed\Parser\Feed $feed Feed object + * @param SimpleXMLElement $entry Feed item + * @param \PicoFeed\Parser\Item $item Item object + * @param \PicoFeed\Parser\Feed $feed Feed object */ - public abstract function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed); + abstract public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed); } |