summaryrefslogtreecommitdiffstats
path: root/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php')
-rw-r--r--vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php324
1 files changed, 142 insertions, 182 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
index 918cdef33..5130b68bb 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
@@ -10,112 +10,99 @@ use PicoFeed\Logging\Logger;
use PicoFeed\Scraper\Scraper;
/**
- * Base parser class
+ * Base parser class.
*
* @author Frederic Guillot
- * @package Parser
*/
abstract class Parser
{
/**
- * Config object
+ * Config object.
*
- * @access private
* @var \PicoFeed\Config\Config
*/
private $config;
/**
- * DateParser object
+ * DateParser object.
*
- * @access protected
* @var \PicoFeed\Parser\DateParser
*/
protected $date;
/**
- * Hash algorithm used to generate item id, any value supported by PHP, see hash_algos()
+ * Hash algorithm used to generate item id, any value supported by PHP, see hash_algos().
*
- * @access private
* @var string
*/
private $hash_algo = 'sha256';
/**
- * Feed content (XML data)
+ * Feed content (XML data).
*
- * @access protected
* @var string
*/
protected $content = '';
/**
- * Fallback url
+ * Fallback url.
*
- * @access protected
* @var string
*/
protected $fallback_url = '';
/**
- * XML namespaces supported by parser
+ * XML namespaces supported by parser.
*
- * @access protected
* @var array
*/
protected $namespaces = array();
/**
- * XML namespaces used in document
+ * XML namespaces used in document.
*
- * @access protected
* @var array
*/
protected $used_namespaces = array();
/**
- * Enable the content filtering
+ * Enable the content filtering.
*
- * @access private
* @var bool
*/
private $enable_filter = true;
/**
- * Enable the content grabber
+ * Enable the content grabber.
*
- * @access private
* @var bool
*/
private $enable_grabber = false;
/**
- * Enable the content grabber on all pages
+ * Enable the content grabber on all pages.
*
- * @access private
* @var bool
*/
private $grabber_needs_rule_file = false;
/**
- * Ignore those urls for the content scraper
+ * Ignore those urls for the content scraper.
*
- * @access private
* @var array
*/
private $grabber_ignore_urls = array();
/**
- * Constructor
+ * Constructor.
*
- * @access public
- * @param string $content Feed content
- * @param string $http_encoding HTTP encoding (headers)
- * @param string $fallback_url Fallback url when the feed provide relative or broken url
+ * @param string $content Feed content
+ * @param string $http_encoding HTTP encoding (headers)
+ * @param string $fallback_url Fallback url when the feed provide relative or broken url
*/
public function __construct($content, $http_encoding = '', $fallback_url = '')
{
- $this->date = new DateParser;
+ $this->date = new DateParser();
$this->fallback_url = $fallback_url;
$xml_encoding = XmlParser::getEncodingFromXmlTag($content);
@@ -128,9 +115,8 @@ abstract class Parser
}
/**
- * Parse the document
+ * Parse the document.
*
- * @access public
* @return \PicoFeed\Parser\Feed
*/
public function execute()
@@ -154,7 +140,7 @@ abstract class Parser
$this->used_namespaces = $xml->getNamespaces(true);
$xml = $this->registerSupportedNamespaces($xml);
- $feed = new Feed;
+ $feed = new Feed();
$this->findFeedUrl($xml, $feed);
$this->checkFeedUrl($feed);
@@ -171,10 +157,9 @@ abstract class Parser
$this->findFeedIcon($xml, $feed);
foreach ($this->getItemsTree($xml) as $entry) {
-
$entry = $this->registerSupportedNamespaces($entry);
- $item = new Item;
+ $item = new Item();
$item->xml = $entry;
$item->namespaces = $this->used_namespaces;
@@ -206,43 +191,38 @@ abstract class Parser
}
/**
- * Check if the feed url is correct
+ * Check if the feed url is correct.
*
- * @access public
- * @param Feed $feed Feed object
+ * @param Feed $feed Feed object
*/
public function checkFeedUrl(Feed $feed)
{
if ($feed->getFeedUrl() === '') {
$feed->feed_url = $this->fallback_url;
- }
- else {
+ } else {
$feed->feed_url = Url::resolve($feed->getFeedUrl(), $this->fallback_url);
}
}
/**
- * Check if the site url is correct
+ * Check if the site url is correct.
*
- * @access public
- * @param Feed $feed Feed object
+ * @param Feed $feed Feed object
*/
public function checkSiteUrl(Feed $feed)
{
if ($feed->getSiteUrl() === '') {
$feed->site_url = Url::base($feed->getFeedUrl());
- }
- else {
+ } else {
$feed->site_url = Url::resolve($feed->getSiteUrl(), $this->fallback_url);
}
}
/**
- * Check if the item url is correct
+ * Check if the item url is correct.
*
- * @access public
- * @param Feed $feed Feed object
- * @param Item $item Item object
+ * @param Feed $feed Feed object
+ * @param Item $item Item object
*/
public function checkItemUrl(Feed $feed, Item $item)
{
@@ -250,15 +230,13 @@ abstract class Parser
}
/**
- * Fetch item content with the content grabber
+ * Fetch item content with the content grabber.
*
- * @access public
- * @param Item $item Item object
+ * @param Item $item Item object
*/
public function scrapWebsite(Item $item)
{
- if ($this->enable_grabber && ! in_array($item->getUrl(), $this->grabber_ignore_urls)) {
-
+ if ($this->enable_grabber && !in_array($item->getUrl(), $this->grabber_ignore_urls)) {
$grabber = new Scraper($this->config);
$grabber->setUrl($item->getUrl());
@@ -275,11 +253,10 @@ abstract class Parser
}
/**
- * Filter HTML for entry content
+ * Filter HTML for entry content.
*
- * @access public
- * @param Feed $feed Feed object
- * @param Item $item Item object
+ * @param Feed $feed Feed object
+ * @param Item $item Item object
*/
public function filterItemContent(Feed $feed, Item $item)
{
@@ -287,16 +264,14 @@ abstract class Parser
$filter = Filter::html($item->getContent(), $feed->getSiteUrl());
$filter->setConfig($this->config);
$item->content = $filter->execute();
- }
- else {
+ } else {
Logger::setMessage(get_called_class().': Content filtering disabled');
}
}
/**
- * Generate a unique id for an entry (hash all arguments)
+ * Generate a unique id for an entry (hash all arguments).
*
- * @access public
* @return string
*/
public function generateId()
@@ -305,11 +280,12 @@ abstract class Parser
}
/**
- * Return true if the given language is "Right to Left"
+ * Return true if the given language is "Right to Left".
*
* @static
- * @access public
- * @param string $language Language: fr-FR, en-US
+ *
+ * @param string $language Language: fr-FR, en-US
+ *
* @return bool
*/
public static function isLanguageRTL($language)
@@ -337,24 +313,26 @@ abstract class Parser
}
/**
- * Set Hash algorithm used for id generation
+ * Set Hash algorithm used for id generation.
+ *
+ * @param string $algo Algorithm name
*
- * @access public
- * @param string $algo Algorithm name
* @return \PicoFeed\Parser\Parser
*/
public function setHashAlgo($algo)
{
$this->hash_algo = $algo ?: $this->hash_algo;
+
return $this;
}
/**
- * Set a different timezone
+ * Set a different timezone.
*
* @see http://php.net/manual/en/timezones.php
- * @access public
- * @param string $timezone Timezone
+ *
+ * @param string $timezone Timezone
+ *
* @return \PicoFeed\Parser\Parser
*/
public function setTimezone($timezone)
@@ -367,22 +345,22 @@ abstract class Parser
}
/**
- * Set config object
+ * Set config object.
+ *
+ * @param \PicoFeed\Config\Config $config Config instance
*
- * @access public
- * @param \PicoFeed\Config\Config $config Config instance
* @return \PicoFeed\Parser\Parser
*/
public function setConfig($config)
{
$this->config = $config;
+
return $this;
}
/**
- * Enable the content grabber
+ * Enable the content grabber.
*
- * @access public
* @return \PicoFeed\Parser\Parser
*/
public function disableContentFiltering()
@@ -391,10 +369,9 @@ abstract class Parser
}
/**
- * Return true if the content filtering is enabled
+ * Return true if the content filtering is enabled.
*
- * @access public
- * @return boolean
+ * @return bool
*/
public function isFilteringEnabled()
{
@@ -406,11 +383,11 @@ abstract class Parser
}
/**
- * Enable the content grabber
+ * Enable the content grabber.
*
- * @access public
* @param bool $needs_rule_file true if only pages with rule files should be
- * scraped
+ * scraped
+ *
* @return \PicoFeed\Parser\Parser
*/
public function enableContentGrabber($needs_rule_file = false)
@@ -420,10 +397,10 @@ abstract class Parser
}
/**
- * Set ignored URLs for the content grabber
+ * Set ignored URLs for the content grabber.
+ *
+ * @param array $urls URLs
*
- * @access public
- * @param array $urls URLs
* @return \PicoFeed\Parser\Parser
*/
public function setGrabberIgnoreUrls(array $urls)
@@ -432,10 +409,10 @@ abstract class Parser
}
/**
- * Register all supported namespaces to be used within an xpath query
+ * Register all supported namespaces to be used within an xpath query.
+ *
+ * @param SimpleXMLElement $xml Feed xml
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
* @return SimpleXMLElement
*/
public function registerSupportedNamespaces(SimpleXMLElement $xml)
@@ -448,169 +425,152 @@ abstract class Parser
}
/**
- * Find the feed url
+ * Find the feed url.
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $xml Feed xml
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
+ abstract public function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
/**
- * Find the site url
+ * Find the site url.
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $xml Feed xml
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
+ abstract public function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
/**
- * Find the feed title
+ * Find the feed title.
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $xml Feed xml
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
+ abstract public function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
/**
- * Find the feed description
+ * Find the feed description.
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $xml Feed xml
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
+ abstract public function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
/**
- * Find the feed language
+ * Find the feed language.
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $xml Feed xml
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
+ abstract public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
/**
- * Find the feed id
+ * Find the feed id.
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $xml Feed xml
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findFeedId(SimpleXMLElement $xml, Feed $feed);
+ abstract public function findFeedId(SimpleXMLElement $xml, Feed $feed);
/**
- * Find the feed date
+ * Find the feed date.
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $xml Feed xml
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findFeedDate(SimpleXMLElement $xml, Feed $feed);
+ abstract public function findFeedDate(SimpleXMLElement $xml, Feed $feed);
/**
- * Find the feed logo url
+ * Find the feed logo url.
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $xml Feed xml
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
+ abstract public function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
/**
- * Find the feed icon
+ * Find the feed icon.
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $xml Feed xml
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
+ abstract public function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
/**
- * Get the path to the items XML tree
+ * Get the path to the items XML tree.
+ *
+ * @param SimpleXMLElement $xml Feed xml
*
- * @access public
- * @param SimpleXMLElement $xml Feed xml
* @return SimpleXMLElement
*/
- public abstract function getItemsTree(SimpleXMLElement $xml);
+ abstract public function getItemsTree(SimpleXMLElement $xml);
/**
- * Find the item author
+ * Find the item author.
*
- * @access public
- * @param SimpleXMLElement $xml Feed
- * @param SimpleXMLElement $entry Feed item
- * @param \PicoFeed\Parser\Item $item Item object
+ * @param SimpleXMLElement $xml Feed
+ * @param SimpleXMLElement $entry Feed item
+ * @param \PicoFeed\Parser\Item $item Item object
*/
- public abstract function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
+ abstract public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
/**
- * Find the item URL
+ * Find the item URL.
*
- * @access public
- * @param SimpleXMLElement $entry Feed item
- * @param \PicoFeed\Parser\Item $item Item object
+ * @param SimpleXMLElement $entry Feed item
+ * @param \PicoFeed\Parser\Item $item Item object
*/
- public abstract function findItemUrl(SimpleXMLElement $entry, Item $item);
+ abstract public function findItemUrl(SimpleXMLElement $entry, Item $item);
/**
- * Find the item title
+ * Find the item title.
*
- * @access public
- * @param SimpleXMLElement $entry Feed item
- * @param \PicoFeed\Parser\Item $item Item object
+ * @param SimpleXMLElement $entry Feed item
+ * @param \PicoFeed\Parser\Item $item Item object
*/
- public abstract function findItemTitle(SimpleXMLElement $entry, Item $item);
+ abstract public function findItemTitle(SimpleXMLElement $entry, Item $item);
/**
- * Genereate the item id
+ * Genereate the item id.
*
- * @access public
- * @param SimpleXMLElement $entry Feed item
- * @param \PicoFeed\Parser\Item $item Item object
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $entry Feed item
+ * @param \PicoFeed\Parser\Item $item Item object
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
+ abstract public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
/**
- * Find the item date
+ * Find the item date.
*
- * @access public
- * @param SimpleXMLElement $entry Feed item
- * @param Item $item Item object
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $entry Feed item
+ * @param Item $item Item object
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed);
+ abstract public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed);
/**
- * Find the item content
+ * Find the item content.
*
- * @access public
- * @param SimpleXMLElement $entry Feed item
- * @param \PicoFeed\Parser\Item $item Item object
+ * @param SimpleXMLElement $entry Feed item
+ * @param \PicoFeed\Parser\Item $item Item object
*/
- public abstract function findItemContent(SimpleXMLElement $entry, Item $item);
+ abstract public function findItemContent(SimpleXMLElement $entry, Item $item);
/**
- * Find the item enclosure
+ * Find the item enclosure.
*
- * @access public
- * @param SimpleXMLElement $entry Feed item
- * @param \PicoFeed\Parser\Item $item Item object
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $entry Feed item
+ * @param \PicoFeed\Parser\Item $item Item object
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
+ abstract public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
/**
- * Find the item language
+ * Find the item language.
*
- * @access public
- * @param SimpleXMLElement $entry Feed item
- * @param \PicoFeed\Parser\Item $item Item object
- * @param \PicoFeed\Parser\Feed $feed Feed object
+ * @param SimpleXMLElement $entry Feed item
+ * @param \PicoFeed\Parser\Item $item Item object
+ * @param \PicoFeed\Parser\Feed $feed Feed object
*/
- public abstract function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
+ abstract public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
}