diff options
author | Bernhard Posselt <dev@bernhard-posselt.com> | 2016-03-25 17:54:22 +0100 |
---|---|---|
committer | Bernhard Posselt <dev@bernhard-posselt.com> | 2016-03-25 17:54:22 +0100 |
commit | 8e128604b9bace8a834daa3b02f242f976df85e7 (patch) | |
tree | d520ad25a04ab839eec838fbde16e8cab1094240 /vendor/fguillot/picofeed | |
parent | 7e9d8b396a87999b7abef7d08c6038af824a70e6 (diff) |
add makefile for building the project
Diffstat (limited to 'vendor/fguillot/picofeed')
29 files changed, 633 insertions, 311 deletions
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Base.php b/vendor/fguillot/picofeed/lib/PicoFeed/Base.php new file mode 100644 index 000000000..4be0985e4 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Base.php @@ -0,0 +1,34 @@ +<?php + +namespace PicoFeed; + +use PicoFeed\Config\Config; +use PicoFeed\Logging\Logger; + +/** + * Base class + * + * @package PicoFeed + * @author Frederic Guillot + */ +abstract class Base +{ + /** + * Config class instance + * + * @access protected + * @var \PicoFeed\Config\Config + */ + protected $config; + + /** + * Constructor. + * + * @param \PicoFeed\Config\Config $config Config class instance + */ + public function __construct(Config $config = null) + { + $this->config = $config ?: new Config(); + Logger::setTimezone($this->config->getTimezone()); + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php index 34e21dc19..5fd8d6da9 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php @@ -2,25 +2,18 @@ namespace PicoFeed\Filter; -use DOMXpath; +use DOMXPath; +use PicoFeed\Base; use PicoFeed\Parser\XmlParser; -use PicoFeed\Config\Config; /** * Tag Filter class. * * @author Frederic Guillot */ -class Tag +class Tag extends Base { /** - * Config object. - * - * @var \PicoFeed\Config\Config - */ - private $config; - - /** * Tags blacklist (Xpath expressions). * * @var array @@ -76,11 +69,6 @@ class Tag 'q', ); - public function __construct(Config $config) - { - $this->config = $config; - } - /** * Check if the tag is allowed and is not a pixel tracker. * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Generator/ContentGeneratorInterface.php b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/ContentGeneratorInterface.php new file mode 100644 index 000000000..5c2f205c6 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/ContentGeneratorInterface.php @@ -0,0 +1,23 @@ +<?php + +namespace PicoFeed\Generator; + +use PicoFeed\Parser\Item; + +/** + * Content Generator Interface + * + * @package PicoFeed\Generator + * @author Frederic Guillot + */ +interface ContentGeneratorInterface +{ + /** + * Execute Content Generator + * + * @access public + * @param Item $item + * @return boolean + */ + public function execute(Item $item); +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Generator/FileContentGenerator.php b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/FileContentGenerator.php new file mode 100644 index 000000000..03f37e16f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/FileContentGenerator.php @@ -0,0 +1,36 @@ +<?php + +namespace PicoFeed\Generator; + +use PicoFeed\Base; +use PicoFeed\Parser\Item; + +/** + * File Content Generator + * + * @package PicoFeed\Generator + * @author Frederic Guillot + */ +class FileContentGenerator extends Base implements ContentGeneratorInterface +{ + private $extensions = array('pdf'); + + /** + * Execute Content Generator + * + * @access public + * @param Item $item + * @return boolean + */ + public function execute(Item $item) + { + foreach ($this->extensions as $extension) { + if (substr($item->getUrl(), - strlen($extension)) === $extension) { + $item->setContent('<a href="'.$item->getUrl().'" target="_blank">'.$item->getUrl().'</a>'); + return true; + } + } + + return false; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Generator/YoutubeContentGenerator.php b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/YoutubeContentGenerator.php new file mode 100644 index 000000000..198090d4f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Generator/YoutubeContentGenerator.php @@ -0,0 +1,67 @@ +<?php + +namespace PicoFeed\Generator; + +use PicoFeed\Base; +use PicoFeed\Parser\Item; + +/** + * Youtube Content Generator + * + * @package PicoFeed\Generator + * @author Frederic Guillot + */ +class YoutubeContentGenerator extends Base implements ContentGeneratorInterface +{ + /** + * Execute Content Generator + * + * @access public + * @param Item $item + * @return boolean + */ + public function execute(Item $item) + { + if ($item->hasNamespace('yt')) { + return $this->generateHtmlFromXml($item); + } + + return $this->generateHtmlFromUrl($item); + } + + /** + * Generate HTML + * + * @access public + * @param Item $item + * @return boolean + */ + private function generateHtmlFromXml(Item $item) + { + $videoId = $item->getTag('yt:videoId'); + + if (! empty($videoId)) { + $item->setContent('<iframe width="560" height="315" src="//www.youtube.com/embed/'.$videoId[0].'" frameborder="0"></iframe>'); + return true; + } + + return false; + } + + /** + * Generate HTML from item URL + * + * @access public + * @param Item $item + * @return bool + */ + public function generateHtmlFromUrl(Item $item) + { + if (preg_match('/youtube\.com\/watch\?v=(.*)/', $item->getUrl(), $matches)) { + $item->setContent('<iframe width="560" height="315" src="//www.youtube.com/embed/'.$matches[1].'" frameborder="0"></iframe>'); + return true; + } + + return false; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php index 356453c9d..63259235f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php @@ -150,7 +150,7 @@ class Atom extends Parser $updated = XmlParser::getXPathResult($xml, 'atom:updated', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'updated'); - $feed->date = $this->date->getDateTime((string) current($updated)); + $feed->date = $this->getDateParser()->getDateTime((string) current($updated)); } /** @@ -168,8 +168,8 @@ class Atom extends Parser $updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'updated'); - $published = !empty($published) ? $this->date->getDateTime((string) current($published)) : null; - $updated = !empty($updated) ? $this->date->getDateTime((string) current($updated)) : null; + $published = !empty($published) ? $this->getDateParser()->getDateTime((string) current($published)) : null; + $updated = !empty($updated) ? $this->getDateParser()->getDateTime((string) current($updated)) : null; if ($published === null && $updated === null) { $item->date = $feed->getDate(); // We use the feed date if there is no date for the item diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php index e4d08b527..4ad00789a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php @@ -4,20 +4,22 @@ namespace PicoFeed\Parser; use DateTime; use DateTimeZone; +use PicoFeed\Base; /** * Date Parser. * * @author Frederic Guillot */ -class DateParser +class DateParser extends Base { /** * Timezone used to parse feed dates. * + * @access private * @var string */ - public $timezone = 'UTC'; + private $timezone = 'UTC'; /** * Supported formats [ 'format' => length ]. @@ -88,7 +90,7 @@ class DateParser */ public function getValidDate($format, $value) { - $date = DateTime::createFromFormat($format, $value, new DateTimeZone($this->timezone)); + $date = DateTime::createFromFormat($format, $value, $this->getTimeZone()); if ($date !== false) { $errors = DateTime::getLastErrors(); @@ -108,6 +110,17 @@ class DateParser */ public function getCurrentDateTime() { - return new DateTime('now', new DateTimeZone($this->timezone)); + return new DateTime('now', $this->getTimeZone()); + } + + /** + * Get DateTimeZone instance + * + * @access public + * @return DateTimeZone + */ + public function getTimeZone() + { + return new DateTimeZone($this->config->getTimezone() ?: $this->timezone); } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php index 22d7c5951..34e557a11 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php @@ -103,6 +103,18 @@ class Item public $namespaces = array(); /** + * Check if a XML namespace exists + * + * @access public + * @param string $namespace + * @return bool + */ + public function hasNamespace($namespace) + { + return array_key_exists($namespace, $this->namespaces); + } + + /** * Get specific XML tag or attribute value. * * @param string $tag Tag name (examples: guid, media:content) @@ -112,12 +124,10 @@ class Item */ public function getTag($tag, $attribute = '') { - // convert to xPath attribute query if ($attribute !== '') { $attribute = '/@'.$attribute; } - // construct query $query = './/'.$tag.$attribute; $elements = XmlParser::getXPathResult($this->xml, $query, $this->namespaces); @@ -155,7 +165,10 @@ class Item } /** - * Get url. + * Get URL + * + * @access public + * @return string */ public function getUrl() { @@ -163,6 +176,19 @@ class Item } /** + * Set URL + * + * @access public + * @param string $url + * @return Item + */ + public function setUrl($url) + { + $this->url = $url; + return $this; + } + + /** * Get id. */ public function getId() @@ -187,6 +213,19 @@ class Item } /** + * Set content + * + * @access public + * @param string $value + * @return Item + */ + public function setContent($value) + { + $this->content = $value; + return $this; + } + + /** * Get enclosure url. */ public function getEnclosureUrl() diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php index 5130b68bb..433f21a26 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php @@ -2,12 +2,15 @@ namespace PicoFeed\Parser; +use PicoFeed\Processor\ContentFilterProcessor; +use PicoFeed\Processor\ContentGeneratorProcessor; +use PicoFeed\Processor\ItemPostProcessor; +use PicoFeed\Processor\ScraperProcessor; use SimpleXMLElement; use PicoFeed\Client\Url; use PicoFeed\Encoding\Encoding; use PicoFeed\Filter\Filter; use PicoFeed\Logging\Logger; -use PicoFeed\Scraper\Scraper; /** * Base parser class. @@ -28,7 +31,7 @@ abstract class Parser * * @var \PicoFeed\Parser\DateParser */ - protected $date; + private $dateParser; /** * Hash algorithm used to generate item id, any value supported by PHP, see hash_algos(). @@ -66,32 +69,12 @@ abstract class Parser protected $used_namespaces = array(); /** - * Enable the content filtering. + * Item Post Processor instance * - * @var bool + * @access private + * @var ItemPostProcessor */ - private $enable_filter = true; - - /** - * Enable the content grabber. - * - * @var bool - */ - private $enable_grabber = false; - - /** - * Enable the content grabber on all pages. - * - * @var bool - */ - private $grabber_needs_rule_file = false; - - /** - * Ignore those urls for the content scraper. - * - * @var array - */ - private $grabber_ignore_urls = array(); + private $itemPostProcessor; /** * Constructor. @@ -102,7 +85,6 @@ abstract class Parser */ public function __construct($content, $http_encoding = '', $fallback_url = '') { - $this->date = new DateParser(); $this->fallback_url = $fallback_url; $xml_encoding = XmlParser::getEncodingFromXmlTag($content); @@ -112,6 +94,10 @@ abstract class Parser // Encode everything in UTF-8 Logger::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"'); $this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding); + + $this->itemPostProcessor = new ItemPostProcessor($this->config); + $this->itemPostProcessor->register(new ContentGeneratorProcessor($this->config)); + $this->itemPostProcessor->register(new ContentFilterProcessor($this->config)); } /** @@ -173,15 +159,11 @@ abstract class Parser // Id generation can use the item url/title/content (order is important) $this->findItemId($entry, $item, $feed); - $this->findItemDate($entry, $item, $feed); $this->findItemEnclosure($entry, $item, $feed); $this->findItemLanguage($entry, $item, $feed); - // Order is important (avoid double filtering) - $this->filterItemContent($feed, $item); - $this->scrapWebsite($item); - + $this->itemPostProcessor->execute($feed, $item); $feed->items[] = $item; } @@ -230,43 +212,29 @@ abstract class Parser } /** - * Fetch item content with the content grabber. + * Get Item Post Processor instance * - * @param Item $item Item object + * @access public + * @return ItemPostProcessor */ - public function scrapWebsite(Item $item) + public function getItemPostProcessor() { - if ($this->enable_grabber && !in_array($item->getUrl(), $this->grabber_ignore_urls)) { - $grabber = new Scraper($this->config); - $grabber->setUrl($item->getUrl()); - - if ($this->grabber_needs_rule_file) { - $grabber->disableCandidateParser(); - } - - $grabber->execute(); - - if ($grabber->hasRelevantContent()) { - $item->content = $grabber->getFilteredContent(); - } - } + return $this->itemPostProcessor; } /** - * Filter HTML for entry content. + * Get DateParser instance * - * @param Feed $feed Feed object - * @param Item $item Item object + * @access public + * @return DateParser */ - public function filterItemContent(Feed $feed, Item $item) + public function getDateParser() { - if ($this->isFilteringEnabled()) { - $filter = Filter::html($item->getContent(), $feed->getSiteUrl()); - $filter->setConfig($this->config); - $item->content = $filter->execute(); - } else { - Logger::setMessage(get_called_class().': Content filtering disabled'); + if ($this->dateParser === null) { + return new DateParser($this->config); } + + return $this->dateParser; } /** @@ -316,31 +284,11 @@ abstract class Parser * Set Hash algorithm used for id generation. * * @param string $algo Algorithm name - * * @return \PicoFeed\Parser\Parser */ public function setHashAlgo($algo) { $this->hash_algo = $algo ?: $this->hash_algo; - - return $this; - } - - /** - * Set a different timezone. - * - * @see http://php.net/manual/en/timezones.php - * - * @param string $timezone Timezone - * - * @return \PicoFeed\Parser\Parser - */ - public function setTimezone($timezone) - { - if ($timezone) { - $this->date->timezone = $timezone; - } - return $this; } @@ -354,7 +302,6 @@ abstract class Parser public function setConfig($config) { $this->config = $config; - return $this; } @@ -365,21 +312,8 @@ abstract class Parser */ public function disableContentFiltering() { - $this->enable_filter = false; - } - - /** - * Return true if the content filtering is enabled. - * - * @return bool - */ - public function isFilteringEnabled() - { - if ($this->config === null) { - return $this->enable_filter; - } - - return $this->config->getContentFiltering($this->enable_filter); + $this->itemPostProcessor->unregister('PicoFeed\Processor\ContentFilterProcessor'); + return $this; } /** @@ -392,8 +326,14 @@ abstract class Parser */ public function enableContentGrabber($needs_rule_file = false) { - $this->enable_grabber = true; - $this->grabber_needs_rule_file = $needs_rule_file; + $processor = new ScraperProcessor($this->config); + + if ($needs_rule_file) { + $processor->getScraper()->disableCandidateParser(); + } + + $this->itemPostProcessor->register($processor); + return $this; } /** @@ -405,7 +345,8 @@ abstract class Parser */ public function setGrabberIgnoreUrls(array $urls) { - $this->grabber_ignore_urls = $urls; + $this->itemPostProcessor->getProcessor('PicoFeed\Processor\ScraperProcessor')->ignoreUrls($urls); + return $this; } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php index dd2aa7a8c..315c7db26 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php @@ -149,7 +149,7 @@ class Rss10 extends Parser $date = XmlParser::getXPathResult($xml, 'rss:channel/dc:date', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'channel/dc:date', $this->namespaces); - $feed->date = $this->date->getDateTime((string) current($date)); + $feed->date = $this->getDateParser()->getDateTime((string) current($date)); } /** @@ -163,7 +163,7 @@ class Rss10 extends Parser { $date = XmlParser::getXPathResult($entry, 'dc:date', $this->namespaces); - $item->date = empty($date) ? $feed->getDate() : $this->date->getDateTime((string) current($date)); + $item->date = empty($date) ? $feed->getDate() : $this->getDateParser()->getDateTime((string) current($date)); } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php index 005691f48..b265656ac 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php @@ -139,11 +13 |