summaryrefslogtreecommitdiffstats
path: root/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php')
-rw-r--r--3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php170
1 files changed, 170 insertions, 0 deletions
diff --git a/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
new file mode 100644
index 000000000..0490e2f49
--- /dev/null
+++ b/3rdparty/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
@@ -0,0 +1,170 @@
+<?php
+
+namespace PicoFeed\Filter;
+
+/**
+ * Filter class
+ *
+ * @author Frederic Guillot
+ * @package Filter
+ */
+class Filter
+{
+ /**
+ * Get the Html filter instance
+ *
+ * @static
+ * @access public
+ * @param string $html HTML content
+ * @param string $website Site URL (used to build absolute URL)
+ * @return PicoFeed\Filter\Html
+ */
+ public static function html($html, $website)
+ {
+ $filter = new Html($html, $website);
+ return $filter;
+ }
+
+ /**
+ * Escape HTML content
+ *
+ * @static
+ * @access public
+ * @return string
+ */
+ public static function escape($content)
+ {
+ return @htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
+ }
+
+ /**
+ * Remove HTML tags
+ *
+ * @access public
+ * @param string $data Input data
+ * @return string
+ */
+ public function removeHTMLTags($data)
+ {
+ return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $data);
+ }
+
+ /**
+ * Remove the XML tag from a document
+ *
+ * @static
+ * @access public
+ * @param string $data Input data
+ * @return string
+ */
+ public static function stripXmlTag($data)
+ {
+ if (strpos($data, '<?xml') !== false) {
+ $data = ltrim(substr($data, strpos($data, '?>') + 2));
+ }
+
+ do {
+
+ $pos = strpos($data, '<?xml-stylesheet ');
+
+ if ($pos !== false) {
+ $data = ltrim(substr($data, strpos($data, '?>') + 2));
+ }
+
+ } while ($pos !== false && $pos < 200);
+
+ return $data;
+ }
+
+ /**
+ * Strip head tag from the HTML content
+ *
+ * @static
+ * @access public
+ * @param string $data Input data
+ * @return string
+ */
+ public static function stripHeadTags($data)
+ {
+ $start = strpos($data, '<head>');
+ $end = strpos($data, '</head>');
+
+ if ($start !== false && $end !== false) {
+ $before = substr($data, 0, $start);
+ $after = substr($data, $end + 7);
+ $data = $before.$after;
+ }
+
+ return $data;
+ }
+
+ /**
+ * Trim whitespace from the begining, the end and inside a string and don't break utf-8 string
+ *
+ * @static
+ * @access public
+ * @param string $value Raw data
+ * @return string Normalized data
+ */
+ public static function stripWhiteSpace($value)
+ {
+ $value = str_replace("\r", ' ', $value);
+ $value = str_replace("\t", ' ', $value);
+ $value = str_replace("\n", ' ', $value);
+ // $value = preg_replace('/\s+/', ' ', $value); <= break utf-8
+ return trim($value);
+ }
+
+ /**
+ * Dirty quickfixes before XML parsing
+ *
+ * @static
+ * @access public
+ * @param string $data Raw data
+ * @return string Normalized data
+ */
+ public static function normalizeData($data)
+ {
+ $invalid_chars = array(
+ "\x10",
+ "\xc3\x20",
+ "&#x1F;",
+ );
+
+ foreach ($invalid_chars as $needle) {
+ $data = str_replace($needle, '', $data);
+ }
+
+ return $data;
+ }
+
+ /**
+ * Get the first XML tag
+ *
+ * @static
+ * @access public
+ * @param string $data Feed content
+ * @return string
+ */
+ public static function getFirstTag($data)
+ {
+ // Strip HTML comments (max of 5,000 characters long to prevent crashing)
+ $data = preg_replace('/<!--(.{0,5000}?)-->/Uis', '', $data);
+
+ /* Strip Doctype:
+ * Doctype needs to be within the first 100 characters. (Ideally the first!)
+ * If it's not found by then, we need to stop looking to prevent PREG
+ * from reaching max backtrack depth and crashing.
+ */
+ $data = preg_replace('/^.{0,100}<!DOCTYPE([^>]*)>/Uis', '', $data);
+
+ // Strip <?xml version....
+ $data = self::stripXmlTag($data);
+
+ // Find the first tag
+ $open_tag = strpos($data, '<');
+ $close_tag = strpos($data, '>');
+
+ return substr($data, $open_tag, $close_tag);
+ }
+}