summaryrefslogtreecommitdiffstats
path: root/3rdparty/htmlpurifier/benchmarks
diff options
context:
space:
mode:
authorBernhard Posselt <nukeawhale@gmail.com>2013-05-04 00:15:41 +0200
committerBernhard Posselt <nukeawhale@gmail.com>2013-05-04 00:15:41 +0200
commit10831dd274ff65d4852b47dbc398adae61845206 (patch)
tree9f9397bb7433fd53bfacf88d8c8b3cf2ef50e27d /3rdparty/htmlpurifier/benchmarks
parent7b628a3e4d105f2e571d0fe142d59f201d6a10d0 (diff)
use html purifier for sanitation
Diffstat (limited to '3rdparty/htmlpurifier/benchmarks')
-rw-r--r--3rdparty/htmlpurifier/benchmarks/.htaccess1
-rw-r--r--3rdparty/htmlpurifier/benchmarks/ConfigSchema.php16
-rw-r--r--3rdparty/htmlpurifier/benchmarks/Lexer.php158
-rw-r--r--3rdparty/htmlpurifier/benchmarks/Trace.php21
-rw-r--r--3rdparty/htmlpurifier/benchmarks/samples/Lexer/1.html56
-rw-r--r--3rdparty/htmlpurifier/benchmarks/samples/Lexer/2.html20
-rw-r--r--3rdparty/htmlpurifier/benchmarks/samples/Lexer/3.html131
-rw-r--r--3rdparty/htmlpurifier/benchmarks/samples/Lexer/4.html543
-rw-r--r--3rdparty/htmlpurifier/benchmarks/samples/Lexer/DISCLAIMER.txt7
9 files changed, 953 insertions, 0 deletions
diff --git a/3rdparty/htmlpurifier/benchmarks/.htaccess b/3rdparty/htmlpurifier/benchmarks/.htaccess
new file mode 100644
index 000000000..03688ee91
--- /dev/null
+++ b/3rdparty/htmlpurifier/benchmarks/.htaccess
@@ -0,0 +1 @@
+Deny from all
diff --git a/3rdparty/htmlpurifier/benchmarks/ConfigSchema.php b/3rdparty/htmlpurifier/benchmarks/ConfigSchema.php
new file mode 100644
index 000000000..5ab39137e
--- /dev/null
+++ b/3rdparty/htmlpurifier/benchmarks/ConfigSchema.php
@@ -0,0 +1,16 @@
+<?php
+
+chdir(dirname(__FILE__));
+
+//require_once '../library/HTMLPurifier.path.php';
+shell_exec('php ../maintenance/generate-schema-cache.php');
+require_once '../library/HTMLPurifier.path.php';
+require_once 'HTMLPurifier.includes.php';
+
+$begin = xdebug_memory_usage();
+
+$schema = HTMLPurifier_ConfigSchema::makeFromSerial();
+
+echo xdebug_memory_usage() - $begin;
+
+// vim: et sw=4 sts=4
diff --git a/3rdparty/htmlpurifier/benchmarks/Lexer.php b/3rdparty/htmlpurifier/benchmarks/Lexer.php
new file mode 100644
index 000000000..7e837bfbf
--- /dev/null
+++ b/3rdparty/htmlpurifier/benchmarks/Lexer.php
@@ -0,0 +1,158 @@
+<?php
+
+require_once '../library/HTMLPurifier.auto.php';
+@include_once '../test-settings.php';
+
+// PEAR
+require_once 'Benchmark/Timer.php'; // to do the timing
+require_once 'Text/Password.php'; // for generating random input
+
+$LEXERS = array();
+$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
+ ? $GLOBALS['HTMLPurifierTest']['Runs'] : 2;
+
+require_once 'HTMLPurifier/Lexer/DirectLex.php';
+$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
+
+if (version_compare(PHP_VERSION, '5', '>=')) {
+ require_once 'HTMLPurifier/Lexer/DOMLex.php';
+ $LEXERS['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
+}
+
+// custom class to aid unit testing
+class RowTimer extends Benchmark_Timer
+{
+
+ var $name;
+
+ function RowTimer($name, $auto = false) {
+ $this->name = htmlentities($name);
+ $this->Benchmark_Timer($auto);
+ }
+
+ function getOutput() {
+
+ $total = $this->TimeElapsed();
+ $result = $this->getProfiling();
+ $dashes = '';
+
+ $out = '<tr>';
+
+ $out .= "<td>{$this->name}</td>";
+
+ $standard = false;
+
+ foreach ($result as $k => $v) {
+ if ($v['name'] == 'Start' || $v['name'] == 'Stop') continue;
+
+ //$perc = (($v['diff'] * 100) / $total);
+ //$tperc = (($v['total'] * 100) / $total);
+
+ //$out .= '<td align="right">' . $v['diff'] . '</td>';
+
+ if ($standard == false) $standard = $v['diff'];
+
+ $perc = $v['diff'] * 100 / $standard;
+ $bad_run = ($v['diff'] < 0);
+
+ $out .= '<td align="right"'.
+ ($bad_run ? ' style="color:#AAA;"' : '').
+ '>' . number_format($perc, 2, '.', '') .
+ '%</td><td>'.number_format($v['diff'],4,'.','').'</td>';
+
+ }
+
+ $out .= '</tr>';
+
+ return $out;
+ }
+}
+
+function print_lexers() {
+ global $LEXERS;
+ $first = true;
+ foreach ($LEXERS as $key => $value) {
+ if (!$first) echo ' / ';
+ echo htmlspecialchars($key);
+ $first = false;
+ }
+}
+
+function do_benchmark($name, $document) {
+ global $LEXERS, $RUNS;
+
+ $config = HTMLPurifier_Config::createDefault();
+ $context = new HTMLPurifier_Context();
+
+ $timer = new RowTimer($name);
+ $timer->start();
+
+ foreach($LEXERS as $key => $lexer) {
+ for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document, $config, $context);
+ $timer->setMarker($key);
+ }
+
+ $timer->stop();
+ $timer->display();
+}
+
+?>
+<html>
+<head>
+<title>Benchmark: <?php print_lexers(); ?></title>
+</head>
+<body>
+<h1>Benchmark: <?php print_lexers(); ?></h1>
+<table border="1">
+<tr><th>Case</th><?php
+foreach ($LEXERS as $key => $value) {
+ echo '<th colspan="2">' . htmlspecialchars($key) . '</th>';
+}
+?></tr>
+<?php
+
+// ************************************************************************** //
+
+// sample of html pages
+
+$dir = 'samples/Lexer';
+$dh = opendir($dir);
+while (false !== ($filename = readdir($dh))) {
+
+ if (strpos($filename, '.html') !== strlen($filename) - 5) continue;
+ $document = file_get_contents($dir . '/' . $filename);
+ do_benchmark("File: $filename", $document);
+
+}
+
+// crashers, caused infinite loops before
+
+$snippets = array();
+$snippets[] = '<a href="foo>';
+$snippets[] = '<a "=>';
+
+foreach ($snippets as $snippet) {
+ do_benchmark($snippet, $snippet);
+}
+
+// random input
+
+$random = Text_Password::create(80, 'unpronounceable', 'qwerty <>="\'');
+
+do_benchmark('Random input', $random);
+
+?></table>
+
+<?php
+
+echo '<div>Random input was: ' .
+ '<span colspan="4" style="font-family:monospace;">' .
+ htmlspecialchars($random) . '</span></div>';
+
+?>
+
+
+</body></html>
+<?php
+
+// vim: et sw=4 sts=4
diff --git a/3rdparty/htmlpurifier/benchmarks/Trace.php b/3rdparty/htmlpurifier/benchmarks/Trace.php
new file mode 100644
index 000000000..1ceb184ab
--- /dev/null
+++ b/3rdparty/htmlpurifier/benchmarks/Trace.php
@@ -0,0 +1,21 @@
+<?php
+
+ini_set('xdebug.trace_format', 1);
+ini_set('xdebug.show_mem_delta', true);
+
+if (file_exists('Trace.xt')) {
+ echo "Previous trace Trace.xt must be removed before this script can be run.";
+ exit;
+}
+
+xdebug_start_trace(dirname(__FILE__) . '/Trace');
+require_once '../library/HTMLPurifier.auto.php';
+
+$purifier = new HTMLPurifier();
+
+$data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
+xdebug_stop_trace();
+
+echo "Trace finished.";
+
+// vim: et sw=4 sts=4
diff --git a/3rdparty/htmlpurifier/benchmarks/samples/Lexer/1.html b/3rdparty/htmlpurifier/benchmarks/samples/Lexer/1.html
new file mode 100644
index 000000000..9eed68af2
--- /dev/null
+++ b/3rdparty/htmlpurifier/benchmarks/samples/Lexer/1.html
@@ -0,0 +1,56 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html>
+<head>
+ <title>Main Page - Huaxia Taiji Club</title>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+ <link rel="stylesheet" type="text/css" media="screen, projection" href="/screen.css" />
+ <link rel="stylesheet" type="text/css" media="print" href="/print.css" />
+</head>
+<body>
+
+<div id="translation"><a href="/ch/Main_Page">&#20013;&#25991;</a></div>
+
+<div id="heading"><a href="/en/Main_Page" title="English Main Page">Huaxia Taiji Club</a>
+ <a class="heading_ch" href="/ch/Main_Page" title="&#20013;&#25991;&#20027;&#39029;">&#21326;&#22799;&#22826;&#26497;&#20465;&#20048;&#37096;</a></div>
+<ul id="menu">
+ <li><a href="/en/Main_Page" class="active">Main Page</a></li><li><a href="/en/About">About</a></li><li><a href="/en/News">News</a></li><li><a href="/en/Events">Events</a></li><li><a href="/en/Digest">Digest</a></li><li><a href="/en/Taiji_and_I">Taiji and I</a></li><li><a href="/en/Downloads">Downloads</a></li><li><a href="/en/Registration">Registration</a></li><li><a href="/en/Contact">Contact</a></li> <li><a href="http://www.taijiclub.org/gallery2/main.php">Gallery</a></li>
+
+ <li><a href="http://www.taijiclub.org/forums/index.php">Forums</a></li>
+
+</ul>
+<div id="content">
+<h1 id="title">Main Page</h1><h2>Taiji (Tai Chi) </h2>
+
+<div id="sidebar">
+<h3>Recent News</h3>
+<ul>
+ <li>Zou Xiaojun was elected as the new club vice president </li>
+
+ <li>HX Edison Taiji Club <a href="http://www.taijiclub.org/downloads/Taiji_club_regulation_.pdf">by-law</a> effective 3/28/2006</li>
+ <li>A new email account for our club: HXEdisontaijiclub@yahoo.com</li>
+
+ <li>Workshop conducted by <a href="http://www.taijiclub.org/ch/Digest/LiDeyin">?????</a> Li Deyin is set on June 4, 2006 at Clarion Hotel in Edison from 9:30am-12pm; <a href="http://www.taijiclub.org/en/Registration">Registration</a></li>
+
+</ul>
+</div>
+
+
+
+<p><i>Taiji</i> is an ancient Chinese tradition of movement systems that is associated with philosophy, physiology, psychology, geometry and dynamics. It is the slowest form of martial arts and is meant to improve the internal spirit. It is soothing to the soul and extremely invigorating. </p>
+
+<p>The founder of Taiji was Zhang Sanfeng (Chang San-feng), who was a monk of the Wu Dang (Wu Tang) Monastery and lived in the period from 1391 to 1459. His exercises stressed suppleness and elasticity as opposed to the hardness and force of other martial art styles. Several centuries old, Taiji was originally developed as a form of self-defense, emphasizing strength, balance, flexibility and speed. Tai Chi also differs from other martial arts in that it is based on the Taoist religion and aims to avoid aggressive forces. </p>
+
+<p>Modern Taiji includes many forms &mdash; Quan, Sword and Fan. Impacting the mind and body of the practitioners, Taiji is practiced as a meditative exercise made up of a series of forms, or choreographed motions, requiring slow, gentle movement of the arms, legs and torso. Taiji practitioners learn to center their attention on their breathing and body movements so that the exercise strengthens their overall mental and physical awareness. In a sense, Taiji is similar to yoga in that it is also a form of moving meditation, with the goal of achieving stillness through the motion and awareness of breath. To perform Taiji, practitioners have to empty their mind of thoughts and worries in order to achieve harmony. It is a great aid for reducing stress and improving the quality of life. </p>
+
+<p>In China and in communities all over the world, Taiji is practiced by young and old in the early morning hours. It's a great way to bring a new and fresh day!</p>
+
+<p>Check out our <a href="/gallery2/main.php">gallery</a>.</p>
+
+<div style="text-align:center;"><a href="http://www.taijiclub.org/gallery2/v/2006/group1b.jpg.html?g2_imageViewsIndex=1"><img src="/gallery2/d/1836-2/group1b.jpg" /></a></div>
+
+<div style="text-align:center;">Click on photo to see HR version</div></div>
+</body>
+</html>
+
+<!-- vim: et sw=4 sts=4
+-->
diff --git a/3rdparty/htmlpurifier/benchmarks/samples/Lexer/2.html b/3rdparty/htmlpurifier/benchmarks/samples/Lexer/2.html
new file mode 100644
index 000000000..f0819e33c
--- /dev/null
+++ b/3rdparty/htmlpurifier/benchmarks/samples/Lexer/2.html
@@ -0,0 +1,20 @@
+<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"><title>Google</title><style><!--
+body,td,a,p,.h{font-family:arial,sans-serif;}
+.h{font-size: 20px;}
+.q{color:#0000cc;}
+//-->
+</style>
+<script>
+<!--
+function sf(){document.f.q.focus();}
+function rwt(el,ct,cd,sg){var e = window.encodeURIComponent ? encodeURIComponent : escape;el.href="/url?sa=t&ct="+e(ct)+"&cd="+e(cd)+"&url="+e(el.href).replace(/\+/g,"%2B")+"&ei=fHNBRJDEG4HSaLONmIoP"+sg;el.onmousedown="";return true;}
+// -->
+</script>
+</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf() topmargin=3 marginheight=3><center><table border=0 cellspacing=0 cellpadding=0 width=100%><tr><td align=right nowrap><font size=-1><b>edwardzyang@gmail.com</b>&nbsp;|&nbsp;<a href="/url?sa=p&pref=ig&pval=2&q=http://www.google.com/ig%3Fhl%3Den" onmousedown="return rwt(this,'pro','hppphou:def','&sig2=hDbTpsWIp9YG37a23n6krQ')">Personalized Home</a>&nbsp;|&nbsp;<a href="/searchhistory/?hl=en">Search History</a>&nbsp;|&nbsp;<a href="https://www.google.com/accounts/ManageAccount">My Account</a>&nbsp;|&nbsp;<a href="http://www.google.com/accounts/Logout?continue=http://www.google.com/">Sign out</a></font></td></tr><tr height=4><td><img alt="" width=1 height=1></td></tr></table><img src="/intl/en/images/logo.gif" width=276 height=110 alt="Google"><br><br>
+<form action=/search name=f><script><!--
+function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
+// -->
+</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b>&nbsp;&nbsp;&nbsp;&nbsp;<a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a>&nbsp;&nbsp;&nbsp;&nbsp;<b><a href="/intl/en/options/" class=q>more&nbsp;&raquo;</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%>&nbsp;</td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising&nbsp;Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>&copy;2006 Google</font></p></center></body></html>
+
+<!-- vim: et sw=4 sts=4
+-->
diff --git a/3rdparty/htmlpurifier/benchmarks/samples/Lexer/3.html b/3rdparty/htmlpurifier/benchmarks/samples/Lexer/3.html
new file mode 100644
index 000000000..bb683ceed
--- /dev/null
+++ b/3rdparty/htmlpurifier/benchmarks/samples/Lexer/3.html
@@ -0,0 +1,131 @@
+<html>
+<head>
+<title>Anime Digi-Lib Index</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+
+<div id="tb">
+
+<form name="lycos_search" method="get" target="_new" style="margin: 0px"
+ action="http://r.hotbot.com/r/memberpgs_lycos_searchbox_af/http://www.angelfire.lycos.com/cgi-bin/search/pursuit">
+
+ <table id="tbtable" cellpadding="0" cellspacing="0" border="0" width="100%" style="border: 1px solid black;">
+ <tr style="background-color: #dcf7ff">
+ <td colspan="3">
+ <table cellpadding="0" cellspacing="0" border="0">
+ <tr>
+ <td>&nbsp;Search:</td>
+
+ <td><input type="radio" name="cat" value="lycos" checked></td>
+ <td nowrap="nowrap">The Web</td>
+ <td><input type="radio" name="cat" value="angelfire"></td>
+ <td nowrap="nowrap">Angelfire</td>
+ <td nowrap="nowrap">&nbsp;&nbsp;&nbsp;<img src="http://af.lygo.com/d/toolbar/planeticon.gif"></td><td nowrap="nowrap">&nbsp;<a href="http://r.lycos.com/r/tlbr_planet/http://planet.lycos.com" target="_new">Planet</a></td>
+ </tr>
+ </table>
+ <td nowrap="nowrap"><a href="http://lt.angelfire.com/af_toolbar/edit/_h_/www.angelfire.lycos.com/build/index.tmpl" target="_top">
+ <span id="build">Edit your Site</span></a>&nbsp;</td>
+
+ <td><img src="http://af.lygo.com/d/toolbar/dir.gif" alt="show site directory" border="0" height="10" hspace="3" width="8"></td>
+ <td nowrap="nowrap"><a href="http://lt.angelfire.com/af_toolbar/browse/_h_/www.angelfire.lycos.com/directory/index.tmpl" target="_top">Browse Sites</a>&nbsp;</td>
+ <td><a href="http://lt.angelfire.com/af_toolbar/angelfire/_h_/www.angelfire.lycos.com" target="_top"><img src="http://af.lygo.com/d/toolbar/aflogo_top.gif" alt="hosted by angelfire" border="0" height="26" width="143"></a></td>
+ </tr>
+ <tr style="background-color: #dcf7ff">
+ <td nowrap="nowrap" valign="middle">&nbsp;<input size="30" style="font-size: 10px; background-color: #fff;" type="text" name="query" id="searchbox"></td>
+
+ <td style="background: #fff url(http://af.lygo.com/d/toolbar/bg.gif) repeat-x; text-align: center;" colspan="3" align="center">
+ <a href="http://clk.atdmt.com/VON/go/lycsnvon0710000019von/direct/01/"><img src="/sys/free_logo_xxxx_157x20.gif" height="20" width="157" border="0" alt="Vonage"></a><img src="http://view.atdmt.com/VON/view/lycsnvon0710000019von/direct/01/"></td>
+
+ <span style="font-size: 11px;">
+ <span style="color:#00f; font-weight:bold;">&#171;</span>
+ <span id="top100">
+ <a href="javascript:void top100('prev')" target="_top">Previous</a> |
+ <a href="http://lt.angelfire.com/af_toolbar/top100/_h_/www.angelfire.lycos.com/cgi-bin/top100/pagelist?start=1" target="_top">Top 100</a> |
+ <a href="javascript:void top100('next')" target="_top">Next</a>
+
+ </span>
+ <span style="color: #00f; font-weight: bold;">&#187;</span>
+ </span>
+ </td>
+ <td valign="top" style="background: #fff url(http://af.lygo.com/d/toolbar/bg.gif) repeat-x;"><a href="http://lt.angelfire.com/af_toolbar/angelfire/_h_/www.angelfire.lycos.com" target="_top"><img src="http://af.lygo.com/d/toolbar/aflogo_bot.gif" alt="hosted by angelfire" border="0" height="22" width="143"></a></td>
+ </tr>
+ </table>
+ </form>
+ </div>
+
+<table border="0" cellpadding="0" cellspacing="0" width="728"><tr><td>
+ <script type="text/javascript">
+if (objAdMgr.isSlotAvailable("leaderboard")) {
+ objAdMgr.renderSlot("leaderboard")
+}
+</script>
+<noscript>
+<a href="http://network.realmedia.com/RealMedia/ads/click_nx.ads/lycosangelfire/ros/728x90/wp/ss/a/491169@Top1?x"><img border="0" src="http://network.realmedia.com/RealMedia/ads/adstream_nx.ads/lycosangelfire/ros/728x90/wp/ss/a/491169@Top1" alt="leaderboard ad" /></a>
+</noscript>
+
+</td></tr>
+</table>
+<table width="86%" border="0" cellspacing="0" cellpadding="2">
+ <tr>
+ <td height="388" width="19%" bgcolor="#FFCCFF" valign="top">
+ <p>May 1, 2000</p>
+ <p><b>Pop Culture</b> </p>
+ <p>by. H. Finkelstein</p>
+
+ </td>
+ <td height="388" width="52%" valign="top">
+ <p>Welcome to the <b>Anime Digi-Lib</b>, a virtual index to anime on the
+ internet. This site strives to house a comprehensive index to both personal
+ and commercial websites and provides reviews to these sites. We hope to
+ be a gateway for people who've never imagined they'd ever be interested
+ in Japanese Animation. </p>
+ <table width="99%" border="1" cellspacing="0" cellpadding="2" height="320" name="Searchnservices">
+ <tr>
+ <td height="263" valign="top" width="58%">
+ <p>&nbsp; </p>
+ <p>&nbsp;</p>
+
+<FORM ACTION="/cgi-bin/script_library/site_search/search" METHOD="GET">
+
+<table border="0" cellpadding="2" cellspacing="0">
+<tr>
+<td colspan="2">Search term: <INPUT NAME="search_term"><br></td>
+</tr>
+<tr>
+<td colspan="2" align="center">Case-sensitive -
+<INPUT TYPE="checkbox" NAME="case_sensitive">yes<br></td>
+</tr>
+<tr>
+<td align="right"><INPUT TYPE="radio" NAME="search_type" VALUE="exact" CHECKED>exact</td>
+<td><INPUT TYPE="radio" NAME="search_type" VALUE="fuzzy">fuzzy<br></td>
+
+</tr>
+<tr>
+<td colspan="2" align="center"><INPUT TYPE="hidden" NAME="display" VALUE="#FF0000"><INPUT TYPE="submit"></td>
+</tr>
+</table>
+</form>
+
+
+ <td>
+ <table border="0" cellpadding="0" cellspacing="0" width="100%">
+<tr><td><font face="verdana,geneva" color="#000011" size="1">What is better, subtitled or dubbed anime?</font></td></tr>
+<tr><td><input type="radio" name="rd" value="1"><font face="verdana" size="2" color="#000011">Subtitled</font></td></tr>
+
+<tr><td align="middle"><font face="verdana" size="1"><a href="http://pub.alxnet.com/poll?id=2079873&q=view">Current results</a></font></td></tr>
+</table></td></tr>
+ <tr>
+ <td><font face="verdana" size="1"><a href="http://www.alxnet.com/services/poll/">Free
+ Web Polls</a></font></td>
+ </tr>
+</table></form>
+<!-- Alxnet.com -- web poll code ends -->
+ </td>
+ </tr>
+</table>
+</body>
+
+</html>
+
+<!-- vim: et sw=4 sts=4
+-->
diff --git a/3rdparty/htmlpurifier/benchmarks/samples/Lexer/4.html b/3rdparty/htmlpurifier/benchmarks/samples/Lexer/4.html
new file mode 100644
index 000000000..9264c5cf5
--- /dev/null
+++ b/3rdparty/htmlpurifier/benchmarks/samples/Lexer/4.html
@@ -0,0 +1,543 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+ <meta name="keywords" content="Tai Chi Chuan,Yang Pan-hou,Yang Chien-hou,Yang style Tai Chi Chuan,Yang Lu-ch'an,Wu/Hao style T'ai Chi Ch'uan,Wu Ch'uan-yü,Hao Wei-chen,Yang Shou-chung,Wu style T'ai Chi Ch'uan,Wu Chien-ch'üan" />
+<link rel="shortcut icon" href="/favicon.ico" />
+<link rel="copyright" href="http://www.gnu.org/copyleft/fdl.html" />
+ <title>Tai Chi Chuan - Wikipedia, the free encyclopedia</title>
+ <style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "/skins-1.5/monobook/main.css?9"; /*]]>*/</style>
+ <link rel="stylesheet" type="text/css" media="print" href="/skins-1.5/common/commonPrint.css" />
+
+ <!--[if lt IE 5.5000]><style type="text/css">@import "/skins-1.5/monobook/IE50Fixes.css";</style><![endif]-->
+ <!--[if IE 5.5000]><style type="text/css">@import "/skins-1.5/monobook/IE55Fixes.css";</style><![endif]-->
+ <!--[if IE 6]><style type="text/css">@import "/skins-1.5/monobook/IE60Fixes.css";</style><![endif]-->
+ <!--[if IE 7]><style type="text/css">@import "/skins-1.5/monobook/IE70Fixes.css?1";</style><![endif]-->
+ <!--[if lt IE 7]><script type="text/javascript" src="/skins-1.5/common/IEFixes.js"></script>
+ <meta http-equiv="imagetoolbar" content="no" /><![endif]-->
+ <script type="text/javascript">var skin = 'monobook';var stylepath = '/skins-1.5';</script>
+ <script type="text/javascript" src="/skins-1.5/common/wikibits.js?1"><!-- wikibits js --></script>
+ <script type="text/javascript" src="/w/index.php?title=-&amp;action=raw&amp;smaxage=0&amp;gen=js"><!-- site js --></script>
+
+ <style type="text/css">/*<![CDATA[*/
+@import "/w/index.php?title=MediaWiki:Common.css&action=raw&ctype=text/css&smaxage=2678400";
+@import "/w/index.php?title=MediaWiki:Monobook.css&action=raw&ctype=text/css&smaxage=2678400";
+@import "/w/index.php?title=-&action=raw&gen=css&maxage=2678400&ts=20060721225848";
+@import "/w/index.php?title=User:Edward_Z._Yang/monobook.css&action=raw&ctype=text/css";
+/*]]>*/</style>
+ <script type="text/javascript" src="/w/index.php?title=User:Edward_Z._Yang/monobook.js&amp;action=raw&amp;ctype=text/javascript&amp;dontcountme=s"></script>
+ <!-- Head Scripts -->
+ </head>
+<body class="ns-0 ltr">
+ <div id="globalWrapper">
+ <div id="column-content">
+ <div id="content">
+
+ <a name="top" id="top"></a>
+ <div id="siteNotice"><div id="wikimania2006" style="text-align:right; font-size:80%"><a href="http://wm06reg.wikimedia.org/" class="external text" title="http://wm06reg.wikimedia.org/">Registration</a> for <a href="http://wikimania2006.wikimedia.org" class="external text" title="http://wikimania2006.wikimedia.org">Wikimania 2006</a> is open.&nbsp;&nbsp;&nbsp;</div>
+</div> <h1 class="firstHeading">Tai Chi Chuan</h1>
+ <div id="bodyContent">
+ <h3 id="siteSub">From Wikipedia, the free encyclopedia</h3>
+
+ <div id="contentSub"></div>
+ <div id="jump-to-nav">Jump to: <a href="#column-one">navigation</a>, <a href="#searchInput">search</a></div> <!-- start content -->
+ <table border="1" cellpadding="2" cellspacing="0" align="right">
+<tr>
+<th colspan="2" bgcolor="#FFCCCC"><big>???</big></th>
+</tr>
+<tr>
+<td colspan="2">
+
+<div class="center">
+<div class="thumb tnone">
+<div style="width:182px;"><a href="/wiki/Image:Yang_Ch%27eng-fu_circa_1918.jpg" class="internal" title="Yang Chengfu in a posture from the Tai Chi solo form known as Single Whip, circa 1918"><img src="http://upload.wikimedia.org/wikipedia/en/thumb/d/d1/Yang_Ch%27eng-fu_circa_1918.jpg/180px-Yang_Ch%27eng-fu_circa_1918.jpg" alt="Yang Chengfu in a posture from the Tai Chi solo form known as Single Whip, circa 1918" width="180" height="255" longdesc="/wiki/Image:Yang_Ch%27eng-fu_circa_1918.jpg" /></a>
+<div class="thumbcaption">
+<div class="magnify" style="float:right"><a href="/wiki/Image:Yang_Ch%27eng-fu_circa_1918.jpg" class="internal" title="Enlarge"><img src="/skins-1.5/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div>
+<b><a href="/wiki/Yang_Chengfu" title="Yang Chengfu">Yang Chengfu</a> in a posture from the Tai Chi solo form known as <i>Single Whip</i>, circa <a href="/wiki/1918" title="1918">1918</a></b></div>
+</div>
+</div>
+</div>
+</td>
+</tr>
+
+<tr>
+<th colspan="2"><a href="/wiki/Chinese_language" title="Chinese language">Chinese</a> Name</th>
+</tr>
+<tr>
+<td><a href="/wiki/Hanyu_Pinyin" title="Hanyu Pinyin">Hanyu Pinyin</a></td>
+<td>Tàijíquán</td>
+</tr>
+<tr>
+<td><a href="/wiki/Wade-Giles" title="Wade-Giles">Wade-Giles</a></td>
+<td>T'ai<sup>4</sup> Chi<sup>2</sup> Ch'üan<sup>2</sup></td>
+
+</tr>
+<tr>
+<td><a href="/wiki/Simplified_Chinese" title="Simplified Chinese">Simplified Chinese</a></td>
+<td>???</td>
+</tr>
+<tr>
+<td><a href="/wiki/Traditional_Chinese" title="Traditional Chinese">Traditional Chinese</a></td>
+<td><a href="http://en.wiktionary.org/wiki/%E5%A4%AA" class="extiw" title="wiktionary:?">?</a><a href="http://en.wiktionary.org/wiki/%E6%A5%B5" class="extiw" title="wiktionary:?">?</a><a href="http://en.wiktionary.org/wiki/%E6%8B%B3" class="extiw" title="wiktionary:?">?</a></td>
+</tr>
+<tr>
+<td><a href="/wiki/Cantonese_%28linguistics%29" title="Cantonese (linguistics)">Cantonese</a></td>
+
+<td>taai3 gik6 kyun4</td>
+</tr>
+<tr>
+<td><a href="/wiki/Hiragana" title="Hiragana">Japanese Hiragana</a></td>
+<td>???????</td>
+</tr>
+<tr>
+<td><a href="/wiki/Korean_%28language%29" title="Korean (language)">Korean</a></td>
+<td>???</td>
+</tr>
+<tr>
+<td><a href="/wiki/Vietnamese_%28language%29" title="Vietnamese (language)">Vietnamese</a></td>
+
+<td>Thái C?c Quy?n</td>
+</tr>
+</table>
+<p><b>Tai Chi Chuan</b>, <b>T'ai Chi Ch'üan</b> or <b>Taijiquan</b> (<a href="/wiki/Traditional_Chinese_character" title="Traditional Chinese character">Traditional Chinese</a>: ???; <a href="/wiki/Simplified_Chinese_character" title="Simplified Chinese character">Simplified Chinese</a>: ???; <a href="/wiki/Pinyin" title="Pinyin">pinyin</a>: Tàijíquán; literally "supreme ultimate fist"), commonly known as <b>Tai Chi</b>, <b>T'ai Chi</b>, or <b><a href="/wiki/Taiji" title="Taiji">Taiji</a></b>, is an <a href="/wiki/Neijia" title="Neijia">internal</a> <a href="/wiki/Chinese_martial_arts" title="Chinese martial arts">Chinese martial art</a>. There are different styles of T'ai Chi Ch'üan, although most agree they are all based on the system originally taught by the <a href="/wiki/Chen" title="Chen">Chen</a> family to the <a href="/wiki/Yang" title="Yang">Yang</a> family starting in <a href="/wiki/1820" title="1820">1820</a>. It is often promoted and practiced as a <a href="/wiki/Martial_arts_therapy" title="Martial arts therapy">martial arts therapy</a> for the purposes of <a href="/wiki/Health" title="Health">health</a> and <a href="/wiki/Longevity" title="Longevity">longevity</a>, (some <a href="/wiki/Tai_Chi_Chuan#Citations_to_medical_research" title="Tai Chi Chuan">recent medical studies</a> support its effectiveness). T'ai Chi Ch'üan is considered a <i>soft</i> style martial art, an art applied with as complete a relaxation or "softness" in the musculature as possible, to distinguish its theory and application from that of the <i>hard</i> martial art styles which use a degree of tension in the muscles.</p>
+
+<p>Variations of T'ai Chi Ch'üan's basic training forms are well known as the slow motion routines that groups of people practice every morning in parks across China and other parts of the world. Traditional T'ai Chi training is intended to teach awareness of one's own balance and what affects it, awareness of the same in others, an appreciation of the practical value in one's ability to moderate extremes of behavior and attitude at both mental and physical levels, and how this applies to effective self-defense principles.</p>
+<table id="toc" class="toc" summary="Contents">
+<tr>
+<td>
+<div id="toctitle">
+<h2>Contents</h2>
+</div>
+<ul>
+<li class="toclevel-1"><a href="#Overview"><span class="tocnumber">1</span> <span class="toctext">Overview</span></a></li>
+<li class="toclevel-1"><a href="#Training_and_techniques"><span class="tocnumber">2</span> <span class="toctext">Training and techniques</span></a></li>
+
+<li class="toclevel-1"><a href="#Styles_and_history"><span class="tocnumber">3</span> <span class="toctext">Styles and history</span></a>
+<ul>
+<li class="toclevel-2"><a href="#Family_tree"><span class="tocnumber">3.1</span> <span class="toctext">Family tree</span></a></li>
+<li class="toclevel-2"><a href="#Notes_to_Family_tree_table"><span class="tocnumber">3.2</span> <span class="toctext">Notes to Family tree table</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1"><a href="#Modern_T.27ai_Chi"><span class="tocnumber">4</span> <span class="toctext">Modern T'ai Chi</span></a>
+
+<ul>
+<li class="toclevel-2"><a href="#Modern_forms"><span class="tocnumber">4.1</span> <span class="toctext">Modern forms</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1"><a href="#Health_benefits"><span class="tocnumber">5</span> <span class="toctext">Health benefits</span></a>
+<ul>
+<li class="toclevel-2"><a href="#Citations_to_medical_research"><span class="tocnumber">5.1</span> <span class="toctext">Citations to medical research</span></a></li>
+</ul>
+
+</li>
+<li class="toclevel-1"><a href="#See_also"><span class="tocnumber">6</span> <span class="toctext">See also</span></a></li>
+<li class="toclevel-1"><a href="#External_links"><span class="tocnumber">7</span> <span class="toctext">External links</span></a></li>
+</ul>
+</td>
+</tr>
+</table>
+<p><script type="text/javascript">
+//<![CDATA[
+ if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); }
+//]]>
+</script></p>
+<div class="editsection" style="float:right;margin-left:5px;">[<a href="/w/index.php?title=Tai_Chi_Chuan&amp;action=edit&amp;section=1" title="Edit section: Overview">edit</a>]</div>
+
+<p><a name="Overview" id="Overview"></a></p>
+<h2>Overview</h2>
+<p>Historically, T'ai Chi Ch'üan has been regarded as a martial art, and its traditional practitioners still teach it as one. Even so, it has developed a worldwide following among many thousands of people with little or no interest in martial training for its aforementioned benefits to health and <a href="/wiki/Preventive_medicine" title="Preventive medicine">health maintenance</a>. Some call it a form of moving <a href="/wiki/Meditation" title="Meditation">meditation</a>, and T'ai Chi theory and practice evolved in agreement with many of the principles of <a href="/wiki/Traditional_Chinese_medicine" title="Traditional Chinese medicine">traditional Chinese medicine</a>. Besides general health benefits and <a href="/wiki/Stress_management" title