summaryrefslogtreecommitdiffstats
path: root/transform/chain_test.go
diff options
context:
space:
mode:
authorbep <bjorn.erik.pedersen@gmail.com>2015-02-16 10:48:15 +0100
committerspf13 <steve.francia@gmail.com>2015-02-16 08:24:42 -0500
commitf1fec88c308631ab9618b9a2b6dba9c714b087c1 (patch)
tree1cdae7bc240c695f520bda4991810a80405498d6 /transform/chain_test.go
parent27c03a6dd0bd003c77cdd4ded19ca8c3033e6476 (diff)
Improve abs url replacement speed
This commit replaces the multuple `bytes.Containts` and `bytes.Replace` with a custom replacer that does one pass through the document and exploits the fact that there are two common prefixes we search for, `src=` and `href=`. This is both faster and consumes less memory. There may be even better algos to use here, but we must leave some room for improvements for future versions. This should also make it possible to solve #816. ``` benchmark old ns/op new ns/op delta BenchmarkAbsUrl 25795 22597 -12.40% BenchmarkXmlAbsUrl 17187 11166 -35.03% benchmark old allocs new allocs delta BenchmarkAbsUrl 60 33 -45.00% BenchmarkXmlAbsUrl 30 16 -46.67% benchmark old bytes new bytes delta BenchmarkAbsUrl 5844 4167 -28.70% BenchmarkXmlAbsUrl 3754 2069 -44.89% ``` Fixes #894
Diffstat (limited to 'transform/chain_test.go')
-rw-r--r--transform/chain_test.go22
1 files changed, 15 insertions, 7 deletions
diff --git a/transform/chain_test.go b/transform/chain_test.go
index 71037d455..a88d84533 100644
--- a/transform/chain_test.go
+++ b/transform/chain_test.go
@@ -14,21 +14,29 @@ const CORRECT_OUTPUT_SRC_HREF_DQ = "<!DOCTYPE html><html><head><script src=\"foo
const CORRECT_OUTPUT_SRC_HREF_SQ = "<!DOCTYPE html><html><head><script src='foobar.js'></script><script src='http://base/barfoo.js'></script></head><body><nav><h1>title</h1></nav><article>content <a href='foobar'>foobar</a>. <a href='http://base/foobar'>Follow up</a></article></body></html>"
const H5_XML_CONTENT_ABS_URL = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\">&lt;p&gt;&lt;a href=&#34;/foobar&#34;&gt;foobar&lt;/a&gt;&lt;/p&gt; &lt;p&gt;A video: &lt;iframe src=&#39;/foo&#39;&gt;&lt;/iframe&gt;&lt;/p&gt;</content></entry></feed>"
-const CORRECT_OUTPUT_SRC_HREF_IN_XML = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\">&lt;p&gt;&lt;a href=&#34;http://xml/foobar&#34;&gt;foobar&lt;/a&gt;&lt;/p&gt; &lt;p&gt;A video: &lt;iframe src=&#39;http://xml/foo&#39;&gt;&lt;/iframe&gt;&lt;/p&gt;</content></entry></feed>"
+const CORRECT_OUTPUT_SRC_HREF_IN_XML = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\">&lt;p&gt;&lt;a href=&#34;http://base/foobar&#34;&gt;foobar&lt;/a&gt;&lt;/p&gt; &lt;p&gt;A video: &lt;iframe src=&#39;http://base/foo&#39;&gt;&lt;/iframe&gt;&lt;/p&gt;</content></entry></feed>"
const H5_XML_CONTENT_GUARDED = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\">&lt;p&gt;&lt;a href=&#34;//foobar&#34;&gt;foobar&lt;/a&gt;&lt;/p&gt; &lt;p&gt;A video: &lt;iframe src=&#39;//foo&#39;&gt;&lt;/iframe&gt;&lt;/p&gt;</content></entry></feed>"
-var abs_url_tests = []test{
+// additional sanity tests for replacements testing
+const REPLACE_1 = "No replacements."
+const REPLACE_2 = "ᚠᛇᚻ ᛒᛦᚦ ᚠᚱᚩᚠᚢᚱ\nᚠᛁᚱᚪ ᚷᛖᚻᚹᛦᛚᚳᚢᛗ"
+
+var abs_url_bench_tests = []test{
{H5_JS_CONTENT_DOUBLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_DQ},
{H5_JS_CONTENT_SINGLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_SQ},
{H5_JS_CONTENT_ABS_URL, H5_JS_CONTENT_ABS_URL},
{H5_JS_CONTENT_ABS_URL_SCHEMALESS, H5_JS_CONTENT_ABS_URL_SCHEMALESS},
}
-var xml_abs_url_tests = []test{
+var xml_abs_url_bench_tests = []test{
{H5_XML_CONTENT_ABS_URL, CORRECT_OUTPUT_SRC_HREF_IN_XML},
{H5_XML_CONTENT_GUARDED, H5_XML_CONTENT_GUARDED},
}
+var sanity_tests = []test{{REPLACE_1, REPLACE_1}, {REPLACE_2, REPLACE_2}}
+var abs_url_tests = append(abs_url_bench_tests, sanity_tests...)
+var xml_abs_url_tests = append(xml_abs_url_bench_tests, sanity_tests...)
+
func TestChainZeroTransformers(t *testing.T) {
tr := NewChain()
in := new(bytes.Buffer)
@@ -44,7 +52,7 @@ func BenchmarkAbsUrl(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
- apply(b.Errorf, tr, abs_url_tests)
+ apply(b.Errorf, tr, abs_url_bench_tests)
}
}
@@ -57,17 +65,17 @@ func TestAbsUrl(t *testing.T) {
}
func BenchmarkXmlAbsUrl(b *testing.B) {
- absURLInXML, _ := AbsURLInXML("http://xml")
+ absURLInXML, _ := AbsURLInXML("http://base")
tr := NewChain(absURLInXML...)
b.ResetTimer()
for i := 0; i < b.N; i++ {
- apply(b.Errorf, tr, xml_abs_url_tests)
+ apply(b.Errorf, tr, xml_abs_url_bench_tests)
}
}
func TestXMLAbsUrl(t *testing.T) {
- absURLInXML, _ := AbsURLInXML("http://xml")
+ absURLInXML, _ := AbsURLInXML("http://base")
tr := NewChain(absURLInXML...)
apply(t.Errorf, tr, xml_abs_url_tests)
}