summaryrefslogtreecommitdiffstats
path: root/publisher/htmlElementsCollector.go
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2023-02-06 17:29:12 +0100
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2023-02-07 09:11:02 +0100
commitd33a7ebcc16e804f1db0dc1f1edad4d9f9e816ef (patch)
tree93953221b8e208b4522d58db18bfba6b6edc47f6 /publisher/htmlElementsCollector.go
parent2a364cca6487e7ecc1982c8ade2927d18c592cee (diff)
Make the HTML collector parsing more robust
Most notably better handling self-closing elements Closes #10698
Diffstat (limited to 'publisher/htmlElementsCollector.go')
-rw-r--r--publisher/htmlElementsCollector.go17
1 files changed, 13 insertions, 4 deletions
diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go
index 91e1237a9..c3b88c4cc 100644
--- a/publisher/htmlElementsCollector.go
+++ b/publisher/htmlElementsCollector.go
@@ -294,9 +294,10 @@ func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc
}
tagName := w.buff.Bytes()[1:]
+ isSelfClosing := tagName[len(tagName)-1] == '/'
switch {
- case skipInnerElementRe.Match(tagName):
+ case !isSelfClosing && skipInnerElementRe.Match(tagName):
// pre, script etc. We collect classes etc. on the surrounding
// element, but skip the inner content.
w.backup()
@@ -432,10 +433,18 @@ func parseStartTag(s string) string {
})
if spaceIndex == -1 {
- return s[1 : len(s)-1]
+ s = s[1 : len(s)-1]
+ } else {
+ s = s[1:spaceIndex]
}
- return s[1:spaceIndex]
+ if s[len(s)-1] == '/' {
+ // Self closing.
+ s = s[:len(s)-1]
+ }
+
+ return s
+
}
// isClosedByTag reports whether b ends with a closing tag for tagName.
@@ -487,7 +496,7 @@ LOOP:
}
}
- if state != 2 {
+ if state != 2 || lo >= hi {
return false
}