summaryrefslogtreecommitdiffstats
path: root/publisher
diff options
context:
space:
mode:
Diffstat (limited to 'publisher')
-rw-r--r--publisher/htmlElementsCollector.go268
-rw-r--r--publisher/htmlElementsCollector_test.go81
-rw-r--r--publisher/publisher.go47
-rw-r--r--publisher/publisher_test.go14
4 files changed, 386 insertions, 24 deletions
diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go
new file mode 100644
index 000000000..c6e0d3f0f
--- /dev/null
+++ b/publisher/htmlElementsCollector.go
@@ -0,0 +1,268 @@
+// Copyright 2020 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package publisher
+
+import (
+ "github.com/gohugoio/hugo/helpers"
+ "golang.org/x/net/html"
+ yaml "gopkg.in/yaml.v2"
+
+ "bytes"
+ "sort"
+ "strings"
+ "sync"
+)
+
+func newHTMLElementsCollector() *htmlElementsCollector {
+ return &htmlElementsCollector{
+ elementSet: make(map[string]bool),
+ }
+}
+
+func newHTMLElementsCollectorWriter(collector *htmlElementsCollector) *cssClassCollectorWriter {
+ return &cssClassCollectorWriter{
+ collector: collector,
+ }
+}
+
+// HTMLElements holds lists of tags and attribute values for classes and id.
+type HTMLElements struct {
+ Tags []string `json:"tags"`
+ Classes []string `json:"classes"`
+ IDs []string `json:"ids"`
+}
+
+func (h *HTMLElements) Merge(other HTMLElements) {
+ h.Tags = append(h.Tags, other.Tags...)
+ h.Classes = append(h.Classes, other.Classes...)
+ h.IDs = append(h.IDs, other.IDs...)
+
+ h.Tags = helpers.UniqueStringsReuse(h.Tags)
+ h.Classes = helpers.UniqueStringsReuse(h.Classes)
+ h.IDs = helpers.UniqueStringsReuse(h.IDs)
+
+}
+
+func (h *HTMLElements) Sort() {
+ sort.Strings(h.Tags)
+ sort.Strings(h.Classes)
+ sort.Strings(h.IDs)
+}
+
+type cssClassCollectorWriter struct {
+ collector *htmlElementsCollector
+ buff bytes.Buffer
+
+ isCollecting bool
+ dropValue bool
+ inQuote bool
+}
+
+func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
+ n = len(p)
+ i := 0
+
+ for i < len(p) {
+ if !w.isCollecting {
+ for ; i < len(p); i++ {
+ b := p[i]
+ if b == '<' {
+ w.startCollecting()
+ break
+ }
+ }
+ }
+
+ if w.isCollecting {
+ for ; i < len(p); i++ {
+ b := p[i]
+ if !w.inQuote && b == '/' {
+ // End element, we don't care about those.
+ w.endCollecting(true)
+ break
+ }
+ w.toggleIfQuote(b)
+ if !w.inQuote && b == '>' {
+ w.endCollecting(false)
+ break
+ }
+ w.buff.WriteByte(b)
+ }
+
+ if !w.isCollecting {
+ if w.dropValue {
+ w.buff.Reset()
+ } else {
+ // First check if we have processed this element before.
+ w.collector.mu.RLock()
+
+ // See https://github.com/dominikh/go-tools/issues/723
+ //lint:ignore S1030 This construct avoids memory allocation for the string.
+ seen := w.collector.elementSet[string(w.buff.Bytes())]
+ w.collector.mu.RUnlock()
+ if seen {
+ w.buff.Reset()
+ continue
+ }
+
+ s := w.buff.String()
+
+ w.buff.Reset()
+
+ el := parseHTMLElement(s)
+
+ w.collector.mu.Lock()
+ w.collector.elementSet[s] = true
+ if el.Tag != "" {
+ w.collector.elements = append(w.collector.elements, el)
+ }
+ w.collector.mu.Unlock()
+ }
+ }
+ }
+ }
+
+ return
+}
+
+func (c *cssClassCollectorWriter) endCollecting(drop bool) {
+ c.isCollecting = false
+ c.inQuote = false
+ c.dropValue = drop
+}
+
+func (c *cssClassCollectorWriter) startCollecting() {
+ c.isCollecting = true
+ c.dropValue = false
+}
+
+func (c *cssClassCollectorWriter) toggleIfQuote(b byte) {
+ if isQuote(b) {
+ c.inQuote = !c.inQuote
+ }
+}
+
+type htmlElement struct {
+ Tag string
+ Classes []string
+ IDs []string
+}
+
+type htmlElementsCollector struct {
+ // Contains the raw HTML string. We will get the same element
+ // several times, and want to avoid costly reparsing when this
+ // is used for aggregated data only.
+ elementSet map[string]bool
+
+ elements []htmlElement
+
+ mu sync.RWMutex
+}
+
+func (c *htmlElementsCollector) getHTMLElements() HTMLElements {
+
+ var (
+ classes []string
+ ids []string
+ tags []string
+ )
+
+ for _, el := range c.elements {
+ classes = append(classes, el.Classes...)
+ ids = append(ids, el.IDs...)
+ tags = append(tags, el.Tag)
+ }
+
+ classes = helpers.UniqueStringsSorted(classes)
+ ids = helpers.UniqueStringsSorted(ids)
+ tags = helpers.UniqueStringsSorted(tags)
+
+ els := HTMLElements{
+ Classes: classes,
+ IDs: ids,
+ Tags: tags,
+ }
+
+ return els
+}
+
+func isQuote(b byte) bool {
+ return b == '"' || b == '\''
+}
+
+var htmlJsonFixer = strings.NewReplacer(", ", "\n")
+
+func parseHTMLElement(elStr string) (el htmlElement) {
+ elStr = strings.TrimSpace(elStr)
+ if !strings.HasSuffix(elStr, ">") {
+ elStr += ">"
+ }
+ n, err := html.Parse(strings.NewReader(elStr))
+ if err != nil {
+ return
+ }
+ var walk func(*html.Node)
+ walk = func(n *html.Node) {
+ if n.Type == html.ElementNode && strings.Contains(elStr, n.Data) {
+ el.Tag = n.Data
+
+ for _, a := range n.Attr {
+ switch {
+ case strings.EqualFold(a.Key, "id"):
+ // There should be only one, but one never knows...
+ el.IDs = append(el.IDs, a.Val)
+ default:
+ if strings.EqualFold(a.Key, "class") {
+ el.Classes = append(el.Classes, strings.Fields(a.Val)...)
+ } else {
+ key := strings.ToLower(a.Key)
+ val := strings.TrimSpace(a.Val)
+ if strings.Contains(key, "class") && strings.HasPrefix(val, "{") {
+ // This looks like a Vue or AlpineJS class binding.
+ // Try to unmarshal it as YAML and pull the keys.
+ // This may look odd, as the source is (probably) JS (JSON), but the YAML
+ // parser is much more lenient with simple JS input, it seems.
+ m := make(map[string]interface{})
+ val = htmlJsonFixer.Replace(strings.Trim(val, "{}"))
+ // Remove leading space to make it look like YAML.
+ lines := strings.Split(val, "\n")
+ for i, l := range lines {
+ lines[i] = strings.TrimSpace(l)
+ }
+ val = strings.Join(lines, "\n")
+ err := yaml.Unmarshal([]byte(val), &m)
+ if err == nil {
+ for k := range m {
+ el.Classes = append(el.Classes, strings.Fields(k)...)
+ }
+ } else {
+ // Just insert the raw values. This is used for CSS class pruning
+ // so, it's important not to leave out values that may be a CSS class.
+ el.Classes = append(el.Classes, strings.Fields(val)...)
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ walk(c)
+ }
+ }
+
+ walk(n)
+
+ return
+}
diff --git a/publisher/htmlElementsCollector_test.go b/publisher/htmlElementsCollector_test.go
new file mode 100644
index 000000000..3ef159d8b
--- /dev/null
+++ b/publisher/htmlElementsCollector_test.go
@@ -0,0 +1,81 @@
+// Copyright 2020 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package publisher
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ qt "github.com/frankban/quicktest"
+)
+
+func TestClassCollector(t *testing.T) {
+ c := qt.New((t))
+
+ f := func(tags, classes, ids string) HTMLElements {
+ var tagss, classess, idss []string
+ if tags != "" {
+ tagss = strings.Split(tags, " ")
+ }
+ if classes != "" {
+ classess = strings.Split(classes, " ")
+ }
+ if ids != "" {
+ idss = strings.Split(ids, " ")
+ }
+ return HTMLElements{
+ Tags: tagss,
+ Classes: classess,
+ IDs: idss,
+ }
+ }
+
+ for _, test := range []struct {
+ name string
+ html string
+ expect HTMLElements
+ }{
+ {"basic", `<body class="b a"></body>`, f("body", "a b", "")},
+ {"duplicates", `<div class="b a b"></div>`, f("div", "a b", "")},
+ {"single quote", `<body class='b a'></body>`, f("body", "a b", "")},
+ {"no quote", `<body class=b id=myelement></body>`, f("body", "b", "myelement")},
+
+ {"AlpineJS bind 1", `<body>
+ <div x-bind:class="{
+ 'class1': data.open,
+ 'class2 class3': data.foo == 'bar'
+ }">
+ </div>
+ </body>`, f("body div", "class1 class2 class3", "")},
+
+ {"Alpine bind 2", `<div x-bind:class="{ 'bg-black': filter.checked }"
+ class="inline-block mr-1 mb-2 rounded bg-gray-300 px-2 py-2">FOO</div>`,
+ f("div", "bg-black bg-gray-300 inline-block mb-2 mr-1 px-2 py-2 rounded", "")},
+
+ {"Alpine bind 3", `<div x-bind:class="{ 'text-gray-800': !checked, 'text-white': checked }"></div>`, f("div", "text-gray-800 text-white", "")},
+ {"Alpine bind 4", `<div x-bind:class="{ 'text-gray-800': !checked,
+ 'text-white': checked }"></div>`, f("div", "text-gray-800 text-white", "")},
+
+ {"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
+ } {
+ c.Run(test.name, func(c *qt.C) {
+ w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
+ fmt.Fprint(w, test.html)
+ got := w.collector.getHTMLElements()
+ c.Assert(got, qt.DeepEquals, test.expect)
+ })
+ }
+
+}
diff --git a/publisher/publisher.go b/publisher/publisher.go
index f30073c08..8b8d2fa63 100644
--- a/publisher/publisher.go
+++ b/publisher/publisher.go
@@ -1,4 +1,4 @@
-// Copyright 2019 The Hugo Authors. All rights reserved.
+// Copyright 2020 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -18,7 +18,8 @@ import (
"io"
"sync/atomic"
- "github.com/gohugoio/hugo/config"
+ "github.com/gohugoio/hugo/resources"
+
"github.com/gohugoio/hugo/media"
"github.com/gohugoio/hugo/minifiers"
@@ -68,17 +69,21 @@ type Descriptor struct {
// DestinationPublisher is the default and currently only publisher in Hugo. This
// publisher prepares and publishes an item to the defined destination, e.g. /public.
type DestinationPublisher struct {
- fs afero.Fs
- min minifiers.Client
+ fs afero.Fs
+ min minifiers.Client
+ htmlElementsCollector *htmlElementsCollector
}
// NewDestinationPublisher creates a new DestinationPublisher.
-func NewDestinationPublisher(fs afero.Fs, outputFormats output.Formats, mediaTypes media.Types, cfg config.Provider) (pub DestinationPublisher, err error) {
- pub = DestinationPublisher{fs: fs}
- pub.min, err = minifiers.New(mediaTypes, outputFormats, cfg)
- if err != nil {
- return
+func NewDestinationPublisher(rs *resources.Spec, outputFormats output.Formats, mediaTypes media.Types) (pub DestinationPublisher, err error) {
+ fs := rs.BaseFs.PublishFs
+ cfg := rs.Cfg
+ var classCollector *htmlElementsCollector
+ if rs.BuildConfig.WriteStats {
+ classCollector = newHTMLElementsCollector()
}
+ pub = DestinationPublisher{fs: fs, htmlElementsCollector: classCollector}
+ pub.min, err = minifiers.New(mediaTypes, outputFormats, cfg)
return
}
@@ -111,16 +116,38 @@ func (p DestinationPublisher) Publish(d Descriptor) error {
}
defer f.Close()
- _, err = io.Copy(f, src)
+ var w io.Writer = f
+
+ if p.htmlElementsCollector != nil && d.OutputFormat.IsHTML {
+ w = io.MultiWriter(w, newHTMLElementsCollectorWriter(p.htmlElementsCollector))
+ }
+
+ _, err = io.Copy(w, src)
if err == nil && d.StatCounter != nil {
atomic.AddUint64(d.StatCounter, uint64(1))
}
+
return err
}
+func (p DestinationPublisher) PublishStats() PublishStats {
+ if p.htmlElementsCollector == nil {
+ return PublishStats{}
+ }
+
+ return PublishStats{
+ HTMLElements: p.htmlElementsCollector.getHTMLElements(),
+ }
+}
+
+type PublishStats struct {
+ HTMLElements HTMLElements `json:"htmlElements"`
+}
+
// Publisher publishes a result file.
type Publisher interface {
Publish(d Descriptor) error
+ PublishStats() PublishStats
}
// XML transformer := transform.New(urlreplacers.NewAbsURLInXMLTransformer(path))
diff --git a/publisher/publisher_test.go b/publisher/publisher_test.go
deleted file mode 100644
index 200accc8b..000000000
--- a/publisher/publisher_test.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2018 The Hugo Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package publisher