summaryrefslogtreecommitdiffstats
path: root/hugolib
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2017-08-19 13:16:00 +0200
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2017-09-06 00:20:02 +0200
commit3b4f17bbc9ff789faa581ac278ad109d1ac5b816 (patch)
tree7b706ad5fce15afa1825b6565bae09bc517cc687 /hugolib
parent16c9127663951ace1a3901cf669c49cc72780ced (diff)
hugolib: Implement "related content"
This closes #98, even if this commit does not do full content text search. We may revisit that problem in the future, but that deserves its own issue. Fixes #98
Diffstat (limited to 'hugolib')
-rw-r--r--hugolib/page.go42
-rw-r--r--hugolib/pageCache.go10
-rw-r--r--hugolib/pageCache_test.go4
-rw-r--r--hugolib/pageGroup.go4
-rw-r--r--hugolib/pageSort_test.go2
-rw-r--r--hugolib/pages_related.go191
-rw-r--r--hugolib/pages_related_test.go75
-rw-r--r--hugolib/site.go21
8 files changed, 339 insertions, 10 deletions
diff --git a/hugolib/page.go b/hugolib/page.go
index c29590802..a723cabb2 100644
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -20,6 +20,8 @@ import (
"reflect"
"unicode"
+ "github.com/gohugoio/hugo/related"
+
"github.com/bep/gitmap"
"github.com/gohugoio/hugo/helpers"
@@ -54,6 +56,9 @@ var (
// Assert that it implements the Eqer interface.
_ compare.Eqer = (*Page)(nil)
_ compare.Eqer = (*PageOutput)(nil)
+
+ // Assert that it implements the interface needed for related searches.
+ _ related.Document = (*Page)(nil)
)
const (
@@ -231,6 +236,28 @@ type Page struct {
targetPathDescriptorPrototype *targetPathDescriptor
}
+// SearchKeywords implements the related.Document interface needed for fast page searches.
+func (p *Page) SearchKeywords(cfg related.IndexConfig) ([]related.Keyword, error) {
+
+ v, err := p.Param(cfg.Name)
+ if err != nil {
+ return nil, err
+ }
+
+ return cfg.ToKeywords(v)
+}
+
+// PubDate is when this page was or will be published.
+// NOTE: This is currently used for search only and is not meant to be used
+// directly in templates. We need to consolidate the dates in this struct.
+// TODO(bep) see https://github.com/gohugoio/hugo/issues/3854
+func (p *Page) PubDate() time.Time {
+ if !p.PublishDate.IsZero() {
+ return p.PublishDate
+ }
+ return p.Date
+}
+
func (p *Page) RSSLink() template.URL {
f, found := p.outputFormats.GetByName(output.RSSFormat.Name)
if !found {
@@ -329,6 +356,21 @@ func (ps Pages) findPagePosByFilePath(inPath string) int {
return -1
}
+func (ps Pages) removeFirstIfFound(p *Page) Pages {
+ ii := -1
+ for i, pp := range ps {
+ if pp == p {
+ ii = i
+ break
+ }
+ }
+
+ if ii != -1 {
+ ps = append(ps[:ii], ps[ii+1:]...)
+ }
+ return ps
+}
+
func (ps Pages) findFirstPagePosByFilePathPrefix(prefix string) int {
if prefix == "" {
return -1
diff --git a/hugolib/pageCache.go b/hugolib/pageCache.go
index e0a3a160b..df381c679 100644
--- a/hugolib/pageCache.go
+++ b/hugolib/pageCache.go
@@ -36,7 +36,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
c.RLock()
if cached, ok := c.m[key]; ok {
for _, ps := range cached {
- if probablyEqualPages(p, ps[0]) {
+ if fastEqualPages(p, ps[0]) {
c.RUnlock()
return ps[1], true
}
@@ -51,7 +51,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
// double-check
if cached, ok := c.m[key]; ok {
for _, ps := range cached {
- if probablyEqualPages(p, ps[0]) {
+ if fastEqualPages(p, ps[0]) {
return ps[1], true
}
}
@@ -73,10 +73,10 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
}
-// "probably" as in: we do not compare every element for big slices, but that is
-// good enough for our use case.
+// "fast" as in: we do not compare every element for big slices, but that is
+// good enough for our use cases.
// TODO(bep) there is a similar method in pagination.go. DRY.
-func probablyEqualPages(p1, p2 Pages) bool {
+func fastEqualPages(p1, p2 Pages) bool {
if p1 == nil && p2 == nil {
return true
}
diff --git a/hugolib/pageCache_test.go b/hugolib/pageCache_test.go
index 62837394f..aa2adf6e8 100644
--- a/hugolib/pageCache_test.go
+++ b/hugolib/pageCache_test.go
@@ -56,8 +56,8 @@ func TestPageCache(t *testing.T) {
l1.Unlock()
p2, c2 := c1.get("k1", p, nil)
assert.True(t, c2)
- assert.True(t, probablyEqualPages(p, p2))
- assert.True(t, probablyEqualPages(p, pages))
+ assert.True(t, fastEqualPages(p, p2))
+ assert.True(t, fastEqualPages(p, pages))
assert.NotNil(t, p)
l2.Lock()
diff --git a/hugolib/pageGroup.go b/hugolib/pageGroup.go
index 343ecf52e..3ccd35a06 100644
--- a/hugolib/pageGroup.go
+++ b/hugolib/pageGroup.go
@@ -24,8 +24,8 @@ import (
// PageGroup represents a group of pages, grouped by the key.
// The key is typically a year or similar.
type PageGroup struct {
- Key interface{}
- Pages Pages
+ Key interface{}
+ Pages
}
type mapKeyValues []reflect.Value
diff --git a/hugolib/pageSort_test.go b/hugolib/pageSort_test.go
index a17f53dc6..6379dccbe 100644
--- a/hugolib/pageSort_test.go
+++ b/hugolib/pageSort_test.go
@@ -115,7 +115,7 @@ func TestPageSortReverse(t *testing.T) {
assert.Equal(t, 9, p2[0].fuzzyWordCount)
assert.Equal(t, 0, p2[9].fuzzyWordCount)
// cached
- assert.True(t, probablyEqualPages(p2, p1.Reverse()))
+ assert.True(t, fastEqualPages(p2, p1.Reverse()))
}
func TestPageSortByParam(t *testing.T) {
diff --git a/hugolib/pages_related.go b/hugolib/pages_related.go
new file mode 100644
index 000000000..858ad0d11
--- /dev/null
+++ b/hugolib/pages_related.go
@@ -0,0 +1,191 @@
+// Copyright 2017-present The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hugolib
+
+import (
+ "sync"
+
+ "github.com/gohugoio/hugo/common/types"
+ "github.com/gohugoio/hugo/related"
+ "github.com/spf13/cast"
+)
+
+var (
+ // Assert that Pages and PageGroup implements the PageGenealogist interface.
+ _ PageGenealogist = (Pages)(nil)
+ _ PageGenealogist = PageGroup{}
+)
+
+// A PageGenealogist finds related pages in a page collection. This interface is implemented
+// by Pages and PageGroup, which makes it available as `{{ .RegularPages.Related . }}` etc.
+type PageGenealogist interface {
+
+ // Template example:
+ // {{ $related := .RegularPages.Related . }}
+ Related(doc related.Document) (Pages, error)
+
+ // Template example:
+ // {{ $related := .RegularPages.RelatedIndices . "tags" "date" }}
+ RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error)
+
+ // Template example:
+ // {{ $related := .RegularPages.RelatedTo ( keyVals "tags" "hugo", "rocks") ( keyVals "date" .Date ) }}
+ RelatedTo(args ...types.KeyValues) (Pages, error)
+}
+
+// Related searches all the configured indices with the search keywords from the
+// supplied document.
+func (p Pages) Related(doc related.Document) (Pages, error) {
+ page, err := unwrapPage(doc)
+ if err != nil {
+ return nil, err
+ }
+
+ result, err := p.searchDoc(page)
+ if err != nil {
+ return nil, err
+ }
+
+ return result.removeFirstIfFound(page), nil
+}
+
+// RelatedIndices searches the given indices with the search keywords from the
+// supplied document.
+func (p Pages) RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error) {
+ page, err := unwrapPage(doc)
+ if err != nil {
+ return nil, err
+ }
+
+ indicesStr, err := cast.ToStringSliceE(indices)
+ if err != nil {
+ return nil, err
+ }
+
+ result, err := p.searchDoc(page, indicesStr...)
+ if err != nil {
+ return nil, err
+ }
+
+ return result.removeFirstIfFound(page), nil
+
+}
+
+// RelatedTo searches the given indices with the corresponding values.
+func (p Pages) RelatedTo(args ...types.KeyValues) (Pages, error) {
+ if len(p) == 0 {
+ return nil, nil
+ }
+
+ return p.search(args...)
+
+}
+
+func (p Pages) search(args ...types.KeyValues) (Pages, error) {
+ return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) {
+ return idx.SearchKeyValues(args...)
+ })
+
+}
+
+func (p Pages) searchDoc(doc related.Document, indices ...string) (Pages, error) {
+ return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) {
+ return idx.SearchDoc(doc, indices...)
+ })
+}
+
+func (p Pages) withInvertedIndex(search func(idx *related.InvertedIndex) ([]related.Document, error)) (Pages, error) {
+ if len(p) == 0 {
+ return nil, nil
+ }
+
+ cache := p[0].s.relatedDocsHandler
+
+ searchIndex, err := cache.getOrCreateIndex(p)
+ if err != nil {
+ return nil, err
+ }
+
+ result, err := search(searchIndex)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(result) > 0 {
+ mp := make(Pages, len(result))
+ for i, match := range result {
+ mp[i] = match.(*Page)
+ }
+ return mp, nil
+ }
+
+ return nil, nil
+}
+
+type cachedPostingList struct {
+ p Pages
+
+ postingList *related.InvertedIndex
+}
+
+type relatedDocsHandler struct {
+ // This is configured in site or langugage config.
+ cfg related.Config
+
+ postingLists []*cachedPostingList
+ mu sync.RWMutex
+}
+
+func newSearchIndexHandler(cfg related.Config) *relatedDocsHandler {
+ return &relatedDocsHandler{cfg: cfg}
+}
+
+// This assumes that a lock has been aquired.
+func (s *relatedDocsHandler) getIndex(p Pages) *related.InvertedIndex {
+ for _, ci := range s.postingLists {
+ if fastEqualPages(p, ci.p) {
+ return ci.postingList
+ }
+ }
+ return nil
+}
+
+func (s *relatedDocsHandler) getOrCreateIndex(p Pages) (*related.InvertedIndex, error) {
+ s.mu.RLock()
+ cachedIndex := s.getIndex(p)
+ if cachedIndex != nil {
+ s.mu.RUnlock()
+ return cachedIndex, nil
+ }
+ s.mu.RUnlock()
+
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ if cachedIndex := s.getIndex(p); cachedIndex != nil {
+ return cachedIndex, nil
+ }
+
+ searchIndex := related.NewInvertedIndex(s.cfg)
+
+ for _, page := range p {
+ if err := searchIndex.Add(page); err != nil {
+ return nil, err
+ }
+ }
+
+ s.postingLists = append(s.postingLists, &cachedPostingList{p: p, postingList: searchIndex})
+
+ return searchIndex, nil
+}
diff --git a/hugolib/pages_related_test.go b/hugolib/pages_related_test.go
new file mode 100644
index 000000000..cf5da0983
--- /dev/null
+++ b/hugolib/pages_related_test.go
@@ -0,0 +1,75 @@
+// Copyright 2017-present The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hugolib
+
+import (
+ "fmt"
+ "path/filepath"
+ "testing"
+
+ "github.com/gohugoio/hugo/common/types"
+ "github.com/gohugoio/hugo/deps"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestRelated(t *testing.T) {
+ assert := require.New(t)
+
+ t.Parallel()
+
+ var (
+ cfg, fs = newTestCfg()
+ //th = testHelper{cfg, fs, t}
+ )
+
+ pageTmpl := `---
+title: Page %d
+keywords: [%s]
+date: %s
+---
+
+Content
+`
+
+ writeSource(t, fs, filepath.Join("content", "page1.md"), fmt.Sprintf(pageTmpl, 1, "hugo, says", "2017-01-03"))
+ writeSource(t, fs, filepath.Join("content", "page2.md"), fmt.Sprintf(pageTmpl, 2, "hugo, rocks", "2017-01-02"))
+ writeSource(t, fs, filepath.Join("content", "page3.md"), fmt.Sprintf(pageTmpl, 3, "bep, says", "2017-01-01"))
+
+ s := buildSingleSite(t, deps.DepsCfg{Fs: fs, Cfg: cfg}, BuildCfg{SkipRender: true})
+ assert.Len(s.RegularPages, 3)
+
+ result, err := s.RegularPages.RelatedTo(types.NewKeyValuesStrings("keywords", "hugo", "rocks"))
+
+ assert.NoError(err)
+ assert.Len(result, 2)
+ assert.Equal("Page 2", result[0].Title)
+ assert.Equal("Page 1", result[1].Title)
+
+ result, err = s.RegularPages.Related(s.RegularPages[0])
+ assert.Len(result, 2)
+ assert.Equal("Page 2", result[0].Title)
+ assert.Equal("Page 3", result[1].Title)
+
+ result, err = s.RegularPages.RelatedIndices(s.RegularPages[0], "keywords")
+ assert.Len(result, 2)
+ assert.Equal("Page 2", result[0].Title)
+ assert.Equal("Page 3", result[1].Title)
+
+ result, err = s.RegularPages.RelatedTo(types.NewKeyValuesStrings("keywords", "bep", "rocks"))
+ assert.Len(result, 2)
+ assert.Equal("Page 2", result[0].Title)
+ assert.Equal("Page 3", result[1].Title)
+
+}
diff --git a/hugolib/site.go b/hugolib/site.go
index 13ca7f144..b8898264a 100644
--- a/hugolib/site.go
+++ b/hugolib/site.go
@@ -42,6 +42,7 @@ import (
"github.com/gohugoio/hugo/helpers"
"github.com/gohugoio/hugo/output"
"github.com/gohugoio/hugo/parser"
+ "github.com/gohugoio/hugo/related"
"github.com/gohugoio/hugo/source"
"github.com/gohugoio/hugo/tpl"
"github.com/gohugoio/hugo/transform"
@@ -135,6 +136,8 @@ type Site struct {
// The func used to title case titles.
titleFunc func(s string) string
+ relatedDocsHandler *relatedDocsHandler
+
siteStats *siteStats
}
@@ -176,6 +179,7 @@ func (s *Site) reset() *Site {
layoutHandler: output.NewLayoutHandler(s.PathSpec.ThemeSet()),
disabledKinds: s.disabledKinds,
titleFunc: s.titleFunc,
+ relatedDocsHandler: newSearchIndexHandler(s.relatedDocsHandler.cfg),
outputFormats: s.outputFormats,
outputFormatsConfig: s.outputFormatsConfig,
mediaTypesConfig: s.mediaTypesConfig,
@@ -231,6 +235,21 @@ func newSite(cfg deps.DepsCfg) (*Site, error) {
return nil, err
}
+ var relatedContentConfig related.Config
+
+ if cfg.Language.IsSet("related") {
+ relatedContentConfig, err = related.DecodeConfig(cfg.Language.Get("related"))
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ relatedContentConfig = related.DefaultConfig
+ taxonomies := cfg.Language.GetStringMapString("taxonomies")
+ if _, found := taxonomies["tag"]; found {
+ relatedContentConfig.Add(related.IndexConfig{Name: "tags", Weight: 80})
+ }
+ }
+
titleFunc := helpers.GetTitleFunc(cfg.Language.GetString("titleCaseStyle"))
s := &Site{
@@ -239,6 +258,7 @@ func newSite(cfg deps.DepsCfg) (*Site, error) {
Language: cfg.Language,
disabledKinds: disabledKinds,
titleFunc: titleFunc,
+ relatedDocsHandler: newSearchIndexHandler(relatedContentConfig),
outputFormats: outputFormats,
outputFormatsConfig: siteOutputFormatsConfig,
mediaTypesConfig: siteMediaTypesConfig,
@@ -1607,6 +1627,7 @@ func (s *Site) assembleTaxonomies() {
// Prepare site for a new full build.
func (s *Site) resetBuildState() {
+ s.relatedDocsHandler = newSearchIndexHandler(s.relatedDocsHandler.cfg)
s.PageCollections = newPageCollectionsFromPages(s.rawAllPages)
// TODO(bep) get rid of this double
s.Info.PageCollections = s.PageCollections