diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-08-19 13:16:00 +0200 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-09-06 00:20:02 +0200 |
commit | 3b4f17bbc9ff789faa581ac278ad109d1ac5b816 (patch) | |
tree | 7b706ad5fce15afa1825b6565bae09bc517cc687 /hugolib | |
parent | 16c9127663951ace1a3901cf669c49cc72780ced (diff) |
hugolib: Implement "related content"
This closes #98, even if this commit does not do full content text search.
We may revisit that problem in the future, but that deserves its own issue.
Fixes #98
Diffstat (limited to 'hugolib')
-rw-r--r-- | hugolib/page.go | 42 | ||||
-rw-r--r-- | hugolib/pageCache.go | 10 | ||||
-rw-r--r-- | hugolib/pageCache_test.go | 4 | ||||
-rw-r--r-- | hugolib/pageGroup.go | 4 | ||||
-rw-r--r-- | hugolib/pageSort_test.go | 2 | ||||
-rw-r--r-- | hugolib/pages_related.go | 191 | ||||
-rw-r--r-- | hugolib/pages_related_test.go | 75 | ||||
-rw-r--r-- | hugolib/site.go | 21 |
8 files changed, 339 insertions, 10 deletions
diff --git a/hugolib/page.go b/hugolib/page.go index c29590802..a723cabb2 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -20,6 +20,8 @@ import ( "reflect" "unicode" + "github.com/gohugoio/hugo/related" + "github.com/bep/gitmap" "github.com/gohugoio/hugo/helpers" @@ -54,6 +56,9 @@ var ( // Assert that it implements the Eqer interface. _ compare.Eqer = (*Page)(nil) _ compare.Eqer = (*PageOutput)(nil) + + // Assert that it implements the interface needed for related searches. + _ related.Document = (*Page)(nil) ) const ( @@ -231,6 +236,28 @@ type Page struct { targetPathDescriptorPrototype *targetPathDescriptor } +// SearchKeywords implements the related.Document interface needed for fast page searches. +func (p *Page) SearchKeywords(cfg related.IndexConfig) ([]related.Keyword, error) { + + v, err := p.Param(cfg.Name) + if err != nil { + return nil, err + } + + return cfg.ToKeywords(v) +} + +// PubDate is when this page was or will be published. +// NOTE: This is currently used for search only and is not meant to be used +// directly in templates. We need to consolidate the dates in this struct. +// TODO(bep) see https://github.com/gohugoio/hugo/issues/3854 +func (p *Page) PubDate() time.Time { + if !p.PublishDate.IsZero() { + return p.PublishDate + } + return p.Date +} + func (p *Page) RSSLink() template.URL { f, found := p.outputFormats.GetByName(output.RSSFormat.Name) if !found { @@ -329,6 +356,21 @@ func (ps Pages) findPagePosByFilePath(inPath string) int { return -1 } +func (ps Pages) removeFirstIfFound(p *Page) Pages { + ii := -1 + for i, pp := range ps { + if pp == p { + ii = i + break + } + } + + if ii != -1 { + ps = append(ps[:ii], ps[ii+1:]...) + } + return ps +} + func (ps Pages) findFirstPagePosByFilePathPrefix(prefix string) int { if prefix == "" { return -1 diff --git a/hugolib/pageCache.go b/hugolib/pageCache.go index e0a3a160b..df381c679 100644 --- a/hugolib/pageCache.go +++ b/hugolib/pageCache.go @@ -36,7 +36,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool) c.RLock() if cached, ok := c.m[key]; ok { for _, ps := range cached { - if probablyEqualPages(p, ps[0]) { + if fastEqualPages(p, ps[0]) { c.RUnlock() return ps[1], true } @@ -51,7 +51,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool) // double-check if cached, ok := c.m[key]; ok { for _, ps := range cached { - if probablyEqualPages(p, ps[0]) { + if fastEqualPages(p, ps[0]) { return ps[1], true } } @@ -73,10 +73,10 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool) } -// "probably" as in: we do not compare every element for big slices, but that is -// good enough for our use case. +// "fast" as in: we do not compare every element for big slices, but that is +// good enough for our use cases. // TODO(bep) there is a similar method in pagination.go. DRY. -func probablyEqualPages(p1, p2 Pages) bool { +func fastEqualPages(p1, p2 Pages) bool { if p1 == nil && p2 == nil { return true } diff --git a/hugolib/pageCache_test.go b/hugolib/pageCache_test.go index 62837394f..aa2adf6e8 100644 --- a/hugolib/pageCache_test.go +++ b/hugolib/pageCache_test.go @@ -56,8 +56,8 @@ func TestPageCache(t *testing.T) { l1.Unlock() p2, c2 := c1.get("k1", p, nil) assert.True(t, c2) - assert.True(t, probablyEqualPages(p, p2)) - assert.True(t, probablyEqualPages(p, pages)) + assert.True(t, fastEqualPages(p, p2)) + assert.True(t, fastEqualPages(p, pages)) assert.NotNil(t, p) l2.Lock() diff --git a/hugolib/pageGroup.go b/hugolib/pageGroup.go index 343ecf52e..3ccd35a06 100644 --- a/hugolib/pageGroup.go +++ b/hugolib/pageGroup.go @@ -24,8 +24,8 @@ import ( // PageGroup represents a group of pages, grouped by the key. // The key is typically a year or similar. type PageGroup struct { - Key interface{} - Pages Pages + Key interface{} + Pages } type mapKeyValues []reflect.Value diff --git a/hugolib/pageSort_test.go b/hugolib/pageSort_test.go index a17f53dc6..6379dccbe 100644 --- a/hugolib/pageSort_test.go +++ b/hugolib/pageSort_test.go @@ -115,7 +115,7 @@ func TestPageSortReverse(t *testing.T) { assert.Equal(t, 9, p2[0].fuzzyWordCount) assert.Equal(t, 0, p2[9].fuzzyWordCount) // cached - assert.True(t, probablyEqualPages(p2, p1.Reverse())) + assert.True(t, fastEqualPages(p2, p1.Reverse())) } func TestPageSortByParam(t *testing.T) { diff --git a/hugolib/pages_related.go b/hugolib/pages_related.go new file mode 100644 index 000000000..858ad0d11 --- /dev/null +++ b/hugolib/pages_related.go @@ -0,0 +1,191 @@ +// Copyright 2017-present The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hugolib + +import ( + "sync" + + "github.com/gohugoio/hugo/common/types" + "github.com/gohugoio/hugo/related" + "github.com/spf13/cast" +) + +var ( + // Assert that Pages and PageGroup implements the PageGenealogist interface. + _ PageGenealogist = (Pages)(nil) + _ PageGenealogist = PageGroup{} +) + +// A PageGenealogist finds related pages in a page collection. This interface is implemented +// by Pages and PageGroup, which makes it available as `{{ .RegularPages.Related . }}` etc. +type PageGenealogist interface { + + // Template example: + // {{ $related := .RegularPages.Related . }} + Related(doc related.Document) (Pages, error) + + // Template example: + // {{ $related := .RegularPages.RelatedIndices . "tags" "date" }} + RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error) + + // Template example: + // {{ $related := .RegularPages.RelatedTo ( keyVals "tags" "hugo", "rocks") ( keyVals "date" .Date ) }} + RelatedTo(args ...types.KeyValues) (Pages, error) +} + +// Related searches all the configured indices with the search keywords from the +// supplied document. +func (p Pages) Related(doc related.Document) (Pages, error) { + page, err := unwrapPage(doc) + if err != nil { + return nil, err + } + + result, err := p.searchDoc(page) + if err != nil { + return nil, err + } + + return result.removeFirstIfFound(page), nil +} + +// RelatedIndices searches the given indices with the search keywords from the +// supplied document. +func (p Pages) RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error) { + page, err := unwrapPage(doc) + if err != nil { + return nil, err + } + + indicesStr, err := cast.ToStringSliceE(indices) + if err != nil { + return nil, err + } + + result, err := p.searchDoc(page, indicesStr...) + if err != nil { + return nil, err + } + + return result.removeFirstIfFound(page), nil + +} + +// RelatedTo searches the given indices with the corresponding values. +func (p Pages) RelatedTo(args ...types.KeyValues) (Pages, error) { + if len(p) == 0 { + return nil, nil + } + + return p.search(args...) + +} + +func (p Pages) search(args ...types.KeyValues) (Pages, error) { + return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) { + return idx.SearchKeyValues(args...) + }) + +} + +func (p Pages) searchDoc(doc related.Document, indices ...string) (Pages, error) { + return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) { + return idx.SearchDoc(doc, indices...) + }) +} + +func (p Pages) withInvertedIndex(search func(idx *related.InvertedIndex) ([]related.Document, error)) (Pages, error) { + if len(p) == 0 { + return nil, nil + } + + cache := p[0].s.relatedDocsHandler + + searchIndex, err := cache.getOrCreateIndex(p) + if err != nil { + return nil, err + } + + result, err := search(searchIndex) + if err != nil { + return nil, err + } + + if len(result) > 0 { + mp := make(Pages, len(result)) + for i, match := range result { + mp[i] = match.(*Page) + } + return mp, nil + } + + return nil, nil +} + +type cachedPostingList struct { + p Pages + + postingList *related.InvertedIndex +} + +type relatedDocsHandler struct { + // This is configured in site or langugage config. + cfg related.Config + + postingLists []*cachedPostingList + mu sync.RWMutex +} + +func newSearchIndexHandler(cfg related.Config) *relatedDocsHandler { + return &relatedDocsHandler{cfg: cfg} +} + +// This assumes that a lock has been aquired. +func (s *relatedDocsHandler) getIndex(p Pages) *related.InvertedIndex { + for _, ci := range s.postingLists { + if fastEqualPages(p, ci.p) { + return ci.postingList + } + } + return nil +} + +func (s *relatedDocsHandler) getOrCreateIndex(p Pages) (*related.InvertedIndex, error) { + s.mu.RLock() + cachedIndex := s.getIndex(p) + if cachedIndex != nil { + s.mu.RUnlock() + return cachedIndex, nil + } + s.mu.RUnlock() + + s.mu.Lock() + defer s.mu.Unlock() + + if cachedIndex := s.getIndex(p); cachedIndex != nil { + return cachedIndex, nil + } + + searchIndex := related.NewInvertedIndex(s.cfg) + + for _, page := range p { + if err := searchIndex.Add(page); err != nil { + return nil, err + } + } + + s.postingLists = append(s.postingLists, &cachedPostingList{p: p, postingList: searchIndex}) + + return searchIndex, nil +} diff --git a/hugolib/pages_related_test.go b/hugolib/pages_related_test.go new file mode 100644 index 000000000..cf5da0983 --- /dev/null +++ b/hugolib/pages_related_test.go @@ -0,0 +1,75 @@ +// Copyright 2017-present The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hugolib + +import ( + "fmt" + "path/filepath" + "testing" + + "github.com/gohugoio/hugo/common/types" + "github.com/gohugoio/hugo/deps" + + "github.com/stretchr/testify/require" +) + +func TestRelated(t *testing.T) { + assert := require.New(t) + + t.Parallel() + + var ( + cfg, fs = newTestCfg() + //th = testHelper{cfg, fs, t} + ) + + pageTmpl := `--- +title: Page %d +keywords: [%s] +date: %s +--- + +Content +` + + writeSource(t, fs, filepath.Join("content", "page1.md"), fmt.Sprintf(pageTmpl, 1, "hugo, says", "2017-01-03")) + writeSource(t, fs, filepath.Join("content", "page2.md"), fmt.Sprintf(pageTmpl, 2, "hugo, rocks", "2017-01-02")) + writeSource(t, fs, filepath.Join("content", "page3.md"), fmt.Sprintf(pageTmpl, 3, "bep, says", "2017-01-01")) + + s := buildSingleSite(t, deps.DepsCfg{Fs: fs, Cfg: cfg}, BuildCfg{SkipRender: true}) + assert.Len(s.RegularPages, 3) + + result, err := s.RegularPages.RelatedTo(types.NewKeyValuesStrings("keywords", "hugo", "rocks")) + + assert.NoError(err) + assert.Len(result, 2) + assert.Equal("Page 2", result[0].Title) + assert.Equal("Page 1", result[1].Title) + + result, err = s.RegularPages.Related(s.RegularPages[0]) + assert.Len(result, 2) + assert.Equal("Page 2", result[0].Title) + assert.Equal("Page 3", result[1].Title) + + result, err = s.RegularPages.RelatedIndices(s.RegularPages[0], "keywords") + assert.Len(result, 2) + assert.Equal("Page 2", result[0].Title) + assert.Equal("Page 3", result[1].Title) + + result, err = s.RegularPages.RelatedTo(types.NewKeyValuesStrings("keywords", "bep", "rocks")) + assert.Len(result, 2) + assert.Equal("Page 2", result[0].Title) + assert.Equal("Page 3", result[1].Title) + +} diff --git a/hugolib/site.go b/hugolib/site.go index 13ca7f144..b8898264a 100644 --- a/hugolib/site.go +++ b/hugolib/site.go @@ -42,6 +42,7 @@ import ( "github.com/gohugoio/hugo/helpers" "github.com/gohugoio/hugo/output" "github.com/gohugoio/hugo/parser" + "github.com/gohugoio/hugo/related" "github.com/gohugoio/hugo/source" "github.com/gohugoio/hugo/tpl" "github.com/gohugoio/hugo/transform" @@ -135,6 +136,8 @@ type Site struct { // The func used to title case titles. titleFunc func(s string) string + relatedDocsHandler *relatedDocsHandler + siteStats *siteStats } @@ -176,6 +179,7 @@ func (s *Site) reset() *Site { layoutHandler: output.NewLayoutHandler(s.PathSpec.ThemeSet()), disabledKinds: s.disabledKinds, titleFunc: s.titleFunc, + relatedDocsHandler: newSearchIndexHandler(s.relatedDocsHandler.cfg), outputFormats: s.outputFormats, outputFormatsConfig: s.outputFormatsConfig, mediaTypesConfig: s.mediaTypesConfig, @@ -231,6 +235,21 @@ func newSite(cfg deps.DepsCfg) (*Site, error) { return nil, err } + var relatedContentConfig related.Config + + if cfg.Language.IsSet("related") { + relatedContentConfig, err = related.DecodeConfig(cfg.Language.Get("related")) + if err != nil { + return nil, err + } + } else { + relatedContentConfig = related.DefaultConfig + taxonomies := cfg.Language.GetStringMapString("taxonomies") + if _, found := taxonomies["tag"]; found { + relatedContentConfig.Add(related.IndexConfig{Name: "tags", Weight: 80}) + } + } + titleFunc := helpers.GetTitleFunc(cfg.Language.GetString("titleCaseStyle")) s := &Site{ @@ -239,6 +258,7 @@ func newSite(cfg deps.DepsCfg) (*Site, error) { Language: cfg.Language, disabledKinds: disabledKinds, titleFunc: titleFunc, + relatedDocsHandler: newSearchIndexHandler(relatedContentConfig), outputFormats: outputFormats, outputFormatsConfig: siteOutputFormatsConfig, mediaTypesConfig: siteMediaTypesConfig, @@ -1607,6 +1627,7 @@ func (s *Site) assembleTaxonomies() { // Prepare site for a new full build. func (s *Site) resetBuildState() { + s.relatedDocsHandler = newSearchIndexHandler(s.relatedDocsHandler.cfg) s.PageCollections = newPageCollectionsFromPages(s.rawAllPages) // TODO(bep) get rid of this double s.Info.PageCollections = s.PageCollections |