summaryrefslogtreecommitdiffstats
path: root/related/inverted_index.go
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2023-02-11 16:20:24 +0100
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2023-02-21 17:56:41 +0100
commit90da7664bf1f3a0ca2e18144b5deacf532c6e3cf (patch)
tree78d8ac72ebb2ccee4ca4bbeeb9add3365c743e90 /related/inverted_index.go
parent0afec0a9f4aace1f5f4af6822aeda6223ee3e3a9 (diff)
Add page fragments support to Related
The main topic of this commit is that you can now index fragments (content heading identifiers) when calling `.Related`. You can do this by: * Configure one or more indices with type `fragments` * The name of those index configurations maps to an (optional) front matter slice with fragment references. This allows you to link page<->fragment and page<->page. * This also will index all the fragments (heading identifiers) of the pages. It's also possible to use type `fragments` indices in shortcode, e.g.: ``` {{ $related := site.RegularPages.Related .Page }} ``` But, and this is important, you need to include the shortcode using the `{{<` delimiter. Not doing so will create infinite loops and timeouts. This commit also: * Adds two new methods to Page: Fragments (can also be used to build ToC) and HeadingsFiltered (this is only used in Related Content with index type `fragments` and `enableFilter` set to true. * Consolidates all `.Related*` methods into one, which takes either a `Page` or an options map as its only argument. * Add `context.Context` to all of the content related Page API. Turns out it wasn't strictly needed for this particular feature, but it will soon become usefil, e.g. in #9339. Closes #10711 Updates #9339 Updates #10725
Diffstat (limited to 'related/inverted_index.go')
-rw-r--r--related/inverted_index.go275
1 files changed, 202 insertions, 73 deletions
diff --git a/related/inverted_index.go b/related/inverted_index.go
index 5502f9f11..eab97098a 100644
--- a/related/inverted_index.go
+++ b/related/inverted_index.go
@@ -15,20 +15,37 @@
package related
import (
+ "context"
"errors"
"fmt"
"math"
"sort"
"strings"
+ "sync"
"time"
+ xmaps "golang.org/x/exp/maps"
+
+ "github.com/gohugoio/hugo/common/collections"
"github.com/gohugoio/hugo/common/maps"
+ "github.com/gohugoio/hugo/compare"
+ "github.com/gohugoio/hugo/markup/tableofcontents"
"github.com/spf13/cast"
"github.com/gohugoio/hugo/common/types"
"github.com/mitchellh/mapstructure"
)
+const (
+ TypeBasic = "basic"
+ TypeFragments = "fragments"
+)
+
+var validTypes = map[string]bool{
+ TypeBasic: true,
+ TypeFragments: true,
+}
+
var (
_ Keyword = (*StringKeyword)(nil)
zeroDate = time.Time{}
@@ -37,8 +54,8 @@ var (
DefaultConfig = Config{
Threshold: 80,
Indices: IndexConfigs{
- IndexConfig{Name: "keywords", Weight: 100},
- IndexConfig{Name: "date", Weight: 10},
+ IndexConfig{Name: "keywords", Weight: 100, Type: TypeBasic},
+ IndexConfig{Name: "date", Weight: 10, Type: TypeBasic},
},
}
)
@@ -84,6 +101,15 @@ func (c *Config) Add(index IndexConfig) {
c.Indices = append(c.Indices, index)
}
+func (c *Config) HasType(s string) bool {
+ for _, i := range c.Indices {
+ if i.Type == s {
+ return true
+ }
+ }
+ return false
+}
+
// IndexConfigs holds a set of index configurations.
type IndexConfigs []IndexConfig
@@ -92,6 +118,13 @@ type IndexConfig struct {
// The index name. This directly maps to a field or Param name.
Name string
+ // The index type.
+ Type string
+
+ // Enable to apply a type specific filter to the results.
+ // This is currently only used for the "fragments" type.
+ ApplyFilter bool
+
// Contextual pattern used to convert the Param value into a string.
// Currently only used for dates. Can be used to, say, bump posts in the same
// time frame when searching for related documents.
@@ -120,6 +153,14 @@ type Document interface {
Name() string
}
+// FragmentProvider is an optional interface that can be implemented by a Document.
+type FragmentProvider interface {
+ Fragments(context.Context) *tableofcontents.Fragments
+
+ // For internal use.
+ ApplyFilterToHeadings(context.Context, func(*tableofcontents.Heading) bool) Document
+}
+
// InvertedIndex holds an inverted index, also sometimes named posting list, which
// lists, for every possible search term, the documents that contain that term.
type InvertedIndex struct {
@@ -160,7 +201,7 @@ func NewInvertedIndex(cfg Config) *InvertedIndex {
// Add documents to the inverted index.
// The value must support == and !=.
-func (idx *InvertedIndex) Add(docs ...Document) error {
+func (idx *InvertedIndex) Add(ctx context.Context, docs ...Document) error {
var err error
for _, config := range idx.cfg.Indices {
if config.Weight == 0 {
@@ -179,6 +220,14 @@ func (idx *InvertedIndex) Add(docs ...Document) error {
for _, keyword := range words {
setm[keyword] = append(setm[keyword], doc)
}
+
+ if config.Type == TypeFragments {
+ if fp, ok := doc.(FragmentProvider); ok {
+ for _, fragment := range fp.Fragments(ctx).Identifiers {
+ setm[FragmentKeyword(fragment)] = append(setm[FragmentKeyword(fragment)], doc)
+ }
+ }
+ }
}
}
@@ -209,8 +258,22 @@ func (r *rank) addWeight(w int) {
r.Matches++
}
-func newRank(doc Document, weight int) *rank {
- return &rank{Doc: doc, Weight: weight, Matches: 1}
+var rankPool = sync.Pool{
+ New: func() interface{} {
+ return &rank{}
+ },
+}
+
+func getRank(doc Document, weight int) *rank {
+ r := rankPool.Get().(*rank)
+ r.Doc = doc
+ r.Weight = weight
+ r.Matches = 1
+ return r
+}
+
+func putRank(r *rank) {
+ rankPool.Put(r)
}
func (r ranks) Len() int { return len(r) }
@@ -225,22 +288,41 @@ func (r ranks) Less(i, j int) bool {
return r[i].Weight > r[j].Weight
}
-// SearchDoc finds the documents matching any of the keywords in the given indices
-// against the given document.
+// SearchOpts holds the options for a related search.
+type SearchOpts struct {
+ // The Document to search for related content for.
+ Document Document
+
+ // The keywords to search for.
+ NamedSlices []types.KeyValues
+
+ // The indices to search in.
+ Indices []string
+
+ // Fragments holds a a list of special keywords that is used
+ // for indices configured as type "fragments".
+ // This will match the fragment identifiers of the documents.
+ Fragments []string
+}
+
+// Search finds the documents matching any of the keywords in the given indices
+// against query options in opts.
// The resulting document set will be sorted according to number of matches
// and the index weights, and any matches with a rank below the configured
// threshold (normalize to 0..100) will be removed.
// If an index name is provided, only that index will be queried.
-func (idx *InvertedIndex) SearchDoc(doc Document, indices ...string) ([]Document, error) {
- var q []queryElement
+func (idx *InvertedIndex) Search(ctx context.Context, opts SearchOpts) ([]Document, error) {
- var configs IndexConfigs
+ var (
+ queryElements []queryElement
+ configs IndexConfigs
+ )
- if len(indices) == 0 {
+ if len(opts.Indices) == 0 {
configs = idx.cfg.Indices
} else {
- configs = make(IndexConfigs, len(indices))
- for i, indexName := range indices {
+ configs = make(IndexConfigs, len(opts.Indices))
+ for i, indexName := range opts.Indices {
cfg, found := idx.getIndexCfg(indexName)
if !found {
return nil, fmt.Errorf("index %q not found", indexName)
@@ -250,40 +332,78 @@ func (idx *InvertedIndex) SearchDoc(doc Document, indices ...string) ([]Document
}
for _, cfg := range configs {
- keywords, err := doc.RelatedKeywords(cfg)
- if err != nil {
- return nil, err
+ var keywords []Keyword
+ if opts.Document != nil {
+ k, err := opts.Document.RelatedKeywords(cfg)
+ if err != nil {
+ return nil, err
+ }
+ keywords = append(keywords, k...)
+ }
+ if cfg.Type == TypeFragments {
+ for _, fragment := range opts.Fragments {
+ keywords = append(keywords, FragmentKeyword(fragment))
+ }
+ if opts.Document != nil {
+ if fp, ok := opts.Document.(FragmentProvider); ok {
+ for _, fragment := range fp.Fragments(ctx).Identifiers {
+ keywords = append(keywords, FragmentKeyword(fragment))
+ }
+ }
+ }
+ }
+ queryElements = append(queryElements, newQueryElement(cfg.Name, keywords...))
+ }
+ for _, slice := range opts.NamedSlices {
+ var keywords []Keyword
+ key := slice.KeyString()
+ if key == "" {
+ return nil, fmt.Errorf("index %q not valid", slice.Key)
+ }
+ conf, found := idx.getIndexCfg(key)
+ if !found {
+ return nil, fmt.Errorf("index %q not found", key)
}
- q = append(q, newQueryElement(cfg.Name, keywords...))
+ for _, val := range slice.Values {
+ k, err := conf.ToKeywords(val)
+ if err != nil {
+ return nil, err
+ }
+ keywords = append(keywords, k...)
+ }
+ queryElements = append(queryElements, newQueryElement(conf.Name, keywords...))
+ }
+ if opts.Document != nil {
+ return idx.searchDate(ctx, opts.Document, opts.Document.PublishDate(), queryElements...)
}
+ return idx.search(ctx, queryElements...)
+}
- return idx.searchDate(doc.PublishDate(), q...)
+func (cfg IndexConfig) stringToKeyword(s string) Keyword {
+ if cfg.ToLower {
+ s = strings.ToLower(s)
+ }
+ if cfg.Type == TypeFragments {
+ return FragmentKeyword(s)
+ }
+ return StringKeyword(s)
}
// ToKeywords returns a Keyword slice of the given input.
func (cfg IndexConfig) ToKeywords(v any) ([]Keyword, error) {
- var (
- keywords []Keyword
- toLower = cfg.ToLower
- )
+ var keywords []Keyword
+
switch vv := v.(type) {
case string:
- if toLower {
- vv = strings.ToLower(vv)
- }
- keywords = append(keywords, StringKeyword(vv))
+ keywords = append(keywords, cfg.stringToKeyword(vv))
case []string:
- if toLower {
- vc := make([]string, len(vv))
- copy(vc, vv)
- for i := 0; i < len(vc); i++ {
- vc[i] = strings.ToLower(vc[i])
- }
- vv = vc
+ vvv := make([]Keyword, len(vv))
+ for i := 0; i < len(vvv); i++ {
+ vvv[i] = cfg.stringToKeyword(vv[i])
}
- keywords = append(keywords, StringsToKeywords(vv...)...)
+ keywords = append(keywords, vvv...)
case []any:
return cfg.ToKeywords(cast.ToStringSlice(vv))
case time.Time:
@@ -301,46 +421,20 @@ func (cfg IndexConfig) ToKeywords(v any) ([]Keyword, error) {
return keywords, nil
}
-// SearchKeyValues finds the documents matching any of the keywords in the given indices.
-// The resulting document set will be sorted according to number of matches
-// and the index weights, and any matches with a rank below the configured
-// threshold (normalize to 0..100) will be removed.
-func (idx *InvertedIndex) SearchKeyValues(args ...types.KeyValues) ([]Document, error) {
- q := make([]queryElement, len(args))
-
- for i, arg := range args {
- var keywords []Keyword
- key := arg.KeyString()
- if key == "" {
- return nil, fmt.Errorf("index %q not valid", arg.Key)
- }
- conf, found := idx.getIndexCfg(key)
- if !found {
- return nil, fmt.Errorf("index %q not found", key)
- }
-
- for _, val := range arg.Values {
- k, err := conf.ToKeywords(val)
- if err != nil {
- return nil, err
- }
- keywords = append(keywords, k...)
- }
-
- q[i] = newQueryElement(conf.Name, keywords...)
-
- }
-
- return idx.search(q...)
+func (idx *InvertedIndex) search(ctx context.Context, query ...queryElement) ([]Document, error) {
+ return idx.searchDate(ctx, nil, zeroDate, query...)
}
-func (idx *InvertedIndex) search(query ...queryElement) ([]Document, error) {
- return idx.searchDate(zeroDate, query...)
-}
-
-func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement) ([]Document, error) {
+func (idx *InvertedIndex) searchDate(ctx context.Context, self Document, upperDate time.Time, query ...queryElement) ([]Document, error) {
matchm := make(map[Document]*rank, 200)
+ defer func() {
+ for _, r := range matchm {
+ putRank(r)
+ }
+ }()
+
applyDateFilter := !idx.cfg.IncludeNewer && !upperDate.IsZero()
+ var fragmentsFilter collections.SortedStringSlice
for _, el := range query {
setm, found := idx.index[el.Index]
@@ -356,15 +450,27 @@ func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement)
for _, kw := range el.Keywords {
if docs, found := setm[kw]; found {
for _, doc := range docs {
+ if compare.Eq(doc, self) {
+ continue
+ }
+
if applyDateFilter {
// Exclude newer than the limit given
if doc.PublishDate().After(upperDate) {
continue
}
}
+
+ if config.Type == TypeFragments && config.ApplyFilter {
+ if fkw, ok := kw.(FragmentKeyword); ok {
+ fragmentsFilter = append(fragmentsFilter, string(fkw))
+ }
+ }
+
r, found := matchm[doc]
if !found {
- matchm[doc] = newRank(doc, config.Weight)
+ r = getRank(doc, config.Weight)
+ matchm[doc] = r
} else {
r.addWeight(config.Weight)
}
@@ -390,11 +496,19 @@ func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement)
}
sort.Stable(matches)
+ sort.Strings(fragmentsFilter)
result := make([]Document, len(matches))
for i, m := range matches {
result[i] = m.Doc
+ if len(fragmentsFilter) > 0 {
+ if dp, ok := result[i].(FragmentProvider); ok {
+ result[i] = dp.ApplyFilterToHeadings(ctx, func(h *tableofcontents.Heading) bool {
+ return fragmentsFilter.Contains(h.ID)
+ })
+ }
+ }
}
return result, nil
@@ -433,6 +547,14 @@ func DecodeConfig(m maps.Params) (Config, error) {
c.Indices[i].ToLower = true
}
}
+ for i := range c.Indices {
+ if c.Indices[i].Type == "" {
+ c.Indices[i].Type = TypeBasic
+ }
+ if !validTypes[c.Indices[i].Type] {
+ return c, fmt.Errorf("invalid index type %q. Must be one of %v", c.Indices[i].Type, xmaps.Keys(validTypes))
+ }
+ }
return c, nil
}
@@ -444,17 +566,24 @@ func (s StringKeyword) String() string {
return string(s)
}
+// FragmentKeyword represents a document fragment.
+type FragmentKeyword string
+
+func (f FragmentKeyword) String() string {
+ return string(f)
+}
+
// Keyword is the interface a keyword in the search index must implement.
type Keyword interface {
String() string
}
// StringsToKeywords converts the given slice of strings to a slice of Keyword.
-func StringsToKeywords(s ...string) []Keyword {
+func (cfg IndexConfig) StringsToKeywords(s ...string) []Keyword {
kw := make([]Keyword, len(s))
for i := 0; i < len(s); i++ {
- kw[i] = StringKeyword(s[i])
+ kw[i] = cfg.stringToKeyword(s[i])
}
return kw