diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2023-02-11 16:20:24 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2023-02-21 17:56:41 +0100 |
commit | 90da7664bf1f3a0ca2e18144b5deacf532c6e3cf (patch) | |
tree | 78d8ac72ebb2ccee4ca4bbeeb9add3365c743e90 /related/inverted_index.go | |
parent | 0afec0a9f4aace1f5f4af6822aeda6223ee3e3a9 (diff) |
Add page fragments support to Related
The main topic of this commit is that you can now index fragments (content heading identifiers) when calling `.Related`.
You can do this by:
* Configure one or more indices with type `fragments`
* The name of those index configurations maps to an (optional) front matter slice with fragment references. This allows you to link
page<->fragment and page<->page.
* This also will index all the fragments (heading identifiers) of the pages.
It's also possible to use type `fragments` indices in shortcode, e.g.:
```
{{ $related := site.RegularPages.Related .Page }}
```
But, and this is important, you need to include the shortcode using the `{{<` delimiter. Not doing so will create infinite loops and timeouts.
This commit also:
* Adds two new methods to Page: Fragments (can also be used to build ToC) and HeadingsFiltered (this is only used in Related Content with
index type `fragments` and `enableFilter` set to true.
* Consolidates all `.Related*` methods into one, which takes either a `Page` or an options map as its only argument.
* Add `context.Context` to all of the content related Page API. Turns out it wasn't strictly needed for this particular feature, but it will
soon become usefil, e.g. in #9339.
Closes #10711
Updates #9339
Updates #10725
Diffstat (limited to 'related/inverted_index.go')
-rw-r--r-- | related/inverted_index.go | 275 |
1 files changed, 202 insertions, 73 deletions
diff --git a/related/inverted_index.go b/related/inverted_index.go index 5502f9f11..eab97098a 100644 --- a/related/inverted_index.go +++ b/related/inverted_index.go @@ -15,20 +15,37 @@ package related import ( + "context" "errors" "fmt" "math" "sort" "strings" + "sync" "time" + xmaps "golang.org/x/exp/maps" + + "github.com/gohugoio/hugo/common/collections" "github.com/gohugoio/hugo/common/maps" + "github.com/gohugoio/hugo/compare" + "github.com/gohugoio/hugo/markup/tableofcontents" "github.com/spf13/cast" "github.com/gohugoio/hugo/common/types" "github.com/mitchellh/mapstructure" ) +const ( + TypeBasic = "basic" + TypeFragments = "fragments" +) + +var validTypes = map[string]bool{ + TypeBasic: true, + TypeFragments: true, +} + var ( _ Keyword = (*StringKeyword)(nil) zeroDate = time.Time{} @@ -37,8 +54,8 @@ var ( DefaultConfig = Config{ Threshold: 80, Indices: IndexConfigs{ - IndexConfig{Name: "keywords", Weight: 100}, - IndexConfig{Name: "date", Weight: 10}, + IndexConfig{Name: "keywords", Weight: 100, Type: TypeBasic}, + IndexConfig{Name: "date", Weight: 10, Type: TypeBasic}, }, } ) @@ -84,6 +101,15 @@ func (c *Config) Add(index IndexConfig) { c.Indices = append(c.Indices, index) } +func (c *Config) HasType(s string) bool { + for _, i := range c.Indices { + if i.Type == s { + return true + } + } + return false +} + // IndexConfigs holds a set of index configurations. type IndexConfigs []IndexConfig @@ -92,6 +118,13 @@ type IndexConfig struct { // The index name. This directly maps to a field or Param name. Name string + // The index type. + Type string + + // Enable to apply a type specific filter to the results. + // This is currently only used for the "fragments" type. + ApplyFilter bool + // Contextual pattern used to convert the Param value into a string. // Currently only used for dates. Can be used to, say, bump posts in the same // time frame when searching for related documents. @@ -120,6 +153,14 @@ type Document interface { Name() string } +// FragmentProvider is an optional interface that can be implemented by a Document. +type FragmentProvider interface { + Fragments(context.Context) *tableofcontents.Fragments + + // For internal use. + ApplyFilterToHeadings(context.Context, func(*tableofcontents.Heading) bool) Document +} + // InvertedIndex holds an inverted index, also sometimes named posting list, which // lists, for every possible search term, the documents that contain that term. type InvertedIndex struct { @@ -160,7 +201,7 @@ func NewInvertedIndex(cfg Config) *InvertedIndex { // Add documents to the inverted index. // The value must support == and !=. -func (idx *InvertedIndex) Add(docs ...Document) error { +func (idx *InvertedIndex) Add(ctx context.Context, docs ...Document) error { var err error for _, config := range idx.cfg.Indices { if config.Weight == 0 { @@ -179,6 +220,14 @@ func (idx *InvertedIndex) Add(docs ...Document) error { for _, keyword := range words { setm[keyword] = append(setm[keyword], doc) } + + if config.Type == TypeFragments { + if fp, ok := doc.(FragmentProvider); ok { + for _, fragment := range fp.Fragments(ctx).Identifiers { + setm[FragmentKeyword(fragment)] = append(setm[FragmentKeyword(fragment)], doc) + } + } + } } } @@ -209,8 +258,22 @@ func (r *rank) addWeight(w int) { r.Matches++ } -func newRank(doc Document, weight int) *rank { - return &rank{Doc: doc, Weight: weight, Matches: 1} +var rankPool = sync.Pool{ + New: func() interface{} { + return &rank{} + }, +} + +func getRank(doc Document, weight int) *rank { + r := rankPool.Get().(*rank) + r.Doc = doc + r.Weight = weight + r.Matches = 1 + return r +} + +func putRank(r *rank) { + rankPool.Put(r) } func (r ranks) Len() int { return len(r) } @@ -225,22 +288,41 @@ func (r ranks) Less(i, j int) bool { return r[i].Weight > r[j].Weight } -// SearchDoc finds the documents matching any of the keywords in the given indices -// against the given document. +// SearchOpts holds the options for a related search. +type SearchOpts struct { + // The Document to search for related content for. + Document Document + + // The keywords to search for. + NamedSlices []types.KeyValues + + // The indices to search in. + Indices []string + + // Fragments holds a a list of special keywords that is used + // for indices configured as type "fragments". + // This will match the fragment identifiers of the documents. + Fragments []string +} + +// Search finds the documents matching any of the keywords in the given indices +// against query options in opts. // The resulting document set will be sorted according to number of matches // and the index weights, and any matches with a rank below the configured // threshold (normalize to 0..100) will be removed. // If an index name is provided, only that index will be queried. -func (idx *InvertedIndex) SearchDoc(doc Document, indices ...string) ([]Document, error) { - var q []queryElement +func (idx *InvertedIndex) Search(ctx context.Context, opts SearchOpts) ([]Document, error) { - var configs IndexConfigs + var ( + queryElements []queryElement + configs IndexConfigs + ) - if len(indices) == 0 { + if len(opts.Indices) == 0 { configs = idx.cfg.Indices } else { - configs = make(IndexConfigs, len(indices)) - for i, indexName := range indices { + configs = make(IndexConfigs, len(opts.Indices)) + for i, indexName := range opts.Indices { cfg, found := idx.getIndexCfg(indexName) if !found { return nil, fmt.Errorf("index %q not found", indexName) @@ -250,40 +332,78 @@ func (idx *InvertedIndex) SearchDoc(doc Document, indices ...string) ([]Document } for _, cfg := range configs { - keywords, err := doc.RelatedKeywords(cfg) - if err != nil { - return nil, err + var keywords []Keyword + if opts.Document != nil { + k, err := opts.Document.RelatedKeywords(cfg) + if err != nil { + return nil, err + } + keywords = append(keywords, k...) + } + if cfg.Type == TypeFragments { + for _, fragment := range opts.Fragments { + keywords = append(keywords, FragmentKeyword(fragment)) + } + if opts.Document != nil { + if fp, ok := opts.Document.(FragmentProvider); ok { + for _, fragment := range fp.Fragments(ctx).Identifiers { + keywords = append(keywords, FragmentKeyword(fragment)) + } + } + } + } + queryElements = append(queryElements, newQueryElement(cfg.Name, keywords...)) + } + for _, slice := range opts.NamedSlices { + var keywords []Keyword + key := slice.KeyString() + if key == "" { + return nil, fmt.Errorf("index %q not valid", slice.Key) + } + conf, found := idx.getIndexCfg(key) + if !found { + return nil, fmt.Errorf("index %q not found", key) } - q = append(q, newQueryElement(cfg.Name, keywords...)) + for _, val := range slice.Values { + k, err := conf.ToKeywords(val) + if err != nil { + return nil, err + } + keywords = append(keywords, k...) + } + queryElements = append(queryElements, newQueryElement(conf.Name, keywords...)) + } + if opts.Document != nil { + return idx.searchDate(ctx, opts.Document, opts.Document.PublishDate(), queryElements...) } + return idx.search(ctx, queryElements...) +} - return idx.searchDate(doc.PublishDate(), q...) +func (cfg IndexConfig) stringToKeyword(s string) Keyword { + if cfg.ToLower { + s = strings.ToLower(s) + } + if cfg.Type == TypeFragments { + return FragmentKeyword(s) + } + return StringKeyword(s) } // ToKeywords returns a Keyword slice of the given input. func (cfg IndexConfig) ToKeywords(v any) ([]Keyword, error) { - var ( - keywords []Keyword - toLower = cfg.ToLower - ) + var keywords []Keyword + switch vv := v.(type) { case string: - if toLower { - vv = strings.ToLower(vv) - } - keywords = append(keywords, StringKeyword(vv)) + keywords = append(keywords, cfg.stringToKeyword(vv)) case []string: - if toLower { - vc := make([]string, len(vv)) - copy(vc, vv) - for i := 0; i < len(vc); i++ { - vc[i] = strings.ToLower(vc[i]) - } - vv = vc + vvv := make([]Keyword, len(vv)) + for i := 0; i < len(vvv); i++ { + vvv[i] = cfg.stringToKeyword(vv[i]) } - keywords = append(keywords, StringsToKeywords(vv...)...) + keywords = append(keywords, vvv...) case []any: return cfg.ToKeywords(cast.ToStringSlice(vv)) case time.Time: @@ -301,46 +421,20 @@ func (cfg IndexConfig) ToKeywords(v any) ([]Keyword, error) { return keywords, nil } -// SearchKeyValues finds the documents matching any of the keywords in the given indices. -// The resulting document set will be sorted according to number of matches -// and the index weights, and any matches with a rank below the configured -// threshold (normalize to 0..100) will be removed. -func (idx *InvertedIndex) SearchKeyValues(args ...types.KeyValues) ([]Document, error) { - q := make([]queryElement, len(args)) - - for i, arg := range args { - var keywords []Keyword - key := arg.KeyString() - if key == "" { - return nil, fmt.Errorf("index %q not valid", arg.Key) - } - conf, found := idx.getIndexCfg(key) - if !found { - return nil, fmt.Errorf("index %q not found", key) - } - - for _, val := range arg.Values { - k, err := conf.ToKeywords(val) - if err != nil { - return nil, err - } - keywords = append(keywords, k...) - } - - q[i] = newQueryElement(conf.Name, keywords...) - - } - - return idx.search(q...) +func (idx *InvertedIndex) search(ctx context.Context, query ...queryElement) ([]Document, error) { + return idx.searchDate(ctx, nil, zeroDate, query...) } -func (idx *InvertedIndex) search(query ...queryElement) ([]Document, error) { - return idx.searchDate(zeroDate, query...) -} - -func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement) ([]Document, error) { +func (idx *InvertedIndex) searchDate(ctx context.Context, self Document, upperDate time.Time, query ...queryElement) ([]Document, error) { matchm := make(map[Document]*rank, 200) + defer func() { + for _, r := range matchm { + putRank(r) + } + }() + applyDateFilter := !idx.cfg.IncludeNewer && !upperDate.IsZero() + var fragmentsFilter collections.SortedStringSlice for _, el := range query { setm, found := idx.index[el.Index] @@ -356,15 +450,27 @@ func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement) for _, kw := range el.Keywords { if docs, found := setm[kw]; found { for _, doc := range docs { + if compare.Eq(doc, self) { + continue + } + if applyDateFilter { // Exclude newer than the limit given if doc.PublishDate().After(upperDate) { continue } } + + if config.Type == TypeFragments && config.ApplyFilter { + if fkw, ok := kw.(FragmentKeyword); ok { + fragmentsFilter = append(fragmentsFilter, string(fkw)) + } + } + r, found := matchm[doc] if !found { - matchm[doc] = newRank(doc, config.Weight) + r = getRank(doc, config.Weight) + matchm[doc] = r } else { r.addWeight(config.Weight) } @@ -390,11 +496,19 @@ func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement) } sort.Stable(matches) + sort.Strings(fragmentsFilter) result := make([]Document, len(matches)) for i, m := range matches { result[i] = m.Doc + if len(fragmentsFilter) > 0 { + if dp, ok := result[i].(FragmentProvider); ok { + result[i] = dp.ApplyFilterToHeadings(ctx, func(h *tableofcontents.Heading) bool { + return fragmentsFilter.Contains(h.ID) + }) + } + } } return result, nil @@ -433,6 +547,14 @@ func DecodeConfig(m maps.Params) (Config, error) { c.Indices[i].ToLower = true } } + for i := range c.Indices { + if c.Indices[i].Type == "" { + c.Indices[i].Type = TypeBasic + } + if !validTypes[c.Indices[i].Type] { + return c, fmt.Errorf("invalid index type %q. Must be one of %v", c.Indices[i].Type, xmaps.Keys(validTypes)) + } + } return c, nil } @@ -444,17 +566,24 @@ func (s StringKeyword) String() string { return string(s) } +// FragmentKeyword represents a document fragment. +type FragmentKeyword string + +func (f FragmentKeyword) String() string { + return string(f) +} + // Keyword is the interface a keyword in the search index must implement. type Keyword interface { String() string } // StringsToKeywords converts the given slice of strings to a slice of Keyword. -func StringsToKeywords(s ...string) []Keyword { +func (cfg IndexConfig) StringsToKeywords(s ...string) []Keyword { kw := make([]Keyword, len(s)) for i := 0; i < len(s); i++ { - kw[i] = StringKeyword(s[i]) + kw[i] = cfg.stringToKeyword(s[i]) } return kw |