summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorgreatroar <61184462+greatroar@users.noreply.github.com>2020-04-20 09:02:33 +0200
committerGitHub <noreply@github.com>2020-04-20 09:02:33 +0200
commit44b0f0b456e1c4568af7b7ebbcc0d82f7d38dbcb (patch)
tree6eb616e64183b10df6b8ba0758fbbd73a2fdf019 /lib
parent4aa2199d5b4a2e86fa33f87c1a4f3c4da44ac079 (diff)
lib/db: Switch to faster blobloom Bloom filter pkg (#6537)
Diffstat (limited to 'lib')
-rw-r--r--lib/db/lowlevel.go21
1 files changed, 16 insertions, 5 deletions
diff --git a/lib/db/lowlevel.go b/lib/db/lowlevel.go
index 7ee44f65c..05088ed11 100644
--- a/lib/db/lowlevel.go
+++ b/lib/db/lowlevel.go
@@ -12,12 +12,12 @@ import (
"encoding/binary"
"time"
+ "github.com/greatroar/blobloom"
"github.com/syncthing/syncthing/lib/db/backend"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syncthing/syncthing/lib/sync"
"github.com/syncthing/syncthing/lib/util"
"github.com/thejerf/suture"
- "github.com/willf/bloom"
)
const (
@@ -27,7 +27,8 @@ const (
// than 100k. For fewer than 100k items we will just get better false
// positive rate instead.
indirectGCBloomCapacity = 100000
- indirectGCBloomFalsePositiveRate = 0.01 // 1%
+ indirectGCBloomFalsePositiveRate = 0.01 // 1%
+ indirectGCBloomMaxBytes = 32 << 20 // Use at most 32MiB memory, which covers our desired FP rate at 27 M items
indirectGCDefaultInterval = 13 * time.Hour
indirectGCTimeKey = "lastIndirectGCTime"
@@ -595,7 +596,11 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error {
if db.gcKeyCount > capacity {
capacity = db.gcKeyCount
}
- blockFilter := bloom.NewWithEstimates(uint(capacity), indirectGCBloomFalsePositiveRate)
+ blockFilter := blobloom.NewOptimized(blobloom.Config{
+ FPRate: indirectGCBloomFalsePositiveRate,
+ MaxBits: 8 * indirectGCBloomMaxBytes,
+ NKeys: uint64(capacity),
+ })
// Iterate the FileInfos, unmarshal the block and version hashes and
// add them to the filter.
@@ -617,7 +622,7 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error {
return err
}
if len(bl.BlocksHash) > 0 {
- blockFilter.Add(bl.BlocksHash)
+ blockFilter.Add64(bloomHash(bl.BlocksHash))
}
}
it.Release()
@@ -642,7 +647,7 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error {
}
key := blockListKey(it.Key())
- if blockFilter.Test(key.BlocksHash()) {
+ if blockFilter.Has64(bloomHash(key)) {
matchedBlocks++
continue
}
@@ -665,6 +670,12 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error {
return db.Compact()
}
+// Hash function for the bloomfilter: first eight bytes of the SHA-256.
+// Big or little-endian makes no difference, as long as we're consistent.
+func bloomHash(key blockListKey) uint64 {
+ return binary.BigEndian.Uint64(key.BlocksHash())
+}
+
// CheckRepair checks folder metadata and sequences for miscellaneous errors.
func (db *Lowlevel) CheckRepair() {
for _, folder := range db.ListFolders() {