From 44b0f0b456e1c4568af7b7ebbcc0d82f7d38dbcb Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Mon, 20 Apr 2020 09:02:33 +0200 Subject: lib/db: Switch to faster blobloom Bloom filter pkg (#6537) --- lib/db/lowlevel.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/db/lowlevel.go b/lib/db/lowlevel.go index 7ee44f65c..05088ed11 100644 --- a/lib/db/lowlevel.go +++ b/lib/db/lowlevel.go @@ -12,12 +12,12 @@ import ( "encoding/binary" "time" + "github.com/greatroar/blobloom" "github.com/syncthing/syncthing/lib/db/backend" "github.com/syncthing/syncthing/lib/protocol" "github.com/syncthing/syncthing/lib/sync" "github.com/syncthing/syncthing/lib/util" "github.com/thejerf/suture" - "github.com/willf/bloom" ) const ( @@ -27,7 +27,8 @@ const ( // than 100k. For fewer than 100k items we will just get better false // positive rate instead. indirectGCBloomCapacity = 100000 - indirectGCBloomFalsePositiveRate = 0.01 // 1% + indirectGCBloomFalsePositiveRate = 0.01 // 1% + indirectGCBloomMaxBytes = 32 << 20 // Use at most 32MiB memory, which covers our desired FP rate at 27 M items indirectGCDefaultInterval = 13 * time.Hour indirectGCTimeKey = "lastIndirectGCTime" @@ -595,7 +596,11 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error { if db.gcKeyCount > capacity { capacity = db.gcKeyCount } - blockFilter := bloom.NewWithEstimates(uint(capacity), indirectGCBloomFalsePositiveRate) + blockFilter := blobloom.NewOptimized(blobloom.Config{ + FPRate: indirectGCBloomFalsePositiveRate, + MaxBits: 8 * indirectGCBloomMaxBytes, + NKeys: uint64(capacity), + }) // Iterate the FileInfos, unmarshal the block and version hashes and // add them to the filter. @@ -617,7 +622,7 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error { return err } if len(bl.BlocksHash) > 0 { - blockFilter.Add(bl.BlocksHash) + blockFilter.Add64(bloomHash(bl.BlocksHash)) } } it.Release() @@ -642,7 +647,7 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error { } key := blockListKey(it.Key()) - if blockFilter.Test(key.BlocksHash()) { + if blockFilter.Has64(bloomHash(key)) { matchedBlocks++ continue } @@ -665,6 +670,12 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error { return db.Compact() } +// Hash function for the bloomfilter: first eight bytes of the SHA-256. +// Big or little-endian makes no difference, as long as we're consistent. +func bloomHash(key blockListKey) uint64 { + return binary.BigEndian.Uint64(key.BlocksHash()) +} + // CheckRepair checks folder metadata and sequences for miscellaneous errors. func (db *Lowlevel) CheckRepair() { for _, folder := range db.ListFolders() { -- cgit v1.2.3