diff options
author | Audrius Butkevicius <audrius.butkevicius@gmail.com> | 2020-05-11 19:15:11 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-11 20:15:11 +0200 |
commit | decb967969d164ef788b036791bcf9caa9209217 (patch) | |
tree | 67fc41c68ce71d870c4c9917580478a632258191 /lib | |
parent | aac3750298b2baefcd8caaa4f37cdf9450d979c9 (diff) |
all: Reorder sequences for better rename detection (#6574)
Diffstat (limited to 'lib')
-rw-r--r-- | lib/db/keyer.go | 28 | ||||
-rw-r--r-- | lib/db/lowlevel.go | 42 | ||||
-rw-r--r-- | lib/db/schemaupdater.go | 54 | ||||
-rw-r--r-- | lib/db/set.go | 7 | ||||
-rw-r--r-- | lib/db/transactions.go | 42 | ||||
-rw-r--r-- | lib/model/folder.go | 49 | ||||
-rw-r--r-- | lib/model/folder_sendrecv.go | 59 | ||||
-rw-r--r-- | lib/model/model.go | 18 | ||||
-rw-r--r-- | lib/model/model_test.go | 152 |
9 files changed, 409 insertions, 42 deletions
diff --git a/lib/db/keyer.go b/lib/db/keyer.go index d60644e44..88103eb07 100644 --- a/lib/db/keyer.go +++ b/lib/db/keyer.go @@ -62,6 +62,9 @@ const ( // KeyTypeBlockList <block list hash> = BlockList KeyTypeBlockList = 13 + + // KeyTypeBlockListMap <int32 folder ID> <block list hash> <file name> = <nothing> + KeyTypeBlockListMap = 14 ) type keyer interface { @@ -79,6 +82,8 @@ type keyer interface { // block map key stuff (former BlockMap) GenerateBlockMapKey(key, folder, hash, name []byte) (blockMapKey, error) NameFromBlockMapKey(key []byte) []byte + GenerateBlockListMapKey(key, folder, hash, name []byte) (blockListMapKey, error) + NameFromBlockListMapKey(key []byte) []byte // file need index GenerateNeedFileKey(key, folder, name []byte) (needFileKey, error) @@ -203,6 +208,29 @@ func (k blockMapKey) WithoutHashAndName() []byte { return k[:keyPrefixLen+keyFolderLen] } +type blockListMapKey []byte + +func (k defaultKeyer) GenerateBlockListMapKey(key, folder, hash, name []byte) (blockListMapKey, error) { + folderID, err := k.folderIdx.ID(folder) + if err != nil { + return nil, err + } + key = resize(key, keyPrefixLen+keyFolderLen+keyHashLen+len(name)) + key[0] = KeyTypeBlockListMap + binary.BigEndian.PutUint32(key[keyPrefixLen:], folderID) + copy(key[keyPrefixLen+keyFolderLen:], hash) + copy(key[keyPrefixLen+keyFolderLen+keyHashLen:], name) + return key, nil +} + +func (k defaultKeyer) NameFromBlockListMapKey(key []byte) []byte { + return key[keyPrefixLen+keyFolderLen+keyHashLen:] +} + +func (k blockListMapKey) WithoutHashAndName() []byte { + return k[:keyPrefixLen+keyFolderLen] +} + type needFileKey []byte func (k needFileKey) WithoutName() []byte { diff --git a/lib/db/lowlevel.go b/lib/db/lowlevel.go index d7fc8d4c8..985aa29eb 100644 --- a/lib/db/lowlevel.go +++ b/lib/db/lowlevel.go @@ -195,6 +195,7 @@ func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta if ok && unchanged(f, ef) { continue } + blocksHashSame := ok && bytes.Equal(ef.BlocksHash, f.BlocksHash) if ok { if !ef.IsDirectory() && !ef.IsDeleted() && !ef.IsInvalid() { @@ -207,6 +208,15 @@ func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta return err } } + if !blocksHashSame { + keyBuf, err := db.keyer.GenerateBlockListMapKey(keyBuf, folder, ef.BlocksHash, name) + if err != nil { + return err + } + if err = t.Delete(keyBuf); err != nil { + return err + } + } } keyBuf, err = db.keyer.GenerateSequenceKey(keyBuf, folder, ef.SequenceNo()) @@ -260,6 +270,15 @@ func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta return err } } + if !blocksHashSame { + keyBuf, err := db.keyer.GenerateBlockListMapKey(keyBuf, folder, f.BlocksHash, name) + if err != nil { + return err + } + if err = t.Put(keyBuf, nil); err != nil { + return err + } + } } if err := t.Checkpoint(func() error { @@ -296,7 +315,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error { } // Remove all sequences related to the folder - k1, err := db.keyer.GenerateSequenceKey(nil, folder, 0) + k1, err := db.keyer.GenerateSequenceKey(k0, folder, 0) if err != nil { return err } @@ -305,7 +324,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error { } // Remove all items related to the given folder from the global bucket - k2, err := db.keyer.GenerateGlobalVersionKey(nil, folder, nil) + k2, err := db.keyer.GenerateGlobalVersionKey(k1, folder, nil) if err != nil { return err } @@ -314,7 +333,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error { } // Remove all needs related to the folder - k3, err := db.keyer.GenerateNeedFileKey(nil, folder, nil) + k3, err := db.keyer.GenerateNeedFileKey(k2, folder, nil) if err != nil { return err } @@ -323,7 +342,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error { } // Remove the blockmap of the folder - k4, err := db.keyer.GenerateBlockMapKey(nil, folder, nil, nil) + k4, err := db.keyer.GenerateBlockMapKey(k3, folder, nil, nil) if err != nil { return err } @@ -331,6 +350,14 @@ func (db *Lowlevel) dropFolder(folder []byte) error { return err } + k5, err := db.keyer.GenerateBlockListMapKey(k4, folder, nil, nil) + if err != nil { + return err + } + if err := t.deleteKeyPrefix(k5.WithoutHashAndName()); err != nil { + return err + } + return t.Commit() } @@ -383,6 +410,13 @@ func (db *Lowlevel) dropDeviceFolder(device, folder []byte, meta *metadataTracke if err := t.deleteKeyPrefix(key.WithoutHashAndName()); err != nil { return err } + key2, err := db.keyer.GenerateBlockListMapKey(key, folder, nil, nil) + if err != nil { + return err + } + if err := t.deleteKeyPrefix(key2.WithoutHashAndName()); err != nil { + return err + } } return t.Commit() } diff --git a/lib/db/schemaupdater.go b/lib/db/schemaupdater.go index e68e733c2..f973b2f9f 100644 --- a/lib/db/schemaupdater.go +++ b/lib/db/schemaupdater.go @@ -22,9 +22,9 @@ import ( // 6: v0.14.50 // 7: v0.14.53 // 8-9: v1.4.0 -// 10: v1.6.0 +// 10-11: v1.6.0 const ( - dbVersion = 10 + dbVersion = 11 dbMinSyncthingVersion = "v1.6.0" ) @@ -85,8 +85,9 @@ func (db *schemaUpdater) updateSchema() error { {5, db.updateSchemaTo5}, {6, db.updateSchema5to6}, {7, db.updateSchema6to7}, - {9, db.updateSchemato9}, - {10, db.updateSchemato10}, + {9, db.updateSchemaTo9}, + {10, db.updateSchemaTo10}, + {11, db.updateSchemaTo11}, } for _, m := range migrations { @@ -450,7 +451,7 @@ func (db *schemaUpdater) updateSchema6to7(_ int) error { return t.Commit() } -func (db *schemaUpdater) updateSchemato9(prev int) error { +func (db *schemaUpdater) updateSchemaTo9(prev int) error { // Loads and rewrites all files with blocks, to deduplicate block lists. // Checks for missing or incorrect sequence entries and rewrites those. @@ -499,7 +500,7 @@ func (db *schemaUpdater) updateSchemato9(prev int) error { return t.Commit() } -func (db *schemaUpdater) updateSchemato10(_ int) error { +func (db *schemaUpdater) updateSchemaTo10(_ int) error { t, err := db.newReadWriteTransaction() if err != nil { return err @@ -568,3 +569,44 @@ func (db *schemaUpdater) updateSchemato10(_ int) error { return t.Commit() } + +func (db *schemaUpdater) updateSchemaTo11(_ int) error { + // Populates block list map for every folder. + + t, err := db.newReadWriteTransaction() + if err != nil { + return err + } + defer t.close() + + var dk []byte + for _, folderStr := range db.ListFolders() { + folder := []byte(folderStr) + var putErr error + err := t.withHave(folder, protocol.LocalDeviceID[:], nil, true, func(fi FileIntf) bool { + f := fi.(FileInfoTruncated) + if f.IsDirectory() || f.IsDeleted() || f.IsInvalid() || f.BlocksHash == nil { + return true + } + + name := []byte(f.FileName()) + dk, putErr = db.keyer.GenerateBlockListMapKey(dk, folder, f.BlocksHash, name) + if putErr != nil { + return false + } + + if putErr = t.Put(dk, nil); putErr != nil { + return false + } + putErr = t.Checkpoint() + return putErr == nil + }) + if putErr != nil { + return putErr + } + if err != nil { + return err + } + } + return t.Commit() +} diff --git a/lib/db/set.go b/lib/db/set.go index 69276b0e6..365a9ecf6 100644 --- a/lib/db/set.go +++ b/lib/db/set.go @@ -365,6 +365,13 @@ func (s *Snapshot) RemoteNeedFolderFiles(device protocol.DeviceID, page, perpage return files } +func (s *Snapshot) WithBlocksHash(hash []byte, fn Iterator) { + l.Debugf(`%s WithBlocksHash("%x")`, s.folder, hash) + if err := s.t.withBlocksHash([]byte(s.folder), hash, nativeFileIterator(fn)); err != nil && !backend.IsClosed(err) { + panic(err) + } +} + func (s *FileSet) Sequence(device protocol.DeviceID) int64 { return s.meta.Sequence(device) } diff --git a/lib/db/transactions.go b/lib/db/transactions.go index 1c40eada6..36459d19e 100644 --- a/lib/db/transactions.go +++ b/lib/db/transactions.go @@ -9,6 +9,7 @@ package db import ( "bytes" "errors" + "github.com/syncthing/syncthing/lib/osutil" "github.com/syncthing/syncthing/lib/db/backend" "github.com/syncthing/syncthing/lib/protocol" @@ -304,6 +305,47 @@ func (t *readOnlyTransaction) withGlobal(folder, prefix []byte, truncate bool, f return dbi.Error() } +func (t *readOnlyTransaction) withBlocksHash(folder, hash []byte, iterator Iterator) error { + key, err := t.keyer.GenerateBlockListMapKey(nil, folder, hash, nil) + if err != nil { + return err + } + + iter, err := t.NewPrefixIterator(key) + if err != nil { + return err + } + defer iter.Release() + + for iter.Next() { + file := string(t.keyer.NameFromBlockListMapKey(iter.Key())) + f, ok, err := t.getFile(folder, protocol.LocalDeviceID[:], []byte(osutil.NormalizedFilename(file))) + if err != nil { + return err + } + if !ok { + continue + } + f.Name = osutil.NativeFilename(f.Name) + + if !bytes.Equal(f.BlocksHash, hash) { + l.Warnf("Mismatching block map list hashes: got %x expected %x", f.BlocksHash, hash) + continue + } + + if f.IsDeleted() || f.IsInvalid() || f.IsDirectory() || f.IsSymlink() { + l.Warnf("Found something of unexpected type in block list map: %s", f) + continue + } + + if !iterator(f) { + break + } + } + + return iter.Error() +} + func (t *readOnlyTransaction) availability(folder, file []byte) ([]protocol.DeviceID, error) { vl, err := t.getGlobalVersions(nil, folder, file) if backend.IsNotFound(err) { diff --git a/lib/model/folder.go b/lib/model/folder.go index bacd58cec..edcb903ff 100644 --- a/lib/model/folder.go +++ b/lib/model/folder.go @@ -464,6 +464,13 @@ func (f *folder) scanSubdirs(subDirs []string) error { batch.append(res.File) changes++ + + if f.localFlags&protocol.FlagLocalReceiveOnly == 0 { + if nf, ok := f.findRename(snap, mtimefs, res.File); ok { + batch.append(nf) + changes++ + } + } } if err := batch.flush(); err != nil { @@ -615,6 +622,48 @@ func (f *folder) scanSubdirs(subDirs []string) error { return nil } +func (f *folder) findRename(snap *db.Snapshot, mtimefs fs.Filesystem, file protocol.FileInfo) (protocol.FileInfo, bool) { + found := false + nf := protocol.FileInfo{} + + snap.WithBlocksHash(file.BlocksHash, func(ifi db.FileIntf) bool { + fi := ifi.(protocol.FileInfo) + + select { + case <-f.ctx.Done(): + return false + default: + } + + if fi.ShouldConflict() { + return true + } + + if f.ignores.Match(fi.Name).IsIgnored() { + return true + } + + // Only check the size. + // No point checking block equality, as that uses BlocksHash comparison if that is set (which it will be). + // No point checking BlocksHash comparison as WithBlocksHash already does that. + if file.Size != fi.Size { + return true + } + + if !osutil.IsDeleted(mtimefs, fi.Name) { + return true + } + + nf = fi + nf.SetDeleted(f.shortID) + nf.LocalFlags = f.localFlags + found = true + return false + }) + + return nf, found +} + func (f *folder) scanTimerFired() { err := f.scanSubdirs(nil) diff --git a/lib/model/folder_sendrecv.go b/lib/model/folder_sendrecv.go index ed46e9e00..54bd7cf0a 100644 --- a/lib/model/folder_sendrecv.go +++ b/lib/model/folder_sendrecv.go @@ -355,7 +355,7 @@ func (f *sendReceiveFolder) processNeeded(snap *db.Snapshot, dbUpdateChan chan<- if ok && !df.IsDeleted() && !df.IsSymlink() && !df.IsDirectory() && !df.IsInvalid() { fileDeletions[file.Name] = file // Put files into buckets per first hash - key := string(df.Blocks[0].Hash) + key := string(df.BlocksHash) buckets[key] = append(buckets[key], df) } else { f.deleteFileWithCurrent(file, df, ok, dbUpdateChan, scanChan) @@ -458,30 +458,28 @@ nextFile: // Check our list of files to be removed for a match, in which case // we can just do a rename instead. - key := string(fi.Blocks[0].Hash) + key := string(fi.BlocksHash) for i, candidate := range buckets[key] { - if candidate.BlocksEqual(fi) { - // Remove the candidate from the bucket - lidx := len(buckets[key]) - 1 - buckets[key][i] = buckets[key][lidx] - buckets[key] = buckets[key][:lidx] - - // candidate is our current state of the file, where as the - // desired state with the delete bit set is in the deletion - // map. - desired := fileDeletions[candidate.Name] - if err := f.renameFile(candidate, desired, fi, snap, dbUpdateChan, scanChan); err != nil { - // Failed to rename, try to handle files as separate - // deletions and updates. - break - } + // Remove the candidate from the bucket + lidx := len(buckets[key]) - 1 + buckets[key][i] = buckets[key][lidx] + buckets[key] = buckets[key][:lidx] + + // candidate is our current state of the file, where as the + // desired state with the delete bit set is in the deletion + // map. + desired := fileDeletions[candidate.Name] + if err := f.renameFile(candidate, desired, fi, snap, dbUpdateChan, scanChan); err != nil { + l.Debugln("rename shortcut for %s failed: %S", fi.Name, err.Error()) + // Failed to rename, try next one. + continue + } - // Remove the pending deletion (as we performed it by renaming) - delete(fileDeletions, candidate.Name) + // Remove the pending deletion (as we performed it by renaming) + delete(fileDeletions, candidate.Name) - f.queue.Done(fileName) - continue nextFile - } + f.queue.Done(fileName) + continue nextFile } devices := snap.Availability(fileName) @@ -1181,6 +1179,16 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch protocol.BufferPool.Put(buf) }() + folderFilesystems := make(map[string]fs.Filesystem) + // Hope that it's usually in the same folder, so start with that one. + folders := []string{f.folderID} + for folder, cfg := range f.model.cfg.Folders() { + folderFilesystems[folder] = cfg.Filesystem() + if folder != f.folderID { + folders = append(folders, folder) + } + } + for state := range in { if err := f.CheckAvailableSpace(state.file.Size); err != nil { state.fail(err) @@ -1198,13 +1206,6 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch f.model.progressEmitter.Register(state.sharedPullerState) - folderFilesystems := make(map[string]fs.Filesystem) - var folders []string - for folder, cfg := range f.model.cfg.Folders() { - folderFilesystems[folder] = cfg.Filesystem() - folders = append(folders, folder) - } - var file fs.File var weakHashFinder *weakhash.Finder diff --git a/lib/model/model.go b/lib/model/model.go index 7c4e94ed5..27b3180ef 100644 --- a/lib/model/model.go +++ b/lib/model/model.go @@ -1926,9 +1926,15 @@ func (s *indexSender) sendIndexTo(ctx context.Context) error { var f protocol.FileInfo snap := s.fset.Snapshot() defer snap.Release() + previousWasDelete := false snap.WithHaveSequence(s.prevSequence+1, func(fi db.FileIntf) bool { - if err = batch.flushIfFull(); err != nil { - return false + // This is to make sure that renames (which is an add followed by a delete) land in the same batch. + // Even if the batch is full, we allow a last delete to slip in, we do this by making sure that + // the batch ends with a non-delete, or that the last item in the batch is already a delete + if batch.full() && (!fi.IsDeleted() || previousWasDelete) { + if err = batch.flush(); err != nil { + return false + } } if shouldDebug() { @@ -1964,6 +1970,8 @@ func (s *indexSender) sendIndexTo(ctx context.Context) error { } f.LocalFlags = 0 // never sent externally + previousWasDelete = f.IsDeleted() + batch.append(f) return true }) @@ -2607,8 +2615,12 @@ func (b *fileInfoBatch) append(f protocol.FileInfo) { b.size += f.ProtoSize() } +func (b *fileInfoBatch) full() bool { + return len(b.infos) >= maxBatchSizeFiles || b.size >= maxBatchSizeBytes +} + func (b *fileInfoBatch) flushIfFull() error { - if len(b.infos) >= maxBatchSizeFiles || b.size >= maxBatchSizeBytes { + if b.full() { return b.flush() } return nil diff --git a/lib/model/model_test.go b/lib/model/model_test.go index 53ccdf9c5..a773f645b 100644 --- a/lib/model/model_test.go +++ b/lib/model/model_test.go @@ -18,6 +18,7 @@ import ( "path/filepath" "runtime" "runtime/pprof" + "sort" "strconv" "strings" "sync" @@ -3537,3 +3538,154 @@ func TestFolderAPIErrors(t *testing.T) { } } } + +func TestRenameSequenceOrder(t *testing.T) { + wcfg, fcfg := tmpDefaultWrapper() + m := setupModel(wcfg) + defer cleanupModel(m) + + numFiles := 20 + + ffs := fcfg.Filesystem() + for i := 0; i < numFiles; i++ { + v := fmt.Sprintf("%d", i) + must(t, writeFile(ffs, v, []byte(v), 0644)) + } + + m.ScanFolders() + + count := 0 + snap := dbSnapshot(t, m, "default") + snap.WithHave(protocol.LocalDeviceID, func(i db.FileIntf) bool { + count++ + return true + }) + snap.Release() + + if count != numFiles { + t.Errorf("Unexpected count: %d != %d", count, numFiles) + } + + // Modify all the files, other than the ones we expect to rename + for i := 0; i < numFiles; i++ { + if i == 3 || i == 17 || i == 16 || i == 4 { + continue + } + v := fmt.Sprintf("%d", i) + must(t, writeFile(ffs, v, []byte(v+"-new"), 0644)) + } + // Rename + must(t, ffs.Rename("3", "17")) + must(t, ffs.Rename("16", "4")) + + // Scan + m.ScanFolders() + + // Verify sequence of a appearing is followed by c disappearing. + snap = dbSnapshot(t, m, "default") + defer snap.Release() + + var firstExpectedSequence int64 + var secondExpectedSequence int64 + failed := false + snap.WithHaveSequence(0, func(i db.FileIntf) bool { + t.Log(i) + if i.FileName() == "17" { + firstExpectedSequence = i.SequenceNo() + 1 + } + if i.FileName() == "4" { + secondExpectedSequence = i.SequenceNo() + 1 + } + if i.FileName() == "3" { + failed = i.SequenceNo() != firstExpectedSequence || failed + } + if i.FileName() == "16" { + failed = i.SequenceNo() != secondExpectedSequence || failed + } + return true + }) + if failed { + t.Fail() + } +} + +func TestBlockListMap(t *testing.T) { + wcfg, fcfg := tmpDefaultWrapper() + m := setupModel(wcfg) + defer cleanupModel(m) + + ffs := fcfg.Filesystem() + must(t, writeFile(ffs, "one", []byte("content"), 0644)) + must(t, writeFile(ffs, "two", []byte("content"), 0644)) + must(t, writeFile(ffs, "three", []byte("content"), 0644)) + must(t, writeFile(ffs, "four", []byte("content"), 0644)) + must(t, writeFile(ffs, "five", []byte("content"), 0644)) + + m.ScanFolders() + + snap := dbSnapshot(t, m, "default") + defer snap.Release() + fi, ok := snap.Get(protocol.LocalDeviceID, "one") + if !ok { + t.Error("failed to find existing file") + } + var paths []string + + snap.WithBlocksHash(fi.BlocksHash, func(fi db.FileIntf) bool { + paths = append(paths, fi.FileName()) + return true + }) + snap.Release() + + expected := []string{"one", "two", "three", "four", "five"} + if !equalStringsInAnyOrder(paths, expected) { + t.Errorf("expected %q got %q", expected, paths) + } + + // Fudge the files around + // Remove + must(t, ffs.Remove("one")) + + // Modify + must(t, ffs.Remove("two")) + must(t, writeFile(ffs, "two", []byte("mew-content"), 0644)) + + // Rename + must(t, ffs.Rename("three", "new-three")) + + // Change type + must(t, ffs.Remove("four")) + must(t, ffs.Mkdir("four", 0644)) + + m.ScanFolders() + + // Check we're left with 2 of the 5 + snap = dbSnapshot(t, m, "default") + defer snap.Release() + + paths = paths[:0] + snap.WithBlocksHash(fi.BlocksHash, func(fi db.FileIntf) bool { + paths = append(paths, fi.FileName()) + return true + }) + snap.Release() + + expected = []string{"new-three", "five"} + if !equalStringsInAnyOrder(paths, expected) { + t.Errorf("expected %q got %q", expected, paths) + } +} + +func equalStringsInAnyOrder(a, b []string) bool { + if len(a) != len(b) { + return false + } + sort.Strings(a) + sort.Strings(b) + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} |