summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTW <tw@waldmann-edv.de>2020-12-02 13:58:52 +0100
committerGitHub <noreply@github.com>2020-12-02 13:58:52 +0100
commit7a5306b80c0dc1e554514f5c5b3d10d6f98f7271 (patch)
treeddda8475d8a37dc62be2b86f119b89867719ee5b
parent1b3b52cf5b841e663fed9bab781a5c35eacfc1bd (diff)
parent6d31f25aa4057cde54cdf49c7404a7cc502b5577 (diff)
Merge pull request #5533 from ThomasWaldmann/test-hashindex-corruption-bug-4829-1.01.0-maint
add a test for the hashindex corruption bug, fixes #5531
-rw-r--r--borg/testsuite/hashindex.py42
1 files changed, 42 insertions, 0 deletions
diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py
index 5fd9e304e..78d166a9f 100644
--- a/borg/testsuite/hashindex.py
+++ b/borg/testsuite/hashindex.py
@@ -284,3 +284,45 @@ def test_nsindex_segment_limit():
def test_max_load_factor():
assert NSIndex.MAX_LOAD_FACTOR < 1.0
assert ChunkIndex.MAX_LOAD_FACTOR < 1.0
+
+
+class IndexCorruptionTestCase(BaseTestCase):
+ def test_bug_4829(self):
+
+ from struct import pack
+
+ def HH(x, y):
+ # make some 32byte long thing that depends on x and y.
+ # same x will mean a collision in the hashtable as bucket index is computed from
+ # first 4 bytes. giving a specific x targets bucket index x.
+ # y is to create different keys and does not go into the bucket index calculation.
+ # so, same x + different y --> collision
+ return pack('<IIQQQ', x, y, 0, 0, 0) # 2 * 4 + 3 * 8 == 32
+
+ idx = NSIndex()
+
+ # create lots of colliding entries
+ for y in range(700): # stay below max load to not trigger resize
+ idx[HH(0, y)] = (0, y)
+
+ # assert idx.size() == 1031 * 40 + 18 # 1031 buckets + header
+
+ # delete lots of the collisions, creating lots of tombstones
+ for y in range(400): # stay above min load to not trigger resize
+ del idx[HH(0, y)]
+
+ # create lots of colliding entries, within the not yet used part of the hashtable
+ for y in range(330): # stay below max load to not trigger resize
+ # at y == 259 a resize will happen due to going beyond max EFFECTIVE load
+ # if the bug is present, that element will be inserted at the wrong place.
+ # and because it will be at the wrong place, it can not be found again.
+ idx[HH(600, y)] = 600, y
+
+ # now check if hashtable contents is as expected:
+
+ assert [idx.get(HH(0, y)) for y in range(400, 700)] == [(0, y) for y in range(400, 700)]
+
+ assert [HH(0, y) in idx for y in range(400)] == [False for y in range(400)] # deleted entries
+
+ # this will fail at HH(600, 259) if the bug is present.
+ assert [idx.get(HH(600, y)) for y in range(330)] == [(600, y) for y in range(330)]