summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGES8
-rw-r--r--attic/archiver.py31
-rw-r--r--attic/hashindex.pyx4
-rw-r--r--attic/helpers.py9
-rw-r--r--attic/remote.py3
-rw-r--r--attic/repository.py38
-rw-r--r--attic/testsuite/archiver.py2
-rw-r--r--attic/testsuite/repository.py48
-rwxr-xr-xdocs/update_usage.sh2
-rw-r--r--docs/usage.rst6
-rw-r--r--docs/usage/check.rst.inc28
11 files changed, 177 insertions, 2 deletions
diff --git a/CHANGES b/CHANGES
index 243e02989..d0e6fe8e6 100644
--- a/CHANGES
+++ b/CHANGES
@@ -3,6 +3,14 @@ Attic Changelog
Here you can see the full list of changes between each Attic release.
+Version 0.11
+------------
+
+(feature release, released on X)
+
+- New "check" command for repository consistency checking (#24)
+- Documentation improvements
+
Version 0.10
------------
diff --git a/attic/archiver.py b/attic/archiver.py
index a95a44df3..4946ceb3d 100644
--- a/attic/archiver.py
+++ b/attic/archiver.py
@@ -13,7 +13,7 @@ from attic.cache import Cache
from attic.key import key_creator
from attic.helpers import Error, location_validator, format_time, \
format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
- get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates
+ get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates, is_a_terminal
from attic.remote import RepositoryServer, RemoteRepository
@@ -59,6 +59,17 @@ class Archiver:
repository.commit()
return self.exit_code
+ def do_check(self, args):
+ """Check repository consistency
+ """
+ repository = self.open_repository(args.repository)
+ if args.progress is None:
+ args.progress = is_a_terminal(sys.stdout) or args.verbose
+ if not repository.check(progress=args.progress):
+ if args.progress:
+ print('No problems found', file=sys.stderr)
+ return self.exit_code
+
def do_change_passphrase(self, args):
"""Change repository key file passphrase
"""
@@ -360,6 +371,24 @@ class Archiver:
choices=('none', 'passphrase', 'keyfile'), default='none',
help='select encryption method')
+ check_epilog = """
+ Progress status will be reported on the standard output stream by default when
+ it is attached to a terminal. Any problems found are printed to the standard error
+ stream and the command will have a non zero exit code.
+ """
+ subparser = subparsers.add_parser('check', parents=[common_parser],
+ description=self.do_check.__doc__,
+ epilog=check_epilog)
+ subparser.set_defaults(func=self.do_check)
+ subparser.add_argument('repository', metavar='REPOSITORY',
+ type=location_validator(archive=False),
+ help='repository to check consistency of')
+ subparser.add_argument('--progress', dest='progress', action='store_true',
+ default=None,
+ help='Report progress status to standard output stream')
+ subparser.add_argument('--no-progress', dest='progress', action='store_false',
+ help='Disable progress reporting')
+
subparser = subparsers.add_parser('change-passphrase', parents=[common_parser],
description=self.do_change_passphrase.__doc__)
subparser.set_defaults(func=self.do_change_passphrase)
diff --git a/attic/hashindex.pyx b/attic/hashindex.pyx
index 69c185ed4..e5c8aede0 100644
--- a/attic/hashindex.pyx
+++ b/attic/hashindex.pyx
@@ -107,11 +107,13 @@ cdef class NSIndex(IndexBase):
def iteritems(self, marker=None, limit=0):
iter = NSKeyIterator()
+ iter.idx = self
iter.index = self.index
return iter
cdef class NSKeyIterator:
+ cdef NSIndex idx
cdef HashIndex *index
cdef char *key
@@ -156,11 +158,13 @@ cdef class ChunkIndex(IndexBase):
def iteritems(self, marker=None, limit=0):
iter = ChunkKeyIterator()
+ iter.idx = self
iter.index = self.index
return iter
cdef class ChunkKeyIterator:
+ cdef ChunkIndex idx
cdef HashIndex *index
cdef char *key
diff --git a/attic/helpers.py b/attic/helpers.py
index c5a7ca125..427428519 100644
--- a/attic/helpers.py
+++ b/attic/helpers.py
@@ -431,6 +431,15 @@ def daemonize():
os.dup2(fd, 2)
+def is_a_terminal(fd):
+ """Determine if `fd` is associated with a terminal or not
+ """
+ try:
+ os.ttyname(fd.fileno())
+ return True
+ except:
+ return False
+
if sys.version < '3.3':
# st_mtime_ns attribute only available in 3.3+
def st_mtime_ns(st):
diff --git a/attic/remote.py b/attic/remote.py
index f818563d8..df5c6a990 100644
--- a/attic/remote.py
+++ b/attic/remote.py
@@ -178,6 +178,9 @@ class RemoteRepository(object):
w_fds = []
self.ignore_responses |= set(waiting_for)
+ def check(self, progress=False):
+ return self.call('check', progress)
+
def commit(self, *args):
return self.call('commit')
diff --git a/attic/repository.py b/attic/repository.py
index 6555e9a8a..37f09929c 100644
--- a/attic/repository.py
+++ b/attic/repository.py
@@ -5,6 +5,7 @@ import os
import re
import shutil
import struct
+import sys
from zlib import crc32
from .hashindex import NSIndex
@@ -198,6 +199,41 @@ class Repository(object):
if self.io.head is not None:
self.write_index()
+ def check(self, progress=False):
+ """Check repository consistency
+
+ This method verifies all segment checksums and makes sure
+ the index is consistent with the data stored in the segments.
+ """
+ error_found = False
+ def report_error(msg):
+ nonlocal error_found
+ error_found = True
+ print(msg, file=sys.stderr)
+ seen = set()
+ for segment, filename in self.io._segment_names():
+ if progress:
+ print('Checking segment {}/{}'.format(segment, self.io.head))
+ try:
+ objects = list(self.io.iter_objects(segment))
+ except (IntegrityError, struct.error):
+ report_error('Error reading segment {}'.format(segment))
+ objects = []
+ for tag, key, offset in objects:
+ if tag == TAG_PUT:
+ if key in seen:
+ report_error('Key found in more than one segment. Segment={}, key={}'.format(segment, hexlify(key)))
+ seen.add(key)
+ if self.index.get(key, (0, 0)) != (segment, offset):
+ report_error('Index vs segment header mismatch. Segment={}, key={}'.format(segment, hexlify(key)))
+ elif tag == TAG_COMMIT:
+ continue
+ else:
+ raise self.RepositoryCheckFailed(self.path, 'Unexpected tag {} in segment {}'.format(tag, segment))
+ if len(self.index) != len(seen):
+ report_error('Index object count mismatch. {} != {}'.format(len(self.index), len(seen)))
+ return not error_found
+
def rollback(self):
"""
"""
@@ -309,6 +345,8 @@ class LoggedIO(object):
"""
self.head = None
self.segment = 0
+ # FIXME: Only delete segments if we're sure there's at least
+ # one complete segment somewhere
for segment, filename in self._segment_names(reverse=True):
if self.is_complete_segment(filename):
self.head = segment
diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py
index e09747a9c..b3d5ca8aa 100644
--- a/attic/testsuite/archiver.py
+++ b/attic/testsuite/archiver.py
@@ -205,12 +205,14 @@ class ArchiverTestCase(AtticTestCase):
self.attic('init', self.repository_location)
self.create_src_archive('test')
self.attic('verify', self.repository_location + '::test')
+ self.attic('check', self.repository_location)
name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
fd = open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+')
fd.seek(100)
fd.write('XXXX')
fd.close()
self.attic('verify', self.repository_location + '::test', exit_code=1)
+ self.attic('check', self.repository_location, exit_code=1)
def test_readonly_repository(self):
self.attic('init', self.repository_location)
diff --git a/attic/testsuite/repository.py b/attic/testsuite/repository.py
index 33d0cde28..46a8420ef 100644
--- a/attic/testsuite/repository.py
+++ b/attic/testsuite/repository.py
@@ -102,7 +102,55 @@ class RepositoryTestCase(AtticTestCase):
self.repository.commit()
+class RepositoryCheckTestCase(AtticTestCase):
+
+ def open(self, create=False):
+ return Repository(os.path.join(self.tmppath, 'repository'), create=create)
+
+ def setUp(self):
+ self.tmppath = tempfile.mkdtemp()
+ self.repository = self.open(create=True)
+
+ def tearDown(self):
+ self.repository.close()
+ shutil.rmtree(self.tmppath)
+
+ def add_objects(self, ids):
+ for id_ in ids:
+ self.repository.put(('%032d' % id_).encode('ascii'), b'data')
+ self.repository.commit()
+
+ def open_index(self):
+ head = sorted(int(n[6:]) for n in os.listdir(os.path.join(self.tmppath, 'repository')) if n.startswith('index') and n[6:].isdigit())[0]
+ return NSIndex(os.path.join(self.tmppath, 'repository', 'index.{}'.format(head)))
+
+ def corrupt_object(self, id_):
+ idx = self.open_index()
+ segment, offset = idx[('%032d' % id_).encode('ascii')]
+ with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment)), 'r+b') as fd:
+ fd.seek(offset)
+ fd.write(b'BOOM')
+
+ def list_objects(self):
+ return set((int(key) for key, _ in list(self.open_index().iteritems())))
+
+ def test_check(self):
+ self.add_objects([1, 2, 3])
+ self.add_objects([4, 5, 6])
+ self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+ self.assert_equal(True, self.repository.check())
+ self.corrupt_object(5)
+ self.assert_equal(False, self.repository.check())
+ self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
+
class RemoteRepositoryTestCase(RepositoryTestCase):
def open(self, create=False):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
+
+
+class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
+
+ def open(self, create=False):
+ return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
diff --git a/docs/update_usage.sh b/docs/update_usage.sh
index 5a5eed31e..307d5ba79 100755
--- a/docs/update_usage.sh
+++ b/docs/update_usage.sh
@@ -2,7 +2,7 @@
if [ ! -d usage ]; then
mkdir usage
fi
-for cmd in change-passphrase create delete extract info init list mount prune verify; do
+for cmd in change-passphrase check create delete extract info init list mount prune verify; do
FILENAME="usage/$cmd.rst.inc"
LINE=`echo -n attic $cmd | tr 'a-z- ' '-'`
echo -e ".. _attic_$cmd:\n" > $FILENAME
diff --git a/docs/usage.rst b/docs/usage.rst
index e7b134afc..9d5a97446 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -93,6 +93,12 @@ not corrupt. |project_name| will not compare the the archived files with the
files on disk.
+.. include:: usage/check.rst.inc
+
+The check command verifies the consistency of a repository. Any inconsistencies
+found are reported to the standard error stream and the command will have a
+non zero exit code.
+
.. include:: usage/delete.rst.inc
This command deletes an archive from the repository. Any disk space not
diff --git a/docs/usage/check.rst.inc b/docs/usage/check.rst.inc
new file mode 100644
index 000000000..08fd36193
--- /dev/null
+++ b/docs/usage/check.rst.inc
@@ -0,0 +1,28 @@
+.. _attic_check:
+
+attic check
+-----------
+::
+
+
+ usage: attic check [-h] [-v] [--progress] [--no-progress] REPOSITORY
+
+ Check repository consistency
+
+ positional arguments:
+ REPOSITORY repository to check consistency of
+
+ optional arguments:
+ -h, --help show this help message and exit
+ -v, --verbose verbose output
+ --progress Report progress status to standard output stream
+ --no-progress Disable progress reporting
+
+ Progress status will be reported on the standard output stream by default when
+ it is attached to a terminal. Any problems found are printed to the standard
+ error stream and the command will have a non zero exit code.
+
+Description
+~~~~~~~~~~~
+
+