summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorThomas Waldmann <tw@waldmann-edv.de>2016-08-20 00:04:55 +0200
committerThomas Waldmann <tw@waldmann-edv.de>2016-08-20 00:04:55 +0200
commitc355f3617a4a7bd0312c179b24283289c2d543b4 (patch)
tree3b4a58522ba9701cf53b1866bd307c91775a2948 /src
parent45fd66f6e139d4a2251376dcbd3fe1a51e8f9a9e (diff)
parentf32c8858ad3f6637fca35ef814f6cd584d1cc658 (diff)
Merge branch '1.0-maint'
Diffstat (limited to 'src')
-rw-r--r--src/borg/archive.py9
-rw-r--r--src/borg/archiver.py35
-rw-r--r--src/borg/cache.py2
-rw-r--r--src/borg/locking.py7
-rw-r--r--src/borg/remote.py9
-rw-r--r--src/borg/repository.py1
-rw-r--r--src/borg/testsuite/archiver.py67
-rw-r--r--src/borg/testsuite/locking.py4
8 files changed, 127 insertions, 7 deletions
diff --git a/src/borg/archive.py b/src/borg/archive.py
index 09a3d3f70..8548e3aa3 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -148,6 +148,15 @@ class DownloadPipeline:
self.key = key
def unpack_many(self, ids, filter=None, preload=False):
+ """
+ Return iterator of items.
+
+ *ids* is a chunk ID list of an item stream. *filter* is a callable
+ to decide whether an item will be yielded. *preload* preloads the data chunks of every yielded item.
+
+ Warning: if *preload* is True then all data chunks of every yielded item have to be retrieved,
+ otherwise preloaded chunks will accumulate in RemoteRepository and create a memory leak.
+ """
unpacker = msgpack.Unpacker(use_list=False)
for _, data in self.fetch_many(ids):
unpacker.feed(data)
diff --git a/src/borg/archiver.py b/src/borg/archiver.py
index e8e1568f1..5c26314e4 100644
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -415,6 +415,18 @@ class Archiver:
status = '-' # dry run, item was not backed up
self.print_file_status(status, path)
+ @staticmethod
+ def build_filter(matcher, is_hardlink_master, strip_components=0):
+ if strip_components:
+ def item_filter(item):
+ return (is_hardlink_master(item) or
+ matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:]))
+ else:
+ def item_filter(item):
+ return (is_hardlink_master(item) or
+ matcher.match(item.path))
+ return item_filter
+
@with_repository()
@with_archive
def do_extract(self, args, repository, manifest, key, archive):
@@ -440,8 +452,8 @@ class Archiver:
return (partial_extract and stat.S_ISREG(item.mode) and
item.get('hardlink_master', True) and 'source' not in item)
- for item in archive.iter_items(preload=True,
- filter=lambda item: item_is_hardlink_master(item) or matcher.match(item.path)):
+ filter = self.build_filter(matcher, item_is_hardlink_master, strip_components)
+ for item in archive.iter_items(filter, preload=True):
orig_path = item.path
if item_is_hardlink_master(item):
hardlink_masters[orig_path] = (item.get('chunks'), None)
@@ -449,8 +461,6 @@ class Archiver:
continue
if strip_components:
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
- if not item.path:
- continue
if not args.dry_run:
while dirs and not item.path.startswith(dirs[-1].path):
dir_item = dirs.pop(-1)
@@ -1004,6 +1014,11 @@ class Archiver:
finally:
repository.rollback()
+ def do_debug_info(self, args):
+ """display system information for debugging / bug reports"""
+ print(sysinfo())
+ return EXIT_SUCCESS
+
@with_repository()
def do_debug_dump_archive_items(self, args, repository, manifest, key):
"""dump (decrypted, decompressed) archive items metadata (not: data)"""
@@ -2164,6 +2179,18 @@ class Archiver:
subparser.add_argument('topic', metavar='TOPIC', type=str, nargs='?',
help='additional help on TOPIC')
+ debug_info_epilog = textwrap.dedent("""
+ This command displays some system information that might be useful for bug
+ reports and debugging problems. If a traceback happens, this information is
+ already appended at the end of the traceback.
+ """)
+ subparser = subparsers.add_parser('debug-info', parents=[common_parser], add_help=False,
+ description=self.do_debug_info.__doc__,
+ epilog=debug_info_epilog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ help='show system infos for debugging / bug reports (debug)')
+ subparser.set_defaults(func=self.do_debug_info)
+
debug_dump_archive_items_epilog = textwrap.dedent("""
This command dumps raw (but decrypted and decompressed) archive items (only metadata) to files.
""")
diff --git a/src/borg/cache.py b/src/borg/cache.py
index f325ff25d..eb850dc2a 100644
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -157,9 +157,11 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
cache_version = self.config.getint('cache', 'version')
wanted_version = 1
if cache_version != wanted_version:
+ self.close()
raise Exception('%s has unexpected cache version %d (wanted: %d).' % (
config_path, cache_version, wanted_version))
except configparser.NoSectionError:
+ self.close()
raise Exception('%s does not look like a Borg cache.' % config_path) from None
self.id = self.config.get('cache', 'repository')
self.manifest_id = unhexlify(self.config.get('cache', 'manifest'))
diff --git a/src/borg/locking.py b/src/borg/locking.py
index c74c4fc56..5405c2310 100644
--- a/src/borg/locking.py
+++ b/src/borg/locking.py
@@ -201,6 +201,9 @@ class LockRoster:
roster = self.load()
return set(tuple(e) for e in roster.get(key, []))
+ def empty(self, *keys):
+ return all(not self.get(key) for key in keys)
+
def modify(self, key, op):
roster = self.load()
try:
@@ -293,10 +296,14 @@ class Lock:
def release(self):
if self.is_exclusive:
self._roster.modify(EXCLUSIVE, REMOVE)
+ if self._roster.empty(EXCLUSIVE, SHARED):
+ self._roster.remove()
self._lock.release()
else:
with self._lock:
self._roster.modify(SHARED, REMOVE)
+ if self._roster.empty(EXCLUSIVE, SHARED):
+ self._roster.remove()
def upgrade(self):
# WARNING: if multiple read-lockers want to upgrade, it will deadlock because they
diff --git a/src/borg/remote.py b/src/borg/remote.py
index 18637cae7..c3c3f7282 100644
--- a/src/borg/remote.py
+++ b/src/borg/remote.py
@@ -124,8 +124,13 @@ class RepositoryServer: # pragma: no cover
path = os.path.join(get_home_dir(), path[2:])
path = os.path.realpath(path)
if self.restrict_to_paths:
+ # if --restrict-to-path P is given, we make sure that we only operate in/below path P.
+ # for the prefix check, it is important that the compared pathes both have trailing slashes,
+ # so that a path /foobar will NOT be accepted with --restrict-to-path /foo option.
+ path_with_sep = os.path.join(path, '') # make sure there is a trailing slash (os.sep)
for restrict_to_path in self.restrict_to_paths:
- if path.startswith(os.path.realpath(restrict_to_path)):
+ restrict_to_path_with_sep = os.path.join(os.path.realpath(restrict_to_path), '') # trailing slash
+ if path_with_sep.startswith(restrict_to_path_with_sep):
break
else:
raise PathNotAllowed(path)
@@ -207,6 +212,8 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
raise
def __del__(self):
+ if len(self.responses):
+ logging.debug("still %d cached responses left in RemoteRepository" % (len(self.responses),))
if self.p:
self.close()
assert False, "cleanup happened in Repository.__del__"
diff --git a/src/borg/repository.py b/src/borg/repository.py
index a095ca7c8..9ae7e36f8 100644
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@@ -239,6 +239,7 @@ class Repository:
self.config = ConfigParser(interpolation=None)
self.config.read(os.path.join(self.path, 'config'))
if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
+ self.close()
raise self.InvalidRepository(path)
self.max_segment_size = self.config.getint('repository', 'max_segment_size')
self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py
index 12a7f4172..22438da0e 100644
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -28,9 +28,11 @@ from ..archiver import Archiver
from ..cache import Cache
from ..constants import * # NOQA
from ..crypto import bytes_to_long, num_aes_blocks
+from ..helpers import PatternMatcher, parse_pattern
from ..helpers import Chunk, Manifest
from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
from ..helpers import bin_to_hex
+from ..item import Item
from ..key import KeyfileKeyBase
from ..remote import RemoteRepository, PathNotAllowed
from ..repository import Repository
@@ -403,6 +405,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('extract', self.repository_location + '::test')
assert os.readlink('input/link1') == 'somewhere'
+ # Search for O_NOATIME there: https://www.gnu.org/software/hurd/contributing.html - we just
+ # skip the test on Hurd, it is not critical anyway, just testing a performance optimization.
+ @pytest.mark.skipif(sys.platform == 'gnu0', reason="O_NOATIME is strangely broken on GNU Hurd")
@pytest.mark.skipif(not is_utime_fully_supported(), reason='cannot properly setup and execute test without utime')
def test_atime(self):
def has_noatime(some_file):
@@ -1928,12 +1933,21 @@ class RemoteArchiverTestCase(ArchiverTestCase):
prefix = '__testsuite__:'
def test_remote_repo_restrict_to_path(self):
- self.cmd('init', self.repository_location)
- path_prefix = os.path.dirname(self.repository_path)
+ # restricted to repo directory itself:
+ with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', self.repository_path]):
+ self.cmd('init', self.repository_location)
+ # restricted to repo directory itself, fail for other directories with same prefix:
+ with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', self.repository_path]):
+ self.assert_raises(PathNotAllowed, lambda: self.cmd('init', self.repository_location + '_0'))
+
+ # restricted to a completely different path:
with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo']):
self.assert_raises(PathNotAllowed, lambda: self.cmd('init', self.repository_location + '_1'))
+ path_prefix = os.path.dirname(self.repository_path)
+ # restrict to repo directory's parent directory:
with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', path_prefix]):
self.cmd('init', self.repository_location + '_2')
+ # restrict to repo directory's parent directory and another directory:
with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo', '--restrict-to-path', path_prefix]):
self.cmd('init', self.repository_location + '_3')
@@ -1950,6 +1964,28 @@ class RemoteArchiverTestCase(ArchiverTestCase):
def test_debug_put_get_delete_obj(self):
pass
+ def test_strip_components_doesnt_leak(self):
+ self.cmd('init', self.repository_location)
+ self.create_regular_file('dir/file', contents=b"test file contents 1")
+ self.create_regular_file('dir/file2', contents=b"test file contents 2")
+ self.create_regular_file('skipped-file1', contents=b"test file contents 3")
+ self.create_regular_file('skipped-file2', contents=b"test file contents 4")
+ self.create_regular_file('skipped-file3', contents=b"test file contents 5")
+ self.cmd('create', self.repository_location + '::test', 'input')
+ marker = 'cached responses left in RemoteRepository'
+ with changedir('output'):
+ res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '3')
+ self.assert_true(marker not in res)
+ with self.assert_creates_file('file'):
+ res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '2')
+ self.assert_true(marker not in res)
+ with self.assert_creates_file('dir/file'):
+ res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '1')
+ self.assert_true(marker not in res)
+ with self.assert_creates_file('input/dir/file'):
+ res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '0')
+ self.assert_true(marker not in res)
+
class DiffArchiverTestCase(ArchiverTestCaseBase):
def test_basic_functionality(self):
@@ -2160,3 +2196,30 @@ def test_compare_chunk_contents():
], [
b'1234', b'565'
])
+
+
+class TestBuildFilter:
+ @staticmethod
+ def item_is_hardlink_master(item):
+ return False
+
+ def test_basic(self):
+ matcher = PatternMatcher()
+ matcher.add([parse_pattern('included')], True)
+ filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
+ assert filter(Item(path='included'))
+ assert filter(Item(path='included/file'))
+ assert not filter(Item(path='something else'))
+
+ def test_empty(self):
+ matcher = PatternMatcher(fallback=True)
+ filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
+ assert filter(Item(path='anything'))
+
+ def test_strip_components(self):
+ matcher = PatternMatcher(fallback=True)
+ filter = Archiver.build_filter(matcher, self.item_is_hardlink_master, strip_components=1)
+ assert not filter(Item(path='shallow'))
+ assert not filter(Item(path='shallow/')) # can this even happen? paths are normalized...
+ assert filter(Item(path='deep enough/file'))
+ assert filter(Item(path='something/dir/file'))
diff --git a/src/borg/testsuite/locking.py b/src/borg/testsuite/locking.py
index fcb21f1df..850c0ac56 100644
--- a/src/borg/testsuite/locking.py
+++ b/src/borg/testsuite/locking.py
@@ -64,6 +64,8 @@ class TestLock:
lock2 = Lock(lockpath, exclusive=False, id=ID2).acquire()
assert len(lock1._roster.get(SHARED)) == 2
assert len(lock1._roster.get(EXCLUSIVE)) == 0
+ assert not lock1._roster.empty(SHARED, EXCLUSIVE)
+ assert lock1._roster.empty(EXCLUSIVE)
lock1.release()
lock2.release()
@@ -71,6 +73,7 @@ class TestLock:
with Lock(lockpath, exclusive=True, id=ID1) as lock:
assert len(lock._roster.get(SHARED)) == 0
assert len(lock._roster.get(EXCLUSIVE)) == 1
+ assert not lock._roster.empty(SHARED, EXCLUSIVE)
def test_upgrade(self, lockpath):
with Lock(lockpath, exclusive=False) as lock:
@@ -78,6 +81,7 @@ class TestLock:
lock.upgrade() # NOP
assert len(lock._roster.get(SHARED)) == 0
assert len(lock._roster.get(EXCLUSIVE)) == 1
+ assert not lock._roster.empty(SHARED, EXCLUSIVE)
def test_downgrade(self, lockpath):
with Lock(lockpath, exclusive=True) as lock: