summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorThomas Waldmann <tw@waldmann-edv.de>2016-08-15 04:17:41 +0200
committerThomas Waldmann <tw@waldmann-edv.de>2016-08-15 15:03:05 +0200
commit1f056d9e8ab54066a9242cde7d89c83f2fd6c1dd (patch)
tree0c118a0752694be782943e8afa114751ef3bd2e4 /src
parentb6d0eb99a5a24b824e0e38d8e607400bdd1c7b0b (diff)
more safe interface for manifest.archives
Diffstat (limited to 'src')
-rw-r--r--src/borg/archive.py38
-rw-r--r--src/borg/archiver.py8
-rw-r--r--src/borg/cache.py8
-rw-r--r--src/borg/fuse.py6
-rw-r--r--src/borg/helpers.py84
5 files changed, 99 insertions, 45 deletions
diff --git a/src/borg/archive.py b/src/borg/archive.py
index 561d5ef80..09a3d3f70 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -269,10 +269,10 @@ class Archive:
break
i += 1
else:
- if name not in self.manifest.archives:
+ info = self.manifest.archives.get(name)
+ if info is None:
raise self.DoesNotExist(name)
- info = self.manifest.archives[name]
- self.load(info[b'id'])
+ self.load(info.id)
self.zeros = b'\0' * (1 << chunker_params[1])
def _load_meta(self, id):
@@ -379,7 +379,7 @@ Number of files: {0.stats.nfiles}'''.format(
data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
self.id = self.key.id_hash(data)
self.cache.add_chunk(self.id, Chunk(data), self.stats)
- self.manifest.archives[name] = {'id': self.id, 'time': metadata.time}
+ self.manifest.archives[name] = (self.id, metadata.time)
self.manifest.write()
self.repository.commit()
self.cache.commit()
@@ -593,7 +593,7 @@ Number of files: {0.stats.nfiles}'''.format(
data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
new_id = self.key.id_hash(data)
self.cache.add_chunk(new_id, Chunk(data), self.stats)
- self.manifest.archives[self.name] = {'id': new_id, 'time': metadata.time}
+ self.manifest.archives[self.name] = (new_id, metadata.time)
self.cache.chunk_decref(self.id, self.stats)
self.id = new_id
@@ -844,7 +844,7 @@ Number of files: {0.stats.nfiles}'''.format(
@staticmethod
def list_archives(repository, key, manifest, cache=None):
# expensive! see also Manifest.list_archive_infos.
- for name, info in manifest.archives.items():
+ for name in manifest.archives:
yield Archive(repository, key, manifest, name, cache=cache)
@staticmethod
@@ -1077,7 +1077,7 @@ class ArchiveChecker:
if valid_archive(archive):
archive = ArchiveItem(internal_dict=archive)
logger.info('Found archive %s', archive.name)
- manifest.archives[archive.name] = {b'id': chunk_id, b'time': archive.time}
+ manifest.archives[archive.name] = (chunk_id, archive.time)
logger.info('Manifest rebuild complete.')
return manifest
@@ -1216,28 +1216,30 @@ class ArchiveChecker:
if archive is None:
# we need last N or all archives
- archive_items = sorted(self.manifest.archives.items(), reverse=True,
- key=lambda name_info: name_info[1][b'time'])
+ archive_infos = self.manifest.archives.list(sort_by='ts', reverse=True)
if prefix is not None:
- archive_items = [item for item in archive_items if item[0].startswith(prefix)]
- num_archives = len(archive_items)
+ archive_infos = [info for info in archive_infos if info.name.startswith(prefix)]
+ num_archives = len(archive_infos)
end = None if last is None else min(num_archives, last)
else:
# we only want one specific archive
- archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
- if not archive_items:
+ info = self.manifest.archives.get(archive)
+ if info is None:
logger.error("Archive '%s' not found.", archive)
+ archive_infos = []
+ else:
+ archive_infos = [info]
num_archives = 1
end = 1
with cache_if_remote(self.repository) as repository:
- for i, (name, info) in enumerate(archive_items[:end]):
- logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
- archive_id = info[b'id']
+ for i, info in enumerate(archive_infos[:end]):
+ logger.info('Analyzing archive {} ({}/{})'.format(info.name, num_archives - i, num_archives))
+ archive_id = info.id
if archive_id not in self.chunks:
logger.error('Archive metadata block is missing!')
self.error_found = True
- del self.manifest.archives[name]
+ del self.manifest.archives[info.name]
continue
mark_as_possibly_superseded(archive_id)
cdata = self.repository.get(archive_id)
@@ -1260,7 +1262,7 @@ class ArchiveChecker:
new_archive_id = self.key.id_hash(data)
cdata = self.key.encrypt(Chunk(data))
add_reference(new_archive_id, len(data), len(cdata), cdata)
- info[b'id'] = new_archive_id
+ self.manifest.archives[info.name] = (new_archive_id, info.ts)
def orphan_chunks_check(self):
if self.check_all:
diff --git a/src/borg/archiver.py b/src/borg/archiver.py
index 4fc46e42d..78fdfe7a3 100644
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -734,7 +734,7 @@ class Archiver:
msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
else:
msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
- for archive_info in manifest.list_archive_infos(sort_by='ts'):
+ for archive_info in manifest.archives.list(sort_by='ts'):
msg.append(format_archive(archive_info))
msg.append("Type 'YES' if you understand this and want to continue: ")
msg = '\n'.join(msg)
@@ -812,7 +812,7 @@ class Archiver:
format = "{archive:<36} {time} [{id}]{NL}"
formatter = ArchiveFormatter(format)
- for archive_info in manifest.list_archive_infos(sort_by='ts'):
+ for archive_info in manifest.archives.list(sort_by='ts'):
if args.prefix and not archive_info.name.startswith(args.prefix):
continue
write(safe_encode(formatter.format_item(archive_info)))
@@ -857,7 +857,7 @@ class Archiver:
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
'"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
return self.exit_code
- archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
+ archives_checkpoints = manifest.archives.list(sort_by='ts', reverse=True) # just a ArchiveInfo list
if args.prefix:
archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
@@ -974,7 +974,7 @@ class Archiver:
if args.target is not None:
self.print_error('--target: Need to specify single archive')
return self.exit_code
- for archive in manifest.list_archive_infos(sort_by='ts'):
+ for archive in manifest.archives.list(sort_by='ts'):
name = archive.name
if recreater.is_temporary_archive(name):
continue
diff --git a/src/borg/cache.py b/src/borg/cache.py
index 3f685a949..f325ff25d 100644
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -279,7 +279,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
return set()
def repo_archives():
- return set(info[b'id'] for info in self.manifest.archives.values())
+ return set(info.id for info in self.manifest.archives.list())
def cleanup_outdated(ids):
for id in ids:
@@ -318,9 +318,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
return chunk_idx
def lookup_name(archive_id):
- for name, info in self.manifest.archives.items():
- if info[b'id'] == archive_id:
- return name
+ for info in self.manifest.archives.list():
+ if info.id == archive_id:
+ return info.name
def create_master_idx(chunk_idx):
logger.info('Synchronizing chunks cache...')
diff --git a/src/borg/fuse.py b/src/borg/fuse.py
index f5375924e..4e7cf10c5 100644
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@@ -73,11 +73,11 @@ class FuseOperations(llfuse.Operations):
if archive:
self.process_archive(archive)
else:
- for archive_name in manifest.archives:
+ for name in manifest.archives:
# Create archive placeholder inode
archive_inode = self._create_dir(parent=1)
- self.contents[1][os.fsencode(archive_name)] = archive_inode
- self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
+ self.contents[1][os.fsencode(name)] = archive_inode
+ self.pending_archives[archive_inode] = Archive(repository, key, manifest, name)
def mount(self, mountpoint, mount_options, foreground=False):
"""Mount filesystem on *mountpoint* with *mount_options*."""
diff --git a/src/borg/helpers.py b/src/borg/helpers.py
index ef42ee57e..759291442 100644
--- a/src/borg/helpers.py
+++ b/src/borg/helpers.py
@@ -18,7 +18,7 @@ import time
import unicodedata
import uuid
from binascii import hexlify
-from collections import namedtuple, deque
+from collections import namedtuple, deque, abc
from contextlib import contextmanager
from datetime import datetime, timezone, timedelta
from fnmatch import translate
@@ -97,12 +97,76 @@ def check_extension_modules():
raise ExtensionModuleError
+ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
+
+
+class Archives(abc.MutableMapping):
+ """
+ Nice wrapper around the archives dict, making sure only valid types/values get in
+ and we can deal with str keys (and it internally encodes to byte keys) and eiter
+ str timestamps or datetime timestamps.
+ """
+ def __init__(self):
+ # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts)
+ self._archives = {}
+
+ def __len__(self):
+ return len(self._archives)
+
+ def __iter__(self):
+ return iter(safe_decode(name) for name in self._archives)
+
+ def __getitem__(self, name):
+ assert isinstance(name, str)
+ _name = safe_encode(name)
+ values = self._archives.get(_name)
+ if values is None:
+ raise KeyError
+ ts = parse_timestamp(values[b'time'].decode('utf-8'))
+ return ArchiveInfo(name=name, id=values[b'id'], ts=ts)
+
+ def __setitem__(self, name, info):
+ assert isinstance(name, str)
+ name = safe_encode(name)
+ assert isinstance(info, tuple)
+ id, ts = info
+ assert isinstance(id, bytes)
+ if isinstance(ts, datetime):
+ ts = ts.replace(tzinfo=None).isoformat()
+ assert isinstance(ts, str)
+ ts = ts.encode()
+ self._archives[name] = {b'id': id, b'time': ts}
+
+ def __delitem__(self, name):
+ assert isinstance(name, str)
+ name = safe_encode(name)
+ del self._archives[name]
+
+ def list(self, sort_by=None, reverse=False):
+ # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
+ archives = self.values() # [self[name] for name in self]
+ if sort_by is not None:
+ archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
+ return archives
+
+ def set_raw_dict(self, d):
+ """set the dict we get from the msgpack unpacker"""
+ for k, v in d.items():
+ assert isinstance(k, bytes)
+ assert isinstance(v, dict) and b'id' in v and b'time' in v
+ self._archives[k] = v
+
+ def get_raw_dict(self):
+ """get the dict we can give to the msgpack packer"""
+ return self._archives
+
+
class Manifest:
MANIFEST_ID = b'\0' * 32
def __init__(self, key, repository, item_keys=None):
- self.archives = {}
+ self.archives = Archives()
self.config = {}
self.key = key
self.repository = repository
@@ -129,7 +193,7 @@ class Manifest:
m = ManifestItem(internal_dict=msgpack.unpackb(data))
if m.get('version') != 1:
raise ValueError('Invalid manifest version')
- manifest.archives = {safe_decode(k): v for k, v in m.archives.items()}
+ manifest.archives.set_raw_dict(m.archives)
manifest.timestamp = m.get('timestamp')
manifest.config = m.config
# valid item keys are whatever is known in the repo or every key we know
@@ -141,7 +205,7 @@ class Manifest:
self.timestamp = datetime.utcnow().isoformat()
manifest = ManifestItem(
version=1,
- archives=self.archives,
+ archives=self.archives.get_raw_dict(),
timestamp=self.timestamp,
config=self.config,
item_keys=tuple(self.item_keys),
@@ -150,18 +214,6 @@ class Manifest:
self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
- def list_archive_infos(self, sort_by=None, reverse=False):
- # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
- ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
- archives = []
- for name, values in self.archives.items():
- ts = parse_timestamp(values[b'time'].decode('utf-8'))
- id = values[b'id']
- archives.append(ArchiveInfo(name=name, id=id, ts=ts))
- if sort_by is not None:
- archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
- return archives
-
def prune_within(archives, within):
multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365}