summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Waldmann <tw@waldmann-edv.de>2015-05-26 02:04:41 +0200
committerThomas Waldmann <tw@waldmann-edv.de>2015-05-26 02:04:41 +0200
commitd067bc31784b6650135719f8914d6d2e540c2d2c (patch)
tree8687f2f4850610ae537146cdb84ca49070cae9f1
parent74409e4fcb4d588640b7ccdf7e3f5842c7b8b87b (diff)
efficient archive list from manifest
a lot of speedup for: "list <repo>", "delete <repo>" list, "prune" - esp. for slow connections to remote repositories. the previous method used metadata from the archive itself, which is (in total) rather large. so if you had many archives and a slow (remote) connection, it was very slow. but there is a lot easier way: just use the archives list from the repository manifest - we already have it anyway and it also has name, id and timestamp for all archives - and that's all we need. I defined a ArchiveInfo namedtuple that has same element names as seen as attribute names of the Archive object, so as long as name, id, ts is enough, it can be used in its place.
-rw-r--r--borg/archive.py1
-rw-r--r--borg/archiver.py13
-rw-r--r--borg/helpers.py13
3 files changed, 20 insertions, 7 deletions
diff --git a/borg/archive.py b/borg/archive.py
index a2cc59b99..3eccbbeef 100644
--- a/borg/archive.py
+++ b/borg/archive.py
@@ -494,6 +494,7 @@ class Archive:
@staticmethod
def list_archives(repository, key, manifest, cache=None):
+ # expensive! see also Manifest.list_archive_infos.
for name, info in manifest.archives.items():
yield Archive(repository, key, manifest, name, cache=cache)
diff --git a/borg/archiver.py b/borg/archiver.py
index 79bf65f03..4b13e47fd 100644
--- a/borg/archiver.py
+++ b/borg/archiver.py
@@ -284,8 +284,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
stats.print_('Deleted data:', cache)
else:
print("You requested to completely DELETE the repository *including* all archives it contains:")
- for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')):
- print(format_archive(archive))
+ for archive_info in manifest.list_archive_infos(sort_by='ts'):
+ print(format_archive(archive_info))
print("""Type "YES" if you understand this and want to continue.\n""")
if input('Do you want to continue? ') == 'YES':
repository.destroy()
@@ -354,8 +354,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
item[b'group'] or item[b'gid'], size, format_time(mtime),
remove_surrogates(item[b'path']), extra))
else:
- for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')):
- print(format_archive(archive))
+ for archive_info in manifest.list_archive_infos(sort_by='ts'):
+ print(format_archive(archive_info))
return self.exit_code
def do_info(self, args):
@@ -380,8 +380,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
repository = self.open_repository(args.repository, exclusive=True)
manifest, key = Manifest.load(repository)
cache = Cache(repository, key, manifest, do_files=args.cache_files)
- archives = list(sorted(Archive.list_archives(repository, key, manifest, cache),
- key=attrgetter('ts'), reverse=True))
+ archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None:
self.print_error('At least one of the "within", "hourly", "daily", "weekly", "monthly" or "yearly" '
'settings must be specified')
@@ -412,7 +411,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
self.print_verbose('Would prune: %s' % format_archive(archive))
else:
self.print_verbose('Pruning archive: %s' % format_archive(archive))
- archive.delete(stats)
+ Archive(repository, key, manifest, archive.name, cache).delete(stats)
if to_delete and not args.dry_run:
manifest.write()
repository.commit()
diff --git a/borg/helpers.py b/borg/helpers.py
index f96c1bf52..e97c88bf2 100644
--- a/borg/helpers.py
+++ b/borg/helpers.py
@@ -1,5 +1,6 @@
import argparse
import binascii
+from collections import namedtuple
import grp
import msgpack
import os
@@ -119,6 +120,18 @@ class Manifest:
self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
+ def list_archive_infos(self, sort_by=None, reverse=False):
+ # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
+ ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
+ archives = []
+ for name, values in self.archives.items():
+ ts = parse_timestamp(values[b'time'].decode('utf-8'))
+ id = values[b'id']
+ archives.append(ArchiveInfo(name=name, id=id, ts=ts))
+ if sort_by is not None:
+ archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
+ return archives
+
def prune_within(archives, within):
multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365}