summaryrefslogtreecommitdiffstats
path: root/lib/fileset
diff options
context:
space:
mode:
authorSilvan Mosberger <contact@infinisil.com>2023-11-01 19:40:45 +0100
committerGitHub <noreply@github.com>2023-11-01 19:40:45 +0100
commitfc28c5e5b79ada17ce17b5c83e07a2949b959fcf (patch)
tree8955af520cba3d6f3539d8b182960b7bd376f31b /lib/fileset
parent1c7f17f39502a27885d9b9c9505c612adb41823e (diff)
parent50df7f977548c296e475ad37c1d44afcdc9f8e26 (diff)
Merge pull request #259065 from tweag/fileset.difference
`lib.fileset.difference`: init
Diffstat (limited to 'lib/fileset')
-rw-r--r--lib/fileset/default.nix53
-rw-r--r--lib/fileset/internal.nix80
-rwxr-xr-xlib/fileset/tests.sh98
3 files changed, 231 insertions, 0 deletions
diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix
index 0342be3e0371..4a97633b4a89 100644
--- a/lib/fileset/default.nix
+++ b/lib/fileset/default.nix
@@ -9,6 +9,7 @@ let
_fileFilter
_printFileset
_intersection
+ _difference
;
inherit (builtins)
@@ -369,6 +370,58 @@ If a directory does not recursively contain any file, it is omitted from the sto
(elemAt filesets 1);
/*
+ The file set containing all files from the first file set that are not in the second file set.
+ See also [Difference (set theory)](https://en.wikipedia.org/wiki/Complement_(set_theory)#Relative_complement).
+
+ The given file sets are evaluated as lazily as possible,
+ with the first argument being evaluated first if needed.
+
+ Type:
+ union :: FileSet -> FileSet -> FileSet
+
+ Example:
+ # Create a file set containing all files from the current directory,
+ # except ones under ./tests
+ difference ./. ./tests
+
+ let
+ # A set of Nix-related files
+ nixFiles = unions [ ./default.nix ./nix ./tests/default.nix ];
+ in
+ # Create a file set containing all files under ./tests, except ones in `nixFiles`,
+ # meaning only without ./tests/default.nix
+ difference ./tests nixFiles
+ */
+ difference =
+ # The positive file set.
+ # The result can only contain files that are also in this file set.
+ #
+ # This argument can also be a path,
+ # which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
+ positive:
+ # The negative file set.
+ # The result will never contain files that are also in this file set.
+ #
+ # This argument can also be a path,
+ # which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
+ negative:
+ let
+ filesets = _coerceMany "lib.fileset.difference" [
+ {
+ context = "first argument (positive set)";
+ value = positive;
+ }
+ {
+ context = "second argument (negative set)";
+ value = negative;
+ }
+ ];
+ in
+ _difference
+ (elemAt filesets 0)
+ (elemAt filesets 1);
+
+ /*
Incrementally evaluate and trace a file set in a pretty way.
This function is only intended for debugging purposes.
The exact tracing format is unspecified and may change.
diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix
index 76b95c6ae471..b919a5de3eef 100644
--- a/lib/fileset/internal.nix
+++ b/lib/fileset/internal.nix
@@ -651,6 +651,86 @@ rec {
# In all other cases it's the rhs
rhs;
+ # Compute the set difference between two file sets.
+ # The filesets must already be coerced and validated to be in the same filesystem root.
+ # Type: Fileset -> Fileset -> Fileset
+ _difference = positive: negative:
+ let
+ # The common base components prefix, e.g.
+ # (/foo/bar, /foo/bar/baz) -> /foo/bar
+ # (/foo/bar, /foo/baz) -> /foo
+ commonBaseComponentsLength =
+ # TODO: Have a `lib.lists.commonPrefixLength` function such that we don't need the list allocation from commonPrefix here
+ length (
+ commonPrefix
+ positive._internalBaseComponents
+ negative._internalBaseComponents
+ );
+
+ # We need filesetTree's with the same base to be able to compute the difference between them
+ # This here is the filesetTree from the negative file set, but for a base path that matches the positive file set.
+ # Examples:
+ # For `difference /foo /foo/bar`, `negativeTreeWithPositiveBase = { bar = "directory"; }`
+ # because under the base path of `/foo`, only `bar` from the negative file set is included
+ # For `difference /foo/bar /foo`, `negativeTreeWithPositiveBase = "directory"`
+ # because under the base path of `/foo/bar`, everything from the negative file set is included
+ # For `difference /foo /bar`, `negativeTreeWithPositiveBase = null`
+ # because under the base path of `/foo`, nothing from the negative file set is included
+ negativeTreeWithPositiveBase =
+ if commonBaseComponentsLength == length positive._internalBaseComponents then
+ # The common prefix is the same as the positive base path, so the second path is equal or longer.
+ # We need to _shorten_ the negative filesetTree to the same base path as the positive one
+ # E.g. for `difference /foo /foo/bar` the common prefix is /foo, equal to the positive file set's base
+ # So we need to shorten the base of the tree for the negative argument from /foo/bar to just /foo
+ _shortenTreeBase positive._internalBaseComponents negative
+ else if commonBaseComponentsLength == length negative._internalBaseComponents then
+ # The common prefix is the same as the negative base path, so the first path is longer.
+ # We need to lengthen the negative filesetTree to the same base path as the positive one.
+ # E.g. for `difference /foo/bar /foo` the common prefix is /foo, equal to the negative file set's base
+ # So we need to lengthen the base of the tree for the negative argument from /foo to /foo/bar
+ _lengthenTreeBase positive._internalBaseComponents negative
+ else
+ # The common prefix is neither the first nor the second path.
+ # This means there's no overlap between the two file sets,
+ # and nothing from the negative argument should get removed from the positive one
+ # E.g for `difference /foo /bar`, we remove nothing to get the same as `/foo`
+ null;
+
+ resultingTree =
+ _differenceTree
+ positive._internalBase
+ positive._internalTree
+ negativeTreeWithPositiveBase;
+ in
+ # If the first file set is empty, we can never have any files in the result
+ if positive._internalIsEmptyWithoutBase then
+ _emptyWithoutBase
+ # If the second file set is empty, nothing gets removed, so the result is just the first file set
+ else if negative._internalIsEmptyWithoutBase then
+ positive
+ else
+ # We use the positive file set base for the result,
+ # because only files from the positive side may be included,
+ # which is what base path is for
+ _create positive._internalBase resultingTree;
+
+ # Computes the set difference of two filesetTree's
+ # Type: Path -> filesetTree -> filesetTree
+ _differenceTree = path: lhs: rhs:
+ # If the lhs doesn't have any files, or the right hand side includes all files
+ if lhs == null || isString rhs then
+ # The result will always be empty
+ null
+ # If the right hand side has no files
+ else if rhs == null then
+ # The result is always the left hand side, because nothing gets removed
+ lhs
+ else
+ # Otherwise we always have two attribute sets to recurse into
+ mapAttrs (name: lhsValue:
+ _differenceTree (path + "/${name}") lhsValue (rhs.${name} or null)
+ ) (_directoryEntries path lhs);
+
_fileFilter = predicate: fileset:
let
recurse = path: tree:
diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh
index 5b756b8fc593..2df0727bde38 100755
--- a/lib/fileset/tests.sh
+++ b/lib/fileset/tests.sh
@@ -684,6 +684,104 @@ tree=(
)
checkFileset 'intersection (unions [ ./a/b ./c/d ./c/e ]) (unions [ ./a ./c/d/f ./c/e ])'
+## Difference
+
+# Subtracting something from itself results in nothing
+tree=(
+ [a]=0
+)
+checkFileset 'difference ./. ./.'
+
+# The tree of the second argument should not be evaluated if not needed
+checkFileset 'difference _emptyWithoutBase (_create ./. (abort "This should not be used!"))'
+checkFileset 'difference (_create ./. null) (_create ./. (abort "This should not be used!"))'
+
+# Subtracting nothing gives the same thing back
+tree=(
+ [a]=1
+)
+checkFileset 'difference ./. _emptyWithoutBase'
+checkFileset 'difference ./. (_create ./. null)'
+
+# Subtracting doesn't influence the base path
+mkdir a b
+touch {a,b}/x
+expectEqual 'toSource { root = ./a; fileset = difference ./a ./b; }' 'toSource { root = ./a; fileset = ./a; }'
+rm -rf -- *
+
+# Also not the other way around
+mkdir a
+expectFailure 'toSource { root = ./a; fileset = difference ./. ./a; }' 'lib.fileset.toSource: `fileset` could contain files in '"$work"', which is not under the `root` \('"$work"'/a\). Potential solutions:
+\s*- Set `root` to '"$work"' or any directory higher up. This changes the layout of the resulting store path.
+\s*- Set `fileset` to a file set that cannot contain files outside the `root` \('"$work"'/a\). This could change the files included in the result.'
+rm -rf -- *
+
+# Difference actually works
+# We test all combinations of ./., ./a, ./a/x and ./b
+tree=(
+ [a/x]=0
+ [a/y]=0
+ [b]=0
+ [c]=0
+)
+checkFileset 'difference ./. ./.'
+checkFileset 'difference ./a ./.'
+checkFileset 'difference ./a/x ./.'
+checkFileset 'difference ./b ./.'
+checkFileset 'difference ./a ./a'
+checkFileset 'difference ./a/x ./a'
+checkFileset 'difference ./a/x ./a/x'
+checkFileset 'difference ./b ./b'
+tree=(
+ [a/x]=0
+ [a/y]=0
+ [b]=1
+ [c]=1
+)
+checkFileset 'difference ./. ./a'
+tree=(
+ [a/x]=1
+ [a/y]=1
+ [b]=0
+ [c]=0
+)
+checkFileset 'difference ./a ./b'
+tree=(
+ [a/x]=1
+ [a/y]=0
+ [b]=0
+ [c]=0
+)
+checkFileset 'difference ./a/x ./b'
+tree=(
+ [a/x]=0
+ [a/y]=1
+ [b]=0
+ [c]=0
+)
+checkFileset 'difference ./a ./a/x'
+tree=(
+ [a/x]=0
+ [a/y]=0
+ [b]=1
+ [c]=0
+)
+checkFileset 'difference ./b ./a'
+checkFileset 'difference ./b ./a/x'
+tree=(
+ [a/x]=0
+ [a/y]=1
+ [b]=1
+ [c]=1
+)
+checkFileset 'difference ./. ./a/x'
+tree=(
+ [a/x]=1
+ [a/y]=1
+ [b]=0
+ [c]=1
+)
+checkFileset 'difference ./. ./b'
## File filter