summaryrefslogtreecommitdiffstats
path: root/lib/fileset
diff options
context:
space:
mode:
authorSilvan Mosberger <silvan.mosberger@tweag.io>2023-10-18 00:16:50 +0200
committerSilvan Mosberger <silvan.mosberger@tweag.io>2023-11-08 21:21:30 +0100
commit73493584a74cddb22d785942a2215c3dab12c29e (patch)
tree4ca00ef53322cd9033c7eb6a76f54bcb5578625c /lib/fileset
parentf82a1686e6840c396a141ff122e2f39d64fb6d4b (diff)
lib.fileset.fromSource: init
Diffstat (limited to 'lib/fileset')
-rw-r--r--lib/fileset/default.nix71
-rw-r--r--lib/fileset/internal.nix53
-rwxr-xr-xlib/fileset/tests.sh213
3 files changed, 337 insertions, 0 deletions
diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix
index 7bd701670386..53edc9e77097 100644
--- a/lib/fileset/default.nix
+++ b/lib/fileset/default.nix
@@ -3,8 +3,10 @@ let
inherit (import ./internal.nix { inherit lib; })
_coerce
+ _singleton
_coerceMany
_toSourceFilter
+ _fromSourceFilter
_unionMany
_printFileset
_intersection
@@ -188,6 +190,75 @@ If a directory does not recursively contain any file, it is omitted from the sto
};
/*
+ Create a file set with the same files as a `lib.sources`-based value.
+ This does not import any of the files into the store.
+
+ This can be used to gradually migrate from `lib.sources`-based filtering to `lib.fileset`.
+
+ A file set can be turned back into a source using [`toSource`](#function-library-lib.fileset.toSource).
+
+ :::{.note}
+ File sets cannot represent empty directories.
+ Turning the result of this function back into a source using `toSource` will therefore not preserve empty directories.
+ :::
+
+ Type:
+ fromSource :: SourceLike -> FileSet
+
+ Example:
+ # There's no cleanSource-like function for file sets yet,
+ # but we can just convert cleanSource to a file set and use it that way
+ toSource {
+ root = ./.;
+ fileset = fromSource (lib.sources.cleanSource ./.);
+ }
+
+ # Keeping a previous sourceByRegex (which could be migrated to `lib.fileset.unions`),
+ # but removing a subdirectory using file set functions
+ difference
+ (fromSource (lib.sources.sourceByRegex ./. [
+ "^README\.md$"
+ # This regex includes everything in ./doc
+ "^doc(/.*)?$"
+ ])
+ ./doc/generated
+
+ # Use cleanSource, but limit it to only include ./Makefile and files under ./src
+ intersection
+ (fromSource (lib.sources.cleanSource ./.))
+ (unions [
+ ./Makefile
+ ./src
+ ]);
+ */
+ fromSource = source:
+ let
+ # This function uses `._isLibCleanSourceWith`, `.origSrc` and `.filter`,
+ # which are technically internal to lib.sources,
+ # but we'll allow this since both libraries are in the same code base
+ # and this function is a bridge between them.
+ isFiltered = source ? _isLibCleanSourceWith;
+ path = if isFiltered then source.origSrc else source;
+ in
+ # We can only support sources created from paths
+ if ! isPath path then
+ if isStringLike path then
+ throw ''
+ lib.fileset.fromSource: The source origin of the argument is a string-like value ("${toString path}"), but it should be a path instead.
+ Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.''
+ else
+ throw ''
+ lib.fileset.fromSource: The source origin of the argument is of type ${typeOf path}, but it should be a path instead.''
+ else if ! pathExists path then
+ throw ''
+ lib.fileset.fromSource: The source origin (${toString path}) of the argument does not exist.''
+ else if isFiltered then
+ _fromSourceFilter path source.filter
+ else
+ # If there's no filter, no need to run the expensive conversion, all subpaths will be included
+ _singleton path;
+
+ /*
The file set containing all files that are in either of two given file sets.
This is the same as [`unions`](#function-library-lib.fileset.unions),
but takes just two file sets instead of a list.
diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix
index 9892172955c3..031262520785 100644
--- a/lib/fileset/internal.nix
+++ b/lib/fileset/internal.nix
@@ -461,6 +461,59 @@ rec {
else
nonEmpty;
+ # Turn a builtins.filterSource-based source filter on a root path into a file set
+ # containing only files included by the filter.
+ # The filter is lazily called as necessary to determine whether paths are included
+ # Type: Path -> (String -> String -> Bool) -> fileset
+ _fromSourceFilter = root: sourceFilter:
+ let
+ # During the recursion we need to track both:
+ # - The path value such that we can safely call `readDir` on it
+ # - The path string value such that we can correctly call the `filter` with it
+ #
+ # While we could just recurse with the path value,
+ # this would then require converting it to a path string for every path,
+ # which is a fairly expensive operation
+
+ # Create a file set from a directory entry
+ fromDirEntry = path: pathString: type:
+ # The filter needs to run on the path as a string
+ if ! sourceFilter pathString type then
+ null
+ else if type == "directory" then
+ fromDir path pathString
+ else
+ type;
+
+ # Create a file set from a directory
+ fromDir = path: pathString:
+ mapAttrs
+ # This looks a bit funny, but we need both the path-based and the path string-based values
+ (name: fromDirEntry (path + "/${name}") (pathString + "/${name}"))
+ # We need to readDir on the path value, because reading on a path string
+ # would be unspecified if there are multiple filesystem roots
+ (readDir path);
+
+ rootPathType = pathType root;
+
+ # We need to convert the path to a string to imitate what builtins.path calls the filter function with.
+ # We don't want to rely on `toString` for this though because it's not very well defined, see ../path/README.md
+ # So instead we use `lib.path.splitRoot` to safely deconstruct the path into its filesystem root and subpath
+ # We don't need the filesystem root though, builtins.path doesn't expose that in any way to the filter.
+ # So we only need the components, which we then turn into a string as one would expect.
+ rootString = "/" + concatStringsSep "/" (components (splitRoot root).subpath);
+ in
+ if rootPathType == "directory" then
+ # We imitate builtins.path not calling the filter on the root path
+ _create root (fromDir root rootString)
+ else
+ # Direct files are always included by builtins.path without calling the filter
+ # But we need to lift up the base path to its parent to satisfy the base path invariant
+ _create (dirOf root)
+ {
+ ${baseNameOf root} = rootPathType;
+ };
+
# Transforms the filesetTree of a file set to a shorter base path, e.g.
# _shortenTreeBase [ "foo" ] (_create /foo/bar null)
# => { bar = null; }
diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh
index ead6d4c49569..13f02bb656b9 100755
--- a/lib/fileset/tests.sh
+++ b/lib/fileset/tests.sh
@@ -1,5 +1,7 @@
#!/usr/bin/env bash
# shellcheck disable=SC2016
+# shellcheck disable=SC2317
+# shellcheck disable=SC2192
# Tests lib.fileset
# Run:
@@ -839,6 +841,217 @@ touch 0 "${filesToCreate[@]}"
expectTrace 'unions (mapAttrsToList (n: _: ./. + "/${n}") (removeAttrs (builtins.readDir ./.) [ "0" ]))' "$expectedTrace"
rm -rf -- *
+## lib.fileset.fromSource
+
+# Check error messages
+expectFailure 'fromSource null' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'
+
+expectFailure 'fromSource (lib.cleanSource "")' 'lib.fileset.fromSource: The source origin of the argument is a string-like value \(""\), but it should be a path instead.
+\s*Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.'
+
+expectFailure 'fromSource (lib.cleanSource null)' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'
+
+# fromSource on a path works and is the same as coercing that path
+mkdir a
+touch a/b c
+expectEqual 'trace (fromSource ./.) null' 'trace ./. null'
+rm -rf -- *
+
+# Check that converting to a file set doesn't read the included files
+mkdir a
+touch a/b
+run() {
+ expectEqual "trace (fromSource (lib.cleanSourceWith { src = ./a; })) null" "builtins.trace \"$work/a (all files in directory)\" null"
+ rm a/b
+}
+withFileMonitor run a/b
+rm -rf -- *
+
+# Check that converting to a file set doesn't read entries for directories that are filtered out
+mkdir -p a/b
+touch a/b/c
+run() {
+ expectEqual "trace (fromSource (lib.cleanSourceWith {
+ src = ./a;
+ filter = pathString: type: false;
+ })) null" "builtins.trace \"(empty)\" null"
+ rm a/b/c
+ rmdir a/b
+}
+withFileMonitor run a/b
+rm -rf -- *
+
+# The filter is not needed on empty directories
+expectEqual 'trace (fromSource (lib.cleanSourceWith {
+ src = ./.;
+ filter = abort "filter should not be needed";
+})) null' 'trace _emptyWithoutBase null'
+
+# Single files also work
+touch a b
+expectEqual 'trace (fromSource (cleanSourceWith { src = ./a; })) null' 'trace ./a null'
+rm -rf -- *
+
+# For a tree assigning each subpath true/false,
+# check whether a source filter with those results includes the same files
+# as a file set created using fromSource. Usage:
+#
+# tree=(
+# [a]=1 # ./a is a file and the filter should return true for it
+# [b/]=0 # ./b is a directory and the filter should return false for it
+# )
+# checkSource
+checkSource() {
+ createTree
+
+ # Serialise the tree as JSON (there's only minimal savings with jq,
+ # and we don't need to handle escapes)
+ {
+ echo "{"
+ first=1
+ for p in "${!tree[@]}"; do
+ if [[ -z "$first" ]]; then
+ echo ","
+ else
+ first=
+ fi
+ echo "\"$p\":"
+ case "${tree[$p]}" in
+ 1)
+ echo "true"
+ ;;
+ 0)
+ echo "false"
+ ;;
+ *)
+ die "Unsupported tree value: ${tree[$p]}"
+ esac
+ done
+ echo "}"
+ } > "$tmp/tree.json"
+
+ # An expression to create a source value with a filter matching the tree
+ sourceExpr='
+ let
+ tree = importJSON '"$tmp"'/tree.json;
+ in
+ cleanSourceWith {
+ src = ./.;
+ filter =
+ pathString: type:
+ let
+ stripped = removePrefix (toString ./. + "/") pathString;
+ key = stripped + optionalString (type == "directory") "/";
+ in
+ tree.${key} or
+ (throw "tree key ${key} missing");
+ }
+ '
+
+ filesetExpr='
+ toSource {
+ root = ./.;
+ fileset = fromSource ('"$sourceExpr"');
+ }
+ '
+
+ # Turn both into store paths
+ sourceStorePath=$(expectStorePath "$sourceExpr")
+ filesetStorePath=$(expectStorePath "$filesetExpr")
+
+ # Loop through each path in the tree
+ while IFS= read -r -d $'\0' subpath; do
+ if [[ ! -e "$sourceStorePath"/"$subpath" ]]; then
+ # If it's not in the source store path, it's also not in the file set store path
+ if [[ -e "$filesetStorePath"/"$subpath" ]]; then
+ die "The store path $sourceStorePath created by $expr doesn't contain $subpath, but the corresponding store path $filesetStorePath created via fromSource does contain $subpath"
+ fi
+ elif [[ -z "$(find "$sourceStorePath"/"$subpath" -type f)" ]]; then
+ # If it's an empty directory in the source store path, it shouldn't be in the file set store path
+ if [[ -e "$filesetStorePath"/"$subpath" ]]; then
+ die "The store path $sourceStorePath created by $expr contains the path $subpath without any files, but the corresponding store path $filesetStorePath created via fromSource didn't omit it"
+ fi
+ else
+ # If it's non-empty directory or a file, it should be in the file set store path
+ if [[ ! -e "$filesetStorePath"/"$subpath" ]]; then
+ die "The store path $sourceStorePath created by $expr contains the non-empty path $subpath, but the corresponding store path $filesetStorePath created via fromSource doesn't include it"
+ fi
+ fi
+ done < <(find . -mindepth 1 -print0)
+
+ rm -rf -- *
+}
+
+# Check whether the filter is evaluated correctly
+tree=(
+ [a]=
+ [b/]=
+ [b/c]=
+ [b/d]=
+ [e/]=
+ [e/e/]=
+)
+# We fill out the above tree values with all possible combinations of 0 and 1
+# Then check whether a filter based on those return values gets turned into the corresponding file set
+for i in $(seq 0 $((2 ** ${#tree[@]} - 1 ))); do
+ for p in "${!tree[@]}"; do
+ tree[$p]=$(( i % 2 ))
+ (( i /= 2 )) || true
+ done
+ checkSource
+done
+
+# The filter is called with the same arguments in the same order
+mkdir a e
+touch a/b a/c d e
+expectEqual '
+ trace (fromSource (cleanSourceWith {
+ src = ./.;
+ filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
+ })) null
+' '
+ builtins.seq (cleanSourceWith {
+ src = ./.;
+ filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
+ }).outPath
+ builtins.trace "'"$work"' (all files in directory)"
+ null
+'
+rm -rf -- *
+
+# Test that if a directory is not included, the filter isn't called on its contents
+mkdir a b
+touch a/c b/d
+expectEqual 'trace (fromSource (cleanSourceWith {
+ src = ./.;
+ filter = pathString: type:
+ if pathString == toString ./a then
+ false
+ else if pathString == toString ./b then
+ true
+ else if pathString == toString ./b/d then
+ true
+ else
+ abort "This filter should not be called with path ${pathString}";
+})) null' 'trace (_create ./. { b = "directory"; }) null'
+rm -rf -- *
+
+# The filter is called lazily:
+# If a later say intersection removes a part of the tree, the filter won't run on it
+mkdir a d
+touch a/{b,c} d/e
+expectEqual 'trace (intersection ./a (fromSource (lib.cleanSourceWith {
+ src = ./.;
+ filter = pathString: type:
+ if pathString == toString ./a || pathString == toString ./a/b then
+ true
+ else if pathString == toString ./a/c then
+ false
+ else
+ abort "filter should not be called on ${pathString}";
+}))) null' 'trace ./a/b null'
+rm -rf -- *
+
# TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets
echo >&2 tests ok