diff options
author | Matthias Beyer <matthias.beyer@atos.net> | 2021-08-27 19:41:31 +0200 |
---|---|---|
committer | Matthias Beyer <matthias.beyer@atos.net> | 2021-08-27 19:41:31 +0200 |
commit | ce01861e6ef3b652eeafbef5b87c293b3a971d89 (patch) | |
tree | 6f60e5cb5604f00bfe8a29cbc11bd2d754ed220d | |
parent | 5226132f7e43d0b12d8f58aabef7f5785d5f03fe (diff) | |
parent | 5b52e6a0269eb2a0c3d69a1ce3312ba07d4dc997 (diff) |
Merge branch 'fast-repository-loading'
After some completely non-scientific benchmarking, these patches bring down the
loading time for a real-world repository from over 20 sec to about 17 sec on
debug build of butido and to 5 sec in release build and even to 2 sec after the
filesystem cache got a bit warm.
So I guess we can safely merge this now and be happy with it (and go back to the
more important issues).
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | src/repository/fs/element.rs | 33 | ||||
-rw-r--r-- | src/repository/fs/mod.rs | 18 | ||||
-rw-r--r-- | src/repository/fs/path.rs | 74 | ||||
-rw-r--r-- | src/repository/fs/representation.rs | 432 | ||||
-rw-r--r-- | src/repository/mod.rs | 3 | ||||
-rw-r--r-- | src/repository/repository.rs | 275 |
7 files changed, 656 insertions, 181 deletions
@@ -52,10 +52,12 @@ log = "0.4" parse-display = "0.5" pom = "3" ptree = "0.3" +rayon = "1.5" regex = "1" reqwest = { version = "0.11", features = [ "stream" ] } resiter = "0.4" result-inspect = "0.2" +rlimit = "0.6" semver = { version = "1.0", features = [ "serde" ] } serde = "1" serde_json = "1" diff --git a/src/repository/fs/element.rs b/src/repository/fs/element.rs new file mode 100644 index 0000000..73b97f6 --- /dev/null +++ b/src/repository/fs/element.rs @@ -0,0 +1,33 @@ +// +// Copyright (c) 2020-2021 science+computing ag and other contributors +// +// This program and the accompanying materials are made +// available under the terms of the Eclipse Public License 2.0 +// which is available at https://www.eclipse.org/legal/epl-2.0/ +// +// SPDX-License-Identifier: EPL-2.0 +// + +use std::collections::HashMap; + +use crate::repository::fs::path::PathComponent; + +/// One element in the tree inside FileSystemRepresentation +/// +/// This is either a File, or a Directory that contains more (Files or Directories). +#[derive(Debug)] +pub enum Element { + File(String), + Dir(HashMap<PathComponent, Element>) +} + +impl Element { + /// Helper fn to get the directory contents of the element, if the element is an Element::Dir + pub fn get_map_mut(&mut self) -> Option<&mut HashMap<PathComponent, Element>> { + match self { + Element::File(_) => None, + Element::Dir(ref mut hm) => Some(hm), + } + } +} + diff --git a/src/repository/fs/mod.rs b/src/repository/fs/mod.rs new file mode 100644 index 0000000..f1fce78 --- /dev/null +++ b/src/repository/fs/mod.rs @@ -0,0 +1,18 @@ +// +// Copyright (c) 2020-2021 science+computing ag and other contributors +// +// This program and the accompanying materials are made +// available under the terms of the Eclipse Public License 2.0 +// which is available at https://www.eclipse.org/legal/epl-2.0/ +// +// SPDX-License-Identifier: EPL-2.0 +// + +#![allow(unused)] // TODO: Remove allow(unused) + +mod representation; +pub use representation::FileSystemRepresentation; + +mod element; +mod path; + diff --git a/src/repository/fs/path.rs b/src/repository/fs/path.rs new file mode 100644 index 0000000..b7fb407 --- /dev/null +++ b/src/repository/fs/path.rs @@ -0,0 +1,74 @@ +// +// Copyright (c) 2020-2021 science+computing ag and other contributors +// +// This program and the accompanying materials are made +// available under the terms of the Eclipse Public License 2.0 +// which is available at https://www.eclipse.org/legal/epl-2.0/ +// +// SPDX-License-Identifier: EPL-2.0 +// + +use std::collections::HashMap; +use std::convert::TryFrom; +use std::path::Component; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::anyhow; +use anyhow::Result; + +/// Helper type for filtering for pathes we need or dont need +/// +/// We either have a directory, which has a name, or we have a pkg.toml file, which is of interest. +/// All other files can be ignored and thus are not represented by this type. +/// +/// The PathComponent::DirName(_) represents a _part_ of a Path. Something like +/// +/// ```ignore +/// let p = PathBuf::from("foo/bar/baz") +/// p.components().map(PathComponent::DirName) // does not actually work because of types +/// ``` +/// +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum PathComponent { + PkgToml, + DirName(String), +} + +impl TryFrom<&std::path::Component<'_>> for PathComponent { + type Error = anyhow::Error; + + fn try_from(c: &std::path::Component) -> Result<Self> { + match *c { + Component::Prefix(_) => anyhow::bail!("Unexpected path component: Prefix"), + Component::RootDir => anyhow::bail!("Unexpected path component: RootDir"), + Component::CurDir => anyhow::bail!("Unexpected path component: CurDir"), + Component::ParentDir => anyhow::bail!("Unexpected path component: ParentDir"), + Component::Normal(filename) => { + let filename = filename.to_str().ok_or_else(|| anyhow!("UTF8-error"))?; + if filename == "pkg.toml" { + Ok(PathComponent::PkgToml) + } else { + Ok(PathComponent::DirName(filename.to_string())) + } + }, + } + } +} + +impl PathComponent { + /// Helper fn whether this PathComponent is a PathComponent::PkgToml + pub fn is_pkg_toml(&self) -> bool { + std::matches!(self, PathComponent::PkgToml) + } + + /// Helper fn to get the directory name of this PathComponent if it is a PathComponent::DirName + /// or None if it is not. + pub fn dir_name(&self) -> Option<&str> { + match self { + PathComponent::PkgToml => None, + PathComponent::DirName(dn) => Some(dn) + } + } +} + diff --git a/src/repository/fs/representation.rs b/src/repository/fs/representation.rs new file mode 100644 index 0000000..5f92ba7 --- /dev/null +++ b/src/repository/fs/representation.rs @@ -0,0 +1,432 @@ +// +// Copyright (c) 2020-2021 science+computing ag and other contributors +// +// This program and the accompanying materials are made +// available under the terms of the Eclipse Public License 2.0 +// which is available at https://www.eclipse.org/legal/epl-2.0/ +// +// SPDX-License-Identifier: EPL-2.0 +// + +use std::collections::HashMap; +use std::convert::TryFrom; +use std::convert::TryInto; +use std::path::Component; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::Context; +use anyhow::Error; +use anyhow::Result; +use anyhow::anyhow; +use resiter::AndThen; +use resiter::Filter; +use resiter::Map; +use walkdir::DirEntry; +use walkdir::WalkDir; + +use crate::repository::fs::element::Element; +use crate::repository::fs::path::PathComponent; + +/// A type representing the filesystem +/// +/// This type can be used to load pkg.toml files from the filesystem. As soon as this object is +/// loaded, all filesystem access is done and postprocessing of the loaded data can happen +#[derive(Debug, getset::Getters)] +pub struct FileSystemRepresentation { + #[getset(get = "pub")] + root: PathBuf, + + #[getset(get = "pub")] + files: Vec<PathBuf>, + + elements: HashMap<PathComponent, Element>, +} + +impl FileSystemRepresentation { + /// Load the FileSystemRepresentation object starting a `root`. + pub fn load(root: PathBuf) -> Result<Self> { + let mut fsr = FileSystemRepresentation { + root: root.clone(), + elements: HashMap::new(), + files: vec![], + }; + + // get the number of maximum files open (ulimit -n on linux) + let max_files_open = { + let (soft, _hard) = rlimit::getrlimit(rlimit::Resource::NOFILE)?; + + // use less than the soft limit if the soft limit is above 15 + soft.checked_sub(16) + .unwrap_or(soft) + .try_into() // we need to have a usize + .unwrap_or(usize::MAX) // if usize is smaller than u64, usize::MAX will do + }; + + log::trace!("Loading files from filesystem starting at: {}", root.display()); + log::trace!("Loading with a maximum of {} files open", max_files_open); + WalkDir::new(root) + .follow_links(false) + .max_open(max_files_open) + .same_file_system(true) + .into_iter() + .filter_entry(|e| !is_hidden(e) && (is_pkgtoml(e) || is_dir(e))) + .filter_ok(|e| is_pkgtoml(e)) + .inspect(|el| log::trace!("Loading: {:?}", el)) + .map_err(Error::from) + .and_then_ok(|de| { + let mut curr_hm = &mut fsr.elements; + let de_path = de.path().strip_prefix(&fsr.root)?; + fsr.files.push(de_path.to_path_buf()); + + // traverse the HashMap tree + for cmp in de_path.components() { + match PathComponent::try_from(&cmp)? { + PathComponent::PkgToml => { + curr_hm.entry(PathComponent::PkgToml) + .or_insert(Element::File(load_file(de_path)?)); + }, + dir @ PathComponent::DirName(_) => { + curr_hm.entry(dir.clone()) + .or_insert(Element::Dir(HashMap::new())); + + curr_hm = curr_hm.get_mut(&dir) + .unwrap() // safe, because we just inserted it + .get_map_mut() + .unwrap(); // safe, because we inserted Element::Dir + }, + } + } + + Ok(()) + }) + .collect::<Result<Vec<_>>>()?; + + Ok(fsr) + } + + /// Check the tree whether a `Path` points to a file in a directory that does not contain more + /// directories containing pkg.toml files. + /// + /// # Example + /// + /// / + /// /foo/ + /// /foo/pkg.toml <-- is leaf + /// /bar/ + /// /bar/pkg.toml <-- is not a leaf + /// /bar/baz/pkg.toml <-- is a leaf + /// + /// + pub fn is_leaf_file(&self, path: &Path) -> Result<bool> { + let mut curr_hm = &self.elements; + + // Helper to check whether a tree contains pkg.toml files, recursively + fn toml_files_in_tree(hm: &HashMap<PathComponent, Element>) -> bool { + if let Some(Element::File(_)) = hm.get(&PathComponent::PkgToml) { + return true + } + + for value in hm.values() { + match value { + Element::File(_) => return true, + Element::Dir(hm) => if toml_files_in_tree(hm) { + return true + }, + } + } + false + } + + for elem in path.components() { + let elem = PathComponent::try_from(&elem)?; + + match curr_hm.get(&elem) { + Some(Element::File(_)) => { + // if I have a file now, and the current hashmap only holds either + // * No directory + // * or a directory where all subdirs do not contain a pkg.toml + return Ok(curr_hm.values().count() == 1 || !toml_files_in_tree(curr_hm)) + }, + Some(Element::Dir(hm)) => curr_hm = hm, + None => anyhow::bail!("Path component '{:?}' was not loaded in map, this is most likely a bug", elem), + } + } + + Ok(false) + } + + /// Get a Vec<(PathBuf, &String)> for the `path` + /// + /// The result of this function is the trail of pkg.toml files from `self.root` to `path`, + /// whereas the PathBuf is the actual path to the file and the `&String` is the content of the + /// individual file. + /// + /// Merging all Strings in the returned Vec as Config objects should produce a Package. to + /// `path`, whereas the PathBuf is the actual path to the file and the `&String` is the content + /// of the individual file. + /// + /// Merging all Strings in the returned Vec as Config objects should produce a Package. + pub fn get_files_for<'a>(&'a self, path: &Path) -> Result<Vec<(PathBuf, &'a String)>> { + let mut res = Vec::with_capacity(10); // good enough + + let mut curr_hm = &self.elements; + let mut curr_path = PathBuf::from(""); + for elem in path.components() { + let elem = PathComponent::try_from(&elem)?; + + if !elem.is_pkg_toml() { + if let Some(Element::File(intermediate)) = curr_hm.get(&PathComponent::PkgToml) { + res.push((curr_path.join("pkg.toml"), intermediate)); + } + } + + match curr_hm.get(&elem) { + Some(Element::File(cont)) => res.push((curr_path.join("pkg.toml"), cont)), + Some(Element::Dir(hm)) => { + curr_path = curr_path.join(elem.dir_name().unwrap()); // unwrap safe by above match + curr_hm = hm; + } + None => anyhow::bail!("Path component '{:?}' was not loaded in map, this is most likely a bug", elem), + } + } + + Ok(res) + } +} + +/// Helper to check whether a DirEntry points to a hidden file +fn is_hidden(entry: &DirEntry) -> bool { + log::trace!("Check {:?} is hidden", entry); + entry.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false) +} + +/// Helper to check whether a DirEntry points to a directory +fn is_dir(entry: &DirEntry) -> bool { + log::trace!("Check {:?} is directory", entry); + entry.file_type().is_dir() +} + +/// Helper to check whether a DirEntry points to a pkg.toml file +fn is_pkgtoml(entry: &DirEntry) -> bool { + log::trace!("Check {:?} == 'pkg.toml'", entry); + entry.file_name().to_str().map(|s| s == "pkg.toml").unwrap_or(false) +} + +/// Helper fn to load a Path into memory as String +fn load_file(path: &Path) -> Result<String> { + log::trace!("Reading {}", path.display()); + std::fs::read_to_string(path) + .with_context(|| anyhow!("Reading file from filesystem: {}", path.display())) + .map_err(Error::from) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn dir(name: &str, hm: Vec<(PathComponent, Element)>) -> (PathComponent, Element) { + (PathComponent::DirName(name.to_string()), Element::Dir(hm.into_iter().collect())) + } + + fn pkgtoml(content: &str) -> (PathComponent, Element) { + (PathComponent::PkgToml, Element::File(content.to_string())) + } + + fn pb(s: &str) -> PathBuf { + PathBuf::from(s) + } + + fn s(s: &str) -> String { + String::from(s) + } + + #[test] + fn test_one_file_in_directory() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/pkg.toml + elements: vec![ + dir("foo", vec![ + pkgtoml("content") + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("foo/pkg.toml") + ], + }; + + let path = "foo/pkg.toml".as_ref(); + + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![(pb("foo/pkg.toml"), &s("content"))]); + } + + #[test] + fn test_deep_pkgtoml() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/bar + // /foo/baz + // /foo/baz/pkg.toml + elements: vec![ + dir("foo", vec![ + dir("bar", vec![ + dir("baz", vec![ + pkgtoml("content"), + ]) + ]) + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("foo/bar/baz/pkg.toml") + ], + }; + + let path = "foo/bar/baz/pkg.toml".as_ref(); + + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![(pb("foo/bar/baz/pkg.toml"), &s("content"))]); + } + + #[test] + fn test_hierarchy() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/bar + // /foo/baz + // /foo/baz/pkg.toml + elements: vec![ + dir("foo", vec![ + pkgtoml("content1"), + dir("bar", vec![ + pkgtoml("content2"), + dir("baz", vec![ + pkgtoml("content3"), + ]) + ]) + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("foo/pkg.toml"), + PathBuf::from("foo/bar/pkg.toml"), + PathBuf::from("foo/bar/baz/pkg.toml") + ], + }; + + { + let path = "foo/pkg.toml".as_ref(); + + assert!(!fsr.is_leaf_file(path).unwrap()); + } + { + let path = "foo/bar/pkg.toml".as_ref(); + + assert!(!fsr.is_leaf_file(path).unwrap()); + } + { + let path = "foo/bar/baz/pkg.toml".as_ref(); + + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![ + (pb("foo/pkg.toml"), &s("content1")), + (pb("foo/bar/pkg.toml"), &s("content2")), + (pb("foo/bar/baz/pkg.toml"), &s("content3")), + ]); + } + } + + #[test] + fn test_hierarchy_with_missing_intermediate_files() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/bar + // /foo/baz + // /foo/baz/pkg.toml + elements: vec![ + dir("foo", vec![ + pkgtoml("content1"), + dir("bar", vec![ + dir("baz", vec![ + pkgtoml("content3"), + ]) + ]) + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("foo/pkg.toml"), + PathBuf::from("foo/bar/baz/pkg.toml") + ], + }; + + let path = "foo/pkg.toml".as_ref(); + assert!(!fsr.is_leaf_file(path).unwrap()); + + let path = "foo/bar/baz/pkg.toml".as_ref(); + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![ + (pb("foo/pkg.toml"), &s("content1")), + (pb("foo/bar/baz/pkg.toml"), &s("content3")), + ]); + } + + #[test] + fn test_hierarchy_with_toplevel_file() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/bar + // /foo/baz + // /foo/baz/pkg.toml + elements: vec![ + pkgtoml("content1"), + dir("foo", vec![ + dir("bar", vec![ + dir("baz", vec![ + pkgtoml("content3"), + ]) + ]) + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("pkg.toml"), + PathBuf::from("foo/bar/baz/pkg.toml") + ], + }; + + let path = "pkg.toml".as_ref(); + assert!(!fsr.is_leaf_file(path).unwrap()); + + let path = "foo/bar/baz/pkg.toml".as_ref(); + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![ + (pb("pkg.toml"), &s("content1")), + (pb("foo/bar/baz/pkg.toml"), &s("content3")), + ]); + } + +} diff --git a/src/repository/mod.rs b/src/repository/mod.rs index ca24117..63276e4 100644 --- a/src/repository/mod.rs +++ b/src/repository/mod.rs @@ -11,3 +11,6 @@ #![allow(clippy::module_inception)] mod repository; pub use repository::*; + +mod fs; + diff --git a/src/repository/repository.rs b/src/repository/repository.rs index 7b7b046..ee2e08f 100644 --- a/src/repository/repository.rs +++ b/src/repository/repository.rs @@ -17,7 +17,6 @@ use anyhow::Context; use anyhow::Error; use anyhow::Result; use log::trace; -use resiter::AndThen; use resiter::FilterMap; use resiter::Map; @@ -39,196 +38,110 @@ impl From<BTreeMap<(PackageName, PackageVersion), Package>> for Repository { } impl Repository { + fn new(inner: BTreeMap<(PackageName, PackageVersion), Package>) -> Self { + Repository { inner } + } + pub fn load(path: &Path, progress: &indicatif::ProgressBar) -> Result<Self> { - fn all_subdirs(p: &Path) -> Result<Vec<PathBuf>> { - let mut v = Vec::new(); - for de in p.read_dir()? { - let de = de?; - let is_dir = de.file_type()?.is_dir(); - let is_hidden = de - .path() - .file_name() - .and_then(|s| s.to_str()) - .map(|s| s.starts_with('.')) - .unwrap_or(false); - - if is_dir && !is_hidden { - v.push(de.path()); - } - } + use crate::repository::fs::FileSystemRepresentation; + use config::Config; + use rayon::iter::IntoParallelRefIterator; + use rayon::iter::ParallelIterator; - Ok(v) - } + trace!("Loading files from filesystem"); + let fsr = FileSystemRepresentation::load(path.to_path_buf())?; - fn load_recursive( - root: &Path, - path: &Path, - mut config: config::Config, - progress: &indicatif::ProgressBar, - ) -> Result<Vec<Result<Package>>> { - let pkg_file = path.join("pkg.toml"); - - if pkg_file.is_file() { - let buf = std::fs::read_to_string(&pkg_file) - .with_context(|| anyhow!("Reading {}", pkg_file.display()))?; - - // This function has an issue: It loads packages recursively, but if there are - // patches set for a package, these patches are set _relative_ to the current - // pkg.toml file. - // - // E.G.: - // (1) /pkg.toml - // (2) /a/pkg.toml - // (3) /a/1.0/pkg.toml - // (4) /a/2.0/pkg.toml - // - // If (2) defines a patches = ["./1.patch"], the patch exists at /a/1.patch. - // We can fix that by modifying the Config object after loading (2) and fixing the - // path of the patch to be relative to the repostory root. - // - // But if we continue loading the /a/ subdirectory recursively, this value gets - // overwritten by Config::refresh(), which is called by Config::merge, for example. - // - // The trick is, to get the list of patches _before_ the merge, and later - // re-setting them after the merge, if there were no new patches set (which itself - // is tricky to find out, because the `Config` object _looks like_ there is a new - // array set). - // - // If (3), for example, does set a new patches=[] array, the old array is - // invalidated and no longer relevant for that package! - // Thus, we can savely throw it away and continue with the new array, fixing the - // pathes to be relative to repo root again. - // - // If (4) does _not_ set any patches, we must ensure that the patches from the - // loading of (2) are used and not overwritten by the Config::refresh() call - // happening during Config::merge(). - // - - // first of all, we get the patches array. - // This is either the patches array from the last recursion or the newly set one, - // that doesn't matter here. - let patches_before_merge = match config.get_array("patches") { - Ok(v) => { - v.into_iter() - .map(config::Value::into_str) - .map(|r| { - r.map(PathBuf::from) - .with_context(|| anyhow!("patches must be strings")) - .map_err(Error::from) - }) - .collect::<Result<Vec<_>>>()? - }, - Err(config::ConfigError::NotFound(_)) => vec![], - Err(e) => return Err(e).map_err(Error::from), - }; - trace!("Patches before merging: {:?}", patches_before_merge); - - // Merge the new pkg.toml file over the already loaded configuration - config - .merge(config::File::from_str(&buf, config::FileFormat::Toml)) - .with_context(|| anyhow!("Loading contents of {}", pkg_file.display()))?; - - let path_relative_to_root = path.strip_prefix(root)?; - - // get the patches that are in the `config` object after the merge - let patches = config - .get_array("patches") - .or_else(|e| match e { - - // if there was none, we simply use an empty array - // This is cheap because Vec::with_capacity(0) does not allocate - config::ConfigError::NotFound(_) => Ok(Vec::with_capacity(0)), - other => Err(other), - })? - .into_iter() - - // Map all `Value`s to String and then join them on the path that is relative to - // the root directory of the repository. + fn get_patches(config: &Config) -> Result<Vec<PathBuf>> { + match config.get_array("patches") { + Ok(v) => v.into_iter() .map(config::Value::into_str) .map_err(Error::from) - .map_ok(|patch| path_relative_to_root.join(patch)) - .inspect(|patch| trace!("Patch relative to root: {:?}", patch.as_ref().map(|p| p.display()))) - - // if the patch file exists, use it (as config::Value). - // - // Otherwise we have an error here, because we're refering to a non-existing file. - .and_then_ok(|patch| if patch.exists() { - trace!("Path to patch exists: {}", patch.display()); - Ok(Some(patch)) - } else if patches_before_merge.iter().any(|pb| pb.file_name() == patch.file_name()) { - // We have a patch already in the array that is named equal to the patch - // we have in the current recursion. - // It seems like this patch was already in the list and we re-found it - // because we loaded a deeper pkg.toml file. - Ok(None) - } else { - trace!("Path to patch does not exist: {}", patch.display()); - Err(anyhow!("Patch does not exist: {}", patch.display())) - }) - .filter_map_ok(|o| o) - .collect::<Result<Vec<_>>>()?; - - // If we found any patches, use them. Otherwise use the array from before the merge - // (which already has the correct pathes from the previous recursion). - let patches = if !patches.is_empty() && patches.iter().all(|p| p.exists()) { - patches - } else { - patches_before_merge - }; - - trace!("Patches after postprocessing merge: {:?}", patches); - let patches = patches - .into_iter() - .map(|p| p.display().to_string()) - .map(config::Value::from) - .collect::<Vec<_>>(); - config.set_once("patches", config::Value::from(patches))?; + .map_err(|e| e.context("patches must be strings")) + .map_err(Error::from) + .map_ok(PathBuf::from) + .collect(), + Err(config::ConfigError::NotFound(_)) => Ok(Vec::with_capacity(0)), + Err(e) => Err(e).map_err(Error::from), } + } - let subdirs = all_subdirs(path) - .with_context(|| anyhow!("Finding subdirs for {}", pkg_file.display()))?; - - if subdirs.is_empty() { + fsr.files() + .par_iter() + .inspect(|path| trace!("Checking for leaf file: {}", path.display())) + .filter_map(|path| { + match fsr.is_leaf_file(path) { + Ok(true) => Some(Ok(path)), + Ok(false) => None, + Err(e) => Some(Err(e)), + } + }) + .inspect(|r| trace!("Loading files for {:?}", r)) + .map(|path| { progress.tick(); - if pkg_file.is_file() { - let package = config.try_into() - .with_context(|| anyhow!("Failed to parse {} into package", path.display())) - .and_then(|package: Package| { - if package.name().is_empty() { - Err(anyhow!("Package name cannot be empty: {}", pkg_file.display())) - } else if package.version().is_empty() { - Err(anyhow!("Package version cannot be empty: {}", pkg_file.display())) + let path = path?; + fsr.get_files_for(path)? + .iter() + .inspect(|(path, _)| trace!("Loading layer at {}", path.display())) + .fold(Ok(Config::default()) as Result<_>, |config, (path, ref content)| { + let mut config = config?; + let patches_before_merge = get_patches(&config)?; + + config.merge(config::File::from_str(&content, config::FileFormat::Toml)) + .with_context(|| anyhow!("Loading contents of {}", path.display()))?; + + // get the patches that are in the `config` object after the merge + let patches = get_patches(&config)? + .into_iter() + .map(|p| { + if let Some(current_dir) = path.parent() { + fsr.root().join(current_dir).join(p) + } else { + unimplemented!() + } + }) + .inspect(|patch| trace!("Patch: {:?}", patch)) + + // if the patch file exists, use it (as config::Value). + // + // Otherwise we have an error here, because we're refering to a non-existing file. + .map(|patch| if patch.exists() { + trace!("Path to patch exists: {}", patch.display()); + Ok(Some(patch)) + } else if patches_before_merge.iter().any(|pb| pb.file_name() == patch.file_name()) { + // We have a patch already in the array that is named equal to the patch + // we have in the fold iteration. + // It seems like this patch was already in the list and we re-found it + // because we loaded a "deeper" pkg.toml file. + Ok(None) } else { - Ok(package) - } - }); - - Ok(vec![package]) - } else { - Ok(vec![]) - } - } else { - subdirs.into_iter().fold(Ok(Vec::new()), |vec, dir| { - vec.and_then(|mut v| { - trace!("Recursing into {}", dir.display()); - let mut loaded = load_recursive(root, &dir, config.clone(), progress) - .with_context(|| anyhow!("Reading package from {}", pkg_file.display()))?; - - v.append(&mut loaded); - Ok(v) + trace!("Path to patch does not exist: {}", patch.display()); + Err(anyhow!("Patch does not exist: {}", patch.display())) + }) + .filter_map_ok(|o| o) + .collect::<Result<Vec<_>>>()?; + + // If we found any patches, use them. Otherwise use the array from before the merge + // (which already has the correct pathes from the previous recursion). + let patches = if !patches.is_empty() && patches.iter().all(|p| p.exists()) { + patches + } else { + patches_before_merge + }; + + trace!("Patches after postprocessing merge: {:?}", patches); + let patches = patches + .into_iter() + .map(|p| p.display().to_string()) + .map(config::Value::from) + .collect::<Vec<_>>(); + config.set_once("patches", config::Value::from(patches))?; + Ok(config) }) - }) - } - } - - let inner = load_recursive(path, path, config::Config::default(), progress) - .with_context(|| anyhow!("Recursing for {}", path.display()))? - .into_iter() - .inspect(|p| trace!("Loading into repository: {:?}", p)) - .map_ok(|p| ((p.name().clone(), p.version().clone()), p)) |