summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Beyer <matthias.beyer@atos.net>2021-08-27 19:41:31 +0200
committerMatthias Beyer <matthias.beyer@atos.net>2021-08-27 19:41:31 +0200
commitce01861e6ef3b652eeafbef5b87c293b3a971d89 (patch)
tree6f60e5cb5604f00bfe8a29cbc11bd2d754ed220d
parent5226132f7e43d0b12d8f58aabef7f5785d5f03fe (diff)
parent5b52e6a0269eb2a0c3d69a1ce3312ba07d4dc997 (diff)
Merge branch 'fast-repository-loading'
After some completely non-scientific benchmarking, these patches bring down the loading time for a real-world repository from over 20 sec to about 17 sec on debug build of butido and to 5 sec in release build and even to 2 sec after the filesystem cache got a bit warm. So I guess we can safely merge this now and be happy with it (and go back to the more important issues).
-rw-r--r--Cargo.toml2
-rw-r--r--src/repository/fs/element.rs33
-rw-r--r--src/repository/fs/mod.rs18
-rw-r--r--src/repository/fs/path.rs74
-rw-r--r--src/repository/fs/representation.rs432
-rw-r--r--src/repository/mod.rs3
-rw-r--r--src/repository/repository.rs275
7 files changed, 656 insertions, 181 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 1222869..f70a9e0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,10 +52,12 @@ log = "0.4"
parse-display = "0.5"
pom = "3"
ptree = "0.3"
+rayon = "1.5"
regex = "1"
reqwest = { version = "0.11", features = [ "stream" ] }
resiter = "0.4"
result-inspect = "0.2"
+rlimit = "0.6"
semver = { version = "1.0", features = [ "serde" ] }
serde = "1"
serde_json = "1"
diff --git a/src/repository/fs/element.rs b/src/repository/fs/element.rs
new file mode 100644
index 0000000..73b97f6
--- /dev/null
+++ b/src/repository/fs/element.rs
@@ -0,0 +1,33 @@
+//
+// Copyright (c) 2020-2021 science+computing ag and other contributors
+//
+// This program and the accompanying materials are made
+// available under the terms of the Eclipse Public License 2.0
+// which is available at https://www.eclipse.org/legal/epl-2.0/
+//
+// SPDX-License-Identifier: EPL-2.0
+//
+
+use std::collections::HashMap;
+
+use crate::repository::fs::path::PathComponent;
+
+/// One element in the tree inside FileSystemRepresentation
+///
+/// This is either a File, or a Directory that contains more (Files or Directories).
+#[derive(Debug)]
+pub enum Element {
+ File(String),
+ Dir(HashMap<PathComponent, Element>)
+}
+
+impl Element {
+ /// Helper fn to get the directory contents of the element, if the element is an Element::Dir
+ pub fn get_map_mut(&mut self) -> Option<&mut HashMap<PathComponent, Element>> {
+ match self {
+ Element::File(_) => None,
+ Element::Dir(ref mut hm) => Some(hm),
+ }
+ }
+}
+
diff --git a/src/repository/fs/mod.rs b/src/repository/fs/mod.rs
new file mode 100644
index 0000000..f1fce78
--- /dev/null
+++ b/src/repository/fs/mod.rs
@@ -0,0 +1,18 @@
+//
+// Copyright (c) 2020-2021 science+computing ag and other contributors
+//
+// This program and the accompanying materials are made
+// available under the terms of the Eclipse Public License 2.0
+// which is available at https://www.eclipse.org/legal/epl-2.0/
+//
+// SPDX-License-Identifier: EPL-2.0
+//
+
+#![allow(unused)] // TODO: Remove allow(unused)
+
+mod representation;
+pub use representation::FileSystemRepresentation;
+
+mod element;
+mod path;
+
diff --git a/src/repository/fs/path.rs b/src/repository/fs/path.rs
new file mode 100644
index 0000000..b7fb407
--- /dev/null
+++ b/src/repository/fs/path.rs
@@ -0,0 +1,74 @@
+//
+// Copyright (c) 2020-2021 science+computing ag and other contributors
+//
+// This program and the accompanying materials are made
+// available under the terms of the Eclipse Public License 2.0
+// which is available at https://www.eclipse.org/legal/epl-2.0/
+//
+// SPDX-License-Identifier: EPL-2.0
+//
+
+use std::collections::HashMap;
+use std::convert::TryFrom;
+use std::path::Component;
+use std::path::Path;
+use std::path::PathBuf;
+
+use anyhow::anyhow;
+use anyhow::Result;
+
+/// Helper type for filtering for pathes we need or dont need
+///
+/// We either have a directory, which has a name, or we have a pkg.toml file, which is of interest.
+/// All other files can be ignored and thus are not represented by this type.
+///
+/// The PathComponent::DirName(_) represents a _part_ of a Path. Something like
+///
+/// ```ignore
+/// let p = PathBuf::from("foo/bar/baz")
+/// p.components().map(PathComponent::DirName) // does not actually work because of types
+/// ```
+///
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum PathComponent {
+ PkgToml,
+ DirName(String),
+}
+
+impl TryFrom<&std::path::Component<'_>> for PathComponent {
+ type Error = anyhow::Error;
+
+ fn try_from(c: &std::path::Component) -> Result<Self> {
+ match *c {
+ Component::Prefix(_) => anyhow::bail!("Unexpected path component: Prefix"),
+ Component::RootDir => anyhow::bail!("Unexpected path component: RootDir"),
+ Component::CurDir => anyhow::bail!("Unexpected path component: CurDir"),
+ Component::ParentDir => anyhow::bail!("Unexpected path component: ParentDir"),
+ Component::Normal(filename) => {
+ let filename = filename.to_str().ok_or_else(|| anyhow!("UTF8-error"))?;
+ if filename == "pkg.toml" {
+ Ok(PathComponent::PkgToml)
+ } else {
+ Ok(PathComponent::DirName(filename.to_string()))
+ }
+ },
+ }
+ }
+}
+
+impl PathComponent {
+ /// Helper fn whether this PathComponent is a PathComponent::PkgToml
+ pub fn is_pkg_toml(&self) -> bool {
+ std::matches!(self, PathComponent::PkgToml)
+ }
+
+ /// Helper fn to get the directory name of this PathComponent if it is a PathComponent::DirName
+ /// or None if it is not.
+ pub fn dir_name(&self) -> Option<&str> {
+ match self {
+ PathComponent::PkgToml => None,
+ PathComponent::DirName(dn) => Some(dn)
+ }
+ }
+}
+
diff --git a/src/repository/fs/representation.rs b/src/repository/fs/representation.rs
new file mode 100644
index 0000000..5f92ba7
--- /dev/null
+++ b/src/repository/fs/representation.rs
@@ -0,0 +1,432 @@
+//
+// Copyright (c) 2020-2021 science+computing ag and other contributors
+//
+// This program and the accompanying materials are made
+// available under the terms of the Eclipse Public License 2.0
+// which is available at https://www.eclipse.org/legal/epl-2.0/
+//
+// SPDX-License-Identifier: EPL-2.0
+//
+
+use std::collections::HashMap;
+use std::convert::TryFrom;
+use std::convert::TryInto;
+use std::path::Component;
+use std::path::Path;
+use std::path::PathBuf;
+
+use anyhow::Context;
+use anyhow::Error;
+use anyhow::Result;
+use anyhow::anyhow;
+use resiter::AndThen;
+use resiter::Filter;
+use resiter::Map;
+use walkdir::DirEntry;
+use walkdir::WalkDir;
+
+use crate::repository::fs::element::Element;
+use crate::repository::fs::path::PathComponent;
+
+/// A type representing the filesystem
+///
+/// This type can be used to load pkg.toml files from the filesystem. As soon as this object is
+/// loaded, all filesystem access is done and postprocessing of the loaded data can happen
+#[derive(Debug, getset::Getters)]
+pub struct FileSystemRepresentation {
+ #[getset(get = "pub")]
+ root: PathBuf,
+
+ #[getset(get = "pub")]
+ files: Vec<PathBuf>,
+
+ elements: HashMap<PathComponent, Element>,
+}
+
+impl FileSystemRepresentation {
+ /// Load the FileSystemRepresentation object starting a `root`.
+ pub fn load(root: PathBuf) -> Result<Self> {
+ let mut fsr = FileSystemRepresentation {
+ root: root.clone(),
+ elements: HashMap::new(),
+ files: vec![],
+ };
+
+ // get the number of maximum files open (ulimit -n on linux)
+ let max_files_open = {
+ let (soft, _hard) = rlimit::getrlimit(rlimit::Resource::NOFILE)?;
+
+ // use less than the soft limit if the soft limit is above 15
+ soft.checked_sub(16)
+ .unwrap_or(soft)
+ .try_into() // we need to have a usize
+ .unwrap_or(usize::MAX) // if usize is smaller than u64, usize::MAX will do
+ };
+
+ log::trace!("Loading files from filesystem starting at: {}", root.display());
+ log::trace!("Loading with a maximum of {} files open", max_files_open);
+ WalkDir::new(root)
+ .follow_links(false)
+ .max_open(max_files_open)
+ .same_file_system(true)
+ .into_iter()
+ .filter_entry(|e| !is_hidden(e) && (is_pkgtoml(e) || is_dir(e)))
+ .filter_ok(|e| is_pkgtoml(e))
+ .inspect(|el| log::trace!("Loading: {:?}", el))
+ .map_err(Error::from)
+ .and_then_ok(|de| {
+ let mut curr_hm = &mut fsr.elements;
+ let de_path = de.path().strip_prefix(&fsr.root)?;
+ fsr.files.push(de_path.to_path_buf());
+
+ // traverse the HashMap tree
+ for cmp in de_path.components() {
+ match PathComponent::try_from(&cmp)? {
+ PathComponent::PkgToml => {
+ curr_hm.entry(PathComponent::PkgToml)
+ .or_insert(Element::File(load_file(de_path)?));
+ },
+ dir @ PathComponent::DirName(_) => {
+ curr_hm.entry(dir.clone())
+ .or_insert(Element::Dir(HashMap::new()));
+
+ curr_hm = curr_hm.get_mut(&dir)
+ .unwrap() // safe, because we just inserted it
+ .get_map_mut()
+ .unwrap(); // safe, because we inserted Element::Dir
+ },
+ }
+ }
+
+ Ok(())
+ })
+ .collect::<Result<Vec<_>>>()?;
+
+ Ok(fsr)
+ }
+
+ /// Check the tree whether a `Path` points to a file in a directory that does not contain more
+ /// directories containing pkg.toml files.
+ ///
+ /// # Example
+ ///
+ /// /
+ /// /foo/
+ /// /foo/pkg.toml <-- is leaf
+ /// /bar/
+ /// /bar/pkg.toml <-- is not a leaf
+ /// /bar/baz/pkg.toml <-- is a leaf
+ ///
+ ///
+ pub fn is_leaf_file(&self, path: &Path) -> Result<bool> {
+ let mut curr_hm = &self.elements;
+
+ // Helper to check whether a tree contains pkg.toml files, recursively
+ fn toml_files_in_tree(hm: &HashMap<PathComponent, Element>) -> bool {
+ if let Some(Element::File(_)) = hm.get(&PathComponent::PkgToml) {
+ return true
+ }
+
+ for value in hm.values() {
+ match value {
+ Element::File(_) => return true,
+ Element::Dir(hm) => if toml_files_in_tree(hm) {
+ return true
+ },
+ }
+ }
+ false
+ }
+
+ for elem in path.components() {
+ let elem = PathComponent::try_from(&elem)?;
+
+ match curr_hm.get(&elem) {
+ Some(Element::File(_)) => {
+ // if I have a file now, and the current hashmap only holds either
+ // * No directory
+ // * or a directory where all subdirs do not contain a pkg.toml
+ return Ok(curr_hm.values().count() == 1 || !toml_files_in_tree(curr_hm))
+ },
+ Some(Element::Dir(hm)) => curr_hm = hm,
+ None => anyhow::bail!("Path component '{:?}' was not loaded in map, this is most likely a bug", elem),
+ }
+ }
+
+ Ok(false)
+ }
+
+ /// Get a Vec<(PathBuf, &String)> for the `path`
+ ///
+ /// The result of this function is the trail of pkg.toml files from `self.root` to `path`,
+ /// whereas the PathBuf is the actual path to the file and the `&String` is the content of the
+ /// individual file.
+ ///
+ /// Merging all Strings in the returned Vec as Config objects should produce a Package. to
+ /// `path`, whereas the PathBuf is the actual path to the file and the `&String` is the content
+ /// of the individual file.
+ ///
+ /// Merging all Strings in the returned Vec as Config objects should produce a Package.
+ pub fn get_files_for<'a>(&'a self, path: &Path) -> Result<Vec<(PathBuf, &'a String)>> {
+ let mut res = Vec::with_capacity(10); // good enough
+
+ let mut curr_hm = &self.elements;
+ let mut curr_path = PathBuf::from("");
+ for elem in path.components() {
+ let elem = PathComponent::try_from(&elem)?;
+
+ if !elem.is_pkg_toml() {
+ if let Some(Element::File(intermediate)) = curr_hm.get(&PathComponent::PkgToml) {
+ res.push((curr_path.join("pkg.toml"), intermediate));
+ }
+ }
+
+ match curr_hm.get(&elem) {
+ Some(Element::File(cont)) => res.push((curr_path.join("pkg.toml"), cont)),
+ Some(Element::Dir(hm)) => {
+ curr_path = curr_path.join(elem.dir_name().unwrap()); // unwrap safe by above match
+ curr_hm = hm;
+ }
+ None => anyhow::bail!("Path component '{:?}' was not loaded in map, this is most likely a bug", elem),
+ }
+ }
+
+ Ok(res)
+ }
+}
+
+/// Helper to check whether a DirEntry points to a hidden file
+fn is_hidden(entry: &DirEntry) -> bool {
+ log::trace!("Check {:?} is hidden", entry);
+ entry.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
+}
+
+/// Helper to check whether a DirEntry points to a directory
+fn is_dir(entry: &DirEntry) -> bool {
+ log::trace!("Check {:?} is directory", entry);
+ entry.file_type().is_dir()
+}
+
+/// Helper to check whether a DirEntry points to a pkg.toml file
+fn is_pkgtoml(entry: &DirEntry) -> bool {
+ log::trace!("Check {:?} == 'pkg.toml'", entry);
+ entry.file_name().to_str().map(|s| s == "pkg.toml").unwrap_or(false)
+}
+
+/// Helper fn to load a Path into memory as String
+fn load_file(path: &Path) -> Result<String> {
+ log::trace!("Reading {}", path.display());
+ std::fs::read_to_string(path)
+ .with_context(|| anyhow!("Reading file from filesystem: {}", path.display()))
+ .map_err(Error::from)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn dir(name: &str, hm: Vec<(PathComponent, Element)>) -> (PathComponent, Element) {
+ (PathComponent::DirName(name.to_string()), Element::Dir(hm.into_iter().collect()))
+ }
+
+ fn pkgtoml(content: &str) -> (PathComponent, Element) {
+ (PathComponent::PkgToml, Element::File(content.to_string()))
+ }
+
+ fn pb(s: &str) -> PathBuf {
+ PathBuf::from(s)
+ }
+
+ fn s(s: &str) -> String {
+ String::from(s)
+ }
+
+ #[test]
+ fn test_one_file_in_directory() {
+ let fsr = FileSystemRepresentation {
+ root: PathBuf::from("/"),
+
+ // Representing
+ // /
+ // /foo
+ // /foo/pkg.toml
+ elements: vec![
+ dir("foo", vec![
+ pkgtoml("content")
+ ])
+ ].into_iter().collect(),
+
+ files: vec![
+ PathBuf::from("foo/pkg.toml")
+ ],
+ };
+
+ let path = "foo/pkg.toml".as_ref();
+
+ assert!(fsr.is_leaf_file(path).unwrap());
+ assert_eq!(fsr.get_files_for(path).unwrap(), vec![(pb("foo/pkg.toml"), &s("content"))]);
+ }
+
+ #[test]
+ fn test_deep_pkgtoml() {
+ let fsr = FileSystemRepresentation {
+ root: PathBuf::from("/"),
+
+ // Representing
+ // /
+ // /foo
+ // /foo/bar
+ // /foo/baz
+ // /foo/baz/pkg.toml
+ elements: vec![
+ dir("foo", vec![
+ dir("bar", vec![
+ dir("baz", vec![
+ pkgtoml("content"),
+ ])
+ ])
+ ])
+ ].into_iter().collect(),
+
+ files: vec![
+ PathBuf::from("foo/bar/baz/pkg.toml")
+ ],
+ };
+
+ let path = "foo/bar/baz/pkg.toml".as_ref();
+
+ assert!(fsr.is_leaf_file(path).unwrap());
+ assert_eq!(fsr.get_files_for(path).unwrap(), vec![(pb("foo/bar/baz/pkg.toml"), &s("content"))]);
+ }
+
+ #[test]
+ fn test_hierarchy() {
+ let fsr = FileSystemRepresentation {
+ root: PathBuf::from("/"),
+
+ // Representing
+ // /
+ // /foo
+ // /foo/bar
+ // /foo/baz
+ // /foo/baz/pkg.toml
+ elements: vec![
+ dir("foo", vec![
+ pkgtoml("content1"),
+ dir("bar", vec![
+ pkgtoml("content2"),
+ dir("baz", vec![
+ pkgtoml("content3"),
+ ])
+ ])
+ ])
+ ].into_iter().collect(),
+
+ files: vec![
+ PathBuf::from("foo/pkg.toml"),
+ PathBuf::from("foo/bar/pkg.toml"),
+ PathBuf::from("foo/bar/baz/pkg.toml")
+ ],
+ };
+
+ {
+ let path = "foo/pkg.toml".as_ref();
+
+ assert!(!fsr.is_leaf_file(path).unwrap());
+ }
+ {
+ let path = "foo/bar/pkg.toml".as_ref();
+
+ assert!(!fsr.is_leaf_file(path).unwrap());
+ }
+ {
+ let path = "foo/bar/baz/pkg.toml".as_ref();
+
+ assert!(fsr.is_leaf_file(path).unwrap());
+ assert_eq!(fsr.get_files_for(path).unwrap(), vec![
+ (pb("foo/pkg.toml"), &s("content1")),
+ (pb("foo/bar/pkg.toml"), &s("content2")),
+ (pb("foo/bar/baz/pkg.toml"), &s("content3")),
+ ]);
+ }
+ }
+
+ #[test]
+ fn test_hierarchy_with_missing_intermediate_files() {
+ let fsr = FileSystemRepresentation {
+ root: PathBuf::from("/"),
+
+ // Representing
+ // /
+ // /foo
+ // /foo/bar
+ // /foo/baz
+ // /foo/baz/pkg.toml
+ elements: vec![
+ dir("foo", vec![
+ pkgtoml("content1"),
+ dir("bar", vec![
+ dir("baz", vec![
+ pkgtoml("content3"),
+ ])
+ ])
+ ])
+ ].into_iter().collect(),
+
+ files: vec![
+ PathBuf::from("foo/pkg.toml"),
+ PathBuf::from("foo/bar/baz/pkg.toml")
+ ],
+ };
+
+ let path = "foo/pkg.toml".as_ref();
+ assert!(!fsr.is_leaf_file(path).unwrap());
+
+ let path = "foo/bar/baz/pkg.toml".as_ref();
+ assert!(fsr.is_leaf_file(path).unwrap());
+ assert_eq!(fsr.get_files_for(path).unwrap(), vec![
+ (pb("foo/pkg.toml"), &s("content1")),
+ (pb("foo/bar/baz/pkg.toml"), &s("content3")),
+ ]);
+ }
+
+ #[test]
+ fn test_hierarchy_with_toplevel_file() {
+ let fsr = FileSystemRepresentation {
+ root: PathBuf::from("/"),
+
+ // Representing
+ // /
+ // /foo
+ // /foo/bar
+ // /foo/baz
+ // /foo/baz/pkg.toml
+ elements: vec![
+ pkgtoml("content1"),
+ dir("foo", vec![
+ dir("bar", vec![
+ dir("baz", vec![
+ pkgtoml("content3"),
+ ])
+ ])
+ ])
+ ].into_iter().collect(),
+
+ files: vec![
+ PathBuf::from("pkg.toml"),
+ PathBuf::from("foo/bar/baz/pkg.toml")
+ ],
+ };
+
+ let path = "pkg.toml".as_ref();
+ assert!(!fsr.is_leaf_file(path).unwrap());
+
+ let path = "foo/bar/baz/pkg.toml".as_ref();
+ assert!(fsr.is_leaf_file(path).unwrap());
+ assert_eq!(fsr.get_files_for(path).unwrap(), vec![
+ (pb("pkg.toml"), &s("content1")),
+ (pb("foo/bar/baz/pkg.toml"), &s("content3")),
+ ]);
+ }
+
+}
diff --git a/src/repository/mod.rs b/src/repository/mod.rs
index ca24117..63276e4 100644
--- a/src/repository/mod.rs
+++ b/src/repository/mod.rs
@@ -11,3 +11,6 @@
#![allow(clippy::module_inception)]
mod repository;
pub use repository::*;
+
+mod fs;
+
diff --git a/src/repository/repository.rs b/src/repository/repository.rs
index 7b7b046..ee2e08f 100644
--- a/src/repository/repository.rs
+++ b/src/repository/repository.rs
@@ -17,7 +17,6 @@ use anyhow::Context;
use anyhow::Error;
use anyhow::Result;
use log::trace;
-use resiter::AndThen;
use resiter::FilterMap;
use resiter::Map;
@@ -39,196 +38,110 @@ impl From<BTreeMap<(PackageName, PackageVersion), Package>> for Repository {
}
impl Repository {
+ fn new(inner: BTreeMap<(PackageName, PackageVersion), Package>) -> Self {
+ Repository { inner }
+ }
+
pub fn load(path: &Path, progress: &indicatif::ProgressBar) -> Result<Self> {
- fn all_subdirs(p: &Path) -> Result<Vec<PathBuf>> {
- let mut v = Vec::new();
- for de in p.read_dir()? {
- let de = de?;
- let is_dir = de.file_type()?.is_dir();
- let is_hidden = de
- .path()
- .file_name()
- .and_then(|s| s.to_str())
- .map(|s| s.starts_with('.'))
- .unwrap_or(false);
-
- if is_dir && !is_hidden {
- v.push(de.path());
- }
- }
+ use crate::repository::fs::FileSystemRepresentation;
+ use config::Config;
+ use rayon::iter::IntoParallelRefIterator;
+ use rayon::iter::ParallelIterator;
- Ok(v)
- }
+ trace!("Loading files from filesystem");
+ let fsr = FileSystemRepresentation::load(path.to_path_buf())?;
- fn load_recursive(
- root: &Path,
- path: &Path,
- mut config: config::Config,
- progress: &indicatif::ProgressBar,
- ) -> Result<Vec<Result<Package>>> {
- let pkg_file = path.join("pkg.toml");
-
- if pkg_file.is_file() {
- let buf = std::fs::read_to_string(&pkg_file)
- .with_context(|| anyhow!("Reading {}", pkg_file.display()))?;
-
- // This function has an issue: It loads packages recursively, but if there are
- // patches set for a package, these patches are set _relative_ to the current
- // pkg.toml file.
- //
- // E.G.:
- // (1) /pkg.toml
- // (2) /a/pkg.toml
- // (3) /a/1.0/pkg.toml
- // (4) /a/2.0/pkg.toml
- //
- // If (2) defines a patches = ["./1.patch"], the patch exists at /a/1.patch.
- // We can fix that by modifying the Config object after loading (2) and fixing the
- // path of the patch to be relative to the repostory root.
- //
- // But if we continue loading the /a/ subdirectory recursively, this value gets
- // overwritten by Config::refresh(), which is called by Config::merge, for example.
- //
- // The trick is, to get the list of patches _before_ the merge, and later
- // re-setting them after the merge, if there were no new patches set (which itself
- // is tricky to find out, because the `Config` object _looks like_ there is a new
- // array set).
- //
- // If (3), for example, does set a new patches=[] array, the old array is
- // invalidated and no longer relevant for that package!
- // Thus, we can savely throw it away and continue with the new array, fixing the
- // pathes to be relative to repo root again.
- //
- // If (4) does _not_ set any patches, we must ensure that the patches from the
- // loading of (2) are used and not overwritten by the Config::refresh() call
- // happening during Config::merge().
- //
-
- // first of all, we get the patches array.
- // This is either the patches array from the last recursion or the newly set one,
- // that doesn't matter here.
- let patches_before_merge = match config.get_array("patches") {
- Ok(v) => {
- v.into_iter()
- .map(config::Value::into_str)
- .map(|r| {
- r.map(PathBuf::from)
- .with_context(|| anyhow!("patches must be strings"))
- .map_err(Error::from)
- })
- .collect::<Result<Vec<_>>>()?
- },
- Err(config::ConfigError::NotFound(_)) => vec![],
- Err(e) => return Err(e).map_err(Error::from),
- };
- trace!("Patches before merging: {:?}", patches_before_merge);
-
- // Merge the new pkg.toml file over the already loaded configuration
- config
- .merge(config::File::from_str(&buf, config::FileFormat::Toml))
- .with_context(|| anyhow!("Loading contents of {}", pkg_file.display()))?;
-
- let path_relative_to_root = path.strip_prefix(root)?;
-
- // get the patches that are in the `config` object after the merge
- let patches = config
- .get_array("patches")
- .or_else(|e| match e {
-
- // if there was none, we simply use an empty array
- // This is cheap because Vec::with_capacity(0) does not allocate
- config::ConfigError::NotFound(_) => Ok(Vec::with_capacity(0)),
- other => Err(other),
- })?
- .into_iter()
-
- // Map all `Value`s to String and then join them on the path that is relative to
- // the root directory of the repository.
+ fn get_patches(config: &Config) -> Result<Vec<PathBuf>> {
+ match config.get_array("patches") {
+ Ok(v) => v.into_iter()
.map(config::Value::into_str)
.map_err(Error::from)
- .map_ok(|patch| path_relative_to_root.join(patch))
- .inspect(|patch| trace!("Patch relative to root: {:?}", patch.as_ref().map(|p| p.display())))
-
- // if the patch file exists, use it (as config::Value).
- //
- // Otherwise we have an error here, because we're refering to a non-existing file.
- .and_then_ok(|patch| if patch.exists() {
- trace!("Path to patch exists: {}", patch.display());
- Ok(Some(patch))
- } else if patches_before_merge.iter().any(|pb| pb.file_name() == patch.file_name()) {
- // We have a patch already in the array that is named equal to the patch
- // we have in the current recursion.
- // It seems like this patch was already in the list and we re-found it
- // because we loaded a deeper pkg.toml file.
- Ok(None)
- } else {
- trace!("Path to patch does not exist: {}", patch.display());
- Err(anyhow!("Patch does not exist: {}", patch.display()))
- })
- .filter_map_ok(|o| o)
- .collect::<Result<Vec<_>>>()?;
-
- // If we found any patches, use them. Otherwise use the array from before the merge
- // (which already has the correct pathes from the previous recursion).
- let patches = if !patches.is_empty() && patches.iter().all(|p| p.exists()) {
- patches
- } else {
- patches_before_merge
- };
-
- trace!("Patches after postprocessing merge: {:?}", patches);
- let patches = patches
- .into_iter()
- .map(|p| p.display().to_string())
- .map(config::Value::from)
- .collect::<Vec<_>>();
- config.set_once("patches", config::Value::from(patches))?;
+ .map_err(|e| e.context("patches must be strings"))
+ .map_err(Error::from)
+ .map_ok(PathBuf::from)
+ .collect(),
+ Err(config::ConfigError::NotFound(_)) => Ok(Vec::with_capacity(0)),
+ Err(e) => Err(e).map_err(Error::from),
}
+ }
- let subdirs = all_subdirs(path)
- .with_context(|| anyhow!("Finding subdirs for {}", pkg_file.display()))?;
-
- if subdirs.is_empty() {
+ fsr.files()
+ .par_iter()
+ .inspect(|path| trace!("Checking for leaf file: {}", path.display()))
+ .filter_map(|path| {
+ match fsr.is_leaf_file(path) {
+ Ok(true) => Some(Ok(path)),
+ Ok(false) => None,
+ Err(e) => Some(Err(e)),
+ }
+ })
+ .inspect(|r| trace!("Loading files for {:?}", r))
+ .map(|path| {
progress.tick();
- if pkg_file.is_file() {
- let package = config.try_into()
- .with_context(|| anyhow!("Failed to parse {} into package", path.display()))
- .and_then(|package: Package| {
- if package.name().is_empty() {
- Err(anyhow!("Package name cannot be empty: {}", pkg_file.display()))
- } else if package.version().is_empty() {
- Err(anyhow!("Package version cannot be empty: {}", pkg_file.display()))
+ let path = path?;
+ fsr.get_files_for(path)?
+ .iter()
+ .inspect(|(path, _)| trace!("Loading layer at {}", path.display()))
+ .fold(Ok(Config::default()) as Result<_>, |config, (path, ref content)| {
+ let mut config = config?;
+ let patches_before_merge = get_patches(&config)?;
+
+ config.merge(config::File::from_str(&content, config::FileFormat::Toml))
+ .with_context(|| anyhow!("Loading contents of {}", path.display()))?;
+
+ // get the patches that are in the `config` object after the merge
+ let patches = get_patches(&config)?
+ .into_iter()
+ .map(|p| {
+ if let Some(current_dir) = path.parent() {
+ fsr.root().join(current_dir).join(p)
+ } else {
+ unimplemented!()
+ }
+ })
+ .inspect(|patch| trace!("Patch: {:?}", patch))
+
+ // if the patch file exists, use it (as config::Value).
+ //
+ // Otherwise we have an error here, because we're refering to a non-existing file.
+ .map(|patch| if patch.exists() {
+ trace!("Path to patch exists: {}", patch.display());
+ Ok(Some(patch))
+ } else if patches_before_merge.iter().any(|pb| pb.file_name() == patch.file_name()) {
+ // We have a patch already in the array that is named equal to the patch
+ // we have in the fold iteration.
+ // It seems like this patch was already in the list and we re-found it
+ // because we loaded a "deeper" pkg.toml file.
+ Ok(None)
} else {
- Ok(package)
- }
- });
-
- Ok(vec![package])
- } else {
- Ok(vec![])
- }
- } else {
- subdirs.into_iter().fold(Ok(Vec::new()), |vec, dir| {
- vec.and_then(|mut v| {
- trace!("Recursing into {}", dir.display());
- let mut loaded = load_recursive(root, &dir, config.clone(), progress)
- .with_context(|| anyhow!("Reading package from {}", pkg_file.display()))?;
-
- v.append(&mut loaded);
- Ok(v)
+ trace!("Path to patch does not exist: {}", patch.display());
+ Err(anyhow!("Patch does not exist: {}", patch.display()))
+ })
+ .filter_map_ok(|o| o)
+ .collect::<Result<Vec<_>>>()?;
+
+ // If we found any patches, use them. Otherwise use the array from before the merge
+ // (which already has the correct pathes from the previous recursion).
+ let patches = if !patches.is_empty() && patches.iter().all(|p| p.exists()) {
+ patches
+ } else {
+ patches_before_merge
+ };
+
+ trace!("Patches after postprocessing merge: {:?}", patches);
+ let patches = patches
+ .into_iter()
+ .map(|p| p.display().to_string())
+ .map(config::Value::from)
+ .collect::<Vec<_>>();
+ config.set_once("patches", config::Value::from(patches))?;
+ Ok(config)
})
- })
- }
- }
-
- let inner = load_recursive(path, path, config::Config::default(), progress)
- .with_context(|| anyhow!("Recursing for {}", path.display()))?
- .into_iter()
- .inspect(|p| trace!("Loading into repository: {:?}", p))
- .map_ok(|p| ((p.name().clone(), p.version().clone()), p))