diff options
Diffstat (limited to 'src/repository/fs/representation.rs')
-rw-r--r-- | src/repository/fs/representation.rs | 432 |
1 files changed, 432 insertions, 0 deletions
diff --git a/src/repository/fs/representation.rs b/src/repository/fs/representation.rs new file mode 100644 index 0000000..5f92ba7 --- /dev/null +++ b/src/repository/fs/representation.rs @@ -0,0 +1,432 @@ +// +// Copyright (c) 2020-2021 science+computing ag and other contributors +// +// This program and the accompanying materials are made +// available under the terms of the Eclipse Public License 2.0 +// which is available at https://www.eclipse.org/legal/epl-2.0/ +// +// SPDX-License-Identifier: EPL-2.0 +// + +use std::collections::HashMap; +use std::convert::TryFrom; +use std::convert::TryInto; +use std::path::Component; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::Context; +use anyhow::Error; +use anyhow::Result; +use anyhow::anyhow; +use resiter::AndThen; +use resiter::Filter; +use resiter::Map; +use walkdir::DirEntry; +use walkdir::WalkDir; + +use crate::repository::fs::element::Element; +use crate::repository::fs::path::PathComponent; + +/// A type representing the filesystem +/// +/// This type can be used to load pkg.toml files from the filesystem. As soon as this object is +/// loaded, all filesystem access is done and postprocessing of the loaded data can happen +#[derive(Debug, getset::Getters)] +pub struct FileSystemRepresentation { + #[getset(get = "pub")] + root: PathBuf, + + #[getset(get = "pub")] + files: Vec<PathBuf>, + + elements: HashMap<PathComponent, Element>, +} + +impl FileSystemRepresentation { + /// Load the FileSystemRepresentation object starting a `root`. + pub fn load(root: PathBuf) -> Result<Self> { + let mut fsr = FileSystemRepresentation { + root: root.clone(), + elements: HashMap::new(), + files: vec![], + }; + + // get the number of maximum files open (ulimit -n on linux) + let max_files_open = { + let (soft, _hard) = rlimit::getrlimit(rlimit::Resource::NOFILE)?; + + // use less than the soft limit if the soft limit is above 15 + soft.checked_sub(16) + .unwrap_or(soft) + .try_into() // we need to have a usize + .unwrap_or(usize::MAX) // if usize is smaller than u64, usize::MAX will do + }; + + log::trace!("Loading files from filesystem starting at: {}", root.display()); + log::trace!("Loading with a maximum of {} files open", max_files_open); + WalkDir::new(root) + .follow_links(false) + .max_open(max_files_open) + .same_file_system(true) + .into_iter() + .filter_entry(|e| !is_hidden(e) && (is_pkgtoml(e) || is_dir(e))) + .filter_ok(|e| is_pkgtoml(e)) + .inspect(|el| log::trace!("Loading: {:?}", el)) + .map_err(Error::from) + .and_then_ok(|de| { + let mut curr_hm = &mut fsr.elements; + let de_path = de.path().strip_prefix(&fsr.root)?; + fsr.files.push(de_path.to_path_buf()); + + // traverse the HashMap tree + for cmp in de_path.components() { + match PathComponent::try_from(&cmp)? { + PathComponent::PkgToml => { + curr_hm.entry(PathComponent::PkgToml) + .or_insert(Element::File(load_file(de_path)?)); + }, + dir @ PathComponent::DirName(_) => { + curr_hm.entry(dir.clone()) + .or_insert(Element::Dir(HashMap::new())); + + curr_hm = curr_hm.get_mut(&dir) + .unwrap() // safe, because we just inserted it + .get_map_mut() + .unwrap(); // safe, because we inserted Element::Dir + }, + } + } + + Ok(()) + }) + .collect::<Result<Vec<_>>>()?; + + Ok(fsr) + } + + /// Check the tree whether a `Path` points to a file in a directory that does not contain more + /// directories containing pkg.toml files. + /// + /// # Example + /// + /// / + /// /foo/ + /// /foo/pkg.toml <-- is leaf + /// /bar/ + /// /bar/pkg.toml <-- is not a leaf + /// /bar/baz/pkg.toml <-- is a leaf + /// + /// + pub fn is_leaf_file(&self, path: &Path) -> Result<bool> { + let mut curr_hm = &self.elements; + + // Helper to check whether a tree contains pkg.toml files, recursively + fn toml_files_in_tree(hm: &HashMap<PathComponent, Element>) -> bool { + if let Some(Element::File(_)) = hm.get(&PathComponent::PkgToml) { + return true + } + + for value in hm.values() { + match value { + Element::File(_) => return true, + Element::Dir(hm) => if toml_files_in_tree(hm) { + return true + }, + } + } + false + } + + for elem in path.components() { + let elem = PathComponent::try_from(&elem)?; + + match curr_hm.get(&elem) { + Some(Element::File(_)) => { + // if I have a file now, and the current hashmap only holds either + // * No directory + // * or a directory where all subdirs do not contain a pkg.toml + return Ok(curr_hm.values().count() == 1 || !toml_files_in_tree(curr_hm)) + }, + Some(Element::Dir(hm)) => curr_hm = hm, + None => anyhow::bail!("Path component '{:?}' was not loaded in map, this is most likely a bug", elem), + } + } + + Ok(false) + } + + /// Get a Vec<(PathBuf, &String)> for the `path` + /// + /// The result of this function is the trail of pkg.toml files from `self.root` to `path`, + /// whereas the PathBuf is the actual path to the file and the `&String` is the content of the + /// individual file. + /// + /// Merging all Strings in the returned Vec as Config objects should produce a Package. to + /// `path`, whereas the PathBuf is the actual path to the file and the `&String` is the content + /// of the individual file. + /// + /// Merging all Strings in the returned Vec as Config objects should produce a Package. + pub fn get_files_for<'a>(&'a self, path: &Path) -> Result<Vec<(PathBuf, &'a String)>> { + let mut res = Vec::with_capacity(10); // good enough + + let mut curr_hm = &self.elements; + let mut curr_path = PathBuf::from(""); + for elem in path.components() { + let elem = PathComponent::try_from(&elem)?; + + if !elem.is_pkg_toml() { + if let Some(Element::File(intermediate)) = curr_hm.get(&PathComponent::PkgToml) { + res.push((curr_path.join("pkg.toml"), intermediate)); + } + } + + match curr_hm.get(&elem) { + Some(Element::File(cont)) => res.push((curr_path.join("pkg.toml"), cont)), + Some(Element::Dir(hm)) => { + curr_path = curr_path.join(elem.dir_name().unwrap()); // unwrap safe by above match + curr_hm = hm; + } + None => anyhow::bail!("Path component '{:?}' was not loaded in map, this is most likely a bug", elem), + } + } + + Ok(res) + } +} + +/// Helper to check whether a DirEntry points to a hidden file +fn is_hidden(entry: &DirEntry) -> bool { + log::trace!("Check {:?} is hidden", entry); + entry.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false) +} + +/// Helper to check whether a DirEntry points to a directory +fn is_dir(entry: &DirEntry) -> bool { + log::trace!("Check {:?} is directory", entry); + entry.file_type().is_dir() +} + +/// Helper to check whether a DirEntry points to a pkg.toml file +fn is_pkgtoml(entry: &DirEntry) -> bool { + log::trace!("Check {:?} == 'pkg.toml'", entry); + entry.file_name().to_str().map(|s| s == "pkg.toml").unwrap_or(false) +} + +/// Helper fn to load a Path into memory as String +fn load_file(path: &Path) -> Result<String> { + log::trace!("Reading {}", path.display()); + std::fs::read_to_string(path) + .with_context(|| anyhow!("Reading file from filesystem: {}", path.display())) + .map_err(Error::from) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn dir(name: &str, hm: Vec<(PathComponent, Element)>) -> (PathComponent, Element) { + (PathComponent::DirName(name.to_string()), Element::Dir(hm.into_iter().collect())) + } + + fn pkgtoml(content: &str) -> (PathComponent, Element) { + (PathComponent::PkgToml, Element::File(content.to_string())) + } + + fn pb(s: &str) -> PathBuf { + PathBuf::from(s) + } + + fn s(s: &str) -> String { + String::from(s) + } + + #[test] + fn test_one_file_in_directory() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/pkg.toml + elements: vec![ + dir("foo", vec![ + pkgtoml("content") + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("foo/pkg.toml") + ], + }; + + let path = "foo/pkg.toml".as_ref(); + + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![(pb("foo/pkg.toml"), &s("content"))]); + } + + #[test] + fn test_deep_pkgtoml() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/bar + // /foo/baz + // /foo/baz/pkg.toml + elements: vec![ + dir("foo", vec![ + dir("bar", vec![ + dir("baz", vec![ + pkgtoml("content"), + ]) + ]) + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("foo/bar/baz/pkg.toml") + ], + }; + + let path = "foo/bar/baz/pkg.toml".as_ref(); + + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![(pb("foo/bar/baz/pkg.toml"), &s("content"))]); + } + + #[test] + fn test_hierarchy() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/bar + // /foo/baz + // /foo/baz/pkg.toml + elements: vec![ + dir("foo", vec![ + pkgtoml("content1"), + dir("bar", vec![ + pkgtoml("content2"), + dir("baz", vec![ + pkgtoml("content3"), + ]) + ]) + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("foo/pkg.toml"), + PathBuf::from("foo/bar/pkg.toml"), + PathBuf::from("foo/bar/baz/pkg.toml") + ], + }; + + { + let path = "foo/pkg.toml".as_ref(); + + assert!(!fsr.is_leaf_file(path).unwrap()); + } + { + let path = "foo/bar/pkg.toml".as_ref(); + + assert!(!fsr.is_leaf_file(path).unwrap()); + } + { + let path = "foo/bar/baz/pkg.toml".as_ref(); + + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![ + (pb("foo/pkg.toml"), &s("content1")), + (pb("foo/bar/pkg.toml"), &s("content2")), + (pb("foo/bar/baz/pkg.toml"), &s("content3")), + ]); + } + } + + #[test] + fn test_hierarchy_with_missing_intermediate_files() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/bar + // /foo/baz + // /foo/baz/pkg.toml + elements: vec![ + dir("foo", vec![ + pkgtoml("content1"), + dir("bar", vec![ + dir("baz", vec![ + pkgtoml("content3"), + ]) + ]) + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("foo/pkg.toml"), + PathBuf::from("foo/bar/baz/pkg.toml") + ], + }; + + let path = "foo/pkg.toml".as_ref(); + assert!(!fsr.is_leaf_file(path).unwrap()); + + let path = "foo/bar/baz/pkg.toml".as_ref(); + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![ + (pb("foo/pkg.toml"), &s("content1")), + (pb("foo/bar/baz/pkg.toml"), &s("content3")), + ]); + } + + #[test] + fn test_hierarchy_with_toplevel_file() { + let fsr = FileSystemRepresentation { + root: PathBuf::from("/"), + + // Representing + // / + // /foo + // /foo/bar + // /foo/baz + // /foo/baz/pkg.toml + elements: vec![ + pkgtoml("content1"), + dir("foo", vec![ + dir("bar", vec![ + dir("baz", vec![ + pkgtoml("content3"), + ]) + ]) + ]) + ].into_iter().collect(), + + files: vec![ + PathBuf::from("pkg.toml"), + PathBuf::from("foo/bar/baz/pkg.toml") + ], + }; + + let path = "pkg.toml".as_ref(); + assert!(!fsr.is_leaf_file(path).unwrap()); + + let path = "foo/bar/baz/pkg.toml".as_ref(); + assert!(fsr.is_leaf_file(path).unwrap()); + assert_eq!(fsr.get_files_for(path).unwrap(), vec![ + (pb("pkg.toml"), &s("content1")), + (pb("foo/bar/baz/pkg.toml"), &s("content3")), + ]); + } + +} |