summaryrefslogtreecommitdiffstats
path: root/src/package/dag.rs
diff options
context:
space:
mode:
authorMatthias Beyer <mail@beyermatthias.de>2021-02-04 15:52:02 +0100
committerMatthias Beyer <mail@beyermatthias.de>2021-02-06 11:54:23 +0100
commitefe07be74cf1ae704cb73b0f20c28b33aa46c217 (patch)
tree2c7b1686fc2878b79d82708bd2335c713326299d /src/package/dag.rs
parent8a4ef5e6c0126037412794699d5f80540ffd4802 (diff)
Rewrite package organizational structure using DAG
This patch reimplements the package orchestration functionality to rely on a DAG rather than a tree. A / \ B E / \ \ C D F Before this change, the structure the packages were organized in for a build was a tree. That did work reasonable well for initial development of butido, because this is a simple case and the implementation is rather simple, too. But, packages and their dependencies are not always organized in a tree. Most of the time, they are organized in a DAG: .-> C -, / \ D > A \ / `-> B -ยด This is a real-world example: A could be a common crypto-library that I do not want to name here. B and C could be libraries that use the said crypto-library and D could be a program that use B and C. Because said crypto-library builds rather long, building it twice and throwing one result away is a no-go. A DAG as organizational structure makes that issue go away entirely. Also, we can later implement checks whether the DAG contains multiple versions of the same library, if that is undesireable. The change itself is rather big, frankly because it is a non-trivial change the replace the whole data structure and its handling in the orchestrator code. First of all, we introduce the "daggy" library, which provides the DAG implementation on top of the popular "petgraph" library. The package `Tree` datastructure was replaced by a package `Dag` datastructure. This type implements the heavy-lifting that is needed to load a package and all its dependencies from the `Repository` object. The `JobTree` was also reimplemented, but as `daggy::Dag` provides a convenient `map()` function, its implementation which transforms the package `Dag` into a job `Dag` is rather trivial. `crate::job::Dag` then provides the convenience `iter()` function to iterate over all elements in the DAG and providing a `JobDefinition` object for each node. The topology in which we traverse the DAG is not an issue, as we need to create tasks for all `JobDefinition`s anyways, so we do not care about traversal topology at all. The `crate::package::Package` type got an `Hash` implementation, which is necessary to keep track of the mappings while reading the DAG from the repository. The implementation does not create the edges between the nodes in the DAG right when inserting, but afterwards. To keep track of the `daggy::NodeIndex`es, it keeps a mapping Package -> NodeIndex in a Hashmap. Thus, `Package` must implement `std::hash::Hash` Signed-off-by: Matthias Beyer <mail@beyermatthias.de> Tested-by: Matthias Beyer <mail@beyermatthias.de> squash! Reimplement as DAG
Diffstat (limited to 'src/package/dag.rs')
-rw-r--r--src/package/dag.rs154
1 files changed, 154 insertions, 0 deletions
diff --git a/src/package/dag.rs b/src/package/dag.rs
new file mode 100644
index 0000000..a4596aa
--- /dev/null
+++ b/src/package/dag.rs
@@ -0,0 +1,154 @@
+//
+// Copyright (c) 2020-2021 science+computing ag and other contributors
+//
+// This program and the accompanying materials are made
+// available under the terms of the Eclipse Public License 2.0
+// which is available at https://www.eclipse.org/legal/epl-2.0/
+//
+// SPDX-License-Identifier: EPL-2.0
+//
+
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::io::Result as IoResult;
+use std::io::Write;
+
+use anyhow::Error;
+use anyhow::Result;
+use anyhow::anyhow;
+use daggy::Walker;
+use indicatif::ProgressBar;
+use log::trace;
+use ptree::Style;
+use ptree::TreeItem;
+use resiter::AndThen;
+use getset::Getters;
+
+use crate::package::Package;
+use crate::repository::Repository;
+
+#[derive(Debug, Getters)]
+pub struct Dag {
+ #[getset(get = "pub")]
+ dag: daggy::Dag<Package, i8>,
+
+ #[getset(get = "pub")]
+ root_idx: daggy::NodeIndex,
+}
+
+impl Dag {
+ pub fn for_root_package(
+ p: Package,
+ repo: &Repository,
+ progress: ProgressBar,
+ ) -> Result<Self> {
+ fn add_sub_packages<'a>(
+ repo: &'a Repository,
+ mappings: &mut HashMap<&'a Package, daggy::NodeIndex>,
+ dag: &mut daggy::Dag<&'a Package, i8>,
+ p: &'a Package,
+ progress: &ProgressBar
+ ) -> Result<()> {
+ p.get_self_packaged_dependencies()
+ .and_then_ok(|(name, constr)| {
+ trace!("Dependency: {:?}", name);
+ let packs = repo.find_with_version(&name, &constr);
+ trace!("Found: {:?}", packs);
+
+ if mappings.keys().any(|p| packs.iter().any(|pk| pk.name() == p.name() && pk.version() == p.version())) {
+ return Err(anyhow!(
+ "Duplicate version of some package in {:?} found",
+ packs
+ ));
+ }
+ trace!("All dependecies available...");
+
+ packs.into_iter()
+ .map(|p| {
+ progress.tick();
+ trace!("Following dependecy: {:?}", p);
+
+ let idx = dag.add_node(p);
+ mappings.insert(p, idx);
+ add_sub_packages(repo, mappings, dag, p, progress)
+ })
+ .collect()
+ })
+ .collect::<Result<()>>()
+ }
+
+ fn add_edges(mappings: &HashMap<&Package, daggy::NodeIndex>, dag: &mut daggy::Dag<&Package, i8>) -> Result<()> {
+ for (package, idx) in mappings {
+ package.get_self_packaged_dependencies()
+ .and_then_ok(|(name, constr)| {
+ mappings
+ .iter()
+ .filter(|(package, _)| *package.name() == name && constr.matches(package.version()))
+ .try_for_each(|(_, dep_idx)| {
+ dag.add_edge(*idx, *dep_idx, 0)
+ .map(|_| ())
+ .map_err(Error::from)
+ })
+ })
+ .collect::<Result<()>>()?
+ }
+
+ Ok(())
+ }
+
+ let mut dag: daggy::Dag<&Package, i8> = daggy::Dag::new();
+ let mut mappings = HashMap::new();
+
+ trace!("Making package Tree for {:?}", p);
+ let root_idx = dag.add_node(&p);
+ mappings.insert(&p, root_idx);
+ add_sub_packages(repo, &mut mappings, &mut dag, &p, &progress)?;
+ add_edges(&mappings, &mut dag)?;
+ trace!("Finished makeing package Tree");
+
+ Ok(Dag {
+ dag: dag.map(|_, p: &&Package| -> Package { (*p).clone() }, |_, e| *e),
+ root_idx
+ })
+ }
+
+ /// Get all packages in the tree by reference
+ ///
+ /// # Warning
+ ///
+ /// The order of the packages is _NOT_ guaranteed by the implementation
+ pub fn all_packages(&self) -> Vec<&Package> {
+ self.dag
+ .graph()
+ .node_indices()
+ .filter_map(|idx| self.dag.graph().node_weight(idx))
+ .collect()
+ }
+
+ pub fn display(&self) -> DagDisplay {
+ DagDisplay(self, self.root_idx)
+ }
+}
+
+#[derive(Clone)]
+pub struct DagDisplay<'a>(&'a Dag, daggy::NodeIndex);
+
+impl<'a> TreeItem for DagDisplay<'a> {
+ type Child = Self;
+
+ fn write_self<W: Write>(&self, f: &mut W, _: &Style) -> IoResult<()> {
+ let p = self.0.dag.graph().node_weight(self.1)
+ .ok_or_else(|| anyhow!("Error finding node: {:?}", self.1))
+ .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
+ write!(f, "{} {}", p.name(), p.version())
+ }
+
+ fn children(&self) -> Cow<[Self::Child]> {
+ let c = self.0.dag.children(self.1);
+ Cow::from(c.iter(&self.0.dag)
+ .map(|(_, idx)| DagDisplay(self.0, idx))
+ .collect::<Vec<_>>()
+ )
+ }
+}
+