diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/orchestrator/orchestrator.rs | 390 |
1 files changed, 289 insertions, 101 deletions
diff --git a/src/orchestrator/orchestrator.rs b/src/orchestrator/orchestrator.rs index e637f36..cbb783c 100644 --- a/src/orchestrator/orchestrator.rs +++ b/src/orchestrator/orchestrator.rs @@ -8,17 +8,21 @@ // SPDX-License-Identifier: EPL-2.0 // +#![allow(unused)] + use std::path::PathBuf; use std::sync::Arc; -use anyhow::anyhow; use anyhow::Error; use anyhow::Result; +use anyhow::anyhow; use diesel::PgConnection; use indicatif::ProgressBar; use log::trace; -use tokio::sync::RwLock; use tokio::stream::StreamExt; +use tokio::sync::RwLock; +use tokio::sync::mpsc::Receiver; +use tokio::sync::mpsc::Sender; use typed_builder::TypedBuilder; use uuid::Uuid; @@ -90,7 +94,7 @@ impl<'a> OrchestratorSetup<'a> { /// It is either a list of artifacts (with their respective database artifact objects) /// or a UUID and an Error object, where the UUID is the job UUID and the error is the /// anyhow::Error that was issued. -type JobResult = std::result::Result<Vec<(Artifact, dbmodels::Artifact)>, (Uuid, Error)>; +type JobResult = std::result::Result<(Uuid, Vec<(Artifact, dbmodels::Artifact)>), Vec<(Uuid, Error)>>; impl<'a> Orchestrator<'a> { pub async fn run(self, output: &mut Vec<dbmodels::Artifact>) -> Result<Vec<(Uuid, Error)>> { @@ -100,128 +104,311 @@ impl<'a> Orchestrator<'a> { } async fn run_tree(self) -> Result<(Vec<(Artifact, dbmodels::Artifact)>, Vec<(Uuid, Error)>)> { - use futures::FutureExt; - - let mut already_built = vec![]; - let mut artifacts = vec![]; - let mut errors = vec![]; - - loop { - // loop{} - // until for all elements of self.jobtree, the uuid exists in already_built - // - // for each element in jobtree - // where dependencies(element) all in already_built - // run_job_for(element) - // - // for results from run_job_for calls - // remember UUID in already_built - // put built artifacts in artifacts - // if error, abort everything - // - // - let multibar = Arc::new(indicatif::MultiProgress::new()); - let build_results = self.jobtree - .inner() - .iter() - .filter(|(uuid, jobdef)| { // select all jobs where all dependencies are in `already_built` - trace!("Filtering job definition: {:?}", jobdef); - jobdef.dependencies.iter().all(|d| already_built.contains(d)) && !already_built.contains(uuid) - }) - .map(|(uuid, jobdef)| { - trace!("Running job {}", uuid); - let bar = multibar.add(self.progress_generator.bar()); - self.run_job(jobdef, bar).map(move |r| (*uuid, r)) - }) - .collect::<futures::stream::FuturesUnordered<_>>() - .collect::<Vec<(_, Result<JobResult>)>>(); - - let multibar_block = tokio::task::spawn_blocking(move || multibar.join()); - let (_, build_results) = tokio::join!(multibar_block, build_results); - - for (uuid, artifact_result) in build_results.into_iter() { - already_built.push(uuid); - - match artifact_result { - Ok(Ok(mut arts)) => artifacts.append(&mut arts), - Ok(Err((uuid, e))) => { // error during job running - log::error!("Error for job {} = {}", uuid, e); - errors.push((uuid, e)); - }, - - Err(e) => return Err(e), // error during container execution + let multibar = Arc::new(indicatif::MultiProgress::new()); + + // For each job in the jobtree, built a tuple with + // + // 1. The receiver that is used by the task to receive results from dependency tasks from + // 2. The task itself (as a TaskPreparation object) + // 3. The sender, that can be used to send results to this task + // 4. An Option<Sender> that this tasks uses to send its results with + // This is an Option<> because we need to set it later and the root of the tree needs a + // special handling, as this very function will wait on a receiver that gets the results + // of the root task + let jobs: Vec<(Receiver<JobResult>, TaskPreparation, Sender<JobResult>, _)> = self.jobtree + .inner() + .iter() + .map(|(uuid, jobdef)| { + // We initialize the channel with 100 elements here, as there is unlikely a task + // that depends on 100 other tasks. + // Either way, this might be increased in future. + let (sender, receiver) = tokio::sync::mpsc::channel(100); + + trace!("Creating TaskPreparation object for job {}", uuid); + let tp = TaskPreparation { + uuid: *uuid, + jobdef, + + bar: multibar.add(self.progress_generator.bar()), + config: self.config, + source_cache: &self.source_cache, + scheduler: &self.scheduler, + merged_stores: &self.merged_stores, + database: self.database.clone(), + }; + + (receiver, tp, sender, std::cell::RefCell::new(None as Option<Sender<JobResult>>)) + }) + .collect(); + + // Associate tasks with their appropriate sender + // + // Right now, the tuple yielded from above contains (rx, task, tx, _), where rx and tx belong + // to eachother. + // But what we need is the tx (sender) that the task should send its result to, of course. + // + // So this algorithm in plain text is: + // for each job + // find the job that depends on this job + // use the sender of the found job and set it as sender for this job + for job in jobs.iter() { + *job.3.borrow_mut() = jobs.iter() + .find(|j| j.1.jobdef.dependencies.contains(&job.1.uuid)) + .map(|j| j.2.clone()); + } + + // Find the id of the root task + // + // By now, all tasks should be associated with their respective sender. + // Only one has None sender: The task that is the "root" of the tree. + // By that property, we can find the root task. + // + // Here, we copy its uuid, because we need it later. + let root_job_id = jobs.iter() + .find(|j| j.3.borrow().is_none()) + .map(|j| j.1.uuid) + .ok_or_else(|| anyhow!("Failed to find root task"))?; + trace!("Root job id = {}", root_job_id); + + // Create a sender and a receiver for the root of the tree + let (root_sender, mut root_receiver) = tokio::sync::mpsc::channel(100); + + // Make all prepared jobs into real jobs and run them + // + // This maps each TaskPreparation with its sender and receiver to a JobTask and calls the + // async fn JobTask::run() to run the task. + // + // The JobTask::run implementation handles the rest, we just have to wait for all futures + // to succeed. + let running_jobs = jobs + .into_iter() + .map(|prep| { + trace!("Creating JobTask for = {}", prep.1.uuid); + let root_sender = root_sender.clone(); + JobTask { + uuid: prep.1.uuid, + jobdef: prep.1.jobdef, + + bar: prep.1.bar.clone(), + + config: prep.1.config, + source_cache: prep.1.source_cache, + scheduler: prep.1.scheduler, + merged_stores: prep.1.merged_stores, + database: prep.1.database.clone(), + + receiver: prep.0, + + // the sender is set or we need to use the root sender + sender: prep.3.into_inner().unwrap_or(root_sender), } - } + }) + .map(|task| task.run()) + .collect::<futures::stream::FuturesUnordered<_>>() + .collect::<Result<()>>(); + + let root_recv = root_receiver.recv(); + let multibar_block = tokio::task::spawn_blocking(move || multibar.join()); + + let (root_recv, _, jobs_result) = tokio::join!(root_recv, multibar_block, running_jobs); + let _ = jobs_result?; + match root_recv { + None => Err(anyhow!("No result received...")), + Some(Ok((_, artifacts))) => Ok((artifacts, vec![])), + Some(Err(errors)) => Ok((vec![], errors)), + } + } +} + +/// Helper type: A task with all things attached, but not sender and receivers +/// +/// This is the preparation of the JobTask, but without the associated sender and receiver, because +/// it is not mapped to the task yet. +/// +/// This simply holds data and does not contain any more functionality +struct TaskPreparation<'a> { + /// The UUID of this job + uuid: Uuid, + jobdef: &'a JobDefinition, + + bar: ProgressBar, - if !errors.is_empty() { - break + config: &'a Configuration, + source_cache: &'a SourceCache, + scheduler: &'a EndpointScheduler, + merged_stores: &'a MergedStores, + database: Arc<PgConnection>, +} + +/// Helper type for executing one job task +/// +/// This type represents a task for a job that can immediately be executed (see `JobTask::run()`). +struct JobTask<'a> { + /// The UUID of this job + uuid: Uuid, + jobdef: &'a JobDefinition, + + bar: ProgressBar, + + config: &'a Configuration, + source_cache: &'a SourceCache, + scheduler: &'a EndpointScheduler, + merged_stores: &'a MergedStores, + database: Arc<PgConnection>, + + /// Channel where the dependencies arrive + receiver: Receiver<JobResult>, + + /// Channel to send the own build outputs to + sender: Sender<JobResult>, +} + +impl<'a> JobTask<'a> { + + /// Run the job + /// + /// This function runs the job from this object on the scheduler as soon as all dependend jobs + /// returned successfully. + async fn run(mut self) -> Result<()> { + trace!("[{}]: Running", self.uuid); + + // A list of job run results from dependencies that were received from the tasks for the + // dependencies + let mut received_dependencies: Vec<(Uuid, Vec<(Artifact, dbmodels::Artifact)>)> = vec![]; + + // A list of errors that were received from the tasks for the dependencies + let mut received_errors: Vec<(Uuid, Error)> = vec![]; + + // Helper function to check whether all UUIDs are in a list of UUIDs + let all_dependencies_are_in = |dependency_uuids: &[Uuid], list: &[(Uuid, Vec<_>)]| { + dependency_uuids.iter().all(|dependency_uuid| { + list.iter().map(|tpl| tpl.0).any(|id| id == *dependency_uuid) + }) + }; + + // as long as the job definition lists dependencies that are not in the received_dependencies list... + while !all_dependencies_are_in(&self.jobdef.dependencies, &received_dependencies) { + // Update the status bar message + self.bar.set_message(&format!("Waiting ({}/{})...", received_dependencies.len(), self.jobdef.dependencies.len())); + trace!("[{}]: Updated bar", self.uuid); + + trace!("[{}]: receiving...", self.uuid); + // receive from the receiver + match self.receiver.recv().await { + Some(Ok(v)) => { + // The task we depend on succeeded and returned an + // (uuid of the job, [Artifact]) + trace!("[{}]: Received: {:?}", self.uuid, v); + received_dependencies.push(v) + }, + Some(Err(mut e)) => { + // The task we depend on failed + // we log that error for now + trace!("[{}]: Received: {:?}", self.uuid, e); + received_errors.append(&mut e); + }, + None => { + // The task we depend on finished... we must check what we have now... + trace!("[{}]: Received nothing, channel seems to be empty", self.uuid); + + // Find all dependencies that we need but which are not received + let received = received_dependencies.iter().map(|tpl| tpl.0).collect::<Vec<_>>(); + let missing_deps: Vec<_> = self.jobdef + .dependencies + .iter() + .filter(|d| !received.contains(d)) + .collect(); + trace!("[{}]: Missing dependencies = {:?}", self.uuid, missing_deps); + + // ... if there are any, error + if !missing_deps.is_empty() { + return Err(anyhow!("Childs finished, but dependencies still missing: {:?}", missing_deps)) + } else { + // all dependencies are received + break; + } + }, } - // already_built.sort(); // TODO: optimization for binary search in - // above and below contains() clause + trace!("[{}]: Received errors = {:?}", self.uuid, received_errors); + // if there are any errors from child tasks + if !received_errors.is_empty() { + // send them to the parent,... + self.sender.send(Err(received_errors)).await; - if self.jobtree.inner().iter().all(|(uuid, _)| already_built.contains(uuid)) { - break + // ... and stop operation, because the whole tree will fail anyways. + return Ok(()) } } - Ok((artifacts, errors)) - } - - async fn run_job(&self, jobdef: &JobDefinition, bar: ProgressBar) -> Result<JobResult> { - let dependency_artifacts = self.get_dependency_artifacts_for_jobs(&jobdef.dependencies).await?; - bar.set_message("Preparing..."); + // Map the list of received dependencies from + // Vec<(Uuid, Vec<(Artifact)>)> + // to + // Vec<Artifact> + let dependency_artifacts = received_dependencies + .iter() + .map(|tpl| tpl.1.iter()) + .flatten() + .map(|tpl| tpl.0.clone()) + .collect(); + trace!("[{}]: Dependency artifacts = {:?}", self.uuid, dependency_artifacts); + self.bar.set_message("Preparing..."); + // Create a RunnableJob object let runnable = RunnableJob::build_from_job( - &jobdef.job, - &self.source_cache, - &self.config, + &self.jobdef.job, + self.source_cache, + self.config, dependency_artifacts) .await?; - bar.set_message("Scheduling..."); - let job_uuid = *jobdef.job.uuid(); - match self.scheduler.schedule_job(runnable, bar).await?.run().await { - Err(e) => return Ok(Err((job_uuid, e))), + self.bar.set_message("Scheduling..."); + let job_uuid = *self.jobdef.job.uuid(); + + // Schedule the job on the scheduler + match self.scheduler.schedule_job(runnable, self.bar).await?.run().await { + // if the scheduler run reports an error, + // that is an error from the actual execution of the job ... + Err(e) => { + trace!("[{}]: Scheduler returned error = {:?}", self.uuid, e); + // ... and we send that to our parent + self.sender.send(Err(vec![(job_uuid, e)])).await?; + }, + + // if the scheduler run reports success, + // it returns the database artifact objects it created! Ok(db_artifacts) => { - db_artifacts.into_iter() + trace!("[{}]: Scheduler returned artifacts = {:?}", self.uuid, db_artifacts); + // we take these artifacts and + let results: JobResult = db_artifacts.into_iter() .map(|db_artifact| async { trace!("Getting store Artifact for db Artifact: {:?}", db_artifact); + + // get the appropriate filesystem artifact for it let art = self.get_store_artifact_for(&db_artifact).await?; trace!("Store Artifact: {:?}", art); Ok(Ok((art, db_artifact))) }) .collect::<futures::stream::FuturesUnordered<_>>() - .collect::<Result<JobResult>>() - .await + .collect::<Result<std::result::Result<Vec<(Artifact, dbmodels::Artifact)>, _>>>() + .await? + .map(|mut v| { + // Also send out the artifact of our dependencies, because we need to + // propagate them upwards through the tree + v.extend(received_dependencies.into_iter().map(|tpl| tpl.1.into_iter()).flatten()); + (self.uuid, v) + }); // and we add the UUID of the job of this task to it + + trace!("[{}]: sending artifacts to parent", self.uuid); + + self.sender + .send(results) + .await?; }, } - } - /// Get all dependency artifacts for the job from the database - /// - /// Use the JobDefinition object and find all dependency outputs in the database - async fn get_dependency_artifacts_for_jobs(&self, uuids: &[Uuid]) -> Result<Vec<Artifact>> { - use crate::schema; - use crate::diesel::ExpressionMethods; - use crate::diesel::QueryDsl; - use crate::diesel::RunQueryDsl; - - // Pseudo code: - // - // * return for uuid in uuids: - // self.database.get(job).get_artifacts() - - schema::artifacts::table - .left_outer_join(schema::jobs::table) - .filter(schema::jobs::uuid.eq_any(uuids)) - .select(schema::artifacts::all_columns) - .load::<dbmodels::Artifact>(&*self.database)? - .iter() - .map(|dbart| self.get_store_artifact_for(dbart)) - .collect::<futures::stream::FuturesUnordered<_>>() - .collect() - .await + trace!("[{}]: Finished successfully", self.uuid); + Ok(()) } async fn get_store_artifact_for(&self, db_artifact: &dbmodels::Artifact) -> Result<Artifact> { @@ -234,3 +421,4 @@ impl<'a> Orchestrator<'a> { }) } } + |