diff options
-rw-r--r-- | src/cli.rs | 46 | ||||
-rw-r--r-- | src/commands/find_artifact.rs | 136 | ||||
-rw-r--r-- | src/commands/mod.rs | 3 | ||||
-rw-r--r-- | src/db/find_artifacts.rs | 239 | ||||
-rw-r--r-- | src/db/mod.rs | 3 | ||||
-rw-r--r-- | src/db/models/envvar.rs | 2 | ||||
-rw-r--r-- | src/filestore/path.rs | 2 | ||||
-rw-r--r-- | src/filestore/release.rs | 9 | ||||
-rw-r--r-- | src/filestore/util.rs | 5 | ||||
-rw-r--r-- | src/main.rs | 6 | ||||
-rw-r--r-- | src/package/name.rs | 6 | ||||
-rw-r--r-- | src/package/version.rs | 6 |
12 files changed, 460 insertions, 3 deletions
@@ -425,6 +425,52 @@ pub fn cli<'a>() -> App<'a> { .about("A version constraint to search for (optional), E.G. '=1.0.0'") ) ) + + .subcommand(App::new("find-artifact") + .about("Find artifacts for packages") + .arg(Arg::new("package_name_regex") + .required(true) + .multiple(false) + .index(1) + .value_name("REGEX") + .about("The regex to match the package name against") + ) + .arg(Arg::new("package_version_constraint") + .required(false) + .multiple(false) + .index(2) + .value_name("VERSION_CONSTRAINT") + .about("A version constraint to search for (optional), E.G. '=1.0.0'") + ) + .arg(Arg::new("no_script_filter") + .long("no-script-filter") + .short('S') + .required(false) + .multiple(false) + .takes_value(false) + .about("Don't check for script equality. Can cause unexact results.") + ) + .arg(Arg::new("staging_dir") + .required(false) + .multiple(false) + .long("staging-dir") + .takes_value(true) + .value_name("PATH") + .validator(dir_exists_validator) + .about("Also consider this staging dir when searching for artifacts") + ) + .arg(Arg::new("env_filter") + .required(false) + .multiple(true) + .long("env") + .short('E') + .takes_value(true) + .value_name("KV") + .validator(env_pass_validator) + .about("Filter for this \"key=value\" environment variable") + ) + ) + .subcommand(App::new("find-pkg") .about("Find a package by regex") .arg(Arg::new("package_name_regex") diff --git a/src/commands/find_artifact.rs b/src/commands/find_artifact.rs new file mode 100644 index 0000000..4f053f8 --- /dev/null +++ b/src/commands/find_artifact.rs @@ -0,0 +1,136 @@ +// +// Copyright (c) 2020-2021 science+computing ag and other contributors +// +// This program and the accompanying materials are made +// available under the terms of the Eclipse Public License 2.0 +// which is available at https://www.eclipse.org/legal/epl-2.0/ +// +// SPDX-License-Identifier: EPL-2.0 +// + +use std::path::PathBuf; +use std::io::Write; +use std::sync::Arc; + +use anyhow::Context; +use anyhow::Error; +use anyhow::Result; +use clap::ArgMatches; +use diesel::PgConnection; +use itertools::Itertools; +use log::debug; +use log::trace; + +use crate::config::Configuration; +use crate::filestore::ReleaseStore; +use crate::filestore::StagingStore; +use crate::filestore::path::StoreRoot; +use crate::package::PackageVersionConstraint; +use crate::repository::Repository; +use crate::util::progress::ProgressBars; + +/// Implementation of the "find_artifact" subcommand +pub async fn find_artifact(matches: &ArgMatches, config: &Configuration, progressbars: ProgressBars, repo: Repository, database_connection: PgConnection, max_packages: u64) -> Result<()> { + let package_name_regex = crate::commands::util::mk_package_name_regex({ + matches.value_of("package_name_regex").unwrap() // safe by clap + })?; + + let package_version_constraint = matches + .value_of("package_version_constraint") + .map(String::from) + .map(PackageVersionConstraint::new) + .transpose() + .context("Parsing package version constraint") + .context("A valid package version constraint looks like this: '=1.0.0'")?; + + let env_filter = matches.values_of("env_filter") + .map(|vals| vals.map(crate::util::env::parse_to_env).collect::<Result<Vec<_>>>()) + .transpose()? + .unwrap_or_default(); + + log::debug!("Finding artifacts for '{:?}' '{:?}'", package_name_regex, package_version_constraint); + + let release_store = { + let bar_release_loading = progressbars.bar(); + bar_release_loading.set_length(max_packages); + + let p = config.releases_directory(); + debug!("Loading release directory: {}", p.display()); + let r = ReleaseStore::load(StoreRoot::new(p.clone())?, bar_release_loading.clone()); + if r.is_ok() { + bar_release_loading.finish_with_message("Loaded releases successfully"); + } else { + bar_release_loading.finish_with_message("Failed to load releases"); + } + r? + }; + let staging_store = if let Some(p) = matches.value_of("staging_dir").map(PathBuf::from) { + let bar_staging_loading = progressbars.bar(); + bar_staging_loading.set_length(max_packages); + + if !p.is_dir() { + let _ = tokio::fs::create_dir_all(&p).await?; + } + + debug!("Loading staging directory: {}", p.display()); + let r = StagingStore::load(StoreRoot::new(p.clone())?, bar_staging_loading.clone()); + if r.is_ok() { + bar_staging_loading.finish_with_message("Loaded staging successfully"); + } else { + bar_staging_loading.finish_with_message("Failed to load staging"); + } + Some(r?) + } else { + None + }; + + let database = Arc::new(database_connection); + repo.packages() + .filter(|p| package_name_regex.captures(p.name()).is_some()) + .filter(|p| { + package_version_constraint + .as_ref() + .map(|v| v.matches(p.version())) + .unwrap_or(true) + }) + .inspect(|pkg| trace!("Found package: {:?}", pkg)) + .map(|pkg| { + let script_filter = !matches.is_present("no_script_filter"); + let pathes = crate::db::find_artifacts(database.clone(), config, &pkg, &release_store, staging_store.as_ref(), &env_filter, script_filter)?; + + pathes.iter() + .map(|tpl| (tpl.0.joined(), tpl.1)) + .sorted_by(|tpla, tplb| { + use std::cmp::Ordering; + + // Sort the iterator elements, so that if there is a release date, we always + // prefer the entry with the release date AS LONG AS the path is equal. + match (tpla, tplb) { + ((a, Some(ta)), (b, Some(tb))) => match a.cmp(b) { + Ordering::Equal => ta.cmp(tb), + other => other, + }, + + ((a, Some(_)), (b, None)) => match a.cmp(b) { + Ordering::Equal => Ordering::Greater, + other => other, + }, + ((a, None), (b, Some(_))) => match a.cmp(b) { + Ordering::Equal => Ordering::Less, + other => other, + }, + ((a, None), (b, None)) => a.cmp(b), + } + }) + .unique_by(|tpl| tpl.0.clone()) // TODO: Dont clone() + .try_for_each(|(path, releasetime)| { + if let Some(time) = releasetime { + writeln!(std::io::stdout(), "[{}] {}", time, path.display()) + } else { + writeln!(std::io::stdout(), "[unknown] {}", path.display()) + }.map_err(Error::from) + }) + }) + .inspect(|r| trace!("Query resulted in: {:?}", r)) + .collect::<Result<()>>() +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 9696573..473596a 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -17,6 +17,9 @@ pub use db::db; mod env_of; pub use env_of::env_of; +mod find_artifact; +pub use find_artifact::find_artifact; + mod find_pkg; pub use find_pkg::find_pkg; diff --git a/src/db/find_artifacts.rs b/src/db/find_artifacts.rs new file mode 100644 index 0000000..05722b2 --- /dev/null +++ b/src/db/find_artifacts.rs @@ -0,0 +1,239 @@ +// +// Copyright (c) 2020-2021 science+computing ag and other contributors +// +// This program and the accompanying materials are made +// available under the terms of the Eclipse Public License 2.0 +// which is available at https://www.eclipse.org/legal/epl-2.0/ +// +// SPDX-License-Identifier: EPL-2.0 +// + +use std::path::PathBuf; +use std::sync::Arc; + +use anyhow::anyhow; +use anyhow::Error; +use anyhow::Result; +use chrono::NaiveDateTime; +use diesel::BoolExpressionMethods; +use diesel::ExpressionMethods; +use diesel::JoinOnDsl; +use diesel::NullableExpressionMethods; +use diesel::PgConnection; +use diesel::QueryDsl; +use diesel::RunQueryDsl; +use log::trace; +use resiter::AndThen; +use resiter::Map; + +use crate::config::Configuration; +use crate::db::models as dbmodels; +use crate::filestore::path::ArtifactPath; +use crate::filestore::path::FullArtifactPath; +use crate::filestore::ReleaseStore; +use crate::filestore::StagingStore; +use crate::package::Package; +use crate::package::ParseDependency; +use crate::package::ScriptBuilder; +use crate::package::Shebang; +use crate::schema; +use crate::util::EnvironmentVariableName; + +/// Find an artifact by a job description +/// +/// This function finds artifacts for a job description and environment that is equal to the passed +/// one. +/// The package is not the only parameter that influences a build, so this function gets all the +/// things: The Package, the Release store, the Staging store (optionally), additional environment +/// variables,... +/// to find artifacts for a job that looks the very same. +/// +/// If the artifact was released, the return value contains a Some(NaiveDateTime), marking the date +/// of the release. +/// Releases are returned prefferably, if multiple equal pathes for an artifact are found. +pub fn find_artifacts<'a>( + database_connection: Arc<PgConnection>, + config: &Configuration, + pkg: &Package, + release_store: &'a ReleaseStore, + staging_store: Option<&'a StagingStore>, + additional_env: &[(EnvironmentVariableName, String)], + script_filter: bool, +) -> Result<Vec<(FullArtifactPath<'a>, Option<NaiveDateTime>)>> { + let shebang = Shebang::from(config.shebang().clone()); + let script = if script_filter { + let script = ScriptBuilder::new(&shebang).build( + pkg, + config.available_phases(), + *config.strict_script_interpolation(), + )?; + Some(script) + } else { + None + }; + + let package_environment = pkg.environment(); + let build_dependencies_names = pkg + .dependencies() + .build() + .iter() + .map(|d| d.parse_as_name_and_version()) + .map_ok(|tpl| tpl.0) // TODO: We only filter by dependency NAME right now, not by version constraint + .collect::<Result<Vec<_>>>()?; + + let runtime_dependencies_names = pkg + .dependencies() + .runtime() + .iter() + .map(|d| d.parse_as_name_and_version()) + .map_ok(|tpl| tpl.0) // TODO: We only filter by dependency NAME right now, not by version constraint + .collect::<Result<Vec<_>>>()?; + + trace!("Build dependency names: {:?}", build_dependencies_names); + trace!("Runtime dependency names: {:?}", runtime_dependencies_names); + + let mut query = schema::submits::table + .inner_join(schema::jobs::table) + .inner_join(schema::packages::table) + .filter({ + // The package with pkg.name() and pkg.version() + let package_name_filter = schema::packages::name.eq(pkg.name().as_ref() as &str); + let package_version_filter = + schema::packages::version.eq(pkg.version().as_ref() as &str); + + let dependency_filter = { + // Filter for dependencies + // + // We select only packages where the submit contained a job for the + // dependencies (by name for now). + let build_refs = build_dependencies_names + .iter() + .map(AsRef::<str>::as_ref) + .collect::<Vec<_>>(); + let runtime_refs = runtime_dependencies_names + .iter() + .map(AsRef::<str>::as_ref) + .collect::<Vec<_>>(); + schema::packages::name + .eq_any(build_refs) + .or(schema::packages::name.eq_any(runtime_refs)) + }; + + package_name_filter + .and(package_version_filter) + .or(dependency_filter) + }) + .inner_join(schema::artifacts::table.on(schema::jobs::id.eq(schema::artifacts::job_id))) + .left_join( + schema::releases::table.on(schema::releases::artifact_id.eq(schema::artifacts::id)), + ) + .inner_join({ + schema::job_envs::table + .inner_join(schema::envvars::table) + .on(schema::jobs::id.eq(schema::job_envs::job_id)) + }) + .inner_join(schema::images::table) + .into_boxed(); + + if let Some(allowed_images) = pkg.allowed_images() { + trace!("Filtering with allowed_images = {:?}", allowed_images); + let imgs = allowed_images + .iter() + .map(AsRef::<str>::as_ref) + .collect::<Vec<_>>(); + query = query.filter(schema::images::name.eq_any(imgs)); + } + + if let Some(denied_images) = pkg.denied_images() { + trace!("Filtering with denied_images = {:?}", denied_images); + let imgs = denied_images + .iter() + .map(AsRef::<str>::as_ref) + .collect::<Vec<_>>(); + query = query.filter(schema::images::name.ne_all(imgs)); + } + + if let Some(script_text) = script.as_ref() { + query = query.filter(schema::jobs::script_text.eq(script_text.as_ref())); + } + + trace!("Query = {}", diesel::debug_query(&query)); + + query + .select({ + let arts = schema::artifacts::all_columns; + let jobs = schema::jobs::all_columns; + let rels = schema::releases::release_date.nullable(); + + (arts, jobs, rels) + }) + .load::<(dbmodels::Artifact, dbmodels::Job, Option<NaiveDateTime>)>( + &*database_connection, + ) + .map_err(Error::from) + .and_then(|results: Vec<_>| { + results + .into_iter() + // + // Filter by environment variables + // All environment variables of the package must be present in the loaded + // package, so that we can be sure that the loaded package was built with + // the same ENV. + // + // TODO: + // Doing this in the database query would be way nicer, but I was not able + // to implement it. + // + .map(|tpl| -> Result<(_, _, _)> { + // This is a Iterator::filter() but because our condition here might fail, we + // map() and do the actual filtering later. + + let job = tpl.1; + let job_env: Vec<(String, String)> = job + .env(&*database_connection)? + .into_iter() + .map(|var: dbmodels::EnvVar| (var.name, var.value)) + .collect(); + + trace!("The job we found had env: {:?}", job_env); + if let Some(pkg_env) = package_environment.as_ref() { + let filter_result = job_env.iter() + .all(|(k, v)| { + pkg_env + .iter() + .chain(additional_env.iter().map(|tpl| (&tpl.0, &tpl.1))) + .any(|(key, value)| k == key.as_ref() && v == value) + }); + + Ok((tpl.0, filter_result, tpl.2)) + } else { + Ok((tpl.0, true, tpl.2)) + } + }) + .filter(|r| match r { // the actual filtering from above + Err(_) => true, + Ok((_, bl, _)) => *bl, + }) + .and_then_ok(|(p, _, ndt)| ArtifactPath::new(PathBuf::from(p.path)).map(|a| (a, ndt))) + .and_then_ok(|(artpath, ndt)| { + if let Some(staging) = staging_store.as_ref() { + trace!( + "Searching in staging: {:?} for {:?}", + staging.root_path(), + artpath + ); + if let Some(art) = staging.get(&artpath) { + trace!("Found in staging: {:?}", art); + return staging.root_path().join(art).map(|p| (p, ndt)); + } + } + + let art = release_store + .get(&artpath) + .ok_or_else(|| anyhow!("Failed to find artifact for: {:?}", artpath))?; + trace!("Found in release: {:?}", art); + release_store.root_path().join(art).map(|p| (p, ndt)) + }) + .collect::<Result<Vec<(FullArtifactPath<'a>, Option<NaiveDateTime>)>>>() + }) +} diff --git a/src/db/mod.rs b/src/db/mod.rs index 0917929..8f29c0e 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -11,4 +11,7 @@ mod connection; pub use connection::*; +mod find_artifacts; +pub use find_artifacts::find_artifacts; + pub mod models; diff --git a/src/db/models/envvar.rs b/src/db/models/envvar.rs index f334969..b5fcc4f 100644 --- a/src/db/models/envvar.rs +++ b/src/db/models/envvar.rs @@ -17,7 +17,7 @@ use crate::schema::envvars; use crate::schema::envvars::*; use crate::util::EnvironmentVariableName; -#[derive(Identifiable, Queryable)] +#[derive(Debug, Identifiable, Queryable)] #[table_name = "envvars"] pub struct EnvVar { pub id: i32, diff --git a/src/filestore/path.rs b/src/filestore/path.rs index c26b9c6..eeb946d 100644 --- a/src/filestore/path.rs +++ b/src/filestore/path.rs @@ -98,7 +98,7 @@ impl StoreRoot { pub struct ArtifactPath(PathBuf); impl ArtifactPath { - pub(in crate::filestore) fn new(p: PathBuf) -> Result<Self> { + pub fn new(p: PathBuf) -> Result<Self> { if p.is_relative() { Ok(ArtifactPath(p)) } else { diff --git a/src/filestore/release.rs b/src/filestore/release.rs index d7a7232..f19f69a 100644 --- a/src/filestore/release.rs +++ b/src/filestore/release.rs @@ -13,6 +13,7 @@ use std::fmt::Debug; use anyhow::Result; use indicatif::ProgressBar; +use crate::filestore::path::ArtifactPath; use crate::filestore::path::StoreRoot; use crate::filestore::util::FileStoreImpl; @@ -29,4 +30,12 @@ impl ReleaseStore { pub fn load(root: StoreRoot, progress: ProgressBar) -> Result<Self> { FileStoreImpl::load(root, progress).map(ReleaseStore) } + + pub fn root_path(&self) -> &StoreRoot { + self.0.root_path() + } + + pub fn get(&self, p: &ArtifactPath) -> Option<&ArtifactPath> { + self.0.get(p) + } } diff --git a/src/filestore/util.rs b/src/filestore/util.rs index 0d98404..53f5919 100644 --- a/src/filestore/util.rs +++ b/src/filestore/util.rs @@ -35,7 +35,10 @@ impl FileStoreImpl { pub fn load(root: StoreRoot, progress: ProgressBar) -> Result<Self> { let store = root .find_artifacts_recursive() - .inspect(|_| progress.tick()) + .inspect(|path| { + log::trace!("Found artifact path: {:?}", path); + progress.tick(); + }) .collect::<Result<HashSet<ArtifactPath>>>()?; Ok(FileStoreImpl { root, store }) diff --git a/src/main.rs b/src/main.rs index a75a9d3..606a951 100644 --- a/src/main.rs +++ b/src/main.rs @@ -161,6 +161,12 @@ async fn main() -> Result<()> { crate::commands::env_of(matches, repo).await? } + Some(("find-artifact", matches)) => { + let repo = load_repo()?; + let conn = crate::db::establish_connection(db_connection_config)?; + crate::commands::find_artifact(matches, &config, progressbars, repo, conn, max_packages).await? + } + Some(("find-pkg", matches)) => { let repo = load_repo()?; crate::commands::find_pkg(matches, &config, repo).await? diff --git a/src/package/name.rs b/src/package/name.rs index f036d2d..2d2509b 100644 --- a/src/package/name.rs +++ b/src/package/name.rs @@ -37,6 +37,12 @@ impl Deref for PackageName { } } +impl AsRef<str> for PackageName { + fn as_ref(&self) -> &str { + &self.0 + } +} + impl From<String> for PackageName { fn from(s: String) -> Self { PackageName(s) diff --git a/src/package/version.rs b/src/package/version.rs index b110d0b..c27bf92 100644 --- a/src/package/version.rs +++ b/src/package/version.rs @@ -76,6 +76,12 @@ impl Deref for PackageVersion { } } +impl AsRef<str> for PackageVersion { + fn as_ref(&self) -> &str { + &self.0 + } +} + impl From<String> for PackageVersion { fn from(s: String) -> Self { PackageVersion(s) |