From 2b804658727c2c78d6ffdc2836924b18d2efe792 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Thu, 2 Dec 2021 14:04:23 +0100 Subject: Outsource "source download" subcommand impl Signed-off-by: Matthias Beyer --- src/commands/source/download.rs | 244 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 src/commands/source/download.rs (limited to 'src/commands/source/download.rs') diff --git a/src/commands/source/download.rs b/src/commands/source/download.rs new file mode 100644 index 0000000..f8d5a95 --- /dev/null +++ b/src/commands/source/download.rs @@ -0,0 +1,244 @@ +// +// Copyright (c) 2020-2021 science+computing ag and other contributors +// +// This program and the accompanying materials are made +// available under the terms of the Eclipse Public License 2.0 +// which is available at https://www.eclipse.org/legal/epl-2.0/ +// +// SPDX-License-Identifier: EPL-2.0 +// + +use std::convert::TryFrom; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; + +use anyhow::Context; +use anyhow::Error; +use anyhow::Result; +use anyhow::anyhow; +use clap::ArgMatches; +use log::{debug, trace}; +use tokio::io::AsyncWriteExt; +use tokio::sync::Mutex; +use tokio_stream::StreamExt; + +use crate::config::*; +use crate::package::PackageName; +use crate::package::PackageVersionConstraint; +use crate::repository::Repository; +use crate::source::*; +use crate::util::progress::ProgressBars; + + +#[derive(Clone)] +struct ProgressWrapper { + download_count: u64, + finished_downloads: u64, + current_bytes: usize, + sum_bytes: u64, + bar: Arc>, +} + +impl ProgressWrapper { + fn new(bar: indicatif::ProgressBar) -> Self { + Self { + download_count: 0, + finished_downloads: 0, + current_bytes: 0, + sum_bytes: 0, + bar: Arc::new(Mutex::new(bar)) + } + } + + async fn inc_download_count(&mut self) { + self.download_count += 1; + self.set_message().await; + } + + async fn inc_download_bytes(&mut self, bytes: u64) { + self.sum_bytes += bytes; + self.set_message().await; + } + + async fn finish_one_download(&mut self) { + self.finished_downloads += 1; + self.bar.lock().await.inc(1); + self.set_message().await; + } + + async fn add_bytes(&mut self, len: usize) { + self.current_bytes += len; + self.set_message().await; + } + + async fn set_message(&self) { + let bar = self.bar.lock().await; + bar.set_message(format!("Downloading ({current_bytes}/{sum_bytes} bytes, {dlfinished}/{dlsum} downloads finished", + current_bytes = self.current_bytes, + sum_bytes = self.sum_bytes, + dlfinished = self.finished_downloads, + dlsum = self.download_count)); + } + + async fn success(&self) { + let bar = self.bar.lock().await; + bar.finish_with_message(format!("Succeeded {}/{} downloads", self.finished_downloads, self.download_count)); + } + + async fn error(&self) { + let bar = self.bar.lock().await; + bar.finish_with_message(format!("At least one download of {} failed", self.download_count)); + } +} + +async fn perform_download(source: &SourceEntry, progress: Arc>, timeout: Option) -> Result<()> { + trace!("Creating: {:?}", source); + let file = source.create().await.with_context(|| { + anyhow!( + "Creating source file destination: {}", + source.path().display() + ) + })?; + + let mut file = tokio::io::BufWriter::new(file); + let client_builder = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::limited(10)); + + let client_builder = if let Some(to) = timeout { + client_builder.timeout(std::time::Duration::from_secs(to)) + } else { + client_builder + }; + + let client = client_builder.build().context("Building HTTP client failed")?; + + let request = client.get(source.url().as_ref()) + .build() + .with_context(|| anyhow!("Building request for {} failed", source.url().as_ref()))?; + + let response = match client.execute(request).await { + Ok(resp) => resp, + Err(e) => { + return Err(e).with_context(|| anyhow!("Downloading '{}'", source.url())) + } + }; + + progress.lock() + .await + .inc_download_bytes(response.content_length().unwrap_or(0)) + .await; + + let mut stream = response.bytes_stream(); + while let Some(bytes) = stream.next().await { + let bytes = bytes?; + file.write_all(bytes.as_ref()).await?; + progress.lock() + .await + .add_bytes(bytes.len()) + .await + } + + progress.lock().await.finish_one_download().await; + file.flush() + .await + .map_err(Error::from) + .map(|_| ()) +} + + +// Implementation of the 'source download' subcommand +pub async fn download( + matches: &ArgMatches, + config: &Configuration, + repo: Repository, + progressbars: ProgressBars, +) -> Result<()> { + let force = matches.is_present("force"); + let timeout = matches.value_of("timeout") + .map(u64::from_str) + .transpose() + .context("Parsing timeout argument to integer")?; + let cache = PathBuf::from(config.source_cache_root()); + let sc = SourceCache::new(cache); + let pname = matches + .value_of("package_name") + .map(String::from) + .map(PackageName::from); + let pvers = matches + .value_of("package_version") + .map(PackageVersionConstraint::try_from) + .transpose()?; + + let matching_regexp = matches.value_of("matching") + .map(crate::commands::util::mk_package_name_regex) + .transpose()?; + + let progressbar = Arc::new(Mutex::new(ProgressWrapper::new(progressbars.bar()))); + + let r = repo.packages() + .filter(|p| { + match (pname.as_ref(), pvers.as_ref(), matching_regexp.as_ref()) { + (None, None, None) => true, + (Some(pname), None, None) => p.name() == pname, + (Some(pname), Some(vers), None) => p.name() == pname && vers.matches(p.version()), + (None, None, Some(regex)) => regex.is_match(p.name()), + + (_, _, _) => { + panic!("This should not be possible, either we select packages by name and (optionally) version, or by regex.") + }, + } + }) + .map(|p| { + sc.sources_for(p).into_iter().map(|source| { + let progressbar = progressbar.clone(); + async move { + let source_path_exists = source.path().exists(); + if !source_path_exists && source.download_manually() { + return Err(anyhow!( + "Cannot download source that is marked for manual download" + )) + .context(anyhow!("Creating source: {}", source.path().display())) + .context(anyhow!("Downloading source: {}", source.url())) + .map_err(Error::from); + } + + if source_path_exists && !force { + Err(anyhow!("Source exists: {}", source.path().display())) + } else { + progressbar.lock() + .await + .inc_download_count() + .await; + + if source_path_exists /* && force is implied by 'if' above*/ { + if let Err(e) = source.remove_file().await { + progressbar.lock().await.finish_one_download().await; + return Err(e) + } + } + + perform_download(&source, progressbar.clone(), timeout).await?; + progressbar.lock().await.finish_one_download().await; + Ok(()) + } + } + }) + }) + .flatten() + .collect::>() + .collect::>>() + .await + .into_iter() + .collect::>(); + + if r.is_err() { + progressbar.lock().await.error().await; + } else { + progressbar.lock().await.success().await; + } + + debug!("r = {:?}", r); + r +} + -- cgit v1.2.3 From d70fc03ca8e6763bddb0af9e854a6d1734a9f7b5 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Thu, 2 Dec 2021 14:06:58 +0100 Subject: Add doc for helper type Signed-off-by: Matthias Beyer --- src/commands/source/download.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/commands/source/download.rs') diff --git a/src/commands/source/download.rs b/src/commands/source/download.rs index f8d5a95..b5223cf 100644 --- a/src/commands/source/download.rs +++ b/src/commands/source/download.rs @@ -30,7 +30,15 @@ use crate::repository::Repository; use crate::source::*; use crate::util::progress::ProgressBars; - +/// A wrapper around the indicatif::ProgressBar +/// +/// A wrapper around the indicatif::ProgressBar that is used to synchronize status information from +/// the individual download jobs to the progress bar that is used to display download progress to +/// the user. +/// +/// The problem this helper solves is that we only have one status bar for all downloads, and all +/// download tasks must be able to increase the number of bytes received, for example, (that is +/// displayed in the status message) but in a sync way. #[derive(Clone)] struct ProgressWrapper { download_count: u64, -- cgit v1.2.3 From 781fae3dba04b3e0e97db0a7f28f5d132d356754 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Thu, 2 Dec 2021 14:07:40 +0100 Subject: Fix: Do not finish download twice The calling function does this already. Signed-off-by: Matthias Beyer --- src/commands/source/download.rs | 1 - 1 file changed, 1 deletion(-) (limited to 'src/commands/source/download.rs') diff --git a/src/commands/source/download.rs b/src/commands/source/download.rs index b5223cf..67a0cad 100644 --- a/src/commands/source/download.rs +++ b/src/commands/source/download.rs @@ -147,7 +147,6 @@ async fn perform_download(source: &SourceEntry, progress: Arc Date: Thu, 2 Dec 2021 14:10:19 +0100 Subject: Optimize: Write bytes to disk and increase statusbar in parallel Signed-off-by: Matthias Beyer --- src/commands/source/download.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'src/commands/source/download.rs') diff --git a/src/commands/source/download.rs b/src/commands/source/download.rs index 67a0cad..b73a074 100644 --- a/src/commands/source/download.rs +++ b/src/commands/source/download.rs @@ -140,11 +140,16 @@ async fn perform_download(source: &SourceEntry, progress: Arc Date: Thu, 2 Dec 2021 14:10:36 +0100 Subject: Optimize: Do not increase the download counter too soon Because if the check fails, we don't have to do this work actually. Signed-off-by: Matthias Beyer --- src/commands/source/download.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'src/commands/source/download.rs') diff --git a/src/commands/source/download.rs b/src/commands/source/download.rs index b73a074..ce467e4 100644 --- a/src/commands/source/download.rs +++ b/src/commands/source/download.rs @@ -218,18 +218,13 @@ pub async fn download( if source_path_exists && !force { Err(anyhow!("Source exists: {}", source.path().display())) } else { - progressbar.lock() - .await - .inc_download_count() - .await; - if source_path_exists /* && force is implied by 'if' above*/ { if let Err(e) = source.remove_file().await { - progressbar.lock().await.finish_one_download().await; return Err(e) } } + progressbar.lock().await.inc_download_count().await; perform_download(&source, progressbar.clone(), timeout).await?; progressbar.lock().await.finish_one_download().await; Ok(()) -- cgit v1.2.3 From 0775fe01a924386d8a990bf5f53721686904cf08 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Thu, 2 Dec 2021 14:17:59 +0100 Subject: Introduce semaphore for max of 100 concurrent downloads Signed-off-by: Matthias Beyer --- src/commands/source/download.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/commands/source/download.rs') diff --git a/src/commands/source/download.rs b/src/commands/source/download.rs index ce467e4..a49d71f 100644 --- a/src/commands/source/download.rs +++ b/src/commands/source/download.rs @@ -30,6 +30,8 @@ use crate::repository::Repository; use crate::source::*; use crate::util::progress::ProgressBars; +const NUMBER_OF_MAX_CONCURRENT_DOWNLOADS: usize = 100; + /// A wrapper around the indicatif::ProgressBar /// /// A wrapper around the indicatif::ProgressBar that is used to synchronize status information from @@ -188,6 +190,8 @@ pub async fn download( let progressbar = Arc::new(Mutex::new(ProgressWrapper::new(progressbars.bar()))); + let download_sema = Arc::new(tokio::sync::Semaphore::new(NUMBER_OF_MAX_CONCURRENT_DOWNLOADS)); + let r = repo.packages() .filter(|p| { match (pname.as_ref(), pvers.as_ref(), matching_regexp.as_ref()) { @@ -203,6 +207,7 @@ pub async fn download( }) .map(|p| { sc.sources_for(p).into_iter().map(|source| { + let download_sema = download_sema.clone(); let progressbar = progressbar.clone(); async move { let source_path_exists = source.path().exists(); @@ -225,7 +230,11 @@ pub async fn download( } progressbar.lock().await.inc_download_count().await; - perform_download(&source, progressbar.clone(), timeout).await?; + { + let permit = download_sema.acquire_owned().await?; + perform_download(&source, progressbar.clone(), timeout).await?; + drop(permit); + } progressbar.lock().await.finish_one_download().await; Ok(()) } -- cgit v1.2.3 From 585f07d9026c9ba60d8a27f9d64ab0c38e8f26c6 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Thu, 2 Dec 2021 14:18:37 +0100 Subject: Fix: Add missing closing paren Signed-off-by: Matthias Beyer --- src/commands/source/download.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/commands/source/download.rs') diff --git a/src/commands/source/download.rs b/src/commands/source/download.rs index a49d71f..f9ebd42 100644 --- a/src/commands/source/download.rs +++ b/src/commands/source/download.rs @@ -84,7 +84,7 @@ impl ProgressWrapper { async fn set_message(&self) { let bar = self.bar.lock().await; - bar.set_message(format!("Downloading ({current_bytes}/{sum_bytes} bytes, {dlfinished}/{dlsum} downloads finished", + bar.set_message(format!("Downloading ({current_bytes}/{sum_bytes} bytes, {dlfinished}/{dlsum} downloads finished)", current_bytes = self.current_bytes, sum_bytes = self.sum_bytes, dlfinished = self.finished_downloads, -- cgit v1.2.3 From 55bc26a59974eb3ba5afaa86a87ef189d868c6a7 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Thu, 2 Dec 2021 14:19:22 +0100 Subject: Fix: Increase bar length when adding download Signed-off-by: Matthias Beyer --- src/commands/source/download.rs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/commands/source/download.rs') diff --git a/src/commands/source/download.rs b/src/commands/source/download.rs index f9ebd42..11843fa 100644 --- a/src/commands/source/download.rs +++ b/src/commands/source/download.rs @@ -64,6 +64,8 @@ impl ProgressWrapper { async fn inc_download_count(&mut self) { self.download_count += 1; self.set_message().await; + let bar = self.bar.lock().await; + bar.set_length(bar.length() + 1); } async fn inc_download_bytes(&mut self, bytes: u64) { -- cgit v1.2.3