diff options
author | Matthias Beyer <matthias.beyer@atos.net> | 2021-03-04 14:08:10 +0100 |
---|---|---|
committer | Matthias Beyer <mail@beyermatthias.de> | 2021-03-04 14:30:25 +0100 |
commit | 60a3fa633a33e315c1439a9f2436fcdb48da62ae (patch) | |
tree | aa10ed05e61781d0ef25d098ad5567f613bdeeab | |
parent | 248c28c0b882930908493af94f714ce4de3706ac (diff) |
Remove relative speed setting, select endpoint by utilization instead
This patch removes the "speed" setting from the configuration, which was
introduced to set a relative speed for each endpoint, with the idea that the
scheduler then would select a faster node preferably.
Instead, the utilization of an endpoint is now calculated (number of running
jobs vs allowed maximum jobs on the endpoint), and the endpoint with lower
utilization is selected.
Signed-off-by: Matthias Beyer <matthias.beyer@atos.net>
-rw-r--r-- | config.toml | 4 | ||||
-rw-r--r-- | src/config/endpoint_config.rs | 8 | ||||
-rw-r--r-- | src/endpoint/configured.rs | 7 | ||||
-rw-r--r-- | src/endpoint/scheduler.rs | 4 |
4 files changed, 13 insertions, 10 deletions
diff --git a/config.toml b/config.toml index 9536234..15843de 100644 --- a/config.toml +++ b/config.toml @@ -174,12 +174,14 @@ verify_images_present = true name = "testhostname" uri = "http://0.0.0.0:8095" # the URI of the endpoint. Either http or socket path endpoint_type = "http" # either "http" or "socket" -speed = 1 # currently ignored, but required to be present # maximum number of jobs running on this endpoint. # Set this to a reasonable high number to be able to run a lot of small jobs. # For example, if you're compiling with `make -j 1`, this should at least be the # number of CPU cores, maybe a bit more (eg. (ncpu * 1.1)) +# +# Also, if two nodes have the same number of running jobs, and a new job comes +# in, the node with more "free slots" will be considered first. maxjobs = 1 diff --git a/src/config/endpoint_config.rs b/src/config/endpoint_config.rs index 12a588a..13db137 100644 --- a/src/config/endpoint_config.rs +++ b/src/config/endpoint_config.rs @@ -26,14 +26,6 @@ pub struct Endpoint { #[getset(get = "pub")] endpoint_type: EndpointType, - /// Relative speed to other endpoints - /// - /// So if you have two servers, one with 12 cores and one with 24, you want to set "1" for the - /// first and "2" for the second (or "12" for the first and "24" for the second - the ratio is - /// the thing here)! - #[getset(get_copy = "pub")] - speed: usize, - /// Maximum number of jobs which are allowed on this endpoint #[getset(get_copy = "pub")] maxjobs: usize, diff --git a/src/endpoint/configured.rs b/src/endpoint/configured.rs index bbaaec6..ba1c54e 100644 --- a/src/endpoint/configured.rs +++ b/src/endpoint/configured.rs @@ -234,6 +234,13 @@ impl Endpoint { pub fn running_jobs(&self) -> usize { self.running_jobs.load(std::sync::atomic::Ordering::Relaxed) } + + /// Super non-scientific utilization calculation for the endpoint + pub fn utilization(&self) -> f64 { + let max_jobs = self.num_max_jobs() as f64; + let run_jobs = self.running_jobs() as f64; + 100.0 / max_jobs * run_jobs + } } pub struct EndpointHandle(Arc<Endpoint>); diff --git a/src/endpoint/scheduler.rs b/src/endpoint/scheduler.rs index 4f49034..34a79d7 100644 --- a/src/endpoint/scheduler.rs +++ b/src/endpoint/scheduler.rs @@ -110,7 +110,9 @@ impl EndpointScheduler { trace!("Endpoint {} considered for scheduling job: {}", ep.name(), r); r }) - .sorted_by(|ep1, ep2| ep1.running_jobs().cmp(&ep2.running_jobs())) + .sorted_by(|ep1, ep2| { + ep1.utilization().partial_cmp(&ep2.utilization()).unwrap_or(std::cmp::Ordering::Equal) + }) .next(); if let Some(endpoint) = ep { |