summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClement Tsang <34804052+ClementTsang@users.noreply.github.com>2024-01-05 01:08:10 -0500
committerClement Tsang <34804052+ClementTsang@users.noreply.github.com>2024-01-05 01:09:37 -0500
commit426ea091cb8a5180f95b9d00e1811a0130ecd3aa (patch)
treea6c474fecd52761d87e9dfcd0bf0f4f05e2e42b5
parentd7c614b75b4d72598b01db893ed1b48e4d3f1a8a (diff)
possible nvml fixgpu_fix_pray
-rw-r--r--src/data_collection/nvidia.rs53
-rw-r--r--src/data_collection/temperature.rs17
-rw-r--r--src/data_collection/temperature/linux.rs35
3 files changed, 66 insertions, 39 deletions
diff --git a/src/data_collection/nvidia.rs b/src/data_collection/nvidia.rs
index d463d2c4..c833b711 100644
--- a/src/data_collection/nvidia.rs
+++ b/src/data_collection/nvidia.rs
@@ -2,14 +2,17 @@ use std::sync::OnceLock;
use hashbrown::HashMap;
use nvml_wrapper::{
- enum_wrappers::device::TemperatureSensor, enums::device::UsedGpuMemory, error::NvmlError, Nvml,
+ enum_wrappers::device::{PerformanceState, TemperatureSensor},
+ enums::device::UsedGpuMemory,
+ error::NvmlError,
+ Nvml,
};
use crate::{
app::{filter::Filter, layout_manager::UsedWidgets},
data_collection::{
memory::MemHarvest,
- temperature::{is_temp_filtered, TempHarvest, TemperatureType},
+ temperature::{TempHarvest, TemperatureType},
},
};
@@ -53,17 +56,42 @@ pub fn get_nvidia_vecs(
));
}
}
- if widgets_to_harvest.use_temp && is_temp_filtered(filter, &name) {
- if let Ok(temperature) = device.temperature(TemperatureSensor::Gpu) {
- let temperature = temp_type.convert_temp_unit(temperature as f32);
- temp_vec.push(TempHarvest {
- name: name.clone(),
- temperature: TemperatureReading::Value(temperature),
- });
+ if widgets_to_harvest.use_temp
+ && filter
+ .as_ref()
+ .map(|filter| filter.keep_entry(&name))
+ .unwrap_or(true)
+ {
+ // Following https://docs.nvidia.com/gameworks/content/gameworkslibrary/coresdk/nvapi/group__gpupstate.html,
+ // it seems like performance state 12 and lower are "minimum idle power consumption".
+ match device.performance_state() {
+ Ok(PerformanceState::Fifteen)
+ | Ok(PerformanceState::Fourteen)
+ | Ok(PerformanceState::Thirteen)
+ | Ok(PerformanceState::Twelve) => {
+ temp_vec.push(TempHarvest {
+ name,
+ temperature: TemperatureReading::Off,
+ });
+ }
+ _ => {
+ if let Ok(temperature) =
+ device.temperature(TemperatureSensor::Gpu)
+ {
+ let temperature =
+ temp_type.convert_temp_unit(temperature as f32);
+
+ temp_vec.push(TempHarvest {
+ name,
+ temperature: TemperatureReading::Value(temperature),
+ });
+ }
+ }
}
}
}
+
if widgets_to_harvest.use_proc {
let mut procs = HashMap::new();
if let Ok(gpu_procs) = device.process_utilization_stats(None) {
@@ -73,6 +101,7 @@ pub fn get_nvidia_vecs(
procs.insert(pid, (0, gpu_util));
}
}
+
if let Ok(compute_procs) = device.running_compute_processes() {
for proc in compute_procs {
let pid = proc.pid;
@@ -87,7 +116,8 @@ pub fn get_nvidia_vecs(
}
}
}
- // Use the legacy API too but prefer newer API results
+
+ // Use the legacy API too, but prefer newer API results
if let Ok(graphics_procs) = device.running_graphics_processes_v2() {
for proc in graphics_procs {
let pid = proc.pid;
@@ -102,6 +132,7 @@ pub fn get_nvidia_vecs(
}
}
}
+
if let Ok(graphics_procs) = device.running_graphics_processes() {
for proc in graphics_procs {
let pid = proc.pid;
@@ -116,9 +147,11 @@ pub fn get_nvidia_vecs(
}
}
}
+
if !procs.is_empty() {
proc_vec.push(procs);
}
+
// running total for proc %
if let Ok(mem) = device.memory_info() {
total_mem += mem.total;
diff --git a/src/data_collection/temperature.rs b/src/data_collection/temperature.rs
index 2726ab65..b7413244 100644
--- a/src/data_collection/temperature.rs
+++ b/src/data_collection/temperature.rs
@@ -13,8 +13,6 @@ cfg_if::cfg_if! {
}
}
-use crate::app::filter::Filter;
-
#[derive(Default, Debug, Clone)]
pub enum TemperatureReading {
Value(f32),
@@ -56,21 +54,6 @@ impl TemperatureType {
}
}
-pub fn is_temp_filtered(filter: &Option<Filter>, text: &str) -> bool {
- if let Some(filter) = filter {
- let mut ret = filter.is_list_ignored;
- for r in &filter.list {
- if r.is_match(text) {
- ret = !filter.is_list_ignored;
- break;
- }
- }
- ret
- } else {
- true
- }
-}
-
#[cfg(test)]
mod test {
use crate::data_collection::temperature::TemperatureType;
diff --git a/src/data_collection/temperature/linux.rs b/src/data_collection/temperature/linux.rs
index c7608539..125fb0ca 100644
--- a/src/data_collection/temperature/linux.rs
+++ b/src/data_collection/temperature/linux.rs
@@ -8,7 +8,7 @@ use std::{
use anyhow::Result;
use hashbrown::{HashMap, HashSet};
-use super::{is_temp_filtered, TempHarvest, TemperatureReading, TemperatureType};
+use super::{TempHarvest, TemperatureReading, TemperatureType};
use crate::{app::filter::Filter, utils::error::BottomError};
const EMPTY_NAME: &str = "Unknown";
@@ -184,8 +184,12 @@ fn finalize_name(
/// If neither are found, it will always return true and be treated as "awake".
#[inline]
fn is_device_awake(path: &Path) -> bool {
+ // XXX: Should we initialize all devices that support runtime_status_path here,
+ // in a map, and take `power/autosuspend_delay_ms` into account?
+
// Try checking `power/runtime_status` if it exists! For more information, see
- // https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power
+ // https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power and
+ // https://gitlab.com/mission-center-devs/mission-center/-/issues/30#note_1697130114
let runtime_status_path = path.join("power/runtime_status");
if runtime_status_path.exists() {
if let Ok(status) = fs::read_to_string(runtime_status_path) {
@@ -337,15 +341,18 @@ fn hwmon_temperatures(temp_type: &TemperatureType, filter: &Option<Filter>) -> H
let name = finalize_name(hwmon_name, sensor_label, &sensor_name, &mut seen_names);
// TODO: It's possible we may want to move the filter check further up to avoid probing hwmon if not needed?
- if is_temp_filtered(filter, &name) {
- if let Ok(temp_celsius) = parse_temp(&temp_path) {
- temperatures.push(TempHarvest {
- name,
- temperature: TemperatureReading::Value(
- temp_type.convert_temp_unit(temp_celsius),
- ),
- });
- }
+ if filter
+ .as_ref()
+ .map(|filter| filter.keep_entry(&name))
+ .unwrap_or(true)
+ {
+ let temperature = if let Ok(temp_celsius) = parse_temp(&temp_path) {
+ TemperatureReading::Value(temp_type.convert_temp_unit(temp_celsius))
+ } else {
+ TemperatureReading::Unavailable
+ };
+
+ temperatures.push(TempHarvest { name, temperature });
}
}
}
@@ -388,7 +395,11 @@ fn add_thermal_zone_temperatures(
name
};
- if is_temp_filtered(filter, &name) {
+ if filter
+ .as_ref()
+ .map(|filter| filter.keep_entry(&name))
+ .unwrap_or(true)
+ {
let temp_path = file_path.join("temp");
if let Ok(temp_celsius) = parse_temp(&temp_path) {
let name = counted_name(&mut seen_names, name);