summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Chen <brianc118@meta.com>2023-12-06 10:17:58 -0800
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2023-12-06 10:17:58 -0800
commit3a855015d5a96dfa0711e540eb907db03b022703 (patch)
tree5df0e338881904dc81301ef33a9a8834ac48a29d
parent5151c5a2dc07e3fcf9b3bf2a4416c67a631c1c08 (diff)
Add resctrl data collection support
Summary: Add support for collecting data from resctrl filesystem (see https://www.kernel.org/doc/html/v6.4/arch/x86/resctrl.html). Reviewed By: dschatzberg Differential Revision: D51767787 fbshipit-source-id: 385024d71d5ceccfe43f03e69e81359b35667b74
-rw-r--r--below/config/src/lib.rs2
-rw-r--r--below/dump/src/test.rs1
-rw-r--r--below/model/Cargo.toml1
-rw-r--r--below/model/src/collector.rs20
-rw-r--r--below/model/src/common_field_ids.rs36
-rw-r--r--below/model/src/lib.rs30
-rw-r--r--below/model/src/resctrl.rs316
-rw-r--r--below/model/src/sample.rs1
-rw-r--r--below/src/main.rs2
9 files changed, 408 insertions, 1 deletions
diff --git a/below/config/src/lib.rs b/below/config/src/lib.rs
index a713cd25..3298c597 100644
--- a/below/config/src/lib.rs
+++ b/below/config/src/lib.rs
@@ -46,6 +46,7 @@ pub struct BelowConfig {
pub btrfs_samples: u64,
pub btrfs_min_pct: f64,
pub enable_ethtool_stats: bool,
+ pub enable_resctrl_stats: bool,
}
impl Default for BelowConfig {
@@ -61,6 +62,7 @@ impl Default for BelowConfig {
btrfs_samples: btrfs::DEFAULT_SAMPLES,
btrfs_min_pct: btrfs::DEFAULT_MIN_PCT,
enable_ethtool_stats: false,
+ enable_resctrl_stats: false,
}
}
}
diff --git a/below/dump/src/test.rs b/below/dump/src/test.rs
index 36ec8542..21126d82 100644
--- a/below/dump/src/test.rs
+++ b/below/dump/src/test.rs
@@ -986,6 +986,7 @@ fn test_dump_queue_content() {
process: model::ProcessModel::default(),
network,
gpu: None,
+ resctrl: None,
};
let mut opts: GeneralOpt = Default::default();
diff --git a/below/model/Cargo.toml b/below/model/Cargo.toml
index d4412d1c..f0b24973 100644
--- a/below/model/Cargo.toml
+++ b/below/model/Cargo.toml
@@ -23,6 +23,7 @@ hostname = "0.3"
os_info = "3.0.7"
procfs = { package = "fb_procfs", version = "0.7.1", path = "../procfs" }
regex = "1.9.2"
+resctrlfs = { version = "0.7.1", path = "../resctrlfs" }
serde = { version = "1.0.185", features = ["derive", "rc"] }
serde_json = { version = "1.0.100", features = ["float_roundtrip", "unbounded_depth"] }
slog = { version = "2.7", features = ["max_level_trace", "nested-values"] }
diff --git a/below/model/src/collector.rs b/below/model/src/collector.rs
index 08645d30..dd7ab9b5 100644
--- a/below/model/src/collector.rs
+++ b/below/model/src/collector.rs
@@ -30,6 +30,7 @@ pub struct CollectorOptions {
pub disable_disk_stat: bool,
pub enable_btrfs_stats: bool,
pub enable_ethtool_stats: bool,
+ pub enable_resctrl_stats: bool,
pub btrfs_samples: u64,
pub btrfs_min_pct: f64,
pub cgroup_re: Option<Regex>,
@@ -46,6 +47,7 @@ impl Default for CollectorOptions {
disable_disk_stat: false,
enable_btrfs_stats: false,
enable_ethtool_stats: false,
+ enable_resctrl_stats: false,
btrfs_samples: btrfs::DEFAULT_SAMPLES,
btrfs_min_pct: btrfs::DEFAULT_MIN_PCT,
cgroup_re: None,
@@ -287,6 +289,24 @@ fn collect_sample(logger: &slog::Logger, options: &CollectorOptions) -> Result<S
}
}
},
+ resctrl: if !options.enable_resctrl_stats {
+ None
+ } else {
+ match resctrlfs::ResctrlReader::root() {
+ Ok(resctrl_reader) => match resctrl_reader.read_all() {
+ Ok(resctrl) => Some(resctrl),
+ Err(e) => {
+ error!(logger, "{:#}", e);
+ None
+ }
+ },
+ Err(_e) => {
+ // ResctrlReader only fails to initialize if resctrlfs is
+ // not mounted. In this case we ignore.
+ None
+ }
+ }
+ },
})
}
diff --git a/below/model/src/common_field_ids.rs b/below/model/src/common_field_ids.rs
index e8b59586..12b8e9e0 100644
--- a/below/model/src/common_field_ids.rs
+++ b/below/model/src/common_field_ids.rs
@@ -23,7 +23,7 @@
///
/// This list also servers as documentation for available field ids that could
/// be used in other below crates. A test ensures that this list is up-to-date.
-pub const COMMON_MODEL_FIELD_IDS: [&str; 374] = [
+pub const COMMON_MODEL_FIELD_IDS: [&str; 408] = [
"system.hostname",
"system.kernel_version",
"system.os_release",
@@ -254,6 +254,40 @@ pub const COMMON_MODEL_FIELD_IDS: [&str; 374] = [
"cgroup.[path:/<cgroup_path>/.]mem_numa.<key>.workingset_restore_anon",
"cgroup.[path:/<cgroup_path>/.]mem_numa.<key>.workingset_restore_file",
"cgroup.[path:/<cgroup_path>/.]mem_numa.<key>.workingset_nodereclaim",
+ "resctrl.cpuset",
+ "resctrl.ctrl_mon_groups.<key>.cpuset",
+ "resctrl.ctrl_mon_groups.<key>.full_path",
+ "resctrl.ctrl_mon_groups.<key>.mode",
+ "resctrl.ctrl_mon_groups.<key>.mon.per_l3.<key>.llc_occupancy_bytes",
+ "resctrl.ctrl_mon_groups.<key>.mon.per_l3.<key>.mbm_local_bytes_per_sec",
+ "resctrl.ctrl_mon_groups.<key>.mon.per_l3.<key>.mbm_total_bytes_per_sec",
+ "resctrl.ctrl_mon_groups.<key>.mon.total.llc_occupancy_bytes",
+ "resctrl.ctrl_mon_groups.<key>.mon.total.mbm_local_bytes_per_sec",
+ "resctrl.ctrl_mon_groups.<key>.mon.total.mbm_total_bytes_per_sec",
+ "resctrl.ctrl_mon_groups.<key>.mon_groups.<key>.full_path",
+ "resctrl.ctrl_mon_groups.<key>.mon_groups.<key>.mon.per_l3.<key>.llc_occupancy_bytes",
+ "resctrl.ctrl_mon_groups.<key>.mon_groups.<key>.mon.per_l3.<key>.mbm_local_bytes_per_sec",
+ "resctrl.ctrl_mon_groups.<key>.mon_groups.<key>.mon.per_l3.<key>.mbm_total_bytes_per_sec",
+ "resctrl.ctrl_mon_groups.<key>.mon_groups.<key>.mon.total.llc_occupancy_bytes",
+ "resctrl.ctrl_mon_groups.<key>.mon_groups.<key>.mon.total.mbm_local_bytes_per_sec",
+ "resctrl.ctrl_mon_groups.<key>.mon_groups.<key>.mon.total.mbm_total_bytes_per_sec",
+ "resctrl.ctrl_mon_groups.<key>.mon_groups.<key>.name",
+ "resctrl.ctrl_mon_groups.<key>.name",
+ "resctrl.mode",
+ "resctrl.mon.per_l3.<key>.llc_occupancy_bytes",
+ "resctrl.mon.per_l3.<key>.mbm_local_bytes_per_sec",
+ "resctrl.mon.per_l3.<key>.mbm_total_bytes_per_sec",
+ "resctrl.mon.total.llc_occupancy_bytes",
+ "resctrl.mon.total.mbm_local_bytes_per_sec",
+ "resctrl.mon.total.mbm_total_bytes_per_sec",
+ "resctrl.mon_groups.<key>.full_path",
+ "resctrl.mon_groups.<key>.mon.per_l3.<key>.llc_occupancy_bytes",
+ "resctrl.mon_groups.<key>.mon.per_l3.<key>.mbm_local_bytes_per_sec",
+ "resctrl.mon_groups.<key>.mon.per_l3.<key>.mbm_total_bytes_per_sec",
+ "resctrl.mon_groups.<key>.mon.total.llc_occupancy_bytes",
+ "resctrl.mon_groups.<key>.mon.total.mbm_local_bytes_per_sec",
+ "resctrl.mon_groups.<key>.mon.total.mbm_total_bytes_per_sec",
+ "resctrl.mon_groups.<key>.name",
"process.processes.<key>.pid",
"process.processes.<key>.ppid",
"process.processes.<key>.ns_tgid",
diff --git a/below/model/src/lib.rs b/below/model/src/lib.rs
index 80cce294..001fe49c 100644
--- a/below/model/src/lib.rs
+++ b/below/model/src/lib.rs
@@ -36,6 +36,7 @@ pub mod collector_plugin;
mod common_field_ids;
pub mod network;
pub mod process;
+pub mod resctrl;
pub mod sample;
mod sample_model;
pub mod system;
@@ -46,6 +47,7 @@ pub use cgroup::*;
pub use collector::*;
pub use network::*;
pub use process::*;
+pub use resctrl::*;
pub use sample::*;
pub use system::*;
@@ -66,6 +68,8 @@ pub enum Field {
StrU64Map(BTreeMap<String, u64>),
Cpuset(cgroupfs::Cpuset),
MemNodes(cgroupfs::MemNodes),
+ ResctrlCpuset(resctrlfs::Cpuset),
+ ResctrlGroupMode(resctrlfs::GroupMode),
}
impl From<Field> for u64 {
@@ -196,6 +200,18 @@ impl From<cgroupfs::MemNodes> for Field {
}
}
+impl From<resctrlfs::Cpuset> for Field {
+ fn from(v: resctrlfs::Cpuset) -> Self {
+ Field::ResctrlCpuset(v)
+ }
+}
+
+impl From<resctrlfs::GroupMode> for Field {
+ fn from(v: resctrlfs::GroupMode) -> Self {
+ Field::ResctrlGroupMode(v)
+ }
+}
+
impl<T: Into<Field> + Clone> From<&T> for Field {
fn from(v: &T) -> Self {
v.clone().into()
@@ -284,6 +300,8 @@ impl fmt::Display for Field {
)),
Field::Cpuset(v) => v.fmt(f),
Field::MemNodes(v) => v.fmt(f),
+ Field::ResctrlCpuset(v) => v.fmt(f),
+ Field::ResctrlGroupMode(v) => v.fmt(f),
}
}
}
@@ -515,6 +533,8 @@ pub struct Model {
pub network: NetworkModel,
#[queriable(subquery)]
pub gpu: Option<GpuModel>,
+ #[queriable(subquery)]
+ pub resctrl: Option<ResctrlModel>,
}
impl Model {
@@ -563,6 +583,16 @@ impl Model {
}
})
}),
+ resctrl: sample.resctrl.as_ref().map(|r| {
+ ResctrlModel::new(
+ r,
+ if let Some((s, d)) = last {
+ s.resctrl.as_ref().map(|r| (r, d))
+ } else {
+ None
+ },
+ )
+ }),
}
}
}
diff --git a/below/model/src/resctrl.rs b/below/model/src/resctrl.rs
new file mode 100644
index 00000000..4c14c06a
--- /dev/null
+++ b/below/model/src/resctrl.rs
@@ -0,0 +1,316 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::*;
+
+#[derive(Clone, Debug, Default, Serialize, Deserialize, below_derive::Queriable)]
+pub struct ResctrlL3MonModel {
+ pub llc_occupancy_bytes: Option<u64>,
+ pub mbm_total_bytes_per_sec: Option<u64>,
+ pub mbm_local_bytes_per_sec: Option<u64>,
+}
+
+/// Model for mon data
+#[derive(Clone, Debug, Default, Serialize, Deserialize, below_derive::Queriable)]
+pub struct ResctrlMonModel {
+ #[queriable(subquery)]
+ pub total: ResctrlL3MonModel,
+ #[queriable(subquery)]
+ pub per_l3: BTreeMap<u64, ResctrlL3MonModel>,
+}
+
+/// Collection of all data about a single MON group
+#[derive(Clone, Debug, Default, Serialize, Deserialize, below_derive::Queriable)]
+pub struct ResctrlMonGroupModel {
+ pub name: String,
+ pub full_path: String,
+ #[queriable(subquery)]
+ pub mon: ResctrlMonModel,
+}
+
+/// Collection of all data about a single CTRL_MON group and descendents
+#[derive(Clone, Debug, Default, Serialize, Deserialize, below_derive::Queriable)]
+pub struct ResctrlCtrlMonGroupModel {
+ pub name: String,
+ pub full_path: String,
+ pub cpuset: Option<resctrlfs::Cpuset>,
+ pub mode: Option<resctrlfs::GroupMode>,
+ #[queriable(subquery)]
+ pub mon: ResctrlMonModel,
+ #[queriable(subquery)]
+ pub mon_groups: BTreeMap<String, ResctrlMonGroupModel>,
+}
+
+/// All data about the entire resctrl filesystem
+#[derive(Clone, Debug, Default, Serialize, Deserialize, below_derive::Queriable)]
+pub struct ResctrlModel {
+ pub cpuset: Option<resctrlfs::Cpuset>,
+ pub mode: Option<resctrlfs::GroupMode>,
+ #[queriable(subquery)]
+ pub mon: Option<ResctrlMonModel>,
+ #[queriable(subquery)]
+ pub mon_groups: BTreeMap<String, ResctrlMonGroupModel>,
+ #[queriable(subquery)]
+ pub ctrl_mon_groups: BTreeMap<String, ResctrlCtrlMonGroupModel>,
+}
+
+fn rmid_bytes_to_opt(rmid_bytes: &Option<resctrlfs::RmidBytes>) -> Option<u64> {
+ match rmid_bytes {
+ Some(resctrlfs::RmidBytes::Bytes(b)) => Some(*b),
+ Some(resctrlfs::RmidBytes::Unavailable) => None,
+ None => None,
+ }
+}
+
+impl std::ops::Add<&ResctrlL3MonModel> for ResctrlL3MonModel {
+ type Output = Self;
+
+ fn add(self, other: &Self) -> Self {
+ Self {
+ llc_occupancy_bytes: opt_add(self.llc_occupancy_bytes, other.llc_occupancy_bytes),
+ mbm_total_bytes_per_sec: opt_add(
+ self.mbm_total_bytes_per_sec,
+ other.mbm_total_bytes_per_sec,
+ ),
+ mbm_local_bytes_per_sec: opt_add(
+ self.mbm_local_bytes_per_sec,
+ other.mbm_local_bytes_per_sec,
+ ),
+ }
+ }
+}
+
+impl ResctrlModel {
+ pub fn new(
+ sample: &resctrlfs::ResctrlSample,
+ last: Option<(&resctrlfs::ResctrlSample, Duration)>,
+ ) -> ResctrlModel {
+ ResctrlModel {
+ cpuset: sample.cpuset.clone(),
+ mode: sample.mode.clone(),
+ mon: sample.mon_stat.as_ref().map(|mon_stat| {
+ ResctrlMonModel::new(
+ mon_stat,
+ last.and_then(|(s, d)| s.mon_stat.as_ref().map(|v| (v, d))),
+ )
+ }),
+ mon_groups: sample
+ .mon_groups
+ .as_ref()
+ .unwrap_or(&Default::default())
+ .iter()
+ .map(|(name, stat)| {
+ (
+ name.clone(),
+ ResctrlMonGroupModel::new(
+ name.clone(),
+ name.to_string(),
+ stat,
+ last.and_then(|(s, d)| {
+ s.mon_groups
+ .as_ref()
+ .and_then(|v| v.get(name))
+ .map(|v| (v, d))
+ }),
+ ),
+ )
+ })
+ .collect(),
+ ctrl_mon_groups: sample
+ .ctrl_mon_groups
+ .as_ref()
+ .unwrap_or(&Default::default())
+ .iter()
+ .map(|(name, stat)| {
+ (
+ name.clone(),
+ ResctrlCtrlMonGroupModel::new(
+ name.clone(),
+ name.to_string(),
+ stat,
+ last.and_then(|(s, d)| {
+ s.ctrl_mon_groups
+ .as_ref()
+ .and_then(|v| v.get(name))
+ .map(|v| (v, d))
+ }),
+ ),
+ )
+ })
+ .collect(),
+ }
+ }
+}
+
+impl ResctrlCtrlMonGroupModel {
+ pub fn new(
+ name: String,
+ full_path: String,
+ sample: &resctrlfs::CtrlMonGroupStat,
+ last: Option<(&resctrlfs::CtrlMonGroupStat, Duration)>,
+ ) -> ResctrlCtrlMonGroupModel {
+ let last_if_inode_matches =
+ last.and_then(|(s, d)| match (s.inode_number, sample.inode_number) {
+ (Some(prev_inode), Some(current_inode)) if prev_inode == current_inode => {
+ Some((s, d))
+ }
+ (None, None) => Some((s, d)),
+ _ => None,
+ });
+ ResctrlCtrlMonGroupModel {
+ name,
+ full_path: full_path.clone(),
+ cpuset: sample.cpuset.clone(),
+ mode: sample.mode.clone(),
+ mon: sample
+ .mon_stat
+ .as_ref()
+ .map(|mon_stat| {
+ if let Some((last, delta)) = last_if_inode_matches {
+ if let Some(last_mon_stat) = last.mon_stat.as_ref() {
+ ResctrlMonModel::new(mon_stat, Some((last_mon_stat, delta)))
+ } else {
+ ResctrlMonModel::new(mon_stat, None)
+ }
+ } else {
+ ResctrlMonModel::new(mon_stat, None)
+ }
+ })
+ .unwrap_or_default(),
+ mon_groups: sample
+ .mon_groups
+ .as_ref()
+ .unwrap_or(&Default::default())
+ .iter()
+ .map(|(name, stat)| {
+ (
+ name.clone(),
+ ResctrlMonGroupModel::new(
+ name.clone(),
+ format!("{}/{}", full_path, name),
+ stat,
+ last_if_inode_matches.and_then(|(s, d)| {
+ s.mon_groups
+ .as_ref()
+ .and_then(|v| v.get(name))
+ .map(|v| (v, d))
+ }),
+ ),
+ )
+ })
+ .collect(),
+ }
+ }
+}
+
+impl ResctrlMonGroupModel {
+ pub fn new(
+ name: String,
+ full_path: String,
+ sample: &resctrlfs::MonGroupStat,
+ last: Option<(&resctrlfs::MonGroupStat, Duration)>,
+ ) -> ResctrlMonGroupModel {
+ let last_if_inode_matches =
+ last.and_then(|(s, d)| match (s.inode_number, sample.inode_number) {
+ (Some(prev_inode), Some(current_inode)) if prev_inode == current_inode => {
+ Some((s, d))
+ }
+ (None, None) => Some((s, d)),
+ _ => None,
+ });
+ ResctrlMonGroupModel {
+ name,
+ full_path,
+ mon: sample
+ .mon_stat
+ .as_ref()
+ .map(|mon_stat| {
+ if let Some((last, delta)) = last_if_inode_matches {
+ if let Some(last_mon_stat) = last.mon_stat.as_ref() {
+ ResctrlMonModel::new(mon_stat, Some((last_mon_stat, delta)))
+ } else {
+ ResctrlMonModel::new(mon_stat, None)
+ }
+ } else {
+ ResctrlMonModel::new(mon_stat, None)
+ }
+ })
+ .unwrap_or_default(),
+ }
+ }
+}
+
+impl ResctrlMonModel {
+ pub fn new(
+ sample: &resctrlfs::MonStat,
+ last: Option<(&resctrlfs::MonStat, Duration)>,
+ ) -> ResctrlMonModel {
+ let mut model = ResctrlMonModel::default();
+ for (l3, end_l3_sample) in sample
+ .l3_mon_stat
+ .as_ref()
+ .unwrap_or(&Default::default())
+ .iter()
+ {
+ let last_l3 = if let Some((last_l3_sample, delta)) = &last {
+ last_l3_sample
+ .l3_mon_stat
+ .as_ref()
+ .and_then(|v| v.get(l3))
+ .map(|v| (v, delta.clone()))
+ } else {
+ None
+ };
+ model
+ .per_l3
+ .insert(*l3, ResctrlL3MonModel::new(end_l3_sample, last_l3));
+ }
+ model.total = model
+ .per_l3
+ .values()
+ .fold(ResctrlL3MonModel::default(), |acc, model| acc + model);
+ model
+ }
+}
+
+impl ResctrlL3MonModel {
+ pub fn new(
+ sample: &resctrlfs::L3MonStat,
+ last: Option<(&resctrlfs::L3MonStat, Duration)>,
+ ) -> ResctrlL3MonModel {
+ if let Some((begin, delta)) = last {
+ ResctrlL3MonModel {
+ llc_occupancy_bytes: rmid_bytes_to_opt(&sample.llc_occupancy_bytes),
+ mbm_total_bytes_per_sec: count_per_sec!(
+ rmid_bytes_to_opt(&begin.mbm_total_bytes),
+ rmid_bytes_to_opt(&sample.mbm_total_bytes),
+ delta,
+ u64
+ ),
+ mbm_local_bytes_per_sec: count_per_sec!(
+ rmid_bytes_to_opt(&begin.mbm_local_bytes),
+ rmid_bytes_to_opt(&sample.mbm_local_bytes),
+ delta,
+ u64
+ ),
+ }
+ } else {
+ ResctrlL3MonModel {
+ llc_occupancy_bytes: rmid_bytes_to_opt(&sample.llc_occupancy_bytes),
+ mbm_total_bytes_per_sec: None,
+ mbm_local_bytes_per_sec: None,
+ }
+ }
+ }
+}
diff --git a/below/model/src/sample.rs b/below/model/src/sample.rs
index 2a59b659..5788f9e2 100644
--- a/below/model/src/sample.rs
+++ b/below/model/src/sample.rs
@@ -22,6 +22,7 @@ pub struct Sample {
pub netstats: procfs::NetStat,
pub gpus: Option<gpu_stats::GpuSample>,
pub ethtool: Option<ethtool::EthtoolStats>,
+ pub resctrl: Option<resctrlfs::ResctrlSample>,
}
#[derive(Default, Clone, PartialEq, Debug, Serialize, Deserialize)]
diff --git a/below/src/main.rs b/below/src/main.rs
index 253d8148..a3dbd345 100644
--- a/below/src/main.rs
+++ b/below/src/main.rs
@@ -1006,6 +1006,7 @@ fn record(
disable_disk_stat,
enable_btrfs_stats: below_config.enable_btrfs_stats,
enable_ethtool_stats: below_config.enable_ethtool_stats,
+ enable_resctrl_stats: below_config.enable_resctrl_stats,
btrfs_samples: below_config.btrfs_samples,
btrfs_min_pct: below_config.btrfs_min_pct,
cgroup_re,
@@ -1130,6 +1131,7 @@ fn live_local(
exit_data: exit_buffer,
enable_btrfs_stats: below_config.enable_btrfs_stats,
enable_ethtool_stats: below_config.enable_ethtool_stats,
+ enable_resctrl_stats: below_config.enable_resctrl_stats,
btrfs_samples: below_config.btrfs_samples,
btrfs_min_pct: below_config.btrfs_min_pct,
gpu_stats_receiver,