feat: guest metrics support (#46)

* feat: initial support for idm send in daemon

* feat: implement IdmClient backend support

* feat: daemon idm now uses IdmClient

* fix: implement channel destruction propagation

* feat: implement request response idm system

* feat: implement metrics support

* proto: move metrics into GuestMetrics for reusability

* fix: log level of guest agent was trace

* feat: metrics tree with process information
This commit is contained in:
Alex Zenla
2024-04-12 00:34:46 -07:00
committed by GitHub
parent ec9060d872
commit 5e16f3149f
27 changed files with 1211 additions and 278 deletions

View File

@ -25,8 +25,8 @@ rtnetlink = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
sys-mount = { workspace = true }
sysinfo = { workspace = true }
tokio = { workspace = true }
walkdir = { workspace = true }
[lib]
name = "krataguest"

View File

@ -23,6 +23,8 @@ async fn main() -> Result<()> {
if let Err(error) = guest.init().await {
error!("failed to initialize guest: {}", error);
death(127).await?;
return Ok(());
}
death(1).await?;
Ok(())
}

View File

@ -1,16 +1,20 @@
use crate::{
childwait::{ChildEvent, ChildWait},
death,
metrics::MetricsCollector,
};
use anyhow::Result;
use cgroups_rs::Cgroup;
use krata::idm::{
client::IdmClient,
protocol::{idm_event::Event, IdmEvent, IdmExitEvent, IdmPacket},
protocol::{
idm_event::Event, idm_request::Request, idm_response::Response, IdmEvent, IdmExitEvent,
IdmMetricsResponse, IdmPingResponse, IdmRequest,
},
};
use log::error;
use log::debug;
use nix::unistd::Pid;
use tokio::select;
use tokio::{select, sync::broadcast};
pub struct GuestBackground {
idm: IdmClient,
@ -30,16 +34,37 @@ impl GuestBackground {
}
pub async fn run(&mut self) -> Result<()> {
let mut event_subscription = self.idm.subscribe().await?;
let mut requests_subscription = self.idm.requests().await?;
loop {
select! {
x = self.idm.receiver.recv() => match x {
Some(_packet) => {
x = event_subscription.recv() => match x {
Ok(_event) => {
},
None => {
error!("idm packet channel closed");
Err(broadcast::error::RecvError::Closed) => {
debug!("idm packet channel closed");
break;
},
_ => {
continue;
}
},
x = requests_subscription.recv() => match x {
Ok(request) => {
self.handle_idm_request(request).await?;
},
Err(broadcast::error::RecvError::Closed) => {
debug!("idm packet channel closed");
break;
},
_ => {
continue;
}
},
@ -54,14 +79,34 @@ impl GuestBackground {
Ok(())
}
async fn handle_idm_request(&mut self, packet: IdmRequest) -> Result<()> {
let id = packet.id;
match packet.request {
Some(Request::Ping(_)) => {
self.idm
.respond(id, Response::Ping(IdmPingResponse {}))
.await?;
}
Some(Request::Metrics(_)) => {
let metrics = MetricsCollector::new()?;
let root = metrics.collect()?;
let response = IdmMetricsResponse { root: Some(root) };
self.idm.respond(id, Response::Metrics(response)).await?;
}
None => {}
}
Ok(())
}
async fn child_event(&mut self, event: ChildEvent) -> Result<()> {
if event.pid == self.child {
self.idm
.sender
.send(IdmPacket {
event: Some(IdmEvent {
event: Some(Event::Exit(IdmExitEvent { code: event.status })),
}),
.emit(IdmEvent {
event: Some(Event::Exit(IdmExitEvent { code: event.status })),
})
.await?;
death(event.status).await?;

View File

@ -17,14 +17,12 @@ use std::fs::{File, OpenOptions, Permissions};
use std::io;
use std::net::{Ipv4Addr, Ipv6Addr};
use std::os::fd::AsRawFd;
use std::os::linux::fs::MetadataExt;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::{chroot, PermissionsExt};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use sys_mount::{FilesystemType, Mount, MountFlags};
use tokio::fs;
use walkdir::WalkDir;
use crate::background::GuestBackground;
@ -88,7 +86,6 @@ impl GuestInit {
let launch = self.parse_launch_config().await?;
self.mount_new_root().await?;
self.nuke_initrd().await?;
self.bind_new_root().await?;
if let Some(hostname) = launch.hostname.clone() {
@ -271,40 +268,6 @@ impl GuestInit {
Ok(serde_json::from_str(&content)?)
}
async fn nuke_initrd(&mut self) -> Result<()> {
trace!("nuking initrd");
let initrd_dev = fs::metadata("/").await?.st_dev();
for item in WalkDir::new("/")
.same_file_system(true)
.follow_links(false)
.contents_first(true)
{
if item.is_err() {
continue;
}
let item = item?;
let metadata = match item.metadata() {
Ok(value) => value,
Err(_) => continue,
};
if metadata.st_dev() != initrd_dev {
continue;
}
if metadata.is_symlink() || metadata.is_file() {
let _ = fs::remove_file(item.path()).await;
trace!("deleting file {:?}", item.path());
} else if metadata.is_dir() {
let _ = fs::remove_dir(item.path()).await;
trace!("deleting directory {:?}", item.path());
}
}
trace!("nuked initrd");
Ok(())
}
async fn bind_new_root(&mut self) -> Result<()> {
self.mount_move_subtree(Path::new(SYS_PATH), Path::new(NEW_ROOT_SYS_PATH))
.await?;

View File

@ -7,6 +7,7 @@ use xenstore::{XsdClient, XsdInterface};
pub mod background;
pub mod childwait;
pub mod init;
pub mod metrics;
pub async fn death(code: c_int) -> Result<()> {
let store = XsdClient::open().await?;

121
crates/guest/src/metrics.rs Normal file
View File

@ -0,0 +1,121 @@
use std::{ops::Add, path::Path};
use anyhow::Result;
use krata::idm::protocol::{IdmMetricFormat, IdmMetricNode};
use sysinfo::Process;
pub struct MetricsCollector {}
impl MetricsCollector {
pub fn new() -> Result<Self> {
Ok(MetricsCollector {})
}
pub fn collect(&self) -> Result<IdmMetricNode> {
let mut sysinfo = sysinfo::System::new();
Ok(IdmMetricNode::structural(
"guest",
vec![
self.collect_system(&mut sysinfo)?,
self.collect_processes(&mut sysinfo)?,
],
))
}
fn collect_system(&self, sysinfo: &mut sysinfo::System) -> Result<IdmMetricNode> {
sysinfo.refresh_memory();
Ok(IdmMetricNode::structural(
"system",
vec![IdmMetricNode::structural(
"memory",
vec![
IdmMetricNode::value("total", sysinfo.total_memory(), IdmMetricFormat::Bytes),
IdmMetricNode::value("used", sysinfo.used_memory(), IdmMetricFormat::Bytes),
IdmMetricNode::value("free", sysinfo.free_memory(), IdmMetricFormat::Bytes),
],
)],
))
}
fn collect_processes(&self, sysinfo: &mut sysinfo::System) -> Result<IdmMetricNode> {
sysinfo.refresh_processes();
let mut processes = Vec::new();
let mut sysinfo_processes = sysinfo.processes().values().collect::<Vec<_>>();
sysinfo_processes.sort_by_key(|x| x.pid());
for process in sysinfo_processes {
if process.thread_kind().is_some() {
continue;
}
processes.push(MetricsCollector::process_node(process)?);
}
Ok(IdmMetricNode::structural("process", processes))
}
fn process_node(process: &Process) -> Result<IdmMetricNode> {
let mut metrics = vec![];
if let Some(parent) = process.parent() {
metrics.push(IdmMetricNode::value(
"parent",
parent.as_u32() as u64,
IdmMetricFormat::Integer,
));
}
if let Some(exe) = process.exe().and_then(path_as_str) {
metrics.push(IdmMetricNode::raw_value("executable", exe));
}
if let Some(working_directory) = process.cwd().and_then(path_as_str) {
metrics.push(IdmMetricNode::raw_value("cwd", working_directory));
}
let cmdline = process.cmd().to_vec();
metrics.push(IdmMetricNode::raw_value("cmdline", cmdline));
metrics.push(IdmMetricNode::structural(
"memory",
vec![
IdmMetricNode::value("resident", process.memory(), IdmMetricFormat::Bytes),
IdmMetricNode::value("virtual", process.virtual_memory(), IdmMetricFormat::Bytes),
],
));
metrics.push(IdmMetricNode::value(
"lifetime",
process.run_time(),
IdmMetricFormat::DurationSeconds,
));
metrics.push(IdmMetricNode::value(
"uid",
process.user_id().map(|x| (*x).add(0)).unwrap_or(0) as f64,
IdmMetricFormat::Integer,
));
metrics.push(IdmMetricNode::value(
"gid",
process.group_id().map(|x| (*x).add(0)).unwrap_or(0) as f64,
IdmMetricFormat::Integer,
));
metrics.push(IdmMetricNode::value(
"euid",
process
.effective_user_id()
.map(|x| (*x).add(0))
.unwrap_or(0) as f64,
IdmMetricFormat::Integer,
));
metrics.push(IdmMetricNode::value(
"egid",
process.effective_group_id().map(|x| x.add(0)).unwrap_or(0) as f64,
IdmMetricFormat::Integer,
));
Ok(IdmMetricNode::structural(
process.pid().to_string(),
metrics,
))
}
}
fn path_as_str(path: &Path) -> Option<String> {
String::from_utf8(path.as_os_str().as_encoded_bytes().to_vec()).ok()
}