feat: guest metrics support (#46)

* feat: initial support for idm send in daemon

* feat: implement IdmClient backend support

* feat: daemon idm now uses IdmClient

* fix: implement channel destruction propagation

* feat: implement request response idm system

* feat: implement metrics support

* proto: move metrics into GuestMetrics for reusability

* fix: log level of guest agent was trace

* feat: metrics tree with process information
This commit is contained in:
Alex Zenla
2024-04-12 00:34:46 -07:00
committed by GitHub
parent ec9060d872
commit 5e16f3149f
27 changed files with 1211 additions and 278 deletions

View File

@ -79,7 +79,7 @@ impl Drop for DaemonConsoleHandle {
pub struct DaemonConsole {
listeners: ListenerMap,
buffers: BufferMap,
receiver: Receiver<(u32, Vec<u8>)>,
receiver: Receiver<(u32, Option<Vec<u8>>)>,
sender: Sender<(u32, Vec<u8>)>,
task: JoinHandle<()>,
}
@ -124,16 +124,22 @@ impl DaemonConsole {
};
let mut buffers = self.buffers.lock().await;
let buffer = buffers
.entry(domid)
.or_insert_with_key(|_| RawConsoleBuffer::boxed());
buffer.extend_from_slice(&data);
drop(buffers);
let mut listeners = self.listeners.lock().await;
if let Some(senders) = listeners.get_mut(&domid) {
senders.retain(|sender| {
!matches!(sender.try_send(data.to_vec()), Err(TrySendError::Closed(_)))
});
if let Some(data) = data {
let buffer = buffers
.entry(domid)
.or_insert_with_key(|_| RawConsoleBuffer::boxed());
buffer.extend_from_slice(&data);
drop(buffers);
let mut listeners = self.listeners.lock().await;
if let Some(senders) = listeners.get_mut(&domid) {
senders.retain(|sender| {
!matches!(sender.try_send(data.to_vec()), Err(TrySendError::Closed(_)))
});
}
} else {
buffers.remove(&domid);
let mut listeners = self.listeners.lock().await;
listeners.remove(&domid);
}
}
Ok(())

View File

@ -2,13 +2,19 @@ use std::{pin::Pin, str::FromStr};
use async_stream::try_stream;
use futures::Stream;
use krata::v1::{
common::{Guest, GuestState, GuestStatus},
control::{
control_service_server::ControlService, ConsoleDataReply, ConsoleDataRequest,
CreateGuestReply, CreateGuestRequest, DestroyGuestReply, DestroyGuestRequest,
ListGuestsReply, ListGuestsRequest, ResolveGuestReply, ResolveGuestRequest,
WatchEventsReply, WatchEventsRequest,
use krata::{
idm::protocol::{
idm_request::Request as IdmRequestType, idm_response::Response as IdmResponseType,
IdmMetricsRequest,
},
v1::{
common::{Guest, GuestState, GuestStatus},
control::{
control_service_server::ControlService, ConsoleDataReply, ConsoleDataRequest,
CreateGuestReply, CreateGuestRequest, DestroyGuestReply, DestroyGuestRequest,
ListGuestsReply, ListGuestsRequest, ReadGuestMetricsReply, ReadGuestMetricsRequest,
ResolveGuestReply, ResolveGuestRequest, WatchEventsReply, WatchEventsRequest,
},
},
};
use tokio::{
@ -19,7 +25,10 @@ use tokio_stream::StreamExt;
use tonic::{Request, Response, Status, Streaming};
use uuid::Uuid;
use crate::{console::DaemonConsoleHandle, db::GuestStore, event::DaemonEventContext};
use crate::{
console::DaemonConsoleHandle, db::GuestStore, event::DaemonEventContext, idm::DaemonIdmHandle,
metrics::idm_metric_to_api,
};
pub struct ApiError {
message: String,
@ -43,6 +52,7 @@ impl From<ApiError> for Status {
pub struct RuntimeControlService {
events: DaemonEventContext,
console: DaemonConsoleHandle,
idm: DaemonIdmHandle,
guests: GuestStore,
guest_reconciler_notify: Sender<Uuid>,
}
@ -51,12 +61,14 @@ impl RuntimeControlService {
pub fn new(
events: DaemonEventContext,
console: DaemonConsoleHandle,
idm: DaemonIdmHandle,
guests: GuestStore,
guest_reconciler_notify: Sender<Uuid>,
) -> Self {
Self {
events,
console,
idm,
guests,
guest_reconciler_notify,
}
@ -269,6 +281,58 @@ impl ControlService for RuntimeControlService {
Ok(Response::new(Box::pin(output) as Self::ConsoleDataStream))
}
async fn read_guest_metrics(
&self,
request: Request<ReadGuestMetricsRequest>,
) -> Result<Response<ReadGuestMetricsReply>, Status> {
let request = request.into_inner();
let uuid = Uuid::from_str(&request.guest_id).map_err(|error| ApiError {
message: error.to_string(),
})?;
let guest = self
.guests
.read(uuid)
.await
.map_err(|error| ApiError {
message: error.to_string(),
})?
.ok_or_else(|| ApiError {
message: "guest did not exist in the database".to_string(),
})?;
let Some(ref state) = guest.state else {
return Err(ApiError {
message: "guest did not have state".to_string(),
}
.into());
};
let domid = state.domid;
if domid == 0 {
return Err(ApiError {
message: "invalid domid on the guest".to_string(),
}
.into());
}
let client = self.idm.client(domid).await.map_err(|error| ApiError {
message: error.to_string(),
})?;
let response = client
.send(IdmRequestType::Metrics(IdmMetricsRequest {}))
.await
.map_err(|error| ApiError {
message: error.to_string(),
})?;
let mut reply = ReadGuestMetricsReply::default();
if let IdmResponseType::Metrics(metrics) = response {
reply.root = metrics.root.map(idm_metric_to_api);
}
Ok(Response::new(reply))
}
async fn watch_events(
&self,
request: Request<WatchEventsRequest>,

View File

@ -6,10 +6,10 @@ use std::{
use anyhow::Result;
use krata::{
idm::protocol::{idm_event::Event, IdmPacket},
idm::protocol::{idm_event::Event, IdmEvent},
v1::common::{GuestExitInfo, GuestState, GuestStatus},
};
use log::error;
use log::{error, warn};
use tokio::{
select,
sync::{
@ -21,15 +21,12 @@ use tokio::{
};
use uuid::Uuid;
use crate::{
db::GuestStore,
idm::{DaemonIdmHandle, DaemonIdmSubscribeHandle},
};
use crate::{db::GuestStore, idm::DaemonIdmHandle};
pub type DaemonEvent = krata::v1::control::watch_events_reply::Event;
const EVENT_CHANNEL_QUEUE_LEN: usize = 1000;
const IDM_CHANNEL_QUEUE_LEN: usize = 1000;
const IDM_EVENT_CHANNEL_QUEUE_LEN: usize = 1000;
#[derive(Clone)]
pub struct DaemonEventContext {
@ -52,9 +49,9 @@ pub struct DaemonEventGenerator {
guest_reconciler_notify: Sender<Uuid>,
feed: broadcast::Receiver<DaemonEvent>,
idm: DaemonIdmHandle,
idms: HashMap<u32, (Uuid, DaemonIdmSubscribeHandle)>,
idm_sender: Sender<(u32, IdmPacket)>,
idm_receiver: Receiver<(u32, IdmPacket)>,
idms: HashMap<u32, (Uuid, JoinHandle<()>)>,
idm_sender: Sender<(u32, IdmEvent)>,
idm_receiver: Receiver<(u32, IdmEvent)>,
_event_sender: broadcast::Sender<DaemonEvent>,
}
@ -65,7 +62,7 @@ impl DaemonEventGenerator {
idm: DaemonIdmHandle,
) -> Result<(DaemonEventContext, DaemonEventGenerator)> {
let (sender, _) = broadcast::channel(EVENT_CHANNEL_QUEUE_LEN);
let (idm_sender, idm_receiver) = channel(IDM_CHANNEL_QUEUE_LEN);
let (idm_sender, idm_receiver) = channel(IDM_EVENT_CHANNEL_QUEUE_LEN);
let generator = DaemonEventGenerator {
guests,
guest_reconciler_notify,
@ -97,15 +94,27 @@ impl DaemonEventGenerator {
match status {
GuestStatus::Started => {
if let Entry::Vacant(e) = self.idms.entry(domid) {
let subscribe =
self.idm.subscribe(domid, self.idm_sender.clone()).await?;
e.insert((id, subscribe));
let client = self.idm.client(domid).await?;
let mut receiver = client.subscribe().await?;
let sender = self.idm_sender.clone();
let task = tokio::task::spawn(async move {
loop {
let Ok(event) = receiver.recv().await else {
break;
};
if let Err(error) = sender.send((domid, event)).await {
warn!("unable to deliver idm event: {}", error);
}
}
});
e.insert((id, task));
}
}
GuestStatus::Destroyed => {
if let Some((_, handle)) = self.idms.remove(&domid) {
handle.unsubscribe().await?;
handle.abort();
}
}
@ -116,11 +125,11 @@ impl DaemonEventGenerator {
Ok(())
}
async fn handle_idm_packet(&mut self, id: Uuid, packet: IdmPacket) -> Result<()> {
if let Some(Event::Exit(exit)) = packet.event.and_then(|x| x.event) {
self.handle_exit_code(id, exit.code).await?;
async fn handle_idm_event(&mut self, id: Uuid, event: IdmEvent) -> Result<()> {
match event.event {
Some(Event::Exit(exit)) => self.handle_exit_code(id, exit.code).await,
None => Ok(()),
}
Ok(())
}
async fn handle_exit_code(&mut self, id: Uuid, code: i32) -> Result<()> {
@ -142,9 +151,9 @@ impl DaemonEventGenerator {
async fn evaluate(&mut self) -> Result<()> {
select! {
x = self.idm_receiver.recv() => match x {
Some((domid, packet)) => {
Some((domid, event)) => {
if let Some((id, _)) = self.idms.get(&domid) {
self.handle_idm_packet(*id, packet).await?;
self.handle_idm_event(*id, event).await?;
}
Ok(())
},

View File

@ -1,53 +1,40 @@
use std::{collections::HashMap, sync::Arc};
use std::{
collections::{hash_map::Entry, HashMap},
sync::Arc,
};
use anyhow::Result;
use anyhow::{anyhow, Result};
use bytes::{Buf, BytesMut};
use krata::idm::protocol::IdmPacket;
use krata::idm::{
client::{IdmBackend, IdmClient},
protocol::IdmPacket,
};
use kratart::channel::ChannelService;
use log::{error, warn};
use prost::Message;
use tokio::{
select,
sync::{
mpsc::{Receiver, Sender},
mpsc::{channel, Receiver, Sender},
Mutex,
},
task::JoinHandle,
};
type ListenerMap = Arc<Mutex<HashMap<u32, Sender<(u32, IdmPacket)>>>>;
type BackendFeedMap = Arc<Mutex<HashMap<u32, Sender<IdmPacket>>>>;
type ClientMap = Arc<Mutex<HashMap<u32, IdmClient>>>;
#[derive(Clone)]
pub struct DaemonIdmHandle {
listeners: ListenerMap,
clients: ClientMap,
feeds: BackendFeedMap,
tx_sender: Sender<(u32, IdmPacket)>,
task: Arc<JoinHandle<()>>,
}
#[derive(Clone)]
pub struct DaemonIdmSubscribeHandle {
domid: u32,
listeners: ListenerMap,
}
impl DaemonIdmSubscribeHandle {
pub async fn unsubscribe(&self) -> Result<()> {
let mut guard = self.listeners.lock().await;
let _ = guard.remove(&self.domid);
Ok(())
}
}
impl DaemonIdmHandle {
pub async fn subscribe(
&self,
domid: u32,
sender: Sender<(u32, IdmPacket)>,
) -> Result<DaemonIdmSubscribeHandle> {
let mut guard = self.listeners.lock().await;
guard.insert(domid, sender);
Ok(DaemonIdmSubscribeHandle {
domid,
listeners: self.listeners.clone(),
})
pub async fn client(&self, domid: u32) -> Result<IdmClient> {
client_or_create(domid, &self.tx_sender, &self.clients, &self.feeds).await
}
}
@ -60,25 +47,38 @@ impl Drop for DaemonIdmHandle {
}
pub struct DaemonIdm {
listeners: ListenerMap,
receiver: Receiver<(u32, Vec<u8>)>,
clients: ClientMap,
feeds: BackendFeedMap,
tx_sender: Sender<(u32, IdmPacket)>,
tx_raw_sender: Sender<(u32, Vec<u8>)>,
tx_receiver: Receiver<(u32, IdmPacket)>,
rx_receiver: Receiver<(u32, Option<Vec<u8>>)>,
task: JoinHandle<()>,
}
impl DaemonIdm {
pub async fn new() -> Result<DaemonIdm> {
let (service, _, receiver) = ChannelService::new("krata-channel".to_string(), None).await?;
let (service, tx_raw_sender, rx_receiver) =
ChannelService::new("krata-channel".to_string(), None).await?;
let (tx_sender, tx_receiver) = channel(100);
let task = service.launch().await?;
let listeners = Arc::new(Mutex::new(HashMap::new()));
let clients = Arc::new(Mutex::new(HashMap::new()));
let feeds = Arc::new(Mutex::new(HashMap::new()));
Ok(DaemonIdm {
receiver,
rx_receiver,
tx_receiver,
tx_sender,
tx_raw_sender,
task,
listeners,
clients,
feeds,
})
}
pub async fn launch(mut self) -> Result<DaemonIdmHandle> {
let listeners = self.listeners.clone();
let clients = self.clients.clone();
let feeds = self.feeds.clone();
let tx_sender = self.tx_sender.clone();
let task = tokio::task::spawn(async move {
let mut buffers: HashMap<u32, BytesMut> = HashMap::new();
if let Err(error) = self.process(&mut buffers).await {
@ -86,43 +86,74 @@ impl DaemonIdm {
}
});
Ok(DaemonIdmHandle {
listeners,
clients,
feeds,
tx_sender,
task: Arc::new(task),
})
}
async fn process(&mut self, buffers: &mut HashMap<u32, BytesMut>) -> Result<()> {
loop {
let Some((domid, data)) = self.receiver.recv().await else {
break;
};
select! {
x = self.rx_receiver.recv() => match x {
Some((domid, data)) => {
if let Some(data) = data {
let buffer = buffers.entry(domid).or_insert_with_key(|_| BytesMut::new());
buffer.extend_from_slice(&data);
if buffer.len() < 4 {
continue;
}
let size = (buffer[0] as u32 | (buffer[1] as u32) << 8 | (buffer[2] as u32) << 16 | (buffer[3] as u32) << 24) as usize;
let needed = size + 4;
if buffer.len() < needed {
continue;
}
let mut packet = buffer.split_to(needed);
packet.advance(4);
match IdmPacket::decode(packet) {
Ok(packet) => {
let _ = client_or_create(domid, &self.tx_sender, &self.clients, &self.feeds).await?;
let guard = self.feeds.lock().await;
if let Some(feed) = guard.get(&domid) {
let _ = feed.try_send(packet);
}
}
let buffer = buffers.entry(domid).or_insert_with_key(|_| BytesMut::new());
buffer.extend_from_slice(&data);
if buffer.len() < 2 {
continue;
}
let size = (buffer[0] as u16 | (buffer[1] as u16) << 8) as usize;
let needed = size + 2;
if buffer.len() < needed {
continue;
}
let mut packet = buffer.split_to(needed);
packet.advance(2);
match IdmPacket::decode(packet) {
Ok(packet) => {
let guard = self.listeners.lock().await;
if let Some(sender) = guard.get(&domid) {
if let Err(error) = sender.try_send((domid, packet)) {
warn!("dropped idm packet from domain {}: {}", domid, error);
Err(packet) => {
warn!("received invalid packet from domain {}: {}", domid, packet);
}
}
} else {
let mut clients = self.clients.lock().await;
let mut feeds = self.feeds.lock().await;
clients.remove(&domid);
feeds.remove(&domid);
}
},
None => {
break;
}
},
x = self.tx_receiver.recv() => match x {
Some((domid, packet)) => {
let data = packet.encode_to_vec();
let mut buffer = vec![0u8; 4];
let length = data.len() as u32;
buffer[0] = length as u8;
buffer[1] = (length << 8) as u8;
buffer[2] = (length << 16) as u8;
buffer[3] = (length << 24) as u8;
buffer.extend_from_slice(&data);
self.tx_raw_sender.send((domid, buffer)).await?;
},
None => {
break;
}
}
Err(packet) => {
warn!("received invalid packet from domain {}: {}", domid, packet);
}
}
};
}
Ok(())
}
@ -133,3 +164,50 @@ impl Drop for DaemonIdm {
self.task.abort();
}
}
async fn client_or_create(
domid: u32,
tx_sender: &Sender<(u32, IdmPacket)>,
clients: &ClientMap,
feeds: &BackendFeedMap,
) -> Result<IdmClient> {
let mut clients = clients.lock().await;
let mut feeds = feeds.lock().await;
match clients.entry(domid) {
Entry::Occupied(entry) => Ok(entry.get().clone()),
Entry::Vacant(entry) => {
let (rx_sender, rx_receiver) = channel(100);
feeds.insert(domid, rx_sender);
let backend = IdmDaemonBackend {
domid,
rx_receiver,
tx_sender: tx_sender.clone(),
};
let client = IdmClient::new(Box::new(backend) as Box<dyn IdmBackend>).await?;
entry.insert(client.clone());
Ok(client)
}
}
}
pub struct IdmDaemonBackend {
domid: u32,
rx_receiver: Receiver<IdmPacket>,
tx_sender: Sender<(u32, IdmPacket)>,
}
#[async_trait::async_trait]
impl IdmBackend for IdmDaemonBackend {
async fn recv(&mut self) -> Result<IdmPacket> {
if let Some(packet) = self.rx_receiver.recv().await {
Ok(packet)
} else {
Err(anyhow!("idm receive channel closed"))
}
}
async fn send(&mut self, packet: IdmPacket) -> Result<()> {
self.tx_sender.send((self.domid, packet)).await?;
Ok(())
}
}

View File

@ -24,6 +24,7 @@ pub mod control;
pub mod db;
pub mod event;
pub mod idm;
pub mod metrics;
pub mod reconcile;
pub struct Daemon {
@ -33,7 +34,7 @@ pub struct Daemon {
guest_reconciler_task: JoinHandle<()>,
guest_reconciler_notify: Sender<Uuid>,
generator_task: JoinHandle<()>,
_idm: DaemonIdmHandle,
idm: DaemonIdmHandle,
console: DaemonConsoleHandle,
}
@ -69,7 +70,7 @@ impl Daemon {
guest_reconciler_task,
guest_reconciler_notify,
generator_task,
_idm: idm,
idm,
console,
})
}
@ -78,6 +79,7 @@ impl Daemon {
let control_service = RuntimeControlService::new(
self.events.clone(),
self.console.clone(),
self.idm.clone(),
self.guests.clone(),
self.guest_reconciler_notify.clone(),
);

View File

@ -0,0 +1,27 @@
use krata::{
idm::protocol::{IdmMetricFormat, IdmMetricNode},
v1::common::{GuestMetricFormat, GuestMetricNode},
};
fn idm_metric_format_to_api(format: IdmMetricFormat) -> GuestMetricFormat {
match format {
IdmMetricFormat::Unknown => GuestMetricFormat::Unknown,
IdmMetricFormat::Bytes => GuestMetricFormat::Bytes,
IdmMetricFormat::Integer => GuestMetricFormat::Integer,
IdmMetricFormat::DurationSeconds => GuestMetricFormat::DurationSeconds,
}
}
pub fn idm_metric_to_api(node: IdmMetricNode) -> GuestMetricNode {
let format = node.format();
GuestMetricNode {
name: node.name,
value: node.value,
format: idm_metric_format_to_api(format).into(),
children: node
.children
.into_iter()
.map(idm_metric_to_api)
.collect::<Vec<_>>(),
}
}