From 6bf3741ec9fbcb7f6392760d858d5dc746a8bacd Mon Sep 17 00:00:00 2001 From: Khionu Sybiern Date: Thu, 8 Aug 2024 12:11:51 -0700 Subject: [PATCH] chore(o11y): add more debug logs to daemon & runtime (#318) This change adds debug log lines to make it easier to tell where issues are occuring during startup. --- crates/daemon/src/idm.rs | 7 ++++++- crates/daemon/src/lib.rs | 20 ++++++++++++++++++-- crates/runtime/src/channel.rs | 14 +++++++++++--- crates/runtime/src/ip.rs | 8 +++++++- crates/runtime/src/lib.rs | 16 ++++++++++++---- 5 files changed, 54 insertions(+), 11 deletions(-) diff --git a/crates/daemon/src/idm.rs b/crates/daemon/src/idm.rs index 3758147..f5d9016 100644 --- a/crates/daemon/src/idm.rs +++ b/crates/daemon/src/idm.rs @@ -11,7 +11,7 @@ use krata::idm::{ transport::IdmTransportPacket, }; use kratart::channel::ChannelService; -use log::{error, warn}; +use log::{debug, error, warn}; use prost::Message; use tokio::{ select, @@ -85,13 +85,18 @@ pub struct DaemonIdm { impl DaemonIdm { pub async fn new(glt: ZoneLookupTable) -> Result { + debug!("allocating channel for IDM"); let (service, tx_raw_sender, rx_receiver) = ChannelService::new("krata-channel".to_string(), None).await?; let (tx_sender, tx_receiver) = channel(100); let (snoop_sender, _) = broadcast::channel(100); + + debug!("starting channel service"); let task = service.launch().await?; + let clients = Arc::new(Mutex::new(HashMap::new())); let feeds = Arc::new(Mutex::new(HashMap::new())); + Ok(DaemonIdm { glt, rx_receiver, diff --git a/crates/daemon/src/lib.rs b/crates/daemon/src/lib.rs index 6e75aea..1e9f85b 100644 --- a/crates/daemon/src/lib.rs +++ b/crates/daemon/src/lib.rs @@ -11,7 +11,7 @@ use idm::{DaemonIdm, DaemonIdmHandle}; use krata::{dial::ControlDialAddress, v1::control::control_service_server::ControlServiceServer}; use krataoci::{packer::service::OciPackerService, registry::OciPlatform}; use kratart::Runtime; -use log::info; +use log::{debug, info}; use reconcile::zone::ZoneReconciler; use tokio::{ fs, @@ -58,18 +58,22 @@ const ZONE_RECONCILER_QUEUE_LEN: usize = 1000; impl Daemon { pub async fn new(store: String) -> Result { let store_dir = PathBuf::from(store.clone()); + debug!("loading configuration"); let mut config_path = store_dir.clone(); config_path.push("config.toml"); let config = DaemonConfig::load(&config_path).await?; let config = Arc::new(config); + debug!("initializing device manager"); let devices = DaemonDeviceManager::new(config.clone()); + debug!("validating image cache directory"); let mut image_cache_dir = store_dir.clone(); image_cache_dir.push("cache"); image_cache_dir.push("image"); fs::create_dir_all(&image_cache_dir).await?; + debug!("loading zone0 uuid"); let mut host_uuid_path = store_dir.clone(); host_uuid_path.push("host.uuid"); let host_uuid = if host_uuid_path.is_file() { @@ -89,22 +93,32 @@ impl Daemon { generated }; + debug!("validating zone asset directories"); let initrd_path = detect_zone_path(&store, "initrd")?; let kernel_path = detect_zone_path(&store, "kernel")?; let addons_path = detect_zone_path(&store, "addons.squashfs")?; + debug!("initializing caches and hydrating zone state"); let seed = config.oci.seed.clone().map(PathBuf::from); let packer = OciPackerService::new(seed, &image_cache_dir, OciPlatform::current()).await?; - let runtime = Runtime::new(host_uuid).await?; let glt = ZoneLookupTable::new(0, host_uuid); let zones_db_path = format!("{}/zones.db", store); let zones = ZoneStore::open(&PathBuf::from(zones_db_path))?; let (zone_reconciler_notify, zone_reconciler_receiver) = channel::(ZONE_RECONCILER_QUEUE_LEN); + + debug!("initializing core runtime"); + let runtime = Runtime::new(host_uuid).await?; + + debug!("starting IDM service"); let idm = DaemonIdm::new(glt.clone()).await?; let idm = idm.launch().await?; + + debug!("initializing console interfaces"); let console = DaemonConsole::new(glt.clone()).await?; let console = console.launch().await?; + + debug!("initializing zone reconciler"); let (events, generator) = DaemonEventGenerator::new(zones.clone(), zone_reconciler_notify.clone(), idm.clone()) .await?; @@ -134,6 +148,7 @@ impl Daemon { // .set_scheduler_policy("performance".to_string()) // .await?; + info!("krata daemon initialized"); Ok(Self { store, _config: config, @@ -152,6 +167,7 @@ impl Daemon { } pub async fn listen(&mut self, addr: ControlDialAddress) -> Result<()> { + debug!("starting API service"); let control_service = DaemonControlService::new( self.glt.clone(), self.devices.clone(), diff --git a/crates/runtime/src/channel.rs b/crates/runtime/src/channel.rs index 0c5ee43..f4a815d 100644 --- a/crates/runtime/src/channel.rs +++ b/crates/runtime/src/channel.rs @@ -62,14 +62,22 @@ impl ChannelService { )> { let (input_sender, input_receiver) = channel(GROUPED_CHANNEL_QUEUE_LEN); let (output_sender, output_receiver) = channel(GROUPED_CHANNEL_QUEUE_LEN); + + debug!("opening Xen event channel"); + let evtchn = EventChannel::open().await?; + debug!("opening XenStore"); + let store = XsdClient::open().await?; + debug!("opening GrantTab"); + let gnttab = GrantTab::open()?; + Ok(( ChannelService { typ, use_reserved_ref, backends: HashMap::new(), - evtchn: EventChannel::open().await?, - store: XsdClient::open().await?, - gnttab: GrantTab::open()?, + evtchn, + store, + gnttab, input_sender: input_sender.clone(), input_receiver, output_sender, diff --git a/crates/runtime/src/ip.rs b/crates/runtime/src/ip.rs index 9ffccdb..ce69a15 100644 --- a/crates/runtime/src/ip.rs +++ b/crates/runtime/src/ip.rs @@ -7,7 +7,7 @@ use std::{ use anyhow::{anyhow, Result}; use ipnetwork::{Ipv4Network, Ipv6Network}; -use log::error; +use log::{debug, error, trace}; use tokio::sync::RwLock; use uuid::Uuid; use xenstore::{XsdClient, XsdInterface}; @@ -72,7 +72,9 @@ impl IpVendor { ipv4_network: Ipv4Network, ipv6_network: Ipv6Network, ) -> Result { + debug!("fetching state from xenstore"); let mut state = IpVendor::fetch_stored_state(&store).await?; + debug!("allocating IP set"); let (gateway_ipv4, gateway_ipv6) = IpVendor::allocate_ipset(&mut state, host_uuid, ipv4_network, ipv6_network)?; let vend = IpVendor { @@ -84,11 +86,14 @@ impl IpVendor { gateway_ipv6, state: Arc::new(RwLock::new(state)), }; + debug!("IP vendor initialized!"); Ok(vend) } async fn fetch_stored_state(store: &XsdClient) -> Result { + debug!("initializing default IP vendor state"); let mut state = IpVendorState::default(); + debug!("iterating over xen domains"); for domid_candidate in store.list("/local/domain").await? { let dom_path = format!("/local/domain/{}", domid_candidate); let Some(uuid) = store @@ -119,6 +124,7 @@ impl IpVendor { } } } + debug!("IP state hydrated"); Ok(state) } diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 85468f4..92dd589 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -4,7 +4,7 @@ use anyhow::{anyhow, Result}; use ip::IpVendor; use ipnetwork::{IpNetwork, Ipv4Network, Ipv6Network}; use krataloopdev::LoopControl; -use log::error; +use log::{debug, error}; use tokio::sync::Semaphore; use uuid::Uuid; use xenclient::XenClient; @@ -66,15 +66,23 @@ pub struct RuntimeContext { impl RuntimeContext { pub async fn new(host_uuid: Uuid) -> Result { + debug!("initializing XenClient"); let xen = XenClient::new(0, RuntimePlatform::new()).await?; + + debug!("initializing ip allocation vendor"); let ipv4_network = Ipv4Network::new(Ipv4Addr::new(10, 75, 80, 0), 24)?; let ipv6_network = Ipv6Network::from_str("fdd4:1476:6c7e::/48")?; - let ipvend = + let ipvendor = IpVendor::new(xen.store.clone(), host_uuid, ipv4_network, ipv6_network).await?; + + debug!("initializing loop devices"); + let autoloop = AutoLoop::new(LoopControl::open()?); + + debug!("krata runtime initialized!"); Ok(RuntimeContext { - autoloop: AutoLoop::new(LoopControl::open()?), + autoloop, xen, - ipvendor: ipvend, + ipvendor, }) }