chore(o11y): add more debug logs to daemon & runtime

This change adds debug log lines to make it easier to tell where issues
are occuring during startup.
This commit is contained in:
Khionu Sybiern 2024-08-08 12:03:50 -07:00
parent b7d41ee9f4
commit 5a8752448a
No known key found for this signature in database
5 changed files with 54 additions and 11 deletions

View File

@ -11,7 +11,7 @@ use krata::idm::{
transport::IdmTransportPacket, transport::IdmTransportPacket,
}; };
use kratart::channel::ChannelService; use kratart::channel::ChannelService;
use log::{error, warn}; use log::{debug, error, warn};
use prost::Message; use prost::Message;
use tokio::{ use tokio::{
select, select,
@ -85,13 +85,18 @@ pub struct DaemonIdm {
impl DaemonIdm { impl DaemonIdm {
pub async fn new(glt: ZoneLookupTable) -> Result<DaemonIdm> { pub async fn new(glt: ZoneLookupTable) -> Result<DaemonIdm> {
debug!("allocating channel for IDM");
let (service, tx_raw_sender, rx_receiver) = let (service, tx_raw_sender, rx_receiver) =
ChannelService::new("krata-channel".to_string(), None).await?; ChannelService::new("krata-channel".to_string(), None).await?;
let (tx_sender, tx_receiver) = channel(100); let (tx_sender, tx_receiver) = channel(100);
let (snoop_sender, _) = broadcast::channel(100); let (snoop_sender, _) = broadcast::channel(100);
debug!("starting channel service");
let task = service.launch().await?; let task = service.launch().await?;
let clients = Arc::new(Mutex::new(HashMap::new())); let clients = Arc::new(Mutex::new(HashMap::new()));
let feeds = Arc::new(Mutex::new(HashMap::new())); let feeds = Arc::new(Mutex::new(HashMap::new()));
Ok(DaemonIdm { Ok(DaemonIdm {
glt, glt,
rx_receiver, rx_receiver,

View File

@ -11,7 +11,7 @@ use idm::{DaemonIdm, DaemonIdmHandle};
use krata::{dial::ControlDialAddress, v1::control::control_service_server::ControlServiceServer}; use krata::{dial::ControlDialAddress, v1::control::control_service_server::ControlServiceServer};
use krataoci::{packer::service::OciPackerService, registry::OciPlatform}; use krataoci::{packer::service::OciPackerService, registry::OciPlatform};
use kratart::Runtime; use kratart::Runtime;
use log::info; use log::{debug, info};
use reconcile::zone::ZoneReconciler; use reconcile::zone::ZoneReconciler;
use tokio::{ use tokio::{
fs, fs,
@ -58,18 +58,22 @@ const ZONE_RECONCILER_QUEUE_LEN: usize = 1000;
impl Daemon { impl Daemon {
pub async fn new(store: String) -> Result<Self> { pub async fn new(store: String) -> Result<Self> {
let store_dir = PathBuf::from(store.clone()); let store_dir = PathBuf::from(store.clone());
debug!("loading configuration");
let mut config_path = store_dir.clone(); let mut config_path = store_dir.clone();
config_path.push("config.toml"); config_path.push("config.toml");
let config = DaemonConfig::load(&config_path).await?; let config = DaemonConfig::load(&config_path).await?;
let config = Arc::new(config); let config = Arc::new(config);
debug!("initializing device manager");
let devices = DaemonDeviceManager::new(config.clone()); let devices = DaemonDeviceManager::new(config.clone());
debug!("validating image cache directory");
let mut image_cache_dir = store_dir.clone(); let mut image_cache_dir = store_dir.clone();
image_cache_dir.push("cache"); image_cache_dir.push("cache");
image_cache_dir.push("image"); image_cache_dir.push("image");
fs::create_dir_all(&image_cache_dir).await?; fs::create_dir_all(&image_cache_dir).await?;
debug!("loading zone0 uuid");
let mut host_uuid_path = store_dir.clone(); let mut host_uuid_path = store_dir.clone();
host_uuid_path.push("host.uuid"); host_uuid_path.push("host.uuid");
let host_uuid = if host_uuid_path.is_file() { let host_uuid = if host_uuid_path.is_file() {
@ -89,22 +93,32 @@ impl Daemon {
generated generated
}; };
debug!("validating zone asset directories");
let initrd_path = detect_zone_path(&store, "initrd")?; let initrd_path = detect_zone_path(&store, "initrd")?;
let kernel_path = detect_zone_path(&store, "kernel")?; let kernel_path = detect_zone_path(&store, "kernel")?;
let addons_path = detect_zone_path(&store, "addons.squashfs")?; let addons_path = detect_zone_path(&store, "addons.squashfs")?;
debug!("initializing caches and hydrating zone state");
let seed = config.oci.seed.clone().map(PathBuf::from); let seed = config.oci.seed.clone().map(PathBuf::from);
let packer = OciPackerService::new(seed, &image_cache_dir, OciPlatform::current()).await?; let packer = OciPackerService::new(seed, &image_cache_dir, OciPlatform::current()).await?;
let runtime = Runtime::new(host_uuid).await?;
let glt = ZoneLookupTable::new(0, host_uuid); let glt = ZoneLookupTable::new(0, host_uuid);
let zones_db_path = format!("{}/zones.db", store); let zones_db_path = format!("{}/zones.db", store);
let zones = ZoneStore::open(&PathBuf::from(zones_db_path))?; let zones = ZoneStore::open(&PathBuf::from(zones_db_path))?;
let (zone_reconciler_notify, zone_reconciler_receiver) = let (zone_reconciler_notify, zone_reconciler_receiver) =
channel::<Uuid>(ZONE_RECONCILER_QUEUE_LEN); channel::<Uuid>(ZONE_RECONCILER_QUEUE_LEN);
debug!("initializing core runtime");
let runtime = Runtime::new(host_uuid).await?;
debug!("starting IDM service");
let idm = DaemonIdm::new(glt.clone()).await?; let idm = DaemonIdm::new(glt.clone()).await?;
let idm = idm.launch().await?; let idm = idm.launch().await?;
debug!("initializing console interfaces");
let console = DaemonConsole::new(glt.clone()).await?; let console = DaemonConsole::new(glt.clone()).await?;
let console = console.launch().await?; let console = console.launch().await?;
debug!("initializing zone reconciler");
let (events, generator) = let (events, generator) =
DaemonEventGenerator::new(zones.clone(), zone_reconciler_notify.clone(), idm.clone()) DaemonEventGenerator::new(zones.clone(), zone_reconciler_notify.clone(), idm.clone())
.await?; .await?;
@ -134,6 +148,7 @@ impl Daemon {
// .set_scheduler_policy("performance".to_string()) // .set_scheduler_policy("performance".to_string())
// .await?; // .await?;
info!("krata daemon initialized");
Ok(Self { Ok(Self {
store, store,
_config: config, _config: config,
@ -152,6 +167,7 @@ impl Daemon {
} }
pub async fn listen(&mut self, addr: ControlDialAddress) -> Result<()> { pub async fn listen(&mut self, addr: ControlDialAddress) -> Result<()> {
debug!("starting API service");
let control_service = DaemonControlService::new( let control_service = DaemonControlService::new(
self.glt.clone(), self.glt.clone(),
self.devices.clone(), self.devices.clone(),

View File

@ -62,14 +62,22 @@ impl ChannelService {
)> { )> {
let (input_sender, input_receiver) = channel(GROUPED_CHANNEL_QUEUE_LEN); let (input_sender, input_receiver) = channel(GROUPED_CHANNEL_QUEUE_LEN);
let (output_sender, output_receiver) = channel(GROUPED_CHANNEL_QUEUE_LEN); let (output_sender, output_receiver) = channel(GROUPED_CHANNEL_QUEUE_LEN);
debug!("opening Xen event channel");
let evtchn = EventChannel::open().await?;
debug!("opening XenStore");
let store = XsdClient::open().await?;
debug!("opening GrantTab");
let gnttab = GrantTab::open()?;
Ok(( Ok((
ChannelService { ChannelService {
typ, typ,
use_reserved_ref, use_reserved_ref,
backends: HashMap::new(), backends: HashMap::new(),
evtchn: EventChannel::open().await?, evtchn,
store: XsdClient::open().await?, store,
gnttab: GrantTab::open()?, gnttab,
input_sender: input_sender.clone(), input_sender: input_sender.clone(),
input_receiver, input_receiver,
output_sender, output_sender,

View File

@ -7,7 +7,7 @@ use std::{
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use ipnetwork::{Ipv4Network, Ipv6Network}; use ipnetwork::{Ipv4Network, Ipv6Network};
use log::error; use log::{debug, error, trace};
use tokio::sync::RwLock; use tokio::sync::RwLock;
use uuid::Uuid; use uuid::Uuid;
use xenstore::{XsdClient, XsdInterface}; use xenstore::{XsdClient, XsdInterface};
@ -72,7 +72,9 @@ impl IpVendor {
ipv4_network: Ipv4Network, ipv4_network: Ipv4Network,
ipv6_network: Ipv6Network, ipv6_network: Ipv6Network,
) -> Result<Self> { ) -> Result<Self> {
debug!("fetching state from xenstore");
let mut state = IpVendor::fetch_stored_state(&store).await?; let mut state = IpVendor::fetch_stored_state(&store).await?;
debug!("allocating IP set");
let (gateway_ipv4, gateway_ipv6) = let (gateway_ipv4, gateway_ipv6) =
IpVendor::allocate_ipset(&mut state, host_uuid, ipv4_network, ipv6_network)?; IpVendor::allocate_ipset(&mut state, host_uuid, ipv4_network, ipv6_network)?;
let vend = IpVendor { let vend = IpVendor {
@ -84,11 +86,14 @@ impl IpVendor {
gateway_ipv6, gateway_ipv6,
state: Arc::new(RwLock::new(state)), state: Arc::new(RwLock::new(state)),
}; };
debug!("IP vendor initialized!");
Ok(vend) Ok(vend)
} }
async fn fetch_stored_state(store: &XsdClient) -> Result<IpVendorState> { async fn fetch_stored_state(store: &XsdClient) -> Result<IpVendorState> {
debug!("initializing default IP vendor state");
let mut state = IpVendorState::default(); let mut state = IpVendorState::default();
debug!("iterating over xen domains");
for domid_candidate in store.list("/local/domain").await? { for domid_candidate in store.list("/local/domain").await? {
let dom_path = format!("/local/domain/{}", domid_candidate); let dom_path = format!("/local/domain/{}", domid_candidate);
let Some(uuid) = store let Some(uuid) = store
@ -119,6 +124,7 @@ impl IpVendor {
} }
} }
} }
debug!("IP state hydrated");
Ok(state) Ok(state)
} }

View File

@ -4,7 +4,7 @@ use anyhow::{anyhow, Result};
use ip::IpVendor; use ip::IpVendor;
use ipnetwork::{IpNetwork, Ipv4Network, Ipv6Network}; use ipnetwork::{IpNetwork, Ipv4Network, Ipv6Network};
use krataloopdev::LoopControl; use krataloopdev::LoopControl;
use log::error; use log::{debug, error};
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
use uuid::Uuid; use uuid::Uuid;
use xenclient::XenClient; use xenclient::XenClient;
@ -66,15 +66,23 @@ pub struct RuntimeContext {
impl RuntimeContext { impl RuntimeContext {
pub async fn new(host_uuid: Uuid) -> Result<Self> { pub async fn new(host_uuid: Uuid) -> Result<Self> {
debug!("initializing XenClient");
let xen = XenClient::new(0, RuntimePlatform::new()).await?; let xen = XenClient::new(0, RuntimePlatform::new()).await?;
debug!("initializing ip allocation vendor");
let ipv4_network = Ipv4Network::new(Ipv4Addr::new(10, 75, 80, 0), 24)?; let ipv4_network = Ipv4Network::new(Ipv4Addr::new(10, 75, 80, 0), 24)?;
let ipv6_network = Ipv6Network::from_str("fdd4:1476:6c7e::/48")?; let ipv6_network = Ipv6Network::from_str("fdd4:1476:6c7e::/48")?;
let ipvend = let ipvendor =
IpVendor::new(xen.store.clone(), host_uuid, ipv4_network, ipv6_network).await?; IpVendor::new(xen.store.clone(), host_uuid, ipv4_network, ipv6_network).await?;
debug!("initializing loop devices");
let autoloop = AutoLoop::new(LoopControl::open()?);
debug!("krata runtime initialized!");
Ok(RuntimeContext { Ok(RuntimeContext {
autoloop: AutoLoop::new(LoopControl::open()?), autoloop,
xen, xen,
ipvendor: ipvend, ipvendor,
}) })
} }