diff --git a/Cargo.lock b/Cargo.lock index e5be36a..e6d5a9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1456,9 +1456,11 @@ dependencies = [ "prost", "redb", "scopeguard", + "serde", "signal-hook", "tokio", "tokio-stream", + "toml", "tonic", "uuid", ] @@ -2097,7 +2099,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" dependencies = [ "once_cell", - "toml_edit", + "toml_edit 0.19.15", ] [[package]] @@ -2571,6 +2573,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2985,11 +2996,26 @@ dependencies = [ "tracing", ] +[[package]] +name = "toml" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit 0.22.12", +] + [[package]] name = "toml_datetime" version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] [[package]] name = "toml_edit" @@ -2999,7 +3025,20 @@ checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ "indexmap 2.2.6", "toml_datetime", - "winnow", + "winnow 0.5.40", +] + +[[package]] +name = "toml_edit" +version = "0.22.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef" +dependencies = [ + "indexmap 2.2.6", + "serde", + "serde_spanned", + "toml_datetime", + "winnow 0.6.6", ] [[package]] @@ -3510,6 +3549,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "winnow" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c976aaaa0e1f90dbb21e9587cdaf1d9679a1cde8875c0d6bd83ab96a208352" +dependencies = [ + "memchr", +] + [[package]] name = "winreg" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index 4fabbd4..76e0ccc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,7 @@ sysinfo = "0.30.11" termtree = "0.4.1" thiserror = "1.0" tokio-tun = "0.11.4" +toml = "0.8.12" tonic-build = "0.11.0" tower = "0.4.13" udp-stream = "0.0.11" diff --git a/crates/ctl/src/cli/launch.rs b/crates/ctl/src/cli/launch.rs index aa52141..64ab4fe 100644 --- a/crates/ctl/src/cli/launch.rs +++ b/crates/ctl/src/cli/launch.rs @@ -6,8 +6,8 @@ use krata::{ events::EventStream, v1::{ common::{ - guest_image_spec::Image, GuestImageSpec, GuestOciImageSpec, GuestSpec, GuestStatus, - GuestTaskSpec, GuestTaskSpecEnvVar, OciImageFormat, + guest_image_spec::Image, GuestImageSpec, GuestOciImageSpec, GuestSpec, GuestSpecDevice, + GuestStatus, GuestTaskSpec, GuestTaskSpecEnvVar, OciImageFormat, }, control::{ control_service_client::ControlServiceClient, watch_events_reply::Event, @@ -50,6 +50,8 @@ pub struct LaunchCommand { help = "Memory available to the guest, in megabytes" )] mem: u64, + #[arg[short = 'D', long = "device", help = "Devices to request for the guest"]] + device: Vec, #[arg[short, long, help = "Environment variables set in the guest"]] env: Option>, #[arg( @@ -135,6 +137,11 @@ impl LaunchCommand { working_directory: self.working_directory.unwrap_or_default(), }), annotations: vec![], + devices: self + .device + .iter() + .map(|name| GuestSpecDevice { name: name.clone() }) + .collect(), }), }; let response = client diff --git a/crates/ctl/src/cli/list_devices.rs b/crates/ctl/src/cli/list_devices.rs new file mode 100644 index 0000000..b00e5e0 --- /dev/null +++ b/crates/ctl/src/cli/list_devices.rs @@ -0,0 +1,128 @@ +use anyhow::Result; +use clap::{Parser, ValueEnum}; +use comfy_table::{presets::UTF8_FULL_CONDENSED, Cell, Color, Table}; +use krata::{ + events::EventStream, + v1::control::{control_service_client::ControlServiceClient, DeviceInfo, ListDevicesRequest}, +}; + +use serde_json::Value; +use tonic::transport::Channel; + +use crate::format::{kv2line, proto2dynamic, proto2kv}; + +#[derive(ValueEnum, Clone, Debug, PartialEq, Eq)] +enum ListDevicesFormat { + Table, + Json, + JsonPretty, + Jsonl, + Yaml, + KeyValue, + Simple, +} + +#[derive(Parser)] +#[command(about = "List the devices on the hypervisor")] +pub struct ListDevicesCommand { + #[arg(short, long, default_value = "table", help = "Output format")] + format: ListDevicesFormat, +} + +impl ListDevicesCommand { + pub async fn run( + self, + mut client: ControlServiceClient, + _events: EventStream, + ) -> Result<()> { + let reply = client + .list_devices(ListDevicesRequest {}) + .await? + .into_inner(); + let mut devices = reply.devices; + + devices.sort_by(|a, b| a.name.cmp(&b.name)); + + match self.format { + ListDevicesFormat::Table => { + self.print_devices_table(devices)?; + } + + ListDevicesFormat::Simple => { + for device in devices { + println!("{}\t{}\t{}", device.name, device.claimed, device.owner); + } + } + + ListDevicesFormat::Json | ListDevicesFormat::JsonPretty | ListDevicesFormat::Yaml => { + let mut values = Vec::new(); + for device in devices { + let message = proto2dynamic(device)?; + values.push(serde_json::to_value(message)?); + } + let value = Value::Array(values); + let encoded = if self.format == ListDevicesFormat::JsonPretty { + serde_json::to_string_pretty(&value)? + } else if self.format == ListDevicesFormat::Yaml { + serde_yaml::to_string(&value)? + } else { + serde_json::to_string(&value)? + }; + println!("{}", encoded.trim()); + } + + ListDevicesFormat::Jsonl => { + for device in devices { + let message = proto2dynamic(device)?; + println!("{}", serde_json::to_string(&message)?); + } + } + + ListDevicesFormat::KeyValue => { + self.print_key_value(devices)?; + } + } + + Ok(()) + } + + fn print_devices_table(&self, devices: Vec) -> Result<()> { + let mut table = Table::new(); + table.load_preset(UTF8_FULL_CONDENSED); + table.set_content_arrangement(comfy_table::ContentArrangement::Dynamic); + table.set_header(vec!["name", "status", "owner"]); + for device in devices { + let status_text = if device.claimed { + "claimed" + } else { + "available" + }; + + let status_color = if device.claimed { + Color::Blue + } else { + Color::Green + }; + + table.add_row(vec![ + Cell::new(device.name), + Cell::new(status_text).fg(status_color), + Cell::new(device.owner), + ]); + } + if table.is_empty() { + println!("no devices configured"); + } else { + println!("{}", table); + } + Ok(()) + } + + fn print_key_value(&self, devices: Vec) -> Result<()> { + for device in devices { + let kvs = proto2kv(device)?; + println!("{}", kv2line(kvs)); + } + Ok(()) + } +} diff --git a/crates/ctl/src/cli/mod.rs b/crates/ctl/src/cli/mod.rs index cc549cf..257539c 100644 --- a/crates/ctl/src/cli/mod.rs +++ b/crates/ctl/src/cli/mod.rs @@ -5,6 +5,7 @@ pub mod identify_host; pub mod idm_snoop; pub mod launch; pub mod list; +pub mod list_devices; pub mod logs; pub mod metrics; pub mod pull; @@ -24,8 +25,9 @@ use tonic::{transport::Channel, Request}; use self::{ attach::AttachCommand, destroy::DestroyCommand, exec::ExecCommand, identify_host::IdentifyHostCommand, idm_snoop::IdmSnoopCommand, launch::LaunchCommand, - list::ListCommand, logs::LogsCommand, metrics::MetricsCommand, pull::PullCommand, - resolve::ResolveCommand, top::TopCommand, watch::WatchCommand, + list::ListCommand, list_devices::ListDevicesCommand, logs::LogsCommand, + metrics::MetricsCommand, pull::PullCommand, resolve::ResolveCommand, top::TopCommand, + watch::WatchCommand, }; #[derive(Parser)] @@ -51,6 +53,7 @@ pub enum Commands { Launch(LaunchCommand), Destroy(DestroyCommand), List(ListCommand), + ListDevices(ListDevicesCommand), Attach(AttachCommand), Pull(PullCommand), Logs(LogsCommand), @@ -120,6 +123,10 @@ impl ControlCommand { Commands::Exec(exec) => { exec.run(client).await?; } + + Commands::ListDevices(list) => { + list.run(client, events).await?; + } } Ok(()) } diff --git a/crates/daemon/Cargo.toml b/crates/daemon/Cargo.toml index 77a9ad6..7a08dd2 100644 --- a/crates/daemon/Cargo.toml +++ b/crates/daemon/Cargo.toml @@ -24,9 +24,11 @@ log = { workspace = true } prost = { workspace = true } redb = { workspace = true } scopeguard = { workspace = true } +serde = { workspace = true } signal-hook = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true } +toml = { workspace = true } krata-tokio-tar = { workspace = true } tonic = { workspace = true, features = ["tls"] } uuid = { workspace = true } diff --git a/crates/daemon/src/config.rs b/crates/daemon/src/config.rs new file mode 100644 index 0000000..f277015 --- /dev/null +++ b/crates/daemon/src/config.rs @@ -0,0 +1,55 @@ +use std::{collections::HashMap, path::Path}; + +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use tokio::fs; + +#[derive(Serialize, Deserialize, Clone, Debug, Default)] +pub struct DaemonConfig { + #[serde(default)] + pub pci: DaemonPciConfig, +} + +#[derive(Serialize, Deserialize, Clone, Debug, Default)] +pub struct DaemonPciConfig { + #[serde(default)] + pub devices: HashMap, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct DaemonPciDeviceConfig { + pub locations: Vec, + #[serde(default)] + pub permissive: bool, + #[serde(default)] + #[serde(rename = "msi-translate")] + pub msi_translate: bool, + #[serde(default)] + #[serde(rename = "power-management")] + pub power_management: bool, + #[serde(default)] + #[serde(rename = "rdm-reserve-policy")] + pub rdm_reserve_policy: DaemonPciDeviceRdmReservePolicy, +} + +#[derive(Serialize, Deserialize, Clone, Debug, Default)] +pub enum DaemonPciDeviceRdmReservePolicy { + #[default] + #[serde(rename = "strict")] + Strict, + #[serde(rename = "relaxed")] + Relaxed, +} + +impl DaemonConfig { + pub async fn load(path: &Path) -> Result { + if path.exists() { + let content = fs::read_to_string(path).await?; + let config: DaemonConfig = toml::from_str(&content)?; + Ok(config) + } else { + fs::write(&path, "").await?; + Ok(DaemonConfig::default()) + } + } +} diff --git a/crates/daemon/src/control.rs b/crates/daemon/src/control.rs index 486adfd..e24a2aa 100644 --- a/crates/daemon/src/control.rs +++ b/crates/daemon/src/control.rs @@ -11,10 +11,11 @@ use krata::{ control::{ control_service_server::ControlService, ConsoleDataReply, ConsoleDataRequest, CreateGuestReply, CreateGuestRequest, DestroyGuestReply, DestroyGuestRequest, - ExecGuestReply, ExecGuestRequest, IdentifyHostReply, IdentifyHostRequest, - ListGuestsReply, ListGuestsRequest, PullImageReply, PullImageRequest, - ReadGuestMetricsReply, ReadGuestMetricsRequest, ResolveGuestReply, ResolveGuestRequest, - SnoopIdmReply, SnoopIdmRequest, WatchEventsReply, WatchEventsRequest, + DeviceInfo, ExecGuestReply, ExecGuestRequest, IdentifyHostReply, IdentifyHostRequest, + ListDevicesReply, ListDevicesRequest, ListGuestsReply, ListGuestsRequest, + PullImageReply, PullImageRequest, ReadGuestMetricsReply, ReadGuestMetricsRequest, + ResolveGuestReply, ResolveGuestRequest, SnoopIdmReply, SnoopIdmRequest, + WatchEventsReply, WatchEventsRequest, }, }, }; @@ -35,8 +36,8 @@ use uuid::Uuid; use crate::{ command::DaemonCommand, console::DaemonConsoleHandle, db::GuestStore, - event::DaemonEventContext, glt::GuestLookupTable, idm::DaemonIdmHandle, - metrics::idm_metric_to_api, oci::convert_oci_progress, + devices::DaemonDeviceManager, event::DaemonEventContext, glt::GuestLookupTable, + idm::DaemonIdmHandle, metrics::idm_metric_to_api, oci::convert_oci_progress, }; pub struct ApiError { @@ -60,6 +61,7 @@ impl From for Status { #[derive(Clone)] pub struct DaemonControlService { glt: GuestLookupTable, + devices: DaemonDeviceManager, events: DaemonEventContext, console: DaemonConsoleHandle, idm: DaemonIdmHandle, @@ -69,8 +71,10 @@ pub struct DaemonControlService { } impl DaemonControlService { + #[allow(clippy::too_many_arguments)] pub fn new( glt: GuestLookupTable, + devices: DaemonDeviceManager, events: DaemonEventContext, console: DaemonConsoleHandle, idm: DaemonIdmHandle, @@ -80,6 +84,7 @@ impl DaemonControlService { ) -> Self { Self { glt, + devices, events, console, idm, @@ -524,4 +529,23 @@ impl ControlService for DaemonControlService { }; Ok(Response::new(Box::pin(output) as Self::SnoopIdmStream)) } + + async fn list_devices( + &self, + request: Request, + ) -> Result, Status> { + let _ = request.into_inner(); + let mut devices = Vec::new(); + let state = self.devices.copy().await.map_err(|error| ApiError { + message: error.to_string(), + })?; + for (name, state) in state { + devices.push(DeviceInfo { + name, + claimed: state.owner.is_some(), + owner: state.owner.map(|x| x.to_string()).unwrap_or_default(), + }); + } + Ok(Response::new(ListDevicesReply { devices })) + } } diff --git a/crates/daemon/src/devices.rs b/crates/daemon/src/devices.rs new file mode 100644 index 0000000..fe63073 --- /dev/null +++ b/crates/daemon/src/devices.rs @@ -0,0 +1,106 @@ +use std::{collections::HashMap, sync::Arc}; + +use anyhow::{anyhow, Result}; +use log::warn; +use tokio::sync::RwLock; +use uuid::Uuid; + +use crate::config::{DaemonConfig, DaemonPciDeviceConfig}; + +#[derive(Clone)] +pub struct DaemonDeviceState { + pub pci: Option, + pub owner: Option, +} + +#[derive(Clone)] +pub struct DaemonDeviceManager { + config: Arc, + devices: Arc>>, +} + +impl DaemonDeviceManager { + pub fn new(config: Arc) -> Self { + Self { + config, + devices: Arc::new(RwLock::new(HashMap::new())), + } + } + + pub async fn claim(&self, device: &str, uuid: Uuid) -> Result { + let mut devices = self.devices.write().await; + let Some(state) = devices.get_mut(device) else { + return Err(anyhow!( + "unable to claim unknown device '{}' for guest {}", + device, + uuid + )); + }; + + if let Some(owner) = state.owner { + return Err(anyhow!( + "unable to claim device '{}' for guest {}: already claimed by {}", + device, + uuid, + owner + )); + } + + state.owner = Some(uuid); + Ok(state.clone()) + } + + pub async fn release_all(&self, uuid: Uuid) -> Result<()> { + let mut devices = self.devices.write().await; + for state in (*devices).values_mut() { + if state.owner == Some(uuid) { + state.owner = None; + } + } + Ok(()) + } + + pub async fn release(&self, device: &str, uuid: Uuid) -> Result<()> { + let mut devices = self.devices.write().await; + let Some(state) = devices.get_mut(device) else { + return Ok(()); + }; + + if let Some(owner) = state.owner { + if owner != uuid { + return Ok(()); + } + } + + state.owner = None; + Ok(()) + } + + pub async fn update_claims(&self, claims: HashMap) -> Result<()> { + let mut devices = self.devices.write().await; + devices.clear(); + for (name, pci) in &self.config.pci.devices { + let owner = claims.get(name).cloned(); + devices.insert( + name.clone(), + DaemonDeviceState { + owner, + pci: Some(pci.clone()), + }, + ); + } + + for (name, uuid) in &claims { + if !devices.contains_key(name) { + warn!("unknown device '{}' assigned to guest {}", name, uuid); + } + } + + Ok(()) + } + + pub async fn copy(&self) -> Result> { + let devices = self.devices.read().await; + Ok(devices.clone()) + } +} diff --git a/crates/daemon/src/lib.rs b/crates/daemon/src/lib.rs index 7df39a2..98e3fd1 100644 --- a/crates/daemon/src/lib.rs +++ b/crates/daemon/src/lib.rs @@ -1,9 +1,11 @@ -use std::{net::SocketAddr, path::PathBuf, str::FromStr}; +use std::{net::SocketAddr, path::PathBuf, str::FromStr, sync::Arc}; use anyhow::{anyhow, Result}; +use config::DaemonConfig; use console::{DaemonConsole, DaemonConsoleHandle}; use control::DaemonControlService; use db::GuestStore; +use devices::DaemonDeviceManager; use event::{DaemonEventContext, DaemonEventGenerator}; use glt::GuestLookupTable; use idm::{DaemonIdm, DaemonIdmHandle}; @@ -23,9 +25,11 @@ use tonic::transport::{Identity, Server, ServerTlsConfig}; use uuid::Uuid; pub mod command; +pub mod config; pub mod console; pub mod control; pub mod db; +pub mod devices; pub mod event; pub mod glt; pub mod idm; @@ -35,7 +39,9 @@ pub mod reconcile; pub struct Daemon { store: String, + _config: Arc, glt: GuestLookupTable, + devices: DaemonDeviceManager, guests: GuestStore, events: DaemonEventContext, guest_reconciler_task: JoinHandle<()>, @@ -50,12 +56,20 @@ const GUEST_RECONCILER_QUEUE_LEN: usize = 1000; impl Daemon { pub async fn new(store: String) -> Result { - let mut image_cache_dir = PathBuf::from(store.clone()); + let store_dir = PathBuf::from(store.clone()); + let mut config_path = store_dir.clone(); + config_path.push("config.toml"); + + let config = DaemonConfig::load(&config_path).await?; + let config = Arc::new(config); + let devices = DaemonDeviceManager::new(config.clone()); + + let mut image_cache_dir = store_dir.clone(); image_cache_dir.push("cache"); image_cache_dir.push("image"); fs::create_dir_all(&image_cache_dir).await?; - let mut host_uuid_path = PathBuf::from(store.clone()); + let mut host_uuid_path = store_dir.clone(); host_uuid_path.push("host.uuid"); let host_uuid = if host_uuid_path.is_file() { let content = fs::read_to_string(&host_uuid_path).await?; @@ -93,6 +107,7 @@ impl Daemon { .await?; let runtime_for_reconciler = runtime.dupe().await?; let guest_reconciler = GuestReconciler::new( + devices.clone(), glt.clone(), guests.clone(), events.clone(), @@ -108,7 +123,9 @@ impl Daemon { Ok(Self { store, + _config: config, glt, + devices, guests, events, guest_reconciler_task, @@ -123,6 +140,7 @@ impl Daemon { pub async fn listen(&mut self, addr: ControlDialAddress) -> Result<()> { let control_service = DaemonControlService::new( self.glt.clone(), + self.devices.clone(), self.events.clone(), self.console.clone(), self.idm.clone(), diff --git a/crates/daemon/src/reconcile/guest/mod.rs b/crates/daemon/src/reconcile/guest/mod.rs index 64fdc67..e4ac7b3 100644 --- a/crates/daemon/src/reconcile/guest/mod.rs +++ b/crates/daemon/src/reconcile/guest/mod.rs @@ -26,6 +26,7 @@ use uuid::Uuid; use crate::{ db::GuestStore, + devices::DaemonDeviceManager, event::{DaemonEvent, DaemonEventContext}, glt::GuestLookupTable, }; @@ -55,6 +56,7 @@ impl Drop for GuestReconcilerEntry { #[derive(Clone)] pub struct GuestReconciler { + devices: DaemonDeviceManager, glt: GuestLookupTable, guests: GuestStore, events: DaemonEventContext, @@ -70,6 +72,7 @@ pub struct GuestReconciler { impl GuestReconciler { #[allow(clippy::too_many_arguments)] pub fn new( + devices: DaemonDeviceManager, glt: GuestLookupTable, guests: GuestStore, events: DaemonEventContext, @@ -80,6 +83,7 @@ impl GuestReconciler { initrd_path: PathBuf, ) -> Result { Ok(Self { + devices, glt, guests, events, @@ -152,6 +156,8 @@ impl GuestReconciler { self.guests.remove(guest.uuid).await?; } + let mut device_claims = HashMap::new(); + for (uuid, mut stored_guest) in stored_guests { let previous_guest = stored_guest.clone(); let runtime_guest = runtime_guests.iter().find(|x| x.uuid == uuid); @@ -173,6 +179,17 @@ impl GuestReconciler { } else { state.status = GuestStatus::Started.into(); } + + for device in &stored_guest + .spec + .as_ref() + .cloned() + .unwrap_or_default() + .devices + { + device_claims.insert(device.name.clone(), uuid); + } + state.network = Some(guestinfo_to_networkstate(runtime)); stored_guest.state = Some(state); } @@ -185,6 +202,9 @@ impl GuestReconciler { let _ = self.guest_reconciler_notify.try_send(uuid); } } + + self.devices.update_claims(device_claims).await?; + Ok(()) } @@ -255,6 +275,7 @@ impl GuestReconciler { async fn start(&self, uuid: Uuid, guest: &mut Guest) -> Result { let starter = GuestStarter { + devices: &self.devices, kernel_path: &self.kernel_path, initrd_path: &self.initrd_path, packer: &self.packer, @@ -293,6 +314,7 @@ impl GuestReconciler { host: self.glt.host_uuid().to_string(), domid: domid.unwrap_or(u32::MAX), }); + self.devices.release_all(uuid).await?; Ok(GuestReconcilerResult::Changed { rerun: false }) } diff --git a/crates/daemon/src/reconcile/guest/start.rs b/crates/daemon/src/reconcile/guest/start.rs index 639f058..bd362b8 100644 --- a/crates/daemon/src/reconcile/guest/start.rs +++ b/crates/daemon/src/reconcile/guest/start.rs @@ -1,5 +1,7 @@ use std::collections::HashMap; use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::atomic::{AtomicBool, Ordering}; use anyhow::{anyhow, Result}; use futures::StreamExt; @@ -7,6 +9,7 @@ use krata::launchcfg::LaunchPackedFormat; use krata::v1::common::GuestOciImageSpec; use krata::v1::common::{guest_image_spec::Image, Guest, GuestState, GuestStatus, OciImageFormat}; use krataoci::packer::{service::OciPackerService, OciPackedFormat}; +use kratart::launch::{PciBdf, PciDevice, PciRdmReservePolicy}; use kratart::{launch::GuestLaunchRequest, Runtime}; use log::info; @@ -15,6 +18,8 @@ use tokio::io::AsyncReadExt; use tokio_tar::Archive; use uuid::Uuid; +use crate::config::DaemonPciDeviceRdmReservePolicy; +use crate::devices::DaemonDeviceManager; use crate::{ glt::GuestLookupTable, reconcile::guest::{guestinfo_to_networkstate, GuestReconcilerResult}, @@ -24,6 +29,7 @@ use crate::{ const OCI_SPEC_TAR_FILE_MAX_SIZE: usize = 100 * 1024 * 1024; pub struct GuestStarter<'a> { + pub devices: &'a DaemonDeviceManager, pub kernel_path: &'a Path, pub initrd_path: &'a Path, pub packer: &'a OciPackerService, @@ -135,6 +141,48 @@ impl GuestStarter<'_> { fs::read(&self.initrd_path).await? }; + let success = AtomicBool::new(false); + + let _device_release_guard = scopeguard::guard( + (spec.devices.clone(), self.devices.clone()), + |(devices, manager)| { + if !success.load(Ordering::Acquire) { + tokio::task::spawn(async move { + for device in devices { + let _ = manager.release(&device.name, uuid).await; + } + }); + } + }, + ); + + let mut pcis = Vec::new(); + for device in &spec.devices { + let state = self.devices.claim(&device.name, uuid).await?; + if let Some(cfg) = state.pci { + for location in cfg.locations { + let pci = PciDevice { + bdf: PciBdf::from_str(&location)?.with_domain(0), + permissive: cfg.permissive, + msi_translate: cfg.msi_translate, + power_management: cfg.power_management, + rdm_reserve_policy: match cfg.rdm_reserve_policy { + DaemonPciDeviceRdmReservePolicy::Strict => PciRdmReservePolicy::Strict, + DaemonPciDeviceRdmReservePolicy::Relaxed => { + PciRdmReservePolicy::Relaxed + } + }, + }; + pcis.push(pci); + } + } else { + return Err(anyhow!( + "device '{}' isn't a known device type", + device.name + )); + } + } + let info = self .runtime .launch(GuestLaunchRequest { @@ -150,7 +198,7 @@ impl GuestStarter<'_> { initrd, vcpus: spec.vcpus, mem: spec.mem, - pcis: vec![], + pcis, env: task .environment .iter() @@ -170,6 +218,7 @@ impl GuestStarter<'_> { host: self.glt.host_uuid().to_string(), domid: info.domid, }); + success.store(true, Ordering::Release); Ok(GuestReconcilerResult::Changed { rerun: false }) } } diff --git a/crates/krata/proto/krata/v1/common.proto b/crates/krata/proto/krata/v1/common.proto index d5e18c8..75f8b1e 100644 --- a/crates/krata/proto/krata/v1/common.proto +++ b/crates/krata/proto/krata/v1/common.proto @@ -25,6 +25,7 @@ message GuestSpec { uint64 mem = 6; GuestTaskSpec task = 7; repeated GuestSpecAnnotation annotations = 8; + repeated GuestSpecDevice devices = 9; } message GuestImageSpec { @@ -62,6 +63,10 @@ message GuestSpecAnnotation { string value = 2; } +message GuestSpecDevice { + string name = 1; +} + message GuestState { GuestStatus status = 1; GuestNetworkState network = 2; diff --git a/crates/krata/proto/krata/v1/control.proto b/crates/krata/proto/krata/v1/control.proto index ca49f5b..a91f038 100644 --- a/crates/krata/proto/krata/v1/control.proto +++ b/crates/krata/proto/krata/v1/control.proto @@ -16,6 +16,7 @@ service ControlService { rpc DestroyGuest(DestroyGuestRequest) returns (DestroyGuestReply); rpc ResolveGuest(ResolveGuestRequest) returns (ResolveGuestReply); rpc ListGuests(ListGuestsRequest) returns (ListGuestsReply); + rpc ListDevices(ListDevicesRequest) returns (ListDevicesReply); rpc ExecGuest(stream ExecGuestRequest) returns (stream ExecGuestReply); @@ -187,3 +188,15 @@ message PullImageReply { string digest = 2; krata.v1.common.OciImageFormat format = 3; } + +message DeviceInfo { + string name = 1; + bool claimed = 2; + string owner = 3; +} + +message ListDevicesRequest {} + +message ListDevicesReply { + repeated DeviceInfo devices = 1; +} diff --git a/crates/runtime/src/launch.rs b/crates/runtime/src/launch.rs index 1b247bb..0bd49fa 100644 --- a/crates/runtime/src/launch.rs +++ b/crates/runtime/src/launch.rs @@ -13,8 +13,7 @@ use krata::launchcfg::{ use krataoci::packer::OciPackedImage; use tokio::sync::Semaphore; use uuid::Uuid; -pub use xenclient::pci::PciBdf; -use xenclient::{DomainChannel, DomainConfig, DomainDisk, DomainNetworkInterface, DomainPciDevice}; +use xenclient::{DomainChannel, DomainConfig, DomainDisk, DomainNetworkInterface}; use xenstore::XsdInterface; use crate::cfgblk::ConfigBlock; @@ -22,6 +21,10 @@ use crate::RuntimeContext; use super::{GuestInfo, GuestState}; +pub use xenclient::{ + pci::PciBdf, DomainPciDevice as PciDevice, DomainPciRdmReservePolicy as PciRdmReservePolicy, +}; + pub struct GuestLaunchRequest { pub format: LaunchPackedFormat, pub kernel: Vec, @@ -32,7 +35,7 @@ pub struct GuestLaunchRequest { pub mem: u64, pub env: HashMap, pub run: Option>, - pub pcis: Vec, + pub pcis: Vec, pub debug: bool, pub image: OciPackedImage, } @@ -206,12 +209,7 @@ impl GuestLauncher { bridge: None, script: None, }], - pcis: request - .pcis - .clone() - .into_iter() - .map(|bdf| DomainPciDevice { bdf }) - .collect::>(), + pcis: request.pcis.clone(), filesystems: vec![], event_channels: vec![], extra_keys, diff --git a/crates/xen/xenclient/src/error.rs b/crates/xen/xenclient/src/error.rs index d5a473a..7f6a059 100644 --- a/crates/xen/xenclient/src/error.rs +++ b/crates/xen/xenclient/src/error.rs @@ -1,5 +1,7 @@ use std::io; +use crate::pci::PciBdf; + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("io issue encountered: {0}")] @@ -42,6 +44,8 @@ pub enum Error { ParseIntError(#[from] std::num::ParseIntError), #[error("invalid pci bdf string")] InvalidPciBdfString, + #[error("pci device {0} is not assignable")] + PciDeviceNotAssignable(PciBdf), } pub type Result = std::result::Result; diff --git a/crates/xen/xenclient/src/lib.rs b/crates/xen/xenclient/src/lib.rs index df02ae3..1ac2450 100644 --- a/crates/xen/xenclient/src/lib.rs +++ b/crates/xen/xenclient/src/lib.rs @@ -25,6 +25,7 @@ use pci::{PciBdf, XenPciBackend}; use sys::XEN_PAGE_SHIFT; use tokio::time::timeout; +use std::collections::HashMap; use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; @@ -82,9 +83,31 @@ pub struct DomainEventChannel { pub name: String, } +#[derive(Clone, Debug, Default)] +pub enum DomainPciRdmReservePolicy { + Invalid, + #[default] + Strict, + Relaxed, +} + +impl DomainPciRdmReservePolicy { + pub fn to_option_str(&self) -> &str { + match self { + DomainPciRdmReservePolicy::Invalid => "-1", + DomainPciRdmReservePolicy::Strict => "0", + DomainPciRdmReservePolicy::Relaxed => "1", + } + } +} + #[derive(Clone, Debug)] pub struct DomainPciDevice { pub bdf: PciBdf, + pub permissive: bool, + pub msi_translate: bool, + pub power_management: bool, + pub rdm_reserve_policy: DomainPciRdmReservePolicy, } #[derive(Clone, Debug)] @@ -680,6 +703,9 @@ impl XenClient { device: &DomainPciDevice, ) -> Result<()> { let backend = XenPciBackend::new(); + if !backend.is_assigned(&device.bdf).await? { + return Err(Error::PciDeviceNotAssignable(device.bdf)); + } let resources = backend.read_resources(&device.bdf).await?; for resource in resources { if resource.is_bar_io() { @@ -748,11 +774,23 @@ impl XenClient { ) .await?; } + + let mut options = HashMap::new(); + options.insert("permissive", if device.permissive { "1" } else { "0" }); + options.insert("rdm_policy", device.rdm_reserve_policy.to_option_str()); + options.insert("msitranslate", if device.msi_translate { "1" } else { "0" }); + options.insert( + "power_mgmt", + if device.power_management { "1" } else { "0" }, + ); + let options = options + .into_iter() + .map(|(key, value)| format!("{}={}", key, value)) + .collect::>() + .join(","); + self.store - .write_string( - format!("{}/opts-{}", backend_path, index), - "msitranslate=0,power_mgmt=0,permissive=0,rdm_policy=0", - ) + .write_string(format!("{}/opts-{}", backend_path, index), &options) .await?; Ok(()) @@ -922,21 +960,4 @@ impl XenClient { tx.commit().await?; Ok(()) } - - pub async fn get_console_path(&self, domid: u32) -> Result { - let dom_path = self.store.get_domain_path(domid).await?; - let console_tty_path = format!("{}/console/tty", dom_path); - let mut tty: Option = None; - for _ in 0..5 { - tty = self.store.read_string(&console_tty_path).await?; - if tty.is_some() { - break; - } - tokio::time::sleep(Duration::from_millis(200)).await; - } - let Some(tty) = tty else { - return Err(Error::TtyNotFound); - }; - Ok(tty) - } } diff --git a/crates/xen/xenclient/src/pci.rs b/crates/xen/xenclient/src/pci.rs index 08bfb60..3036ac8 100644 --- a/crates/xen/xenclient/src/pci.rs +++ b/crates/xen/xenclient/src/pci.rs @@ -139,6 +139,16 @@ impl PciBdf { vdefn, } } + + pub fn with_domain(&self, domain: u32) -> PciBdf { + PciBdf { + domain: Some(domain), + bus: self.bus, + device: self.device, + function: self.function, + vdefn: self.vdefn, + } + } } impl FromStr for PciBdf {