chore(xen): move device creation into transaction interface (#196)

* chore(xen): move domain creation to xenplatform

* chore(xen): move device transactions into separate interface
This commit is contained in:
Alex Zenla 2024-06-21 10:38:19 -07:00 committed by GitHub
parent 6f39f115b7
commit ef068e790c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 756 additions and 747 deletions

View File

@ -15,6 +15,7 @@ use krataoci::packer::OciPackedImage;
use tokio::sync::Semaphore;
use uuid::Uuid;
use xenclient::{DomainChannel, DomainConfig, DomainDisk, DomainNetworkInterface};
use xenplatform::domain::BaseDomainConfig;
use crate::cfgblk::ConfigBlock;
use crate::RuntimeContext;
@ -220,13 +221,18 @@ impl GuestLauncher {
}
let config = DomainConfig {
backend_domid: 0,
name: xen_name,
base: BaseDomainConfig {
max_vcpus: request.vcpus,
mem_mb: request.mem,
kernel: request.kernel,
initrd: request.initrd,
cmdline,
uuid,
owner_domid: 0,
enable_iommu: true,
},
backend_domid: 0,
name: xen_name,
swap_console_backend: Some("krata-console".to_string()),
disks,
channels: vec![DomainChannel {
@ -241,7 +247,6 @@ impl GuestLauncher {
}],
pcis: request.pcis.clone(),
filesystems: vec![],
event_channels: vec![],
extra_keys,
extra_rw_paths: vec!["krata/guest".to_string()],
};

View File

@ -1,7 +1,6 @@
/// Handwritten hypercall bindings.
use nix::ioctl_readwrite_bad;
use std::ffi::{c_char, c_int, c_uint, c_ulong};
use uuid::Uuid;
#[repr(C)]
#[derive(Copy, Clone, Debug)]
@ -269,7 +268,7 @@ impl Default for CreateDomain {
fn default() -> Self {
CreateDomain {
ssidref: SECINITSID_DOMU,
handle: Uuid::new_v4().into_bytes(),
handle: [0; 16],
flags: 0,
iommu_opts: 0,
max_vcpus: 1,

View File

@ -1,7 +1,9 @@
use std::{env, process};
use tokio::fs;
use uuid::Uuid;
use xenclient::error::Result;
use xenclient::{DomainConfig, XenClient};
use xenplatform::domain::BaseDomainConfig;
#[cfg(target_arch = "x86_64")]
type RuntimePlatform = xenplatform::x86pv::X86PvPlatform;
@ -22,13 +24,18 @@ async fn main() -> Result<()> {
let initrd_path = args.get(2).expect("argument not specified");
let client = XenClient::new(0, RuntimePlatform::new()).await?;
let config = DomainConfig {
backend_domid: 0,
name: "xenclient-test".to_string(),
base: BaseDomainConfig {
uuid: Uuid::new_v4(),
max_vcpus: 1,
mem_mb: 512,
enable_iommu: true,
kernel: fs::read(&kernel_image_path).await?,
initrd: fs::read(&initrd_path).await?,
cmdline: "earlyprintk=xen earlycon=xen console=hvc0 init=/init".to_string(),
owner_domid: 0,
},
backend_domid: 0,
name: "xenclient-test".to_string(),
swap_console_backend: None,
disks: vec![],
channels: vec![],
@ -37,7 +44,6 @@ async fn main() -> Result<()> {
filesystems: vec![],
extra_keys: vec![],
extra_rw_paths: vec![],
event_channels: vec![],
};
let created = client.create(&config).await?;
println!("created domain {}", created.domid);

View File

@ -1,33 +1,28 @@
pub mod error;
use crate::error::{Error, Result};
use indexmap::IndexMap;
use log::{debug, trace, warn};
use pci::{PciBdf, XenPciBackend};
use log::{debug, trace};
use pci::PciBdf;
use tokio::time::timeout;
use xenplatform::boot::{BootDomain, BootSetup, BootSetupPlatform};
use xenplatform::elfloader::ElfImageLoader;
use xenplatform::sys::XEN_PAGE_SHIFT;
use tx::ClientTransaction;
use xenplatform::boot::BootSetupPlatform;
use xenplatform::domain::{BaseDomainConfig, BaseDomainManager, CreatedDomain};
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use uuid::Uuid;
use xencall::sys::{CreateDomain, DOMCTL_DEV_RDM_RELAXED};
use xencall::XenCall;
use xenstore::{
XsPermission, XsdClient, XsdInterface, XsdTransaction, XS_PERM_NONE, XS_PERM_READ,
XS_PERM_READ_WRITE,
};
use xenstore::{XsdClient, XsdInterface};
pub mod pci;
pub mod tx;
#[derive(Clone)]
pub struct XenClient<P: BootSetupPlatform> {
pub store: XsdClient,
call: XenCall,
platform: Arc<P>,
domain_manager: Arc<BaseDomainManager<P>>,
}
#[derive(Clone, Debug)]
@ -98,19 +93,14 @@ pub struct DomainPciDevice {
#[derive(Clone, Debug)]
pub struct DomainConfig {
pub base: BaseDomainConfig,
pub backend_domid: u32,
pub name: String,
pub max_vcpus: u32,
pub mem_mb: u64,
pub kernel: Vec<u8>,
pub initrd: Vec<u8>,
pub cmdline: String,
pub disks: Vec<DomainDisk>,
pub swap_console_backend: Option<String>,
pub channels: Vec<DomainChannel>,
pub vifs: Vec<DomainNetworkInterface>,
pub filesystems: Vec<DomainFilesystem>,
pub event_channels: Vec<DomainEventChannel>,
pub pcis: Vec<DomainPciDevice>,
pub extra_keys: Vec<(String, String)>,
pub extra_rw_paths: Vec<String>,
@ -122,228 +112,55 @@ pub struct CreatedChannel {
pub evtchn: u32,
}
#[derive(Debug)]
pub struct CreatedDomain {
pub domid: u32,
pub channels: Vec<CreatedChannel>,
}
#[allow(clippy::too_many_arguments)]
impl<P: BootSetupPlatform> XenClient<P> {
pub async fn new(current_domid: u32, platform: P) -> Result<XenClient<P>> {
let store = XsdClient::open().await?;
let call = XenCall::open(current_domid)?;
let call: XenCall = XenCall::open(current_domid)?;
let domain_manager = BaseDomainManager::new(call.clone(), platform).await?;
Ok(XenClient {
store,
call,
platform: Arc::new(platform),
domain_manager: Arc::new(domain_manager),
})
}
pub async fn create(&self, config: &DomainConfig) -> Result<CreatedDomain> {
let mut domain = self.platform.create_domain(!config.pcis.is_empty());
domain.max_vcpus = config.max_vcpus;
let domid = self.call.create_domain(domain).await?;
match self.init(domid, &domain, config).await {
Ok(created) => Ok(created),
let created = self.domain_manager.create(config.base.clone()).await?;
match self.init(created.domid, config, &created).await {
Ok(_) => Ok(created),
Err(err) => {
// ignore since destroying a domain is best
// effort when an error occurs
let _ = self.destroy(domid).await;
let _ = self.domain_manager.destroy(created.domid).await;
Err(err)
}
}
}
async fn init(
&self,
domid: u32,
created: &CreateDomain,
config: &DomainConfig,
) -> Result<CreatedDomain> {
trace!("XenClient init domid={} domain={:?}", domid, created,);
let backend_dom_path = self.store.get_domain_path(0).await?;
let dom_path = self.store.get_domain_path(domid).await?;
let uuid_string = Uuid::from_bytes(created.handle).to_string();
let vm_path = format!("/vm/{}", uuid_string);
let ro_perm = &[
XsPermission {
id: 0,
perms: XS_PERM_NONE,
},
XsPermission {
id: domid,
perms: XS_PERM_READ,
},
];
let rw_perm = &[XsPermission {
id: domid,
perms: XS_PERM_READ_WRITE,
}];
let no_perm = &[XsPermission {
id: 0,
perms: XS_PERM_NONE,
}];
{
let tx = self.store.transaction().await?;
tx.rm(dom_path.as_str()).await?;
tx.mknod(dom_path.as_str(), ro_perm).await?;
tx.rm(vm_path.as_str()).await?;
tx.mknod(vm_path.as_str(), ro_perm).await?;
tx.mknod(vm_path.as_str(), no_perm).await?;
tx.mknod(format!("{}/device", vm_path).as_str(), no_perm)
.await?;
tx.write_string(format!("{}/vm", dom_path).as_str(), &vm_path)
.await?;
tx.mknod(format!("{}/cpu", dom_path).as_str(), ro_perm)
.await?;
tx.mknod(format!("{}/memory", dom_path).as_str(), ro_perm)
.await?;
tx.mknod(format!("{}/control", dom_path).as_str(), ro_perm)
.await?;
tx.mknod(format!("{}/control/shutdown", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(
format!("{}/control/feature-poweroff", dom_path).as_str(),
rw_perm,
)
.await?;
tx.mknod(
format!("{}/control/feature-reboot", dom_path).as_str(),
rw_perm,
)
.await?;
tx.mknod(
format!("{}/control/feature-suspend", dom_path).as_str(),
rw_perm,
)
.await?;
tx.mknod(format!("{}/control/sysrq", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/data", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/drivers", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/feature", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/attr", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/error", dom_path).as_str(), rw_perm)
.await?;
tx.write_string(
format!("{}/uuid", vm_path).as_str(),
&Uuid::from_bytes(created.handle).to_string(),
)
.await?;
tx.write_string(format!("{}/name", dom_path).as_str(), &config.name)
.await?;
tx.write_string(format!("{}/name", vm_path).as_str(), &config.name)
.await?;
for (key, value) in &config.extra_keys {
tx.write_string(format!("{}/{}", dom_path, key).as_str(), value)
.await?;
pub async fn transaction(&self, domid: u32, backend_domid: u32) -> Result<ClientTransaction> {
ClientTransaction::new(&self.store, domid, backend_domid).await
}
for path in &config.extra_rw_paths {
tx.mknod(format!("{}/{}", dom_path, path).as_str(), rw_perm)
async fn init(&self, domid: u32, config: &DomainConfig, created: &CreatedDomain) -> Result<()> {
trace!("xenclient init domid={} domain={:?}", domid, created);
let transaction = self.transaction(domid, config.backend_domid).await?;
transaction
.add_domain_declaration(&config.name, &config.base, created)
.await?;
}
tx.commit().await?;
}
self.call.set_max_vcpus(domid, config.max_vcpus).await?;
self.call
.set_max_mem(domid, (config.mem_mb * 1024) + 2048)
.await?;
let mut domain: BootDomain;
{
let loader = ElfImageLoader::load_file_kernel(&config.kernel)?;
let platform = (*self.platform).clone();
let mut boot = BootSetup::new(self.call.clone(), domid, platform, loader, None);
domain = boot
.initialize(
&config.initrd,
config.mem_mb,
config.max_vcpus,
&config.cmdline,
)
.await?;
boot.boot(&mut domain).await?;
}
{
let tx = self.store.transaction().await?;
tx.write_string(format!("{}/image/os_type", vm_path).as_str(), "linux")
.await?;
tx.write_string(
format!("{}/image/cmdline", vm_path).as_str(),
&config.cmdline,
)
.await?;
tx.write_string(
format!("{}/memory/static-max", dom_path).as_str(),
&(config.mem_mb * 1024).to_string(),
)
.await?;
tx.write_string(
format!("{}/memory/target", dom_path).as_str(),
&(config.mem_mb * 1024).to_string(),
)
.await?;
tx.write_string(format!("{}/memory/videoram", dom_path).as_str(), "0")
.await?;
tx.write_string(format!("{}/domid", dom_path).as_str(), &domid.to_string())
.await?;
tx.write_string(format!("{}/type", dom_path).as_str(), "PV")
.await?;
tx.write_string(
format!("{}/store/port", dom_path).as_str(),
&domain.store_evtchn.to_string(),
)
.await?;
tx.write_string(
format!("{}/store/ring-ref", dom_path).as_str(),
&domain.store_mfn.to_string(),
)
.await?;
for i in 0..config.max_vcpus {
let path = format!("{}/cpu/{}", dom_path, i);
tx.mkdir(&path).await?;
tx.set_perms(&path, ro_perm).await?;
let path = format!("{}/cpu/{}/availability", dom_path, i);
tx.write_string(&path, "online").await?;
tx.set_perms(&path, ro_perm).await?;
}
tx.commit().await?;
}
transaction.commit().await?;
if !self
.store
.introduce_domain(domid, domain.store_mfn, domain.store_evtchn)
.introduce_domain(domid, created.store_mfn, created.store_evtchn)
.await?
{
return Err(Error::IntroduceDomainFailed);
}
let tx = self.store.transaction().await?;
self.console_device_add(
&tx,
&mut domain,
let transaction = self.transaction(domid, config.backend_domid).await?;
transaction
.add_channel_device(
created,
0,
&DomainChannel {
typ: config
.swap_console_backend
@ -352,521 +169,49 @@ impl<P: BootSetupPlatform> XenClient<P> {
.to_string(),
initialized: true,
},
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
0,
)
.await?;
let mut channels: Vec<CreatedChannel> = Vec::new();
for (index, channel) in config.channels.iter().enumerate() {
let (Some(ring_ref), Some(evtchn)) = self
.console_device_add(
&tx,
&mut domain,
channel,
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index + 1,
)
.await?
else {
continue;
};
channels.push(CreatedChannel { ring_ref, evtchn });
transaction
.add_channel_device(created, index + 1, channel)
.await?;
}
for (index, disk) in config.disks.iter().enumerate() {
self.disk_device_add(
&tx,
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
disk,
)
.await?;
transaction.add_vbd_device(index, disk).await?;
}
for (index, filesystem) in config.filesystems.iter().enumerate() {
self.fs_9p_device_add(
&tx,
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
filesystem,
)
.await?;
transaction.add_9pfs_device(index, filesystem).await?;
}
for (index, vif) in config.vifs.iter().enumerate() {
self.vif_device_add(
&tx,
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
vif,
)
.await?;
transaction.add_vif_device(index, vif).await?;
}
for (index, pci) in config.pcis.iter().enumerate() {
self.pci_device_add(
&tx,
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
config.pcis.len(),
pci,
)
transaction
.add_pci_device(&self.call, index, config.pcis.len(), pci)
.await?;
}
for channel in &config.event_channels {
let id = self
.call
.evtchn_alloc_unbound(domid, config.backend_domid)
.await?;
let channel_path = format!("{}/evtchn/{}", dom_path, channel.name);
tx.write_string(&format!("{}/name", channel_path), &channel.name)
.await?;
tx.write_string(&format!("{}/channel", channel_path), &id.to_string())
.await?;
for (key, value) in &config.extra_keys {
transaction.write_key(key, value).await?;
}
tx.commit().await?;
for key in &config.extra_rw_paths {
transaction.add_rw_path(key).await?;
}
transaction.commit().await?;
self.call.unpause_domain(domid).await?;
Ok(CreatedDomain { domid, channels })
}
async fn disk_device_add(
&self,
tx: &XsdTransaction,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
disk: &DomainDisk,
) -> Result<()> {
let id = (202 << 8) | (index << 4) as u64;
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("removable", "0".to_string()),
("bootable", "1".to_string()),
("state", "1".to_string()),
("dev", disk.vdev.to_string()),
("type", "phy".to_string()),
("mode", if disk.writable { "w" } else { "r" }.to_string()),
("device-type", "disk".to_string()),
("discard-enable", "0".to_string()),
("specification", "xen".to_string()),
("physical-device-path", disk.block.path.to_string()),
(
"physical-device",
format!("{:02x}:{:02x}", disk.block.major, disk.block.minor),
),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
("virtual-device", id.to_string()),
("device-type", "disk".to_string()),
("trusted", "1".to_string()),
("protocol", "x86_64-abi".to_string()),
];
self.device_add(
tx,
"vbd",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)
.await?;
Ok(())
}
#[allow(clippy::unnecessary_unwrap)]
async fn console_device_add(
&self,
tx: &XsdTransaction,
domain: &mut BootDomain,
channel: &DomainChannel,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
) -> Result<(Option<u64>, Option<u32>)> {
let console = domain.consoles.get(index);
let port = console.map(|x| x.0);
let ring = console.map(|x| x.1);
let mut backend_entries = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("protocol", "vt100".to_string()),
];
let mut frontend_entries = vec![
("backend-id", backend_domid.to_string()),
("limit", "1048576".to_string()),
("output", "pty".to_string()),
("tty", "".to_string()),
];
frontend_entries.push(("type", channel.typ.clone()));
backend_entries.push(("type", channel.typ.clone()));
if port.is_some() && ring.is_some() {
if channel.typ != "xenconsoled" {
frontend_entries.push(("state", "1".to_string()));
}
frontend_entries.extend_from_slice(&[
("port", port.unwrap().to_string()),
("ring-ref", ring.unwrap().to_string()),
]);
} else {
frontend_entries.extend_from_slice(&[
("state", "1".to_string()),
("protocol", "vt100".to_string()),
]);
}
if channel.initialized {
backend_entries.push(("state", "4".to_string()));
} else {
backend_entries.push(("state", "1".to_string()));
}
self.device_add(
tx,
"console",
index as u64,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_entries,
backend_entries,
)
.await?;
Ok((ring, port))
}
async fn fs_9p_device_add(
&self,
tx: &XsdTransaction,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
filesystem: &DomainFilesystem,
) -> Result<()> {
let id = 90 + index as u64;
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("path", filesystem.path.to_string()),
("security-model", "none".to_string()),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
("tag", filesystem.tag.to_string()),
];
self.device_add(
tx,
"9pfs",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)
.await?;
Ok(())
}
async fn vif_device_add(
&self,
tx: &XsdTransaction,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
vif: &DomainNetworkInterface,
) -> Result<()> {
let id = 20 + index as u64;
let mut backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("mac", vif.mac.to_string()),
("mtu", vif.mtu.to_string()),
("type", "vif".to_string()),
("handle", id.to_string()),
];
if vif.bridge.is_some() {
backend_items.extend_from_slice(&[("bridge", vif.bridge.clone().unwrap())]);
}
if vif.script.is_some() {
backend_items.extend_from_slice(&[
("script", vif.script.clone().unwrap()),
("hotplug-status", "".to_string()),
]);
} else {
backend_items.extend_from_slice(&[
("script", "".to_string()),
("hotplug-status", "connected".to_string()),
]);
}
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
("mac", vif.mac.to_string()),
("trusted", "1".to_string()),
("mtu", vif.mtu.to_string()),
];
self.device_add(
tx,
"vif",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)
.await?;
Ok(())
}
async fn pci_device_add(
&self,
tx: &XsdTransaction,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
device_count: usize,
device: &DomainPciDevice,
) -> Result<()> {
let backend = XenPciBackend::new();
if !backend.is_assigned(&device.bdf).await? {
return Err(Error::PciDeviceNotAssignable(device.bdf));
}
let resources = backend.read_resources(&device.bdf).await?;
for resource in resources {
if resource.is_bar_io() {
self.call
.ioport_permission(domid, resource.start as u32, resource.size() as u32, true)
.await?;
} else {
self.call
.iomem_permission(
domid,
resource.start >> XEN_PAGE_SHIFT,
(resource.size() + (XEN_PAGE_SHIFT - 1)) >> XEN_PAGE_SHIFT,
true,
)
.await?;
}
}
if let Some(irq) = backend.read_irq(&device.bdf).await? {
let irq = self.call.map_pirq(domid, irq as isize, None).await?;
self.call.irq_permission(domid, irq, true).await?;
}
backend.reset(&device.bdf).await?;
self.call
.assign_device(
domid,
device.bdf.encode(),
if device.rdm_reserve_policy == DomainPciRdmReservePolicy::Relaxed {
DOMCTL_DEV_RDM_RELAXED
} else {
0
},
)
.await?;
if device.permissive {
backend.enable_permissive(&device.bdf).await?;
}
let id = 60;
if index == 0 {
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("num_devs", device_count.to_string()),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
];
self.device_add(
tx,
"pci",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)
.await?;
}
let backend_path = format!("{}/backend/{}/{}/{}", backend_dom_path, "pci", domid, id);
tx.write_string(
format!("{}/key-{}", backend_path, index),
&device.bdf.to_string(),
)
.await?;
tx.write_string(
format!("{}/dev-{}", backend_path, index),
&device.bdf.to_string(),
)
.await?;
if let Some(vdefn) = device.bdf.vdefn {
tx.write_string(
format!("{}/vdefn-{}", backend_path, index),
&format!("{:#x}", vdefn),
)
.await?;
}
let mut options = IndexMap::new();
options.insert("permissive", if device.permissive { "1" } else { "0" });
options.insert("rdm_policy", device.rdm_reserve_policy.to_option_str());
options.insert("msitranslate", if device.msi_translate { "1" } else { "0" });
options.insert(
"power_mgmt",
if device.power_management { "1" } else { "0" },
);
let options = options
.into_iter()
.map(|(key, value)| format!("{}={}", key, value))
.collect::<Vec<_>>()
.join(",");
tx.write_string(format!("{}/opts-{}", backend_path, index), &options)
.await?;
Ok(())
}
async fn device_add(
&self,
tx: &XsdTransaction,
typ: &str,
id: u64,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
frontend_items: Vec<(&str, String)>,
backend_items: Vec<(&str, String)>,
) -> Result<()> {
let console_zero = typ == "console" && id == 0;
let frontend_path = if console_zero {
format!("{}/console", dom_path)
} else {
format!("{}/device/{}/{}", dom_path, typ, id)
};
let backend_path = format!("{}/backend/{}/{}/{}", backend_dom_path, typ, domid, id);
let mut backend_items: Vec<(&str, String)> = backend_items.clone();
let mut frontend_items: Vec<(&str, String)> = frontend_items.clone();
backend_items.push(("frontend", frontend_path.clone()));
frontend_items.push(("backend", backend_path.clone()));
let frontend_perms = &[
XsPermission {
id: domid,
perms: XS_PERM_NONE,
},
XsPermission {
id: backend_domid,
perms: XS_PERM_READ,
},
];
let backend_perms = &[
XsPermission {
id: backend_domid,
perms: XS_PERM_NONE,
},
XsPermission {
id: domid,
perms: XS_PERM_READ,
},
];
tx.mknod(&frontend_path, frontend_perms).await?;
for (p, value) in &frontend_items {
let path = format!("{}/{}", frontend_path, *p);
tx.write_string(&path, value).await?;
if !console_zero {
tx.set_perms(&path, frontend_perms).await?;
}
}
tx.mknod(&backend_path, backend_perms).await?;
for (p, value) in &backend_items {
let path = format!("{}/{}", backend_path, *p);
tx.write_string(&path, value).await?;
}
Ok(())
}
pub async fn destroy(&self, domid: u32) -> Result<()> {
if let Err(err) = self.destroy_store(domid).await {
warn!("failed to destroy store for domain {}: {}", domid, err);
}
self.call.destroy_domain(domid).await?;
let _ = self.destroy_store(domid).await;
self.domain_manager.destroy(domid).await?;
Ok(())
}

View File

@ -0,0 +1,573 @@
use indexmap::IndexMap;
use xencall::{sys::DOMCTL_DEV_RDM_RELAXED, XenCall};
use xenplatform::{
domain::{BaseDomainConfig, CreatedDomain},
sys::XEN_PAGE_SHIFT,
};
use xenstore::{
XsPermission, XsdClient, XsdInterface, XsdTransaction, XS_PERM_NONE, XS_PERM_READ,
XS_PERM_READ_WRITE,
};
use crate::{
error::{Error, Result},
pci::XenPciBackend,
DomainChannel, DomainDisk, DomainFilesystem, DomainNetworkInterface, DomainPciDevice,
DomainPciRdmReservePolicy,
};
pub struct ClientTransaction {
tx: XsdTransaction,
abort: bool,
domid: u32,
dom_path: String,
backend_domid: u32,
backend_dom_path: String,
}
impl ClientTransaction {
pub async fn new(store: &XsdClient, domid: u32, backend_domid: u32) -> Result<Self> {
let backend_dom_path = store.get_domain_path(0).await?;
let dom_path = store.get_domain_path(domid).await?;
Ok(ClientTransaction {
tx: store.transaction().await?,
abort: true,
domid,
dom_path,
backend_domid,
backend_dom_path,
})
}
pub async fn add_domain_declaration(
&self,
name: impl AsRef<str>,
base: &BaseDomainConfig,
created: &CreatedDomain,
) -> Result<()> {
let vm_path = format!("/vm/{}", base.uuid);
let ro_perm = &[
XsPermission {
id: 0,
perms: XS_PERM_NONE,
},
XsPermission {
id: self.domid,
perms: XS_PERM_READ,
},
];
let no_perm = &[XsPermission {
id: 0,
perms: XS_PERM_NONE,
}];
let rw_perm = &[XsPermission {
id: self.domid,
perms: XS_PERM_READ_WRITE,
}];
self.tx.rm(&self.dom_path).await?;
self.tx.mknod(&self.dom_path, ro_perm).await?;
self.tx.rm(&vm_path).await?;
self.tx.mknod(&vm_path, ro_perm).await?;
self.tx.mknod(&vm_path, no_perm).await?;
self.tx
.mknod(format!("{}/device", vm_path).as_str(), no_perm)
.await?;
self.tx
.write_string(format!("{}/vm", self.dom_path).as_str(), &vm_path)
.await?;
self.tx
.mknod(format!("{}/cpu", self.dom_path).as_str(), ro_perm)
.await?;
self.tx
.mknod(format!("{}/memory", self.dom_path).as_str(), ro_perm)
.await?;
self.tx
.mknod(format!("{}/control", self.dom_path).as_str(), ro_perm)
.await?;
self.tx
.mknod(
format!("{}/control/shutdown", self.dom_path).as_str(),
rw_perm,
)
.await?;
self.tx
.mknod(
format!("{}/control/feature-poweroff", self.dom_path).as_str(),
rw_perm,
)
.await?;
self.tx
.mknod(
format!("{}/control/feature-reboot", self.dom_path).as_str(),
rw_perm,
)
.await?;
self.tx
.mknod(
format!("{}/control/feature-suspend", self.dom_path).as_str(),
rw_perm,
)
.await?;
self.tx
.mknod(format!("{}/control/sysrq", self.dom_path).as_str(), rw_perm)
.await?;
self.tx
.mknod(format!("{}/data", self.dom_path).as_str(), rw_perm)
.await?;
self.tx
.mknod(format!("{}/drivers", self.dom_path).as_str(), rw_perm)
.await?;
self.tx
.mknod(format!("{}/feature", self.dom_path).as_str(), rw_perm)
.await?;
self.tx
.mknod(format!("{}/attr", self.dom_path).as_str(), rw_perm)
.await?;
self.tx
.mknod(format!("{}/error", self.dom_path).as_str(), rw_perm)
.await?;
self.tx
.write_string(format!("{}/uuid", vm_path).as_str(), &base.uuid.to_string())
.await?;
self.tx
.write_string(format!("{}/name", self.dom_path).as_str(), name.as_ref())
.await?;
self.tx
.write_string(format!("{}/name", vm_path).as_str(), name.as_ref())
.await?;
self.tx
.write_string(format!("{}/image/os_type", vm_path).as_str(), "linux")
.await?;
self.tx
.write_string(format!("{}/image/cmdline", vm_path).as_str(), &base.cmdline)
.await?;
self.tx
.write_string(
format!("{}/memory/static-max", self.dom_path).as_str(),
&(base.mem_mb * 1024).to_string(),
)
.await?;
self.tx
.write_string(
format!("{}/memory/target", self.dom_path).as_str(),
&(base.mem_mb * 1024).to_string(),
)
.await?;
self.tx
.write_string(format!("{}/memory/videoram", self.dom_path).as_str(), "0")
.await?;
self.tx
.write_string(
format!("{}/domid", self.dom_path).as_str(),
&created.domid.to_string(),
)
.await?;
self.tx
.write_string(format!("{}/type", self.dom_path).as_str(), "PV")
.await?;
self.tx
.write_string(
format!("{}/store/port", self.dom_path).as_str(),
&created.store_evtchn.to_string(),
)
.await?;
self.tx
.write_string(
format!("{}/store/ring-ref", self.dom_path).as_str(),
&created.store_mfn.to_string(),
)
.await?;
for i in 0..base.max_vcpus {
let path = format!("{}/cpu/{}", self.dom_path, i);
self.tx.mkdir(&path).await?;
self.tx.set_perms(&path, ro_perm).await?;
let path = format!("{}/cpu/{}/availability", self.dom_path, i);
self.tx.write_string(&path, "online").await?;
self.tx.set_perms(&path, ro_perm).await?;
}
Ok(())
}
pub async fn write_key(&self, key: impl AsRef<str>, value: impl AsRef<str>) -> Result<()> {
self.tx
.write_string(
&format!("{}/{}", self.dom_path, key.as_ref()),
value.as_ref(),
)
.await?;
Ok(())
}
pub async fn add_rw_path(&self, key: impl AsRef<str>) -> Result<()> {
let rw_perm = &[XsPermission {
id: self.domid,
perms: XS_PERM_READ_WRITE,
}];
self.tx
.mknod(&format!("{}/{}", self.dom_path, key.as_ref()), rw_perm)
.await?;
Ok(())
}
pub async fn add_device(
&self,
typ: impl AsRef<str>,
id: u64,
frontend_items: Vec<(&str, String)>,
backend_items: Vec<(&str, String)>,
) -> Result<()> {
let console_zero = typ.as_ref() == "console" && id == 0;
let frontend_path = if console_zero {
format!("{}/console", self.dom_path)
} else {
format!("{}/device/{}/{}", self.dom_path, typ.as_ref(), id)
};
let backend_path = format!(
"{}/backend/{}/{}/{}",
self.backend_dom_path,
typ.as_ref(),
self.domid,
id
);
let mut backend_items: Vec<(&str, String)> = backend_items.clone();
let mut frontend_items: Vec<(&str, String)> = frontend_items.clone();
backend_items.push(("frontend", frontend_path.clone()));
frontend_items.push(("backend", backend_path.clone()));
let frontend_perms = &[
XsPermission {
id: self.domid,
perms: XS_PERM_NONE,
},
XsPermission {
id: self.backend_domid,
perms: XS_PERM_READ,
},
];
let backend_perms = &[
XsPermission {
id: self.backend_domid,
perms: XS_PERM_NONE,
},
XsPermission {
id: self.domid,
perms: XS_PERM_READ,
},
];
self.tx.mknod(&frontend_path, frontend_perms).await?;
for (p, value) in &frontend_items {
let path = format!("{}/{}", frontend_path, *p);
self.tx.write_string(&path, value).await?;
if !console_zero {
self.tx.set_perms(&path, frontend_perms).await?;
}
}
self.tx.mknod(&backend_path, backend_perms).await?;
for (p, value) in &backend_items {
let path = format!("{}/{}", backend_path, *p);
self.tx.write_string(&path, value).await?;
}
Ok(())
}
pub async fn add_vbd_device(&self, index: usize, disk: &DomainDisk) -> Result<()> {
let id = (202 << 8) | (index << 4) as u64;
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", self.domid.to_string()),
("online", "1".to_string()),
("removable", "0".to_string()),
("bootable", "1".to_string()),
("state", "1".to_string()),
("dev", disk.vdev.to_string()),
("type", "phy".to_string()),
("mode", if disk.writable { "w" } else { "r" }.to_string()),
("device-type", "disk".to_string()),
("discard-enable", "0".to_string()),
("specification", "xen".to_string()),
("physical-device-path", disk.block.path.to_string()),
(
"physical-device",
format!("{:02x}:{:02x}", disk.block.major, disk.block.minor),
),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", self.backend_domid.to_string()),
("state", "1".to_string()),
("virtual-device", id.to_string()),
("device-type", "disk".to_string()),
("trusted", "1".to_string()),
("protocol", "x86_64-abi".to_string()),
];
self.add_device("vbd", id, frontend_items, backend_items)
.await?;
Ok(())
}
pub async fn add_vif_device(&self, index: usize, vif: &DomainNetworkInterface) -> Result<()> {
let id = 20 + index as u64;
let mut backend_items: Vec<(&str, String)> = vec![
("frontend-id", self.domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("mac", vif.mac.to_string()),
("mtu", vif.mtu.to_string()),
("type", "vif".to_string()),
("handle", id.to_string()),
];
if vif.bridge.is_some() {
backend_items.extend_from_slice(&[("bridge", vif.bridge.clone().unwrap())]);
}
if vif.script.is_some() {
backend_items.extend_from_slice(&[
("script", vif.script.clone().unwrap()),
("hotplug-status", "".to_string()),
]);
} else {
backend_items.extend_from_slice(&[
("script", "".to_string()),
("hotplug-status", "connected".to_string()),
]);
}
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", self.backend_domid.to_string()),
("state", "1".to_string()),
("mac", vif.mac.to_string()),
("trusted", "1".to_string()),
("mtu", vif.mtu.to_string()),
];
self.add_device("vif", id, frontend_items, backend_items)
.await?;
Ok(())
}
pub async fn add_9pfs_device(&self, index: usize, filesystem: &DomainFilesystem) -> Result<()> {
let id = 90 + index as u64;
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", self.domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("path", filesystem.path.to_string()),
("security-model", "none".to_string()),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", self.backend_domid.to_string()),
("state", "1".to_string()),
("tag", filesystem.tag.to_string()),
];
self.add_device("9pfs", id, frontend_items, backend_items)
.await?;
Ok(())
}
pub async fn add_channel_device(
&self,
domain: &CreatedDomain,
index: usize,
channel: &DomainChannel,
) -> Result<()> {
let port = domain.console_evtchn;
let ring = domain.console_mfn;
let mut backend_items = vec![
("frontend-id", self.domid.to_string()),
("online", "1".to_string()),
("protocol", "vt100".to_string()),
];
let mut frontend_items = vec![
("backend-id", self.backend_domid.to_string()),
("limit", "1048576".to_string()),
("output", "pty".to_string()),
("tty", "".to_string()),
];
frontend_items.push(("type", channel.typ.clone()));
backend_items.push(("type", channel.typ.clone()));
if index == 0 {
if channel.typ != "xenconsoled" {
frontend_items.push(("state", "1".to_string()));
}
frontend_items
.extend_from_slice(&[("port", port.to_string()), ("ring-ref", ring.to_string())]);
} else {
frontend_items.extend_from_slice(&[
("state", "1".to_string()),
("protocol", "vt100".to_string()),
]);
}
if channel.initialized {
backend_items.push(("state", "4".to_string()));
} else {
backend_items.push(("state", "1".to_string()));
}
self.add_device("console", index as u64, frontend_items, backend_items)
.await?;
Ok(())
}
pub async fn add_pci_device(
&self,
call: &XenCall,
index: usize,
device_count: usize,
device: &DomainPciDevice,
) -> Result<()> {
let backend = XenPciBackend::new();
if !backend.is_assigned(&device.bdf).await? {
return Err(Error::PciDeviceNotAssignable(device.bdf));
}
let resources = backend.read_resources(&device.bdf).await?;
for resource in resources {
if resource.is_bar_io() {
call.ioport_permission(
self.domid,
resource.start as u32,
resource.size() as u32,
true,
)
.await?;
} else {
call.iomem_permission(
self.domid,
resource.start >> XEN_PAGE_SHIFT,
(resource.size() + (XEN_PAGE_SHIFT - 1)) >> XEN_PAGE_SHIFT,
true,
)
.await?;
}
}
if let Some(irq) = backend.read_irq(&device.bdf).await? {
let irq = call.map_pirq(self.domid, irq as isize, None).await?;
call.irq_permission(self.domid, irq, true).await?;
}
backend.reset(&device.bdf).await?;
call.assign_device(
self.domid,
device.bdf.encode(),
if device.rdm_reserve_policy == DomainPciRdmReservePolicy::Relaxed {
DOMCTL_DEV_RDM_RELAXED
} else {
0
},
)
.await?;
if device.permissive {
backend.enable_permissive(&device.bdf).await?;
}
let id = 60;
if index == 0 {
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", self.domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("num_devs", device_count.to_string()),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", self.backend_domid.to_string()),
("state", "1".to_string()),
];
self.add_device("pci", id, frontend_items, backend_items)
.await?;
}
let backend_path = format!(
"{}/backend/{}/{}/{}",
self.backend_dom_path, "pci", self.domid, id
);
self.tx
.write_string(
format!("{}/key-{}", backend_path, index),
&device.bdf.to_string(),
)
.await?;
self.tx
.write_string(
format!("{}/dev-{}", backend_path, index),
&device.bdf.to_string(),
)
.await?;
if let Some(vdefn) = device.bdf.vdefn {
self.tx
.write_string(
format!("{}/vdefn-{}", backend_path, index),
&format!("{:#x}", vdefn),
)
.await?;
}
let mut options = IndexMap::new();
options.insert("permissive", if device.permissive { "1" } else { "0" });
options.insert("rdm_policy", device.rdm_reserve_policy.to_option_str());
options.insert("msitranslate", if device.msi_translate { "1" } else { "0" });
options.insert(
"power_mgmt",
if device.power_management { "1" } else { "0" },
);
let options = options
.into_iter()
.map(|(key, value)| format!("{}={}", key, value))
.collect::<Vec<_>>()
.join(",");
self.tx
.write_string(format!("{}/opts-{}", backend_path, index), &options)
.await?;
Ok(())
}
pub async fn commit(mut self) -> Result<()> {
self.abort = false;
self.tx.commit().await?;
Ok(())
}
}
impl Drop for ClientTransaction {
fn drop(&mut self) {
if !self.abort {
return;
}
let tx = self.tx.clone();
tokio::task::spawn(async move {
let _ = tx.abort().await;
});
}
}

View File

@ -43,7 +43,8 @@ pub struct BootDomain {
pub store_evtchn: u32,
pub store_mfn: u64,
pub initrd_segment: DomainSegment,
pub consoles: Vec<(u32, u64)>,
pub console_evtchn: u32,
pub console_mfn: u64,
pub cmdline: String,
}
@ -177,7 +178,8 @@ impl<I: BootImageLoader, P: BootSetupPlatform> BootSetup<I, P> {
target_pages: total_pages,
page_size: self.platform.page_size(),
image_info,
consoles: Vec::new(),
console_evtchn: 0,
console_mfn: 0,
max_vcpus,
phys: PhysicalPages::new(self.call.clone(), self.domid, self.platform.page_shift()),
initrd_segment: DomainSegment::default(),
@ -261,7 +263,7 @@ impl<I: BootImageLoader, P: BootSetupPlatform> BootSetup<I, P> {
#[async_trait::async_trait]
pub trait BootSetupPlatform: Clone {
fn create_domain(&self, needs_passthrough: bool) -> CreateDomain;
fn create_domain(&self, enable_iommu: bool) -> CreateDomain;
fn page_size(&self) -> u64;
fn page_shift(&self) -> u64;
fn needs_early_kernel(&self) -> bool;

View File

@ -0,0 +1,80 @@
use std::sync::Arc;
use crate::{
boot::{BootSetup, BootSetupPlatform},
elfloader::ElfImageLoader,
};
use uuid::Uuid;
use xencall::XenCall;
use crate::error::Result;
pub struct BaseDomainManager<P: BootSetupPlatform> {
call: XenCall,
pub platform: Arc<P>,
}
impl<P: BootSetupPlatform> BaseDomainManager<P> {
pub async fn new(call: XenCall, platform: P) -> Result<BaseDomainManager<P>> {
Ok(BaseDomainManager {
call,
platform: Arc::new(platform),
})
}
pub async fn create(&self, config: BaseDomainConfig) -> Result<CreatedDomain> {
let mut domain = self.platform.create_domain(config.enable_iommu);
domain.handle = config.uuid.into_bytes();
domain.max_vcpus = config.max_vcpus;
let domid = self.call.create_domain(domain).await?;
self.call.set_max_vcpus(domid, config.max_vcpus).await?;
self.call
.set_max_mem(domid, (config.mem_mb * 1024) + 2048)
.await?;
let loader = ElfImageLoader::load_file_kernel(&config.kernel)?;
let platform = (*self.platform).clone();
let mut boot = BootSetup::new(self.call.clone(), domid, platform, loader, None);
let mut domain = boot
.initialize(
&config.initrd,
config.mem_mb,
config.max_vcpus,
&config.cmdline,
)
.await?;
boot.boot(&mut domain).await?;
Ok(CreatedDomain {
domid,
store_evtchn: domain.store_evtchn,
store_mfn: domain.store_mfn,
console_evtchn: domain.console_evtchn,
console_mfn: domain.console_mfn,
})
}
pub async fn destroy(&self, domid: u32) -> Result<()> {
self.call.destroy_domain(domid).await?;
Ok(())
}
}
#[derive(Clone, Debug)]
pub struct BaseDomainConfig {
pub uuid: Uuid,
pub owner_domid: u32,
pub max_vcpus: u32,
pub mem_mb: u64,
pub kernel: Vec<u8>,
pub initrd: Vec<u8>,
pub cmdline: String,
pub enable_iommu: bool,
}
#[derive(Clone, Debug)]
pub struct CreatedDomain {
pub domid: u32,
pub store_evtchn: u32,
pub store_mfn: u64,
pub console_evtchn: u32,
pub console_mfn: u64,
}

View File

@ -6,6 +6,7 @@ pub mod sys;
use crate::error::Error;
pub mod domain;
pub mod unsupported;
#[cfg(target_arch = "x86_64")]
pub mod x86pv;

View File

@ -434,9 +434,9 @@ impl X86PvPlatform {
#[async_trait::async_trait]
impl BootSetupPlatform for X86PvPlatform {
fn create_domain(&self, needs_passthrough: bool) -> CreateDomain {
fn create_domain(&self, enable_iommu: bool) -> CreateDomain {
CreateDomain {
flags: if needs_passthrough {
flags: if enable_iommu {
XEN_DOMCTL_CDF_IOMMU
} else {
0
@ -718,9 +718,8 @@ impl BootSetupPlatform for X86PvPlatform {
domain.store_mfn = domain.phys.p2m[self.xenstore_segment.as_ref().unwrap().pfn as usize];
let evtchn = domain.call.evtchn_alloc_unbound(domain.domid, 0).await?;
let page = domain.alloc_page()?;
domain
.consoles
.push((evtchn, domain.phys.p2m[page.pfn as usize]));
domain.console_evtchn = evtchn;
domain.console_mfn = domain.phys.p2m[page.pfn as usize];
self.page_table_segment = self.alloc_page_tables(domain).await?;
self.boot_stack_segment = Some(domain.alloc_page()?);
@ -802,9 +801,8 @@ impl BootSetupPlatform for X86PvPlatform {
(*info).flags = 0;
(*info).store_evtchn = domain.store_evtchn;
(*info).store_mfn = domain.phys.p2m[xenstore_segment.pfn as usize];
let console = domain.consoles.first().unwrap();
(*info).console.mfn = console.1;
(*info).console.evtchn = console.0;
(*info).console.mfn = domain.console_mfn;
(*info).console.evtchn = domain.console_evtchn;
(*info).mod_start = domain.initrd_segment.vstart;
(*info).mod_len = domain.initrd_segment.size;
for (i, c) in domain.cmdline.chars().enumerate() {
@ -893,7 +891,7 @@ impl BootSetupPlatform for X86PvPlatform {
.as_ref()
.ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?;
let console_gfn = domain.consoles.first().map(|x| x.1).unwrap_or(0) as usize;
let console_gfn = domain.console_mfn as usize;
let xenstore_gfn = domain.phys.p2m[xenstore_segment.pfn as usize];
let addr = domain
.call