From 97c4880605b5bd09db2afae800eadd739bcdb183 Mon Sep 17 00:00:00 2001 From: Alex Zenla Date: Thu, 25 Apr 2024 23:36:52 -0700 Subject: [PATCH] feat: more pci work --- crates/xen/xencall/src/lib.rs | 135 ++++++++++++++++++++++----- crates/xen/xencall/src/sys.rs | 79 +++++++++++++++- crates/xen/xenclient/src/lib.rs | 37 ++++++-- crates/xen/xenclient/src/pci.rs | 30 +++++- crates/xen/xenclient/src/x86.rs | 159 +++++++++++++++++++++++++++++++- 5 files changed, 409 insertions(+), 31 deletions(-) diff --git a/crates/xen/xencall/src/lib.rs b/crates/xen/xencall/src/lib.rs index a556e81..5699df6 100644 --- a/crates/xen/xencall/src/lib.rs +++ b/crates/xen/xencall/src/lib.rs @@ -3,15 +3,17 @@ pub mod sys; use crate::error::{Error, Result}; use crate::sys::{ - AddressSize, CreateDomain, DomCtl, DomCtlValue, DomCtlVcpuContext, EvtChnAllocUnbound, - GetDomainInfo, GetPageFrameInfo3, Hypercall, HypercallInit, IoMemPermission, IoPortPermission, - MaxMem, MaxVcpus, MemoryMap, MemoryReservation, MmapBatch, MmapResource, MmuExtOp, - MultiCallEntry, VcpuGuestContext, VcpuGuestContextAny, XenCapabilitiesInfo, HYPERVISOR_DOMCTL, + AddressSize, AssignDevice, CreateDomain, DomCtl, DomCtlValue, DomCtlVcpuContext, + EvtChnAllocUnbound, GetDomainInfo, GetPageFrameInfo3, Hypercall, HypercallInit, + IoMemPermission, IoPortPermission, IrqPermission, MaxMem, MaxVcpus, MemoryMap, + MemoryReservation, MmapBatch, MmapResource, MmuExtOp, MultiCallEntry, PciAssignDevice, + VcpuGuestContext, VcpuGuestContextAny, XenCapabilitiesInfo, DOMCTL_DEV_PCI, HYPERVISOR_DOMCTL, HYPERVISOR_EVENT_CHANNEL_OP, HYPERVISOR_MEMORY_OP, HYPERVISOR_MMUEXT_OP, HYPERVISOR_MULTICALL, - HYPERVISOR_XEN_VERSION, XENVER_CAPABILITIES, XEN_DOMCTL_CREATEDOMAIN, XEN_DOMCTL_DESTROYDOMAIN, - XEN_DOMCTL_GETDOMAININFO, XEN_DOMCTL_GETPAGEFRAMEINFO3, XEN_DOMCTL_GETVCPUCONTEXT, - XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_IOMEM_PERMISSION, XEN_DOMCTL_IOPORT_PERMISSION, - XEN_DOMCTL_MAX_MEM, XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN, XEN_DOMCTL_SETVCPUCONTEXT, + HYPERVISOR_XEN_VERSION, XENVER_CAPABILITIES, XEN_DOMCTL_ASSIGN_DEVICE, XEN_DOMCTL_CREATEDOMAIN, + XEN_DOMCTL_DESTROYDOMAIN, XEN_DOMCTL_GETDOMAININFO, XEN_DOMCTL_GETPAGEFRAMEINFO3, + XEN_DOMCTL_GETVCPUCONTEXT, XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_IOMEM_PERMISSION, + XEN_DOMCTL_IOPORT_PERMISSION, XEN_DOMCTL_IRQ_PERMISSION, XEN_DOMCTL_MAX_MEM, + XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN, XEN_DOMCTL_SETVCPUCONTEXT, XEN_DOMCTL_SET_ADDRESS_SIZE, XEN_DOMCTL_UNPAUSEDOMAIN, XEN_MEM_CLAIM_PAGES, XEN_MEM_MEMORY_MAP, XEN_MEM_POPULATE_PHYSMAP, }; @@ -20,7 +22,10 @@ use log::trace; use nix::errno::Errno; use std::ffi::{c_long, c_uint, c_ulong, c_void}; use std::sync::Arc; -use sys::{XEN_DOMCTL_MAX_INTERFACE_VERSION, XEN_DOMCTL_MIN_INTERFACE_VERSION}; +use sys::{ + E820Entry, ForeignMemoryMap, PhysdevMapPirq, HYPERVISOR_PHYSDEV_OP, PHYSDEVOP_MAP_PIRQ, + XEN_DOMCTL_MAX_INTERFACE_VERSION, XEN_DOMCTL_MIN_INTERFACE_VERSION, XEN_MEM_SET_MEMORY_MAP, +}; use tokio::sync::Semaphore; use std::fs::{File, OpenOptions}; @@ -570,26 +575,42 @@ impl XenCall { Ok(()) } - pub async fn get_memory_map(&self, size_of_entry: usize) -> Result> { + pub async fn get_memory_map(&self, max_entries: u32) -> Result> { let mut memory_map = MemoryMap { - count: 0, + count: max_entries, buffer: 0, }; + let mut entries = vec![E820Entry::default(); max_entries as usize]; + memory_map.buffer = entries.as_mut_ptr() as c_ulong; + self.hypercall2( + HYPERVISOR_MEMORY_OP, + XEN_MEM_MEMORY_MAP as c_ulong, + addr_of_mut!(memory_map) as c_ulong, + ) + .await?; + entries.truncate(memory_map.count as usize); + Ok(entries) + } + + pub async fn set_memory_map( + &self, + domid: u32, + entries: Vec, + ) -> Result> { + let mut memory_map = ForeignMemoryMap { + domid: domid as u16, + map: MemoryMap { + count: entries.len() as u32, + buffer: entries.as_ptr() as u64, + }, + }; self.hypercall2( HYPERVISOR_MEMORY_OP, - XEN_MEM_MEMORY_MAP as c_ulong, + XEN_MEM_SET_MEMORY_MAP as c_ulong, addr_of_mut!(memory_map) as c_ulong, ) .await?; - let mut buffer = vec![0u8; memory_map.count as usize * size_of_entry]; - memory_map.buffer = buffer.as_mut_ptr() as c_ulong; - self.hypercall2( - HYPERVISOR_MEMORY_OP, - XEN_MEM_MEMORY_MAP as c_ulong, - addr_of_mut!(memory_map) as c_ulong, - ) - .await?; - Ok(buffer) + Ok(entries) } pub async fn populate_physmap( @@ -736,4 +757,76 @@ impl XenCall { .await?; Ok(()) } + + pub async fn irq_permission(&self, domid: u32, irq: u32, allow: bool) -> Result<()> { + trace!( + "domctl fd={} irq_permission domid={} irq={} allow={}", + self.handle.as_raw_fd(), + domid, + irq, + allow, + ); + let mut domctl = DomCtl { + cmd: XEN_DOMCTL_IRQ_PERMISSION, + interface_version: self.domctl_interface_version, + domid, + value: DomCtlValue { + irq_permission: IrqPermission { + pirq: irq, + allow: if allow { 1 } else { 0 }, + pad: [0; 3], + }, + }, + }; + self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong) + .await?; + Ok(()) + } + + pub async fn map_pirq(&self, domid: u32, index: isize, pirq: Option) -> Result { + trace!( + "physdev fd={} map_pirq domid={} index={} pirq={:?}", + self.handle.as_raw_fd(), + domid, + index, + pirq, + ); + let mut physdev = PhysdevMapPirq::default(); + physdev.domid = domid as u16; + physdev.typ = 0x1; + physdev.index = index as c_int; + physdev.pirq = pirq.map(|x| x as c_int).unwrap_or(index as c_int); + self.hypercall2( + HYPERVISOR_PHYSDEV_OP, + PHYSDEVOP_MAP_PIRQ, + addr_of_mut!(physdev) as c_ulong, + ) + .await?; + Ok(physdev.pirq as u32) + } + + pub async fn assign_device(&self, domid: u32, sbdf: u32, flags: u32) -> Result<()> { + trace!( + "domctl fd={} assign_device domid={} sbdf={} flags={}", + self.handle.as_raw_fd(), + domid, + sbdf, + flags, + ); + let mut domctl = DomCtl { + cmd: XEN_DOMCTL_ASSIGN_DEVICE, + interface_version: self.domctl_interface_version, + domid, + value: DomCtlValue { + assign_device: AssignDevice { + device: DOMCTL_DEV_PCI, + flags, + pci_assign_device: PciAssignDevice { sbdf, padding: 0 }, + }, + }, + }; + self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong) + .await?; + Ok(()) + } } diff --git a/crates/xen/xencall/src/sys.rs b/crates/xen/xencall/src/sys.rs index 03ba669..84d06cc 100644 --- a/crates/xen/xencall/src/sys.rs +++ b/crates/xen/xencall/src/sys.rs @@ -104,6 +104,7 @@ pub const XEN_DOMCTL_CDF_HAP: u32 = 1u32 << 1; pub const XEN_DOMCTL_CDF_S3_INTEGRITY: u32 = 1u32 << 2; pub const XEN_DOMCTL_CDF_OOS_OFF: u32 = 1u32 << 3; pub const XEN_DOMCTL_CDF_XS_DOMAIN: u32 = 1u32 << 4; +pub const XEN_DOMCTL_CDF_IOMMU: u32 = 1u32 << 5; pub const XEN_X86_EMU_LAPIC: u32 = 1 << 0; pub const XEN_X86_EMU_HPET: u32 = 1 << 1; @@ -239,6 +240,8 @@ pub union DomCtlValue { pub get_page_frame_info: GetPageFrameInfo3, pub ioport_permission: IoPortPermission, pub iomem_permission: IoMemPermission, + pub irq_permission: IrqPermission, + pub assign_device: AssignDevice, pub pad: [u8; 128], } @@ -327,6 +330,14 @@ pub struct IoMemPermission { pub allow: u8, } +#[repr(C)] +#[derive(Copy, Clone, Debug)] +pub struct IrqPermission { + pub pirq: u32, + pub allow: u8, + pub pad: [u8; 3], +} + #[repr(C)] #[derive(Copy, Clone, Debug, Default)] #[cfg(target_arch = "x86_64")] @@ -396,7 +407,8 @@ pub struct MultiCallEntry { } pub const XEN_MEM_POPULATE_PHYSMAP: u32 = 6; -pub const XEN_MEM_MEMORY_MAP: u32 = 9; +pub const XEN_MEM_MEMORY_MAP: u32 = 10; +pub const XEN_MEM_SET_MEMORY_MAP: u32 = 13; pub const XEN_MEM_CLAIM_PAGES: u32 = 24; #[repr(C)] @@ -406,6 +418,13 @@ pub struct MemoryMap { pub buffer: c_ulong, } +#[repr(C)] +#[derive(Copy, Clone, Debug)] +pub struct ForeignMemoryMap { + pub domid: u16, + pub map: MemoryMap, +} + #[repr(C)] #[derive(Copy, Clone, Debug)] pub struct VcpuGuestContextFpuCtx { @@ -600,3 +619,61 @@ pub struct EvtChnAllocUnbound { pub remote_dom: u16, pub port: u32, } + +#[cfg(target_arch = "x86_64")] +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +pub struct E820Entry { + pub addr: u64, + pub size: u64, + pub typ: u32, +} + +#[cfg(target_arch = "x86_64")] +pub const E820_MAX: u32 = 1024; +#[cfg(target_arch = "x86_64")] +pub const E820_RAM: u32 = 1; +#[cfg(target_arch = "x86_64")] +pub const E820_RESERVED: u32 = 2; +#[cfg(target_arch = "x86_64")] +pub const E820_ACPI: u32 = 3; +#[cfg(target_arch = "x86_64")] +pub const E820_NVS: u32 = 4; +#[cfg(target_arch = "x86_64")] +pub const E820_UNUSABLE: u32 = 5; + +pub const PHYSDEVOP_MAP_PIRQ: u64 = 13; + +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +pub struct PhysdevMapPirq { + pub domid: u16, + pub typ: c_int, + pub index: c_int, + pub pirq: c_int, + pub bus: c_int, + pub devfn: c_int, + pub entry_nr: u16, + pub table_base: u64, +} + +pub const DOMCTL_DEV_RDM_RELAXED: u32 = 1; +pub const DOMCTL_DEV_PCI: u32 = 0; +pub const DOMCTL_DEV_DT: u32 = 1; + +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +pub struct PciAssignDevice { + pub sbdf: u32, + pub padding: u64, +} + +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +pub struct AssignDevice { + pub device: u32, + pub flags: u32, + pub pci_assign_device: PciAssignDevice, +} + +pub const DOMID_IO: u32 = 0x7FF1; diff --git a/crates/xen/xenclient/src/lib.rs b/crates/xen/xenclient/src/lib.rs index 1ac2450..218983d 100644 --- a/crates/xen/xenclient/src/lib.rs +++ b/crates/xen/xenclient/src/lib.rs @@ -30,7 +30,10 @@ use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; use uuid::Uuid; -use xencall::sys::{CreateDomain, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST}; +use xencall::sys::{ + CreateDomain, DOMCTL_DEV_RDM_RELAXED, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST, + XEN_DOMCTL_CDF_IOMMU, +}; use xencall::XenCall; use xenstore::{ XsPermission, XsdClient, XsdInterface, XS_PERM_NONE, XS_PERM_READ, XS_PERM_READ_WRITE, @@ -83,7 +86,7 @@ pub struct DomainEventChannel { pub name: String, } -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, Eq, PartialEq)] pub enum DomainPciRdmReservePolicy { Invalid, #[default] @@ -151,12 +154,14 @@ impl XenClient { pub async fn create(&self, config: &DomainConfig) -> Result { let mut domain = CreateDomain { - max_vcpus: config.max_vcpus, ..Default::default() }; + domain.max_vcpus = config.max_vcpus; if cfg!(target_arch = "aarch64") { domain.flags = XEN_DOMCTL_CDF_HVM_GUEST | XEN_DOMCTL_CDF_HAP; + } else { + domain.flags = XEN_DOMCTL_CDF_IOMMU; } let domid = self.call.create_domain(domain).await?; @@ -724,6 +729,20 @@ impl XenClient { } } + // backend.reset(&device.bdf).await?; + + self.call + .assign_device( + domid, + device.bdf.encode(), + if device.rdm_reserve_policy == DomainPciRdmReservePolicy::Relaxed { + DOMCTL_DEV_RDM_RELAXED + } else { + 0 + }, + ) + .await?; + let id = 60; if index == 0 { @@ -753,13 +772,16 @@ impl XenClient { } let backend_path = format!("{}/backend/{}/{}/{}", backend_dom_path, "pci", domid, id); - self.store + + let transaction = self.store.transaction().await?; + + transaction .write_string( format!("{}/key-{}", backend_path, index), &device.bdf.to_string(), ) .await?; - self.store + transaction .write_string( format!("{}/dev-{}", backend_path, index), &device.bdf.to_string(), @@ -767,7 +789,7 @@ impl XenClient { .await?; if let Some(vdefn) = device.bdf.vdefn { - self.store + transaction .write_string( format!("{}/vdefn-{}", backend_path, index), &format!("{:#x}", vdefn), @@ -789,10 +811,11 @@ impl XenClient { .collect::>() .join(","); - self.store + transaction .write_string(format!("{}/opts-{}", backend_path, index), &options) .await?; + transaction.commit().await?; Ok(()) } diff --git a/crates/xen/xenclient/src/pci.rs b/crates/xen/xenclient/src/pci.rs index 3036ac8..6482bf8 100644 --- a/crates/xen/xenclient/src/pci.rs +++ b/crates/xen/xenclient/src/pci.rs @@ -52,9 +52,22 @@ impl XenPciBackend { Ok(fs::try_exists(path).await?) } + pub async fn read_irq(&self, bdf: &PciBdf) -> Result> { + let mut path: PathBuf = self.path.clone(); + path.push(bdf.to_string()); + path.push("irq"); + + if !path.exists() { + return Ok(None); + } + + let content = fs::read_to_string(&path).await?; + Ok(u32::from_str(content.trim()).ok()) + } + pub async fn read_resources(&self, bdf: &PciBdf) -> Result> { let mut resources = Vec::new(); - let mut path = self.path.clone(); + let mut path: PathBuf = self.path.clone(); path.push(bdf.to_string()); path.push("resource"); let content = fs::read_to_string(&path).await?; @@ -112,6 +125,13 @@ impl XenPciBackend { } Ok(false) } + + pub async fn reset(&self, bdf: &PciBdf) -> Result<()> { + let mut path: PathBuf = self.path.clone(); + path.push("do_flr"); + fs::write(&path, bdf.to_string()).await?; + Ok(()) + } } #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] @@ -149,6 +169,14 @@ impl PciBdf { vdefn: self.vdefn, } } + + pub fn encode(&self) -> u32 { + let mut value = self.domain.unwrap_or(0) << 16u32; + value |= ((self.bus & 0xff) << 8u32) as u32; + value |= ((self.device & 0x1f) << 3u32) as u32; + value |= (self.function & 0x7) as u32; + value + } } impl FromStr for PciBdf { diff --git a/crates/xen/xenclient/src/x86.rs b/crates/xen/xenclient/src/x86.rs index 0a4999f..bd84104 100644 --- a/crates/xen/xenclient/src/x86.rs +++ b/crates/xen/xenclient/src/x86.rs @@ -11,7 +11,9 @@ use slice_copy::copy; use std::cmp::{max, min}; use std::mem::size_of; use std::slice; -use xencall::sys::{VcpuGuestContext, MMUEXT_PIN_L4_TABLE}; +use xencall::sys::{ + E820Entry, VcpuGuestContext, E820_MAX, E820_RAM, E820_UNUSABLE, MMUEXT_PIN_L4_TABLE, +}; pub const X86_PAGE_SHIFT: u64 = 12; pub const X86_PAGE_SIZE: u64 = 1 << X86_PAGE_SHIFT; @@ -273,6 +275,154 @@ impl X86BootSetup { self.table.mappings[m] = map; Ok(m) } + + fn e820_sanitize( + &self, + mut source: Vec, + map_limit_kb: u64, + balloon_kb: u64, + ) -> Result> { + let mut e820 = vec![E820Entry::default(); E820_MAX as usize]; + + for entry in &mut source { + if entry.addr > 0x100000 { + continue; + } + + // entries under 1MB should be removed. + entry.typ = 0; + entry.size = 0; + entry.addr = u64::MAX; + } + + let mut lowest = u64::MAX; + let mut highest = 0; + + for entry in &source { + if entry.typ == E820_RAM || entry.typ == E820_UNUSABLE || entry.typ == 0 { + continue; + } + + lowest = if entry.addr < lowest { + entry.addr + } else { + lowest + }; + + highest = if entry.addr + entry.size > highest { + entry.addr + entry.size + } else { + highest + } + } + + let start_kb = if lowest > 1024 { lowest >> 10 } else { 0 }; + + let mut idx: usize = 0; + + e820[idx].addr = 0; + e820[idx].size = map_limit_kb << 10; + e820[idx].typ = E820_RAM; + + let mut delta_kb = 0u64; + + if start_kb > 0 && map_limit_kb > start_kb { + delta_kb = map_limit_kb - start_kb; + if delta_kb > 0 { + e820[idx].size -= delta_kb << 10; + } + } + + let ram_end = source[0].addr + source[0].size; + idx += 1; + + for src in &mut source { + let end = src.addr + src.size; + if src.typ == E820_UNUSABLE || end < ram_end { + src.typ = 0; + continue; + } + + if src.typ != E820_RAM { + continue; + } + + if src.addr >= (1 << 32) { + continue; + } + + if src.addr < ram_end { + let delta = ram_end - src.addr; + src.typ = E820_UNUSABLE; + + if src.size < delta { + src.typ = 0; + } else { + src.size -= delta; + src.addr = ram_end; + } + + if src.addr + src.size != end { + src.typ = 0; + } + } + + if end > ram_end { + src.typ = E820_UNUSABLE; + } + } + + if lowest > ram_end { + let mut add_unusable = true; + + for src in &mut source { + if !add_unusable { + break; + } + + if src.typ != E820_UNUSABLE { + continue; + } + + if ram_end != src.addr { + continue; + } + + if lowest != src.addr + src.size { + src.size = lowest - src.addr; + } + add_unusable = false; + } + + if add_unusable { + e820[1].typ = E820_UNUSABLE; + e820[1].addr = ram_end; + e820[1].size = lowest - ram_end; + } + } + + for src in &source { + if src.typ == E820_RAM || src.typ == 0 { + continue; + } + + e820[idx].typ = src.typ; + e820[idx].addr = src.addr; + e820[idx].size = src.size; + idx += 1; + } + + if balloon_kb > 0 || delta_kb > 0 { + e820[idx].typ = E820_RAM; + e820[idx].addr = if (1u64 << 32u64) > highest { + 1u64 << 32u64 + } else { + highest + }; + e820[idx].size = (delta_kb << 10) + (balloon_kb << 10); + } + Ok(e820) + } } #[async_trait::async_trait] @@ -615,6 +765,13 @@ impl ArchBootSetup for X86BootSetup { let pg_mfn = setup.phys.p2m[pg_pfn as usize]; setup.phys.unmap(pg_pfn)?; setup.phys.unmap(p2m_segment.pfn)?; + + let map = setup.call.get_memory_map(E820_MAX).await?; + let mem_mb = setup.total_pages >> (20 - self.page_shift()); + let mem_kb = mem_mb * 1024; + let e820 = self.e820_sanitize(map, mem_kb, 0)?; + setup.call.set_memory_map(setup.domid, e820).await?; + setup .call .mmuext(setup.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)