From 8c59be195643c4af173279b94000511588924c18 Mon Sep 17 00:00:00 2001 From: Alex Zenla Date: Mon, 29 Apr 2024 04:31:56 -0700 Subject: [PATCH] hvm work --- crates/xen/xencall/examples/vcpu_context.rs | 12 - crates/xen/xencall/src/lib.rs | 39 +- crates/xen/xencall/src/sys.rs | 34 +- crates/xen/xenclient/src/boot.rs | 2 + crates/xen/xenclient/src/lib.rs | 9 +- crates/xen/xenclient/src/mem.rs | 6 +- crates/xen/xenclient/src/sys.rs | 2 + crates/xen/xenclient/src/x86pv.rs | 8 +- crates/xen/xenclient/src/x86pvh.rs | 634 ++++++++++++++++++++ 9 files changed, 677 insertions(+), 69 deletions(-) delete mode 100644 crates/xen/xencall/examples/vcpu_context.rs create mode 100644 crates/xen/xenclient/src/x86pvh.rs diff --git a/crates/xen/xencall/examples/vcpu_context.rs b/crates/xen/xencall/examples/vcpu_context.rs deleted file mode 100644 index cb07269..0000000 --- a/crates/xen/xencall/examples/vcpu_context.rs +++ /dev/null @@ -1,12 +0,0 @@ -use xencall::error::Result; -use xencall::XenCall; - -#[tokio::main] -async fn main() -> Result<()> { - env_logger::init(); - - let call = XenCall::open(0)?; - let context = call.get_vcpu_context(224, 0).await?; - println!("{:?}", context); - Ok(()) -} diff --git a/crates/xen/xencall/src/lib.rs b/crates/xen/xencall/src/lib.rs index b359949..f6684ba 100644 --- a/crates/xen/xencall/src/lib.rs +++ b/crates/xen/xencall/src/lib.rs @@ -7,11 +7,11 @@ use crate::sys::{ EvtChnAllocUnbound, GetDomainInfo, GetPageFrameInfo3, Hypercall, HypercallInit, IoMemPermission, IoPortPermission, IrqPermission, MaxMem, MaxVcpus, MemoryMap, MemoryReservation, MmapBatch, MmapResource, MmuExtOp, MultiCallEntry, PciAssignDevice, - VcpuGuestContext, VcpuGuestContextAny, XenCapabilitiesInfo, DOMCTL_DEV_PCI, HYPERVISOR_DOMCTL, + XenCapabilitiesInfo, DOMCTL_DEV_PCI, HYPERVISOR_DOMCTL, HYPERVISOR_EVENT_CHANNEL_OP, HYPERVISOR_MEMORY_OP, HYPERVISOR_MMUEXT_OP, HYPERVISOR_MULTICALL, HYPERVISOR_XEN_VERSION, XENVER_CAPABILITIES, XEN_DOMCTL_ASSIGN_DEVICE, XEN_DOMCTL_CREATEDOMAIN, XEN_DOMCTL_DESTROYDOMAIN, XEN_DOMCTL_GETDOMAININFO, XEN_DOMCTL_GETPAGEFRAMEINFO3, - XEN_DOMCTL_GETVCPUCONTEXT, XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_IOMEM_PERMISSION, + XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_IOMEM_PERMISSION, XEN_DOMCTL_IOPORT_PERMISSION, XEN_DOMCTL_IRQ_PERMISSION, XEN_DOMCTL_MAX_MEM, XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN, XEN_DOMCTL_SETVCPUCONTEXT, XEN_DOMCTL_SET_ADDRESS_SIZE, XEN_DOMCTL_UNPAUSEDOMAIN, XEN_MEM_CLAIM_PAGES, XEN_MEM_MEMORY_MAP, @@ -23,8 +23,7 @@ use nix::errno::Errno; use std::ffi::{c_long, c_uint, c_ulong, c_void}; use std::sync::Arc; use sys::{ - E820Entry, ForeignMemoryMap, PhysdevMapPirq, HYPERVISOR_PHYSDEV_OP, PHYSDEVOP_MAP_PIRQ, - XEN_DOMCTL_MAX_INTERFACE_VERSION, XEN_DOMCTL_MIN_INTERFACE_VERSION, XEN_MEM_SET_MEMORY_MAP, + E820Entry, ForeignMemoryMap, PhysdevMapPirq, VcpuGuestContextAny, HYPERVISOR_PHYSDEV_OP, PHYSDEVOP_MAP_PIRQ, XEN_DOMCTL_MAX_INTERFACE_VERSION, XEN_DOMCTL_MIN_INTERFACE_VERSION, XEN_MEM_SET_MEMORY_MAP }; use tokio::sync::Semaphore; @@ -459,45 +458,19 @@ impl XenCall { Ok(()) } - pub async fn get_vcpu_context(&self, domid: u32, vcpu: u32) -> Result { - trace!( - "domctl fd={} get_vcpu_context domid={}", - self.handle.as_raw_fd(), - domid, - ); - let mut wrapper = VcpuGuestContextAny { - value: VcpuGuestContext::default(), - }; - let mut domctl = DomCtl { - cmd: XEN_DOMCTL_GETVCPUCONTEXT, - interface_version: self.domctl_interface_version, - domid, - value: DomCtlValue { - vcpu_context: DomCtlVcpuContext { - vcpu, - ctx: addr_of_mut!(wrapper) as c_ulong, - }, - }, - }; - self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong) - .await?; - Ok(unsafe { wrapper.value }) - } - pub async fn set_vcpu_context( &self, domid: u32, vcpu: u32, - context: &VcpuGuestContext, + mut context: VcpuGuestContextAny, ) -> Result<()> { trace!( "domctl fd={} set_vcpu_context domid={} context={:?}", self.handle.as_raw_fd(), domid, - context, + unsafe { context.value } ); - let mut value = VcpuGuestContextAny { value: *context }; let mut domctl = DomCtl { cmd: XEN_DOMCTL_SETVCPUCONTEXT, interface_version: self.domctl_interface_version, @@ -505,7 +478,7 @@ impl XenCall { value: DomCtlValue { vcpu_context: DomCtlVcpuContext { vcpu, - ctx: addr_of_mut!(value) as c_ulong, + ctx: addr_of_mut!(context) as c_ulong, }, }, }; diff --git a/crates/xen/xencall/src/sys.rs b/crates/xen/xencall/src/sys.rs index 7793b9b..02a93cd 100644 --- a/crates/xen/xencall/src/sys.rs +++ b/crates/xen/xencall/src/sys.rs @@ -491,8 +491,7 @@ pub struct TrapInfo { #[repr(C)] #[derive(Copy, Clone, Debug)] -#[cfg(target_arch = "x86_64")] -pub struct VcpuGuestContext { +pub struct x8664VcpuGuestContext { pub fpu_ctx: VcpuGuestContextFpuCtx, pub flags: u64, pub user_regs: CpuUserRegs, @@ -514,10 +513,9 @@ pub struct VcpuGuestContext { pub gs_base_user: u64, } -#[cfg(target_arch = "x86_64")] -impl Default for VcpuGuestContext { +impl Default for x8664VcpuGuestContext { fn default() -> Self { - VcpuGuestContext { + Self { fpu_ctx: Default::default(), flags: 0, user_regs: Default::default(), @@ -543,8 +541,7 @@ impl Default for VcpuGuestContext { #[repr(C)] #[derive(Copy, Clone, Debug, Default)] -#[cfg(target_arch = "aarch64")] -pub struct CpuUserRegs { +pub struct Arm64CpuUserRegs { pub x0: u64, pub x1: u64, pub x2: u64, @@ -590,8 +587,7 @@ pub struct CpuUserRegs { #[repr(C)] #[derive(Copy, Clone, Debug, Default)] -#[cfg(target_arch = "aarch64")] -pub struct VcpuGuestContext { +pub struct Arm64VcpuGuestContext { pub flags: u32, pub user_regs: CpuUserRegs, pub sctlr: u64, @@ -601,7 +597,10 @@ pub struct VcpuGuestContext { } pub union VcpuGuestContextAny { - pub value: VcpuGuestContext, + #[cfg(target_arch = "aarch64")] + pub value: Arm64VcpuGuestContext, + #[cfg(target_arch = "x86_64")] + pub value: x8664VcpuGuestContext, } #[repr(C)] @@ -630,17 +629,11 @@ pub struct E820Entry { pub typ: u32, } -#[cfg(target_arch = "x86_64")] pub const E820_MAX: u32 = 1024; -#[cfg(target_arch = "x86_64")] pub const E820_RAM: u32 = 1; -#[cfg(target_arch = "x86_64")] pub const E820_RESERVED: u32 = 2; -#[cfg(target_arch = "x86_64")] pub const E820_ACPI: u32 = 3; -#[cfg(target_arch = "x86_64")] pub const E820_NVS: u32 = 4; -#[cfg(target_arch = "x86_64")] pub const E820_UNUSABLE: u32 = 5; pub const PHYSDEVOP_MAP_PIRQ: u64 = 13; @@ -678,3 +671,12 @@ pub struct AssignDevice { } pub const DOMID_IO: u32 = 0x7FF1; +pub const MEMFLAGS_POPULATE_ON_DEMAND: u32 = 1 << 16; + +pub struct PodTarget { + pub target_pages: u64, + pub total_pages: u64, + pub pod_cache_pages: u64, + pub pod_entries: u64, + pub domid: u16, +} diff --git a/crates/xen/xenclient/src/boot.rs b/crates/xen/xenclient/src/boot.rs index 2c81faa..a7a9c3b 100644 --- a/crates/xen/xenclient/src/boot.rs +++ b/crates/xen/xenclient/src/boot.rs @@ -36,6 +36,7 @@ pub struct BootDomain { pub pfn_alloc_end: u64, pub virt_pgtab_end: u64, pub total_pages: u64, + pub target_pages: u64, pub image_info: BootImageInfo, pub phys: PhysicalPages, pub store_evtchn: u32, @@ -166,6 +167,7 @@ impl BootSetup { virt_pgtab_end: 0, pfn_alloc_end: 0, total_pages, + target_pages: total_pages, page_size: self.platform.page_size(), image_info, consoles: Vec::new(), diff --git a/crates/xen/xenclient/src/lib.rs b/crates/xen/xenclient/src/lib.rs index 000eca8..7f60f86 100644 --- a/crates/xen/xenclient/src/lib.rs +++ b/crates/xen/xenclient/src/lib.rs @@ -20,7 +20,7 @@ use std::time::Duration; use uuid::Uuid; use xencall::sys::{ CreateDomain, DOMCTL_DEV_RDM_RELAXED, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST, - XEN_DOMCTL_CDF_IOMMU, + XEN_DOMCTL_CDF_IOMMU, XEN_X86_EMU_LAPIC, }; use xencall::XenCall; use xenstore::{ @@ -30,6 +30,7 @@ use xenstore::{ pub mod pci; pub mod x86pv; +pub mod x86pvh; #[derive(Clone)] pub struct XenClient { @@ -153,8 +154,8 @@ impl XenClient { if cfg!(target_arch = "aarch64") { domain.flags = XEN_DOMCTL_CDF_HVM_GUEST | XEN_DOMCTL_CDF_HAP; } else { - domain.flags = XEN_DOMCTL_CDF_IOMMU; - domain.arch_domain_config.emulation_flags = 0; + domain.flags = XEN_DOMCTL_CDF_HVM_GUEST | XEN_DOMCTL_CDF_HAP | XEN_DOMCTL_CDF_IOMMU; + domain.arch_domain_config.emulation_flags = XEN_X86_EMU_LAPIC; } let domid = self.call.create_domain(domain).await?; @@ -294,7 +295,7 @@ impl XenClient { { let loader = ElfImageLoader::load_file_kernel(&config.kernel)?; let mut boot = - BootSetup::new(self.call.clone(), domid, X86PvPlatform::new(), loader, None); + BootSetup::new(self.call.clone(), domid, X86PvhPlatform::new(), loader, None); domain = boot.initialize(&config.initrd, config.mem_mb).await?; boot.boot(&mut domain, &config.cmdline).await?; xenstore_evtchn = domain.store_evtchn; diff --git a/crates/xen/xenclient/src/mem.rs b/crates/xen/xenclient/src/mem.rs index 691c9b7..c044ca7 100644 --- a/crates/xen/xenclient/src/mem.rs +++ b/crates/xen/xenclient/src/mem.rs @@ -80,7 +80,11 @@ impl PhysicalPages { async fn pfn_alloc(&mut self, pfn: u64, count: u64) -> Result { let mut entries = vec![MmapEntry::default(); count as usize]; for (i, entry) in entries.iter_mut().enumerate() { - entry.mfn = self.p2m[pfn as usize + i]; + if !self.p2m.is_empty() { + entry.mfn = self.p2m[pfn as usize + i]; + } else { + entry.mfn = pfn + i as u64; + } } let chunk_size = 1 << XEN_PAGE_SHIFT; let num_per_entry = chunk_size >> XEN_PAGE_SHIFT; diff --git a/crates/xen/xenclient/src/sys.rs b/crates/xen/xenclient/src/sys.rs index 9014de3..9936264 100644 --- a/crates/xen/xenclient/src/sys.rs +++ b/crates/xen/xenclient/src/sys.rs @@ -119,6 +119,8 @@ pub const XEN_PAGE_MASK: u64 = !(XEN_PAGE_SIZE - 1); pub const SUPERPAGE_BATCH_SIZE: u64 = 512; pub const SUPERPAGE_2MB_SHIFT: u64 = 9; pub const SUPERPAGE_2MB_NR_PFNS: u64 = 1u64 << SUPERPAGE_2MB_SHIFT; +pub const SUPERPAGE_1GB_SHIFT: u64 = 18; +pub const SUPERPAGE_1GB_NR_PFNS: u64 = 1u64 << SUPERPAGE_1GB_SHIFT; pub const VGCF_IN_KERNEL: u64 = 1 << 2; pub const VGCF_ONLINE: u64 = 1 << 5; diff --git a/crates/xen/xenclient/src/x86pv.rs b/crates/xen/xenclient/src/x86pv.rs index abb2143..f2c7fb5 100644 --- a/crates/xen/xenclient/src/x86pv.rs +++ b/crates/xen/xenclient/src/x86pv.rs @@ -9,7 +9,7 @@ use log::{debug, trace}; use nix::errno::Errno; use slice_copy::copy; use xencall::sys::{ - E820Entry, VcpuGuestContext, E820_MAX, E820_RAM, E820_UNUSABLE, MMUEXT_PIN_L4_TABLE, + x8664VcpuGuestContext, E820Entry, VcpuGuestContextAny, E820_MAX, E820_RAM, E820_UNUSABLE, MMUEXT_PIN_L4_TABLE }; use crate::{ @@ -839,7 +839,7 @@ impl BootSetupPlatform for X86PvPlatform { .ok_or(Error::MemorySetupFailed("start_info_segment missing"))?; let pg_pfn = page_table_segment.pfn; let pg_mfn = domain.phys.p2m[pg_pfn as usize]; - let mut vcpu = VcpuGuestContext::default(); + let mut vcpu = x8664VcpuGuestContext::default(); vcpu.user_regs.rip = domain.image_info.virt_entry; vcpu.user_regs.rsp = domain.image_info.virt_base + (boot_stack_segment.pfn + 1) * self.page_size(); @@ -861,7 +861,9 @@ impl BootSetupPlatform for X86PvPlatform { vcpu.kernel_ss = vcpu.user_regs.ss as u64; vcpu.kernel_sp = vcpu.user_regs.rsp; trace!("vcpu context: {:?}", vcpu); - domain.call.set_vcpu_context(domain.domid, 0, &vcpu).await?; + domain.call.set_vcpu_context(domain.domid, 0, VcpuGuestContextAny { + value: vcpu, + }).await?; Ok(()) } diff --git a/crates/xen/xenclient/src/x86pvh.rs b/crates/xen/xenclient/src/x86pvh.rs new file mode 100644 index 0000000..ae329d0 --- /dev/null +++ b/crates/xen/xenclient/src/x86pvh.rs @@ -0,0 +1,634 @@ +use std::{ + mem::size_of, + os::raw::{c_char, c_void}, + slice, +}; + +use libc::munmap; +use log::{debug, trace}; +use nix::errno::Errno; +use slice_copy::copy; +use xencall::sys::{ + x8664VcpuGuestContext, E820Entry, E820_MAX, E820_RAM, E820_UNUSABLE, MEMFLAGS_POPULATE_ON_DEMAND +}; + +use crate::{ + boot::{BootDomain, BootSetupPlatform, DomainSegment}, + error::{Error, Result}, + sys::{ + GrantEntry, SUPERPAGE_1GB_NR_PFNS, SUPERPAGE_1GB_SHIFT, SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE, VGCF_IN_KERNEL, VGCF_ONLINE, XEN_PAGE_SHIFT + }, +}; + +pub const X86_PAGE_SHIFT: u64 = 12; +pub const X86_PAGE_SIZE: u64 = 1 << X86_PAGE_SHIFT; +pub const X86_VIRT_BITS: u64 = 48; +pub const X86_VIRT_MASK: u64 = (1 << X86_VIRT_BITS) - 1; +pub const X86_PGTABLE_LEVELS: u64 = 4; +pub const X86_PGTABLE_LEVEL_SHIFT: u64 = 9; + +#[repr(C)] +#[derive(Debug, Clone, Default)] +pub struct PageTableMappingLevel { + pub from: u64, + pub to: u64, + pub pfn: u64, + pub pgtables: usize, +} + +#[repr(C)] +#[derive(Debug, Clone, Default)] +pub struct PageTableMapping { + pub area: PageTableMappingLevel, + pub levels: [PageTableMappingLevel; X86_PGTABLE_LEVELS as usize], +} + +pub const X86_PAGE_TABLE_MAX_MAPPINGS: usize = 2; + +#[repr(C)] +#[derive(Debug, Clone, Default)] +pub struct PageTable { + pub mappings_count: usize, + pub mappings: [PageTableMapping; X86_PAGE_TABLE_MAX_MAPPINGS], +} + +#[repr(C)] +#[derive(Debug)] +pub struct StartInfoConsole { + pub mfn: u64, + pub evtchn: u32, +} + +pub const MAX_GUEST_CMDLINE: usize = 1024; + +#[repr(C)] +#[derive(Debug)] +pub struct StartInfo { + pub magic: [c_char; 32], + pub nr_pages: u64, + pub shared_info: u64, + pub flags: u32, + pub store_mfn: u64, + pub store_evtchn: u32, + pub console: StartInfoConsole, + pub pt_base: u64, + pub nr_pt_frames: u64, + pub mfn_list: u64, + pub mod_start: u64, + pub mod_len: u64, + pub cmdline: [c_char; MAX_GUEST_CMDLINE], + pub first_p2m_pfn: u64, + pub nr_p2m_frames: u64, +} + +pub const X86_GUEST_MAGIC: &str = "xen-3.0-x86_64"; + +#[repr(C)] +#[derive(Debug)] +pub struct ArchVcpuInfo { + pub cr2: u64, + pub pad: u64, +} + +#[repr(C)] +#[derive(Debug)] +pub struct VcpuInfoTime { + pub version: u32, + pub pad0: u32, + pub tsc_timestamp: u64, + pub system_time: u64, + pub tsc_to_system_mul: u32, + pub tsc_shift: i8, + pub flags: u8, + pub pad1: [u8; 2], +} + +#[repr(C)] +#[derive(Debug)] +pub struct VcpuInfo { + pub evtchn_upcall_pending: u8, + pub evtchn_upcall_mask: u8, + pub evtchn_pending_sel: u64, + pub arch_vcpu_info: ArchVcpuInfo, + pub vcpu_info_time: VcpuInfoTime, +} + +#[repr(C)] +#[derive(Debug)] +pub struct SharedInfo { + pub vcpu_info: [VcpuInfo; 32], + pub evtchn_pending: [u64; u64::BITS as usize], + pub evtchn_mask: [u64; u64::BITS as usize], + pub wc_version: u32, + pub wc_sec: u32, + pub wc_nsec: u32, + pub wc_sec_hi: u32, + // arch shared info + pub max_pfn: u64, + pub pfn_to_mfn_frame_list_list: u64, + pub nmi_reason: u64, + pub p2m_cr3: u64, + pub p2m_vaddr: u64, + pub p2m_generation: u64, +} + +#[derive(Debug)] +struct VmemRange { + start: u64, + end: u64, + _flags: u32, + nid: u32, +} + +#[derive(Default)] +pub struct X86PvhPlatform { + table: PageTable, + p2m_segment: Option, + page_table_segment: Option, + start_info_segment: Option, + boot_stack_segment: Option, + xenstore_segment: Option, +} + +impl X86PvhPlatform { + pub fn new() -> Self { + Self { + ..Default::default() + } + } + + const PAGE_PRESENT: u64 = 0x001; + const PAGE_RW: u64 = 0x002; + const PAGE_USER: u64 = 0x004; + const PAGE_ACCESSED: u64 = 0x020; + const PAGE_DIRTY: u64 = 0x040; + fn get_pg_prot(&mut self, l: usize, pfn: u64) -> u64 { + let prot = [ + X86PvhPlatform::PAGE_PRESENT | X86PvhPlatform::PAGE_RW | X86PvhPlatform::PAGE_ACCESSED, + X86PvhPlatform::PAGE_PRESENT + | X86PvhPlatform::PAGE_RW + | X86PvhPlatform::PAGE_ACCESSED + | X86PvhPlatform::PAGE_DIRTY + | X86PvhPlatform::PAGE_USER, + X86PvhPlatform::PAGE_PRESENT + | X86PvhPlatform::PAGE_RW + | X86PvhPlatform::PAGE_ACCESSED + | X86PvhPlatform::PAGE_DIRTY + | X86PvhPlatform::PAGE_USER, + X86PvhPlatform::PAGE_PRESENT + | X86PvhPlatform::PAGE_RW + | X86PvhPlatform::PAGE_ACCESSED + | X86PvhPlatform::PAGE_DIRTY + | X86PvhPlatform::PAGE_USER, + ]; + + let prot = prot[l]; + if l > 0 { + return prot; + } + + for m in 0..self.table.mappings_count { + let map = &self.table.mappings[m]; + let pfn_s = map.levels[(X86_PGTABLE_LEVELS - 1) as usize].pfn; + let pfn_e = map.area.pgtables as u64 + pfn_s; + if pfn >= pfn_s && pfn < pfn_e { + return prot & !X86PvhPlatform::PAGE_RW; + } + } + prot + } + + fn count_page_tables( + &mut self, + domain: &mut BootDomain, + from: u64, + to: u64, + pfn: u64, + ) -> Result { + debug!("counting pgtables from={} to={} pfn={}", from, to, pfn); + if self.table.mappings_count == X86_PAGE_TABLE_MAX_MAPPINGS { + return Err(Error::MemorySetupFailed("max page table count reached")); + } + + let m = self.table.mappings_count; + + let pfn_end = pfn + ((to - from) >> X86_PAGE_SHIFT); + if pfn_end >= domain.phys.p2m_size() { + return Err(Error::MemorySetupFailed("pfn_end greater than p2m size")); + } + + for idx in 0..self.table.mappings_count { + if from < self.table.mappings[idx].area.to && to > self.table.mappings[idx].area.from { + return Err(Error::MemorySetupFailed("page table calculation failed")); + } + } + let mut map = PageTableMapping::default(); + map.area.from = from & X86_VIRT_MASK; + map.area.to = to & X86_VIRT_MASK; + + for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() { + map.levels[l].pfn = domain.pfn_alloc_end + map.area.pgtables as u64; + if l as u64 == X86_PGTABLE_LEVELS - 1 { + if self.table.mappings_count == 0 { + map.levels[l].from = 0; + map.levels[l].to = X86_VIRT_MASK; + map.levels[l].pgtables = 1; + map.area.pgtables += 1; + } + continue; + } + + let bits = X86_PAGE_SHIFT + (l + 1) as u64 * X86_PGTABLE_LEVEL_SHIFT; + let mask = BootDomain::bits_to_mask(bits); + map.levels[l].from = map.area.from & !mask; + map.levels[l].to = map.area.to | mask; + + for cmp in &mut self.table.mappings[0..self.table.mappings_count] { + if cmp.levels[l].from == cmp.levels[l].to { + continue; + } + + if map.levels[l].from >= cmp.levels[l].from && map.levels[l].to <= cmp.levels[l].to + { + map.levels[l].from = 0; + map.levels[l].to = 0; + break; + } + + if map.levels[l].from >= cmp.levels[l].from + && map.levels[l].from <= cmp.levels[l].to + { + map.levels[l].from = cmp.levels[l].to + 1; + } + + if map.levels[l].to >= cmp.levels[l].from && map.levels[l].to <= cmp.levels[l].to { + map.levels[l].to = cmp.levels[l].from - 1; + } + } + + if map.levels[l].from < map.levels[l].to { + map.levels[l].pgtables = + (((map.levels[l].to - map.levels[l].from) >> bits) + 1) as usize; + } + + debug!( + "count_pgtables {:#x}/{}: {:#x} -> {:#x}, {} tables", + mask, bits, map.levels[l].from, map.levels[l].to, map.levels[l].pgtables + ); + map.area.pgtables += map.levels[l].pgtables; + } + self.table.mappings[m] = map; + Ok(m) + } + + fn e820_sanitize( + &self, + mut source: Vec, + map_limit_kb: u64, + balloon_kb: u64, + ) -> Result> { + let mut e820 = vec![E820Entry::default(); E820_MAX as usize]; + + for entry in &mut source { + if entry.addr > 0x100000 { + continue; + } + + // entries under 1MB should be removed. + entry.typ = 0; + entry.size = 0; + entry.addr = u64::MAX; + } + + let mut lowest = u64::MAX; + let mut highest = 0; + + for entry in &source { + if entry.typ == E820_RAM || entry.typ == E820_UNUSABLE || entry.typ == 0 { + continue; + } + + lowest = if entry.addr < lowest { + entry.addr + } else { + lowest + }; + + highest = if entry.addr + entry.size > highest { + entry.addr + entry.size + } else { + highest + } + } + + let start_kb = if lowest > 1024 { lowest >> 10 } else { 0 }; + + let mut idx: usize = 0; + + e820[idx].addr = 0; + e820[idx].size = map_limit_kb << 10; + e820[idx].typ = E820_RAM; + + let mut delta_kb = 0u64; + + if start_kb > 0 && map_limit_kb > start_kb { + delta_kb = map_limit_kb - start_kb; + if delta_kb > 0 { + e820[idx].size -= delta_kb << 10; + } + } + + let ram_end = source[0].addr + source[0].size; + idx += 1; + + for src in &mut source { + let end = src.addr + src.size; + if src.typ == E820_UNUSABLE || end < ram_end { + src.typ = 0; + continue; + } + + if src.typ != E820_RAM { + continue; + } + + if src.addr >= (1 << 32) { + continue; + } + + if src.addr < ram_end { + let delta = ram_end - src.addr; + src.typ = E820_UNUSABLE; + + if src.size < delta { + src.typ = 0; + } else { + src.size -= delta; + src.addr = ram_end; + } + + if src.addr + src.size != end { + src.typ = 0; + } + } + + if end > ram_end { + src.typ = E820_UNUSABLE; + } + } + + if lowest > ram_end { + let mut add_unusable = true; + + for src in &mut source { + if !add_unusable { + break; + } + + if src.typ != E820_UNUSABLE { + continue; + } + + if ram_end != src.addr { + continue; + } + + if lowest != src.addr + src.size { + src.size = lowest - src.addr; + } + add_unusable = false; + } + + if add_unusable { + e820[1].typ = E820_UNUSABLE; + e820[1].addr = ram_end; + e820[1].size = lowest - ram_end; + } + } + + for src in &source { + if src.typ == E820_RAM || src.typ == 0 { + continue; + } + + e820[idx].typ = src.typ; + e820[idx].addr = src.addr; + e820[idx].size = src.size; + idx += 1; + } + + if balloon_kb > 0 || delta_kb > 0 { + e820[idx].typ = E820_RAM; + e820[idx].addr = if (1u64 << 32u64) > highest { + 1u64 << 32u64 + } else { + highest + }; + e820[idx].size = (delta_kb << 10) + (balloon_kb << 10); + } + Ok(e820) + } +} + +#[async_trait::async_trait] +impl BootSetupPlatform for X86PvhPlatform { + fn page_size(&self) -> u64 { + X86_PAGE_SIZE + } + + fn page_shift(&self) -> u64 { + X86_PAGE_SHIFT + } + + fn needs_early_kernel(&self) -> bool { + false + } + + async fn initialize_memory(&self, domain: &mut BootDomain) -> Result<()> { + let memflags = if domain.target_pages > domain.total_pages { + MEMFLAGS_POPULATE_ON_DEMAND + } else { + 0 + }; + + let mut vmemranges: Vec = Vec::new(); + let stub = VmemRange { + start: 0, + end: domain.total_pages << self.page_shift(), + _flags: 0, + nid: 0, + }; + vmemranges.push(stub); + + let mut p2m_size: u64 = 0; + let mut total: u64 = 0; + for range in &vmemranges { + total += (range.end - range.start) >> XEN_PAGE_SHIFT; + p2m_size = p2m_size.max(range.end >> XEN_PAGE_SHIFT); + } + + if total != domain.total_pages { + return Err(Error::MemorySetupFailed("total pages mismatch")); + } + + for range in &vmemranges { + let memflags = memflags; + + let end_pages = range.end >> self.page_shift(); + let mut cur_pages = range.start >> self.page_shift(); + + while end_pages > cur_pages { + let count = end_pages - cur_pages; + if count != 0 { + let mut extents = vec![0u64; count as usize]; + + for i in 0..count { + extents[i as usize] = cur_pages + i; + } + + let _ = domain.call.populate_physmap(domain.domid, count, 0 as u32, memflags, &extents).await?; + cur_pages += count as u64; + } + } + } + + Ok(()) + } + + async fn alloc_p2m_segment( + &mut self, + _: &mut BootDomain, + ) -> Result> { + Ok(None) + } + + async fn alloc_page_tables( + &mut self, + _: &mut BootDomain, + ) -> Result> { + Ok(None) + } + + async fn setup_page_tables(&mut self, _: &mut BootDomain) -> Result<()> { + Ok(()) + } + + async fn setup_hypercall_page(&mut self, _: &mut BootDomain) -> Result<()> { + Ok(()) + } + + async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()> { + let mut special_array = vec![0u64; X86_HVM_NR_SPECIAL_PAGES as usize]; + for i in 0..X86_HVM_NR_SPECIAL_PAGES { + special_array[i as usize] = special_pfn(i); + } + let pages = domain.call.populate_physmap(domain.domid, 8, 0, 0, &special_array).await?; + + + + Ok(()) + } + + async fn setup_shared_info( + &mut self, + _: &mut BootDomain, + _: u64, + ) -> Result<()> { + Ok(()) + } + + async fn setup_start_info( + &mut self, + _: &mut BootDomain, + _: &str, + _: u64, + ) -> Result<()> { + Ok(()) + } + + async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()> { + let map = domain.call.get_memory_map(E820_MAX).await?; + let mem_mb = domain.total_pages >> (20 - self.page_shift()); + let mem_kb = mem_mb * 1024; + let e820 = self.e820_sanitize(map, mem_kb, 0)?; + domain.call.set_memory_map(domain.domid, e820).await?; + Ok(()) + } + + async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()> { + let boot_stack_segment = self + .boot_stack_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("boot_stack_segment missing"))?; + let start_info_segment = self + .start_info_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("start_info_segment missing"))?; + let pg_pfn = page_table_segment.pfn; + let pg_mfn = domain.phys.p2m[pg_pfn as usize]; + let mut vcpu = x8664VcpuGuestContext::default(); + vcpu.user_regs.rip = domain.image_info.virt_entry; + vcpu.user_regs.rsp = + domain.image_info.virt_base + (boot_stack_segment.pfn + 1) * self.page_size(); + vcpu.user_regs.rsi = + domain.image_info.virt_base + (start_info_segment.pfn) * self.page_size(); + vcpu.user_regs.rflags = 1 << 9; + vcpu.debugreg[6] = 0xffff0ff0; + vcpu.debugreg[7] = 0x00000400; + vcpu.flags = VGCF_IN_KERNEL | VGCF_ONLINE; + let cr3_pfn = pg_mfn; + debug!("cr3: pfn {:#x} mfn {:#x}", page_table_segment.pfn, cr3_pfn); + vcpu.ctrlreg[3] = cr3_pfn << 12; + vcpu.user_regs.ds = 0x0; + vcpu.user_regs.es = 0x0; + vcpu.user_regs.fs = 0x0; + vcpu.user_regs.gs = 0x0; + vcpu.user_regs.ss = 0xe02b; + vcpu.user_regs.cs = 0xe033; + vcpu.kernel_ss = vcpu.user_regs.ss as u64; + vcpu.kernel_sp = vcpu.user_regs.rsp; + trace!("vcpu context: {:?}", vcpu); + domain.call.set_vcpu_context(domain.domid, 0, xencall::sys::VcpuGuestContextAny { value: vcpu }).await?; + Ok(()) + } + + async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()> { + let xenstore_segment = self + .xenstore_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?; + + let console_gfn = domain.consoles.first().map(|x| x.1).unwrap_or(0) as usize; + let xenstore_gfn = domain.phys.p2m[xenstore_segment.pfn as usize]; + let addr = domain + .call + .mmap(0, 1 << XEN_PAGE_SHIFT) + .await + .ok_or(Error::MmapFailed)?; + domain + .call + .map_resource(domain.domid, 1, 0, 0, 1, addr) + .await?; + let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) }; + entries[0].flags = 1 << 0; + entries[0].domid = 0; + entries[0].frame = console_gfn as u32; + entries[1].flags = 1 << 0; + entries[1].domid = 0; + entries[1].frame = xenstore_gfn as u32; + unsafe { + let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT); + if result != 0 { + return Err(Error::UnmapFailed(Errno::from_raw(result))); + } + } + Ok(()) + } +} + +fn special_pfn(x: u64) -> u64 { + X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES + x +} + +const X86_HVM_NR_SPECIAL_PAGES: u64 = 8; +const X86_HVM_END_SPECIAL_REGION: u64 = 0xff000;