From 4839ee26792a778aef355555ad191aa55bf317aa Mon Sep 17 00:00:00 2001 From: Alex Zenla Date: Mon, 29 Apr 2024 02:59:49 -0700 Subject: [PATCH] feat: move platform stuff all into it's own thing --- crates/xen/xencall/src/lib.rs | 1 + crates/xen/xenclient/src/arm64.rs | 288 -------- crates/xen/xenclient/src/boot.rs | 561 ++++++--------- crates/xen/xenclient/src/elfloader.rs | 9 +- crates/xen/xenclient/src/lib.rs | 68 +- crates/xen/xenclient/src/mem.rs | 25 +- crates/xen/xenclient/src/sys.rs | 2 + crates/xen/xenclient/src/{x86.rs => x86pv.rs} | 661 ++++++++++-------- 8 files changed, 635 insertions(+), 980 deletions(-) delete mode 100644 crates/xen/xenclient/src/arm64.rs rename crates/xen/xenclient/src/{x86.rs => x86pv.rs} (68%) diff --git a/crates/xen/xencall/src/lib.rs b/crates/xen/xencall/src/lib.rs index f9b8cd0..b359949 100644 --- a/crates/xen/xencall/src/lib.rs +++ b/crates/xen/xencall/src/lib.rs @@ -783,6 +783,7 @@ impl XenCall { Ok(()) } + #[allow(clippy::field_reassign_with_default)] pub async fn map_pirq(&self, domid: u32, index: isize, pirq: Option) -> Result { trace!( "physdev fd={} map_pirq domid={} index={} pirq={:?}", diff --git a/crates/xen/xenclient/src/arm64.rs b/crates/xen/xenclient/src/arm64.rs deleted file mode 100644 index bf393b3..0000000 --- a/crates/xen/xenclient/src/arm64.rs +++ /dev/null @@ -1,288 +0,0 @@ -use crate::boot::{ArchBootSetup, BootImageInfo, BootSetup, BootState, DomainSegment}; -use crate::error::Result; -use crate::sys::XEN_PAGE_SHIFT; -use crate::Error; -use log::trace; -use xencall::sys::VcpuGuestContext; - -pub const ARM_PAGE_SHIFT: u64 = 12; -const ARM_PAGE_SIZE: u64 = 1 << ARM_PAGE_SHIFT; - -const GUEST_RAM0_BASE: u64 = 0x40000000; -const GUEST_RAM0_SIZE: u64 = 0xc0000000; -const GUEST_RAM1_BASE: u64 = 0x0200000000; -const GUEST_RAM1_SIZE: u64 = 0xfe00000000; - -const GUEST_RAM_BANK_BASES: [u64; 2] = [GUEST_RAM0_BASE, GUEST_RAM1_BASE]; -const GUEST_RAM_BANK_SIZES: [u64; 2] = [GUEST_RAM0_SIZE, GUEST_RAM1_SIZE]; - -const LPAE_SHIFT: u64 = 9; -const PFN_4K_SHIFT: u64 = 0; -const PFN_2M_SHIFT: u64 = PFN_4K_SHIFT + LPAE_SHIFT; -const PFN_1G_SHIFT: u64 = PFN_2M_SHIFT + LPAE_SHIFT; -const PFN_512G_SHIFT: u64 = PFN_1G_SHIFT + LPAE_SHIFT; - -const PSR_FIQ_MASK: u64 = 1 << 6; /* Fast Interrupt mask */ -const PSR_IRQ_MASK: u64 = 1 << 7; /* Interrupt mask */ -const PSR_ABT_MASK: u64 = 1 << 8; /* Asynchronous Abort mask */ -const PSR_MODE_EL1H: u64 = 0x05; -const PSR_GUEST64_INIT: u64 = PSR_ABT_MASK | PSR_FIQ_MASK | PSR_IRQ_MASK | PSR_MODE_EL1H; - -pub struct Arm64BootSetup {} - -impl Default for Arm64BootSetup { - fn default() -> Self { - Self::new() - } -} - -impl Arm64BootSetup { - pub fn new() -> Arm64BootSetup { - Arm64BootSetup {} - } - - async fn populate_one_size( - &self, - setup: &mut BootSetup<'_>, - pfn_shift: u64, - base_pfn: u64, - pfn_count: u64, - extents: &mut [u64], - ) -> Result { - let mask = (1u64 << pfn_shift) - 1; - let next_shift = pfn_shift + LPAE_SHIFT; - let next_mask = (1u64 << next_shift) - 1; - let next_boundary = (base_pfn + (1 << next_shift)) - 1; - - let mut end_pfn = base_pfn + pfn_count; - - if pfn_shift == PFN_512G_SHIFT { - return Ok(0); - } - - if (base_pfn & next_mask) != 0 && end_pfn > next_boundary { - end_pfn = next_boundary; - } - - if (mask & base_pfn) != 0 { - return Ok(0); - } - - let count = (end_pfn - base_pfn) >> pfn_shift; - - if count == 0 { - return Ok(0); - } - - for i in 0..count { - extents[i as usize] = base_pfn + (i << pfn_shift); - } - - let result_extents = setup - .call - .populate_physmap( - setup.domid, - count, - pfn_shift as u32, - 0, - &extents[0usize..count as usize], - ) - .await?; - slice_copy::copy(extents, &result_extents); - Ok((result_extents.len() as u64) << pfn_shift) - } - - async fn populate_guest_memory( - &mut self, - setup: &mut BootSetup<'_>, - base_pfn: u64, - pfn_count: u64, - ) -> Result<()> { - let mut extents = vec![0u64; 1024 * 1024]; - - for pfn in 0..extents.len() { - let mut allocsz = (1024 * 1024).min(pfn_count - pfn as u64); - allocsz = self - .populate_one_size( - setup, - PFN_512G_SHIFT, - base_pfn + pfn as u64, - allocsz, - &mut extents, - ) - .await?; - if allocsz > 0 { - continue; - } - allocsz = self - .populate_one_size( - setup, - PFN_1G_SHIFT, - base_pfn + pfn as u64, - allocsz, - &mut extents, - ) - .await?; - if allocsz > 0 { - continue; - } - allocsz = self - .populate_one_size( - setup, - PFN_2M_SHIFT, - base_pfn + pfn as u64, - allocsz, - &mut extents, - ) - .await?; - if allocsz > 0 { - continue; - } - allocsz = self - .populate_one_size( - setup, - PFN_4K_SHIFT, - base_pfn + pfn as u64, - allocsz, - &mut extents, - ) - .await?; - if allocsz == 0 { - return Err(Error::MemorySetupFailed("allocsz is zero")); - } - } - - Ok(()) - } -} - -#[async_trait::async_trait] -impl ArchBootSetup for Arm64BootSetup { - fn page_size(&mut self) -> u64 { - ARM_PAGE_SIZE - } - - fn page_shift(&mut self) -> u64 { - ARM_PAGE_SHIFT - } - - fn needs_early_kernel(&mut self) -> bool { - true - } - - async fn setup_shared_info(&mut self, _: &mut BootSetup, _: u64) -> Result<()> { - Ok(()) - } - - async fn setup_start_info(&mut self, _: &mut BootSetup, _: &BootState, _: &str) -> Result<()> { - Ok(()) - } - - async fn meminit( - &mut self, - setup: &mut BootSetup, - total_pages: u64, - kernel_segment: &Option, - initrd_segment: &Option, - ) -> Result<()> { - let kernel_segment = kernel_segment - .as_ref() - .ok_or(Error::MemorySetupFailed("kernel_segment missing"))?; - setup.call.claim_pages(setup.domid, total_pages).await?; - let mut ramsize = total_pages << XEN_PAGE_SHIFT; - - let bankbase = GUEST_RAM_BANK_BASES; - let bankmax = GUEST_RAM_BANK_SIZES; - - let kernbase = kernel_segment.vstart; - let kernend = BootSetup::round_up(kernel_segment.size, 21); - let dtb = setup.dtb.as_ref(); - let dtb_size = dtb.map(|blob| BootSetup::round_up(blob.len() as u64, XEN_PAGE_SHIFT)); - let ramdisk_size = initrd_segment - .as_ref() - .map(|segment| BootSetup::round_up(segment.size, XEN_PAGE_SHIFT)); - let modsize = dtb_size.unwrap_or(0) + ramdisk_size.unwrap_or(0); - let ram128mb = bankbase[0] + (128 << 20); - - let mut rambank_size: [u64; 2] = [0, 0]; - for i in 0..2 { - let size = if ramsize > bankmax[i] { - bankmax[i] - } else { - ramsize - }; - ramsize -= size; - rambank_size[i] = size >> XEN_PAGE_SHIFT; - } - - for i in 0..2 { - let size = if ramsize > bankmax[i] { - bankmax[i] - } else { - ramsize - }; - ramsize -= size; - rambank_size[i] = size >> XEN_PAGE_SHIFT; - } - - for i in 0..2 { - self.populate_guest_memory(setup, bankbase[i] >> XEN_PAGE_SHIFT, rambank_size[i]) - .await?; - } - - let bank0end = bankbase[0] + (rambank_size[0] << XEN_PAGE_SHIFT); - let _modbase = if bank0end >= ram128mb + modsize && kernend < ram128mb { - ram128mb - } else if bank0end - modsize > kernend { - bank0end - modsize - } else if kernbase - bankbase[0] > modsize { - kernbase - modsize - } else { - return Err(Error::MemorySetupFailed("unable to determine modbase")); - }; - setup.call.claim_pages(setup.domid, 0).await?; - Ok(()) - } - - async fn bootlate(&mut self, _: &mut BootSetup, _: &mut BootState) -> Result<()> { - Ok(()) - } - - async fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> { - let mut vcpu = VcpuGuestContext::default(); - vcpu.user_regs.pc = state.image_info.virt_entry; - vcpu.user_regs.x0 = 0xffffffff; - vcpu.user_regs.x1 = 0; - vcpu.user_regs.x2 = 0; - vcpu.user_regs.x3 = 0; - vcpu.sctlr = 0x00c50078; - vcpu.ttbr0 = 0; - vcpu.ttbr1 = 0; - vcpu.ttbcr = 0; - vcpu.user_regs.cpsr = PSR_GUEST64_INIT; - vcpu.flags = 1 << 0; // VGCF_ONLINE - trace!("vcpu context: {:?}", vcpu); - setup.call.set_vcpu_context(setup.domid, 0, &vcpu).await?; - Ok(()) - } - - async fn alloc_p2m_segment( - &mut self, - _: &mut BootSetup, - _: &BootImageInfo, - ) -> Result> { - Ok(None) - } - - async fn alloc_page_tables( - &mut self, - _: &mut BootSetup, - _: &BootImageInfo, - ) -> Result> { - Ok(None) - } - - async fn setup_page_tables(&mut self, _: &mut BootSetup, _: &mut BootState) -> Result<()> { - Ok(()) - } -} diff --git a/crates/xen/xenclient/src/boot.rs b/crates/xen/xenclient/src/boot.rs index 3da8453..2c81faa 100644 --- a/crates/xen/xenclient/src/boot.rs +++ b/crates/xen/xenclient/src/boot.rs @@ -1,291 +1,61 @@ -use crate::error::Result; -use crate::mem::PhysicalPages; -use crate::sys::{GrantEntry, XEN_PAGE_SHIFT}; -use crate::Error; -use libc::munmap; -use log::debug; -use nix::errno::Errno; -use slice_copy::copy; - -use crate::mem::ARCH_PAGE_SHIFT; -use std::ffi::c_void; use std::slice; + +use log::debug; +use slice_copy::copy; use xencall::XenCall; -pub trait BootImageLoader { - fn parse(&self) -> Result; - fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()>; +use crate::{ + error::{Error, Result}, + mem::PhysicalPages, + sys::XEN_PAGE_SHIFT, +}; + +pub struct BootSetup { + pub call: XenCall, + pub domid: u32, + pub platform: P, + pub image_loader: I, + pub dtb: Option>, } -pub const XEN_UNSET_ADDR: u64 = -1i64 as u64; - -#[derive(Debug)] -pub struct BootImageInfo { - pub start: u64, - pub virt_base: u64, - pub virt_kstart: u64, - pub virt_kend: u64, - pub virt_hypercall: u64, - pub virt_entry: u64, - pub virt_p2m_base: u64, - pub unmapped_initrd: bool, -} - -pub struct BootSetup<'a> { - pub(crate) call: &'a XenCall, - pub phys: PhysicalPages<'a>, - pub(crate) domid: u32, - pub(crate) virt_alloc_end: u64, - pub(crate) pfn_alloc_end: u64, - pub(crate) virt_pgtab_end: u64, - pub(crate) total_pages: u64, - #[cfg(target_arch = "aarch64")] - pub(crate) dtb: Option>, -} - -#[derive(Debug)] +#[derive(Debug, Default)] pub struct DomainSegment { - pub(crate) vstart: u64, - vend: u64, + pub vstart: u64, + pub vend: u64, pub pfn: u64, - pub(crate) addr: u64, - pub(crate) size: u64, - #[cfg(target_arch = "x86_64")] - pub(crate) pages: u64, + pub addr: u64, + pub size: u64, + pub pages: u64, } -#[derive(Debug)] -pub struct BootState { - pub kernel_segment: DomainSegment, - pub start_info_segment: DomainSegment, - pub xenstore_segment: DomainSegment, - pub boot_stack_segment: DomainSegment, - pub p2m_segment: Option, - pub page_table_segment: Option, +pub struct BootDomain { + pub domid: u32, + pub call: XenCall, + pub page_size: u64, + pub virt_alloc_end: u64, + pub pfn_alloc_end: u64, + pub virt_pgtab_end: u64, + pub total_pages: u64, pub image_info: BootImageInfo, - pub shared_info_frame: u64, - pub initrd_segment: DomainSegment, + pub phys: PhysicalPages, pub store_evtchn: u32, - pub consoles: Vec<(u32, DomainSegment)>, + pub xenstore_mfn: u64, + pub initrd_segment: DomainSegment, + pub consoles: Vec<(u32, u64)>, } -impl BootSetup<'_> { - pub fn new(call: &XenCall, domid: u32) -> BootSetup { - BootSetup { - call, - phys: PhysicalPages::new(call, domid), - domid, - virt_alloc_end: 0, - pfn_alloc_end: 0, - virt_pgtab_end: 0, - total_pages: 0, - #[cfg(target_arch = "aarch64")] - dtb: None, - } +impl BootDomain { + pub async fn alloc_module(&mut self, buffer: &[u8]) -> Result { + let segment = self.alloc_segment(0, buffer.len() as u64).await?; + let slice = unsafe { slice::from_raw_parts_mut(segment.addr as *mut u8, buffer.len()) }; + copy(slice, buffer); + Ok(segment) } - async fn initialize_memory( - &mut self, - arch: &mut Box, - total_pages: u64, - kernel_segment: &Option, - initrd_segment: &Option, - ) -> Result<()> { - arch.meminit(self, total_pages, kernel_segment, initrd_segment) - .await?; - Ok(()) - } - - async fn setup_hypercall_page(&mut self, image_info: &BootImageInfo) -> Result<()> { - if image_info.virt_hypercall == XEN_UNSET_ADDR { - return Ok(()); - } - - let pfn = (image_info.virt_hypercall - image_info.virt_base) >> ARCH_PAGE_SHIFT; - let mfn = self.phys.p2m[pfn as usize]; - self.call.hypercall_init(self.domid, mfn).await?; - Ok(()) - } - - pub async fn initialize( - &mut self, - arch: &mut Box, - image_loader: &I, - initrd: &[u8], - max_vcpus: u32, - mem_mb: u64, - console_count: usize, - ) -> Result { - debug!("initialize max_vcpus={:?} mem_mb={:?}", max_vcpus, mem_mb); - - let page_size = arch.page_size(); - let image_info = image_loader.parse()?; - debug!("initialize image_info={:?}", image_info); - let mut kernel_segment: Option = None; - let mut initrd_segment: Option = None; - if !image_info.unmapped_initrd { - initrd_segment = Some(self.alloc_module(page_size, initrd).await?); - } - - if arch.needs_early_kernel() { - kernel_segment = Some( - self.load_kernel_segment(page_size, image_loader, &image_info) - .await?, - ); - } - - let total_pages = mem_mb << (20 - arch.page_shift()); - self.initialize_memory(arch, total_pages, &kernel_segment, &initrd_segment) - .await?; - self.virt_alloc_end = image_info.virt_base; - - if kernel_segment.is_none() { - kernel_segment = Some( - self.load_kernel_segment(page_size, image_loader, &image_info) - .await?, - ); - } - - let mut p2m_segment: Option = None; - if image_info.virt_p2m_base >= image_info.virt_base - || (image_info.virt_p2m_base & ((1 << arch.page_shift()) - 1)) != 0 - { - p2m_segment = arch.alloc_p2m_segment(self, &image_info).await?; - } - let start_info_segment = self.alloc_page(page_size)?; - let xenstore_segment = self.alloc_page(page_size)?; - let mut consoles: Vec<(u32, DomainSegment)> = Vec::new(); - for _ in 0..console_count { - let evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?; - let page = self.alloc_page(page_size)?; - consoles.push((evtchn, page)); - } - let page_table_segment = arch.alloc_page_tables(self, &image_info).await?; - let boot_stack_segment = self.alloc_page(page_size)?; - - if self.virt_pgtab_end > 0 { - self.alloc_padding_pages(page_size, self.virt_pgtab_end)?; - } - - if p2m_segment.is_none() { - if let Some(mut segment) = arch.alloc_p2m_segment(self, &image_info).await? { - segment.vstart = image_info.virt_p2m_base; - p2m_segment = Some(segment); - } - } - - if image_info.unmapped_initrd { - initrd_segment = Some(self.alloc_module(page_size, initrd).await?); - } - - let initrd_segment = initrd_segment.unwrap(); - let store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?; - - let kernel_segment = - kernel_segment.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?; - - let state = BootState { - kernel_segment, - start_info_segment, - xenstore_segment, - consoles, - boot_stack_segment, - p2m_segment, - page_table_segment, - image_info, - initrd_segment, - store_evtchn, - shared_info_frame: 0, - }; - debug!("initialize state={:?}", state); - Ok(state) - } - - pub async fn boot( - &mut self, - arch: &mut Box, - state: &mut BootState, - cmdline: &str, - ) -> Result<()> { - let domain_info = self.call.get_domain_info(self.domid).await?; - let shared_info_frame = domain_info.shared_info_frame; - state.shared_info_frame = shared_info_frame; - arch.setup_page_tables(self, state).await?; - arch.setup_start_info(self, state, cmdline).await?; - self.setup_hypercall_page(&state.image_info).await?; - arch.bootlate(self, state).await?; - arch.setup_shared_info(self, state.shared_info_frame) - .await?; - arch.vcpu(self, state).await?; - self.phys.unmap_all()?; - self.gnttab_seed(state).await?; - Ok(()) - } - - async fn gnttab_seed(&mut self, state: &mut BootState) -> Result<()> { - let console_gfn = - self.phys.p2m[state.consoles.first().map(|x| x.1.pfn).unwrap_or(0) as usize]; - let xenstore_gfn = self.phys.p2m[state.xenstore_segment.pfn as usize]; - let addr = self - .call - .mmap(0, 1 << XEN_PAGE_SHIFT) - .await - .ok_or(Error::MmapFailed)?; - self.call.map_resource(self.domid, 1, 0, 0, 1, addr).await?; - let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) }; - entries[0].flags = 1 << 0; - entries[0].domid = 0; - entries[0].frame = console_gfn as u32; - entries[1].flags = 1 << 0; - entries[1].domid = 0; - entries[1].frame = xenstore_gfn as u32; - unsafe { - let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT); - if result != 0 { - return Err(Error::UnmapFailed(Errno::from_raw(result))); - } - } - Ok(()) - } - - async fn load_kernel_segment( - &mut self, - page_size: u64, - image_loader: &I, - image_info: &BootImageInfo, - ) -> Result { - let kernel_segment = self - .alloc_segment( - page_size, - image_info.virt_kstart, - image_info.virt_kend - image_info.virt_kstart, - ) - .await?; - let kernel_segment_ptr = kernel_segment.addr as *mut u8; - let kernel_segment_slice = - unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) }; - image_loader.load(image_info, kernel_segment_slice)?; - Ok(kernel_segment) - } - - pub(crate) fn round_up(addr: u64, mask: u64) -> u64 { - addr | mask - } - - #[cfg(target_arch = "x86_64")] - pub(crate) fn bits_to_mask(bits: u64) -> u64 { - (1 << bits) - 1 - } - - pub(crate) async fn alloc_segment( - &mut self, - page_size: u64, - start: u64, - size: u64, - ) -> Result { + pub async fn alloc_segment(&mut self, start: u64, size: u64) -> Result { debug!("alloc_segment {:#x} {:#x}", start, size); if start > 0 { - self.alloc_padding_pages(page_size, start)?; + self.alloc_padding_pages(start)?; } let local_page_size: u32 = (1i64 << XEN_PAGE_SHIFT) as u32; @@ -302,7 +72,7 @@ impl BootSetup<'_> { pages, }; - self.chk_alloc_pages(page_size, pages)?; + self.chk_alloc_pages(pages)?; let ptr = self.phys.pfn_to_ptr(segment.pfn, pages).await?; segment.addr = ptr; @@ -318,46 +88,20 @@ impl BootSetup<'_> { Ok(segment) } - fn alloc_page(&mut self, page_size: u64) -> Result { - let start = self.virt_alloc_end; - let pfn = self.pfn_alloc_end; - - self.chk_alloc_pages(page_size, 1)?; - debug!("alloc_page {:#x} (pfn {:#x})", start, pfn); - Ok(DomainSegment { - vstart: start, - vend: (start + page_size) - 1, - pfn, - addr: 0, - size: 0, - #[cfg(target_arch = "x86_64")] - pages: 1, - }) - } - - async fn alloc_module(&mut self, page_size: u64, buffer: &[u8]) -> Result { - let segment = self - .alloc_segment(page_size, 0, buffer.len() as u64) - .await?; - let slice = unsafe { slice::from_raw_parts_mut(segment.addr as *mut u8, buffer.len()) }; - copy(slice, buffer); - Ok(segment) - } - - fn alloc_padding_pages(&mut self, page_size: u64, boundary: u64) -> Result<()> { - if (boundary & (page_size - 1)) != 0 { + pub fn alloc_padding_pages(&mut self, boundary: u64) -> Result<()> { + if (boundary & (self.page_size - 1)) != 0 { return Err(Error::MemorySetupFailed("boundary is incorrect")); } if boundary < self.virt_alloc_end { return Err(Error::MemorySetupFailed("boundary is below allocation end")); } - let pages = (boundary - self.virt_alloc_end) / page_size; - self.chk_alloc_pages(page_size, pages)?; + let pages = (boundary - self.virt_alloc_end) / self.page_size; + self.chk_alloc_pages(pages)?; Ok(()) } - fn chk_alloc_pages(&mut self, page_size: u64, pages: u64) -> Result<()> { + pub fn chk_alloc_pages(&mut self, pages: u64) -> Result<()> { if pages > self.total_pages || self.pfn_alloc_end > self.total_pages || pages > self.total_pages - self.pfn_alloc_end @@ -366,56 +110,195 @@ impl BootSetup<'_> { } self.pfn_alloc_end += pages; - self.virt_alloc_end += pages * page_size; + self.virt_alloc_end += pages * self.page_size; Ok(()) } + + pub fn alloc_page(&mut self) -> Result { + let start = self.virt_alloc_end; + let pfn = self.pfn_alloc_end; + + self.chk_alloc_pages(1)?; + debug!("alloc_page {:#x} (pfn {:#x})", start, pfn); + Ok(DomainSegment { + vstart: start, + vend: (start + self.page_size) - 1, + pfn, + addr: 0, + size: 0, + pages: 1, + }) + } + + pub fn round_up(addr: u64, mask: u64) -> u64 { + addr | mask + } + + pub fn bits_to_mask(bits: u64) -> u64 { + (1 << bits) - 1 + } +} + +impl BootSetup { + pub fn new( + call: XenCall, + domid: u32, + platform: P, + image_loader: I, + dtb: Option>, + ) -> BootSetup { + BootSetup { + call, + domid, + platform, + image_loader, + dtb, + } + } + + pub async fn initialize(&mut self, initrd: &[u8], mem_mb: u64) -> Result { + let total_pages = mem_mb << (20 - self.platform.page_shift()); + let image_info = self.image_loader.parse().await?; + let mut domain = BootDomain { + domid: self.domid, + call: self.call.clone(), + virt_alloc_end: 0, + virt_pgtab_end: 0, + pfn_alloc_end: 0, + total_pages, + page_size: self.platform.page_size(), + image_info, + consoles: Vec::new(), + phys: PhysicalPages::new(self.call.clone(), self.domid, self.platform.page_shift()), + initrd_segment: DomainSegment::default(), + store_evtchn: 0, + xenstore_mfn: 0, + }; + + let mut initrd_segment = if !domain.image_info.unmapped_initrd { + Some(domain.alloc_module(initrd).await?) + } else { + None + }; + + let mut kernel_segment = if self.platform.needs_early_kernel() { + Some(self.load_kernel_segment(&mut domain).await?) + } else { + None + }; + + self.platform.initialize_memory(&mut domain).await?; + domain.virt_alloc_end = domain.image_info.virt_base; + + if kernel_segment.is_none() { + kernel_segment = Some(self.load_kernel_segment(&mut domain).await?); + } + + self.platform.alloc_magic_pages(&mut domain).await?; + + if domain.image_info.unmapped_initrd { + initrd_segment = Some(domain.alloc_module(initrd).await?); + } + + domain.initrd_segment = + initrd_segment.ok_or(Error::MemorySetupFailed("initd_segment missing"))?; + domain.store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?; + + let _kernel_segment = + kernel_segment.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?; + + Ok(domain) + } + + pub async fn boot(&mut self, domain: &mut BootDomain, cmdline: &str) -> Result<()> { + let domain_info = self.call.get_domain_info(self.domid).await?; + let shared_info_frame = domain_info.shared_info_frame; + self.platform.setup_page_tables(domain).await?; + self.platform + .setup_start_info(domain, cmdline, shared_info_frame) + .await?; + self.platform.setup_hypercall_page(domain).await?; + self.platform.bootlate(domain).await?; + self.platform + .setup_shared_info(domain, shared_info_frame) + .await?; + self.platform.vcpu(domain).await?; + domain.phys.unmap_all()?; + self.platform.gnttab_seed(domain).await?; + Ok(()) + } + + async fn load_kernel_segment(&mut self, domain: &mut BootDomain) -> Result { + let kernel_segment = domain + .alloc_segment( + domain.image_info.virt_kstart, + domain.image_info.virt_kend - domain.image_info.virt_kstart, + ) + .await?; + let kernel_segment_ptr = kernel_segment.addr as *mut u8; + let kernel_segment_slice = + unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) }; + self.image_loader + .load(&domain.image_info, kernel_segment_slice) + .await?; + Ok(kernel_segment) + } } #[async_trait::async_trait] -pub trait ArchBootSetup { - fn page_size(&mut self) -> u64; - fn page_shift(&mut self) -> u64; +pub trait BootSetupPlatform { + fn page_size(&self) -> u64; + fn page_shift(&self) -> u64; + fn needs_early_kernel(&self) -> bool; - fn needs_early_kernel(&mut self) -> bool; + async fn initialize_memory(&self, domain: &mut BootDomain) -> Result<()>; - async fn alloc_p2m_segment( + async fn alloc_page_tables(&mut self, domain: &mut BootDomain) + -> Result>; + + async fn alloc_p2m_segment(&mut self, domain: &mut BootDomain) + -> Result>; + + async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn setup_shared_info( &mut self, - setup: &mut BootSetup, - image_info: &BootImageInfo, - ) -> Result>; - - async fn alloc_page_tables( - &mut self, - setup: &mut BootSetup, - image_info: &BootImageInfo, - ) -> Result>; - - async fn setup_page_tables( - &mut self, - setup: &mut BootSetup, - state: &mut BootState, + domain: &mut BootDomain, + shared_info_frame: u64, ) -> Result<()>; async fn setup_start_info( &mut self, - setup: &mut BootSetup, - state: &BootState, + domain: &mut BootDomain, cmdline: &str, - ) -> Result<()>; - - async fn setup_shared_info( - &mut self, - setup: &mut BootSetup, shared_info_frame: u64, ) -> Result<()>; - async fn meminit( - &mut self, - setup: &mut BootSetup, - total_pages: u64, - kernel_segment: &Option, - initrd_segment: &Option, - ) -> Result<()>; - async fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>; - async fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>; + async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()>; +} + +#[async_trait::async_trait] +pub trait BootImageLoader { + async fn parse(&self) -> Result; + async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()>; +} + +#[derive(Debug)] +pub struct BootImageInfo { + pub start: u64, + pub virt_base: u64, + pub virt_kstart: u64, + pub virt_kend: u64, + pub virt_hypercall: u64, + pub virt_entry: u64, + pub virt_p2m_base: u64, + pub unmapped_initrd: bool, } diff --git a/crates/xen/xenclient/src/elfloader.rs b/crates/xen/xenclient/src/elfloader.rs index 42d3194..b64bb65 100644 --- a/crates/xen/xenclient/src/elfloader.rs +++ b/crates/xen/xenclient/src/elfloader.rs @@ -1,4 +1,4 @@ -use crate::boot::{BootImageInfo, BootImageLoader, XEN_UNSET_ADDR}; +use crate::boot::{BootImageInfo, BootImageLoader}; use crate::error::Result; use crate::sys::{ XEN_ELFNOTE_ENTRY, XEN_ELFNOTE_HYPERCALL_PAGE, XEN_ELFNOTE_INIT_P2M, XEN_ELFNOTE_MOD_START_PFN, @@ -128,8 +128,9 @@ struct ElfNoteValue { value: u64, } +#[async_trait::async_trait] impl BootImageLoader for ElfImageLoader { - fn parse(&self) -> Result { + async fn parse(&self) -> Result { let elf = ElfBytes::::minimal_parse(self.data.as_slice())?; let headers = elf.section_headers().ok_or(Error::ElfInvalidImage)?; let mut linux_notes: HashMap> = HashMap::new(); @@ -220,7 +221,7 @@ impl BootImageLoader for ElfImageLoader { } } - if paddr_offset != XEN_UNSET_ADDR && virt_base == XEN_UNSET_ADDR { + if paddr_offset != u64::MAX && virt_base == u64::MAX { return Err(Error::ElfInvalidImage); } @@ -242,7 +243,7 @@ impl BootImageLoader for ElfImageLoader { Ok(image_info) } - fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> { + async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> { let elf = ElfBytes::::minimal_parse(self.data.as_slice())?; let segments = elf.segments().ok_or(Error::ElfInvalidImage)?; diff --git a/crates/xen/xenclient/src/lib.rs b/crates/xen/xenclient/src/lib.rs index bb039cc..000eca8 100644 --- a/crates/xen/xenclient/src/lib.rs +++ b/crates/xen/xenclient/src/lib.rs @@ -4,19 +4,7 @@ pub mod error; pub mod mem; pub mod sys; -#[cfg(target_arch = "x86_64")] -pub mod x86; - -#[cfg(target_arch = "x86_64")] -use crate::x86::X86BootSetup; - -#[cfg(target_arch = "aarch64")] -pub mod arm64; - -#[cfg(target_arch = "aarch64")] -use crate::arm64::Arm64BootSetup; - -use crate::boot::{ArchBootSetup, BootSetup}; +use crate::boot::{BootDomain, BootSetup}; use crate::elfloader::ElfImageLoader; use crate::error::{Error, Result}; use boot::BootState; @@ -31,7 +19,8 @@ use std::str::FromStr; use std::time::Duration; use uuid::Uuid; use xencall::sys::{ - CreateDomain, DOMCTL_DEV_RDM_RELAXED, X86_EMU_LAPIC, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST, XEN_DOMCTL_CDF_IOMMU + CreateDomain, DOMCTL_DEV_RDM_RELAXED, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST, + XEN_DOMCTL_CDF_IOMMU, }; use xencall::XenCall; use xenstore::{ @@ -40,6 +29,7 @@ use xenstore::{ }; pub mod pci; +pub mod x86pv; #[derive(Clone)] pub struct XenClient { @@ -154,14 +144,17 @@ impl XenClient { } pub async fn create(&self, config: &DomainConfig) -> Result { - let mut domain = CreateDomain::default(); + let mut domain = CreateDomain { + max_vcpus: config.max_vcpus, + ..Default::default() + }; domain.max_vcpus = config.max_vcpus; if cfg!(target_arch = "aarch64") { domain.flags = XEN_DOMCTL_CDF_HVM_GUEST | XEN_DOMCTL_CDF_HAP; } else { - domain.flags = XEN_DOMCTL_CDF_HVM_GUEST | XEN_DOMCTL_CDF_HAP | XEN_DOMCTL_CDF_IOMMU; - domain.arch_domain_config.emulation_flags = X86_EMU_LAPIC; + domain.flags = XEN_DOMCTL_CDF_IOMMU; + domain.arch_domain_config.emulation_flags = 0; } let domid = self.call.create_domain(domain).await?; @@ -294,33 +287,18 @@ impl XenClient { self.call.set_max_vcpus(domid, config.max_vcpus).await?; self.call.set_max_mem(domid, config.mem_mb * 1024).await?; - let image_loader = ElfImageLoader::load_file_kernel(&config.kernel)?; - let xenstore_evtchn: u32; let xenstore_mfn: u64; - let p2m: Vec; - let mut state: BootState; + let mut domain: BootDomain; { - let mut boot = BootSetup::new(&self.call, domid); - #[cfg(target_arch = "x86_64")] - let mut arch = Box::new(X86BootSetup::new()) as Box; - #[cfg(target_arch = "aarch64")] - let mut arch = Box::new(Arm64BootSetup::new()) as Box; - state = boot - .initialize( - &mut arch, - &image_loader, - &config.initrd, - config.max_vcpus, - config.mem_mb, - 1, - ) - .await?; - boot.boot(&mut arch, &mut state, &config.cmdline).await?; - xenstore_evtchn = state.store_evtchn; - xenstore_mfn = boot.phys.p2m[state.xenstore_segment.pfn as usize]; - p2m = boot.phys.p2m; + let loader = ElfImageLoader::load_file_kernel(&config.kernel)?; + let mut boot = + BootSetup::new(self.call.clone(), domid, X86PvPlatform::new(), loader, None); + domain = boot.initialize(&config.initrd, config.mem_mb).await?; + boot.boot(&mut domain, &config.cmdline).await?; + xenstore_evtchn = domain.store_evtchn; + xenstore_mfn = domain.xenstore_mfn; } { @@ -386,8 +364,6 @@ impl XenClient { .to_string(), initialized: true, }, - &p2m, - &state, &dom_path, &backend_dom_path, config.backend_domid, @@ -402,8 +378,6 @@ impl XenClient { .console_device_add( &tx, channel, - &p2m, - &state, &dom_path, &backend_dom_path, config.backend_domid, @@ -547,17 +521,15 @@ impl XenClient { &self, tx: &XsdTransaction, channel: &DomainChannel, - p2m: &[u64], - state: &BootState, dom_path: &str, backend_dom_path: &str, backend_domid: u32, domid: u32, index: usize, ) -> Result<(Option, Option)> { - let console = state.consoles.get(index); + let console = domain.consoles.get(index); let port = console.map(|x| x.0); - let ring = console.map(|x| p2m[x.1.pfn as usize]); + let ring = console.map(|x| x.1); let mut backend_entries = vec![ ("frontend-id", domid.to_string()), diff --git a/crates/xen/xenclient/src/mem.rs b/crates/xen/xenclient/src/mem.rs index 84d520d..691c9b7 100644 --- a/crates/xen/xenclient/src/mem.rs +++ b/crates/xen/xenclient/src/mem.rs @@ -6,11 +6,6 @@ use log::debug; use nix::errno::Errno; use std::ffi::c_void; -#[cfg(target_arch = "aarch64")] -pub(crate) use crate::arm64::ARM_PAGE_SHIFT as ARCH_PAGE_SHIFT; -#[cfg(target_arch = "x86_64")] -pub(crate) use crate::x86::X86_PAGE_SHIFT as ARCH_PAGE_SHIFT; - use xencall::sys::MmapEntry; use xencall::XenCall; @@ -21,16 +16,18 @@ pub struct PhysicalPage { count: u64, } -pub struct PhysicalPages<'a> { +pub struct PhysicalPages { + page_shift: u64, domid: u32, - pub(crate) p2m: Vec, - call: &'a XenCall, + pub p2m: Vec, + call: XenCall, pages: Vec, } -impl PhysicalPages<'_> { - pub fn new(call: &XenCall, domid: u32) -> PhysicalPages { +impl PhysicalPages { + pub fn new(call: XenCall, domid: u32, page_shift: u64) -> PhysicalPages { PhysicalPages { + page_shift, domid, p2m: Vec::new(), call, @@ -70,7 +67,7 @@ impl PhysicalPages<'_> { } } - return Ok(page.ptr + ((pfn - page.pfn) << ARCH_PAGE_SHIFT)); + return Ok(page.ptr + ((pfn - page.pfn) << self.page_shift)); } if count == 0 { @@ -161,7 +158,7 @@ impl PhysicalPages<'_> { unsafe { let err = munmap( page.ptr as *mut c_void, - (page.count << ARCH_PAGE_SHIFT) as usize, + (page.count << self.page_shift) as usize, ); if err != 0 { return Err(Error::UnmapFailed(Errno::from_raw(err))); @@ -182,11 +179,11 @@ impl PhysicalPages<'_> { unsafe { let err = munmap( page.ptr as *mut c_void, - (page.count << ARCH_PAGE_SHIFT) as usize, + (page.count << self.page_shift) as usize, ); debug!( "unmapped {:#x} foreign bytes at {:#x}", - (page.count << ARCH_PAGE_SHIFT) as usize, + (page.count << self.page_shift) as usize, page.ptr ); if err != 0 { diff --git a/crates/xen/xenclient/src/sys.rs b/crates/xen/xenclient/src/sys.rs index a05bbc5..9014de3 100644 --- a/crates/xen/xenclient/src/sys.rs +++ b/crates/xen/xenclient/src/sys.rs @@ -128,3 +128,5 @@ pub struct GrantEntry { pub domid: u16, pub frame: u32, } + +pub const XEN_HVM_START_MAGIC_VALUE: u64 = 0x336ec578; diff --git a/crates/xen/xenclient/src/x86.rs b/crates/xen/xenclient/src/x86pv.rs similarity index 68% rename from crates/xen/xenclient/src/x86.rs rename to crates/xen/xenclient/src/x86pv.rs index 9f19387..abb2143 100644 --- a/crates/xen/xenclient/src/x86.rs +++ b/crates/xen/xenclient/src/x86pv.rs @@ -1,20 +1,26 @@ -use crate::boot::{ArchBootSetup, BootImageInfo, BootSetup, BootState, DomainSegment}; -use crate::error::Result; -use crate::sys::{ - SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE, VGCF_IN_KERNEL, VGCF_ONLINE, - XEN_PAGE_SHIFT, +use std::{ + mem::size_of, + os::raw::{c_char, c_void}, + slice, }; -use crate::Error; -use libc::c_char; + +use libc::munmap; use log::{debug, trace}; +use nix::errno::Errno; use slice_copy::copy; -use std::cmp::{max, min}; -use std::mem::size_of; -use std::slice; use xencall::sys::{ E820Entry, VcpuGuestContext, E820_MAX, E820_RAM, E820_UNUSABLE, MMUEXT_PIN_L4_TABLE, }; +use crate::{ + boot::{BootDomain, BootSetupPlatform, DomainSegment}, + error::{Error, Result}, + sys::{ + GrantEntry, SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE, + VGCF_IN_KERNEL, VGCF_ONLINE, XEN_PAGE_SHIFT, + }, +}; + pub const X86_PAGE_SHIFT: u64 = 12; pub const X86_PAGE_SIZE: u64 = 1 << X86_PAGE_SHIFT; pub const X86_VIRT_BITS: u64 = 48; @@ -127,10 +133,6 @@ pub struct SharedInfo { pub p2m_generation: u64, } -pub struct X86BootSetup { - table: PageTable, -} - #[derive(Debug)] struct VmemRange { start: u64, @@ -139,16 +141,20 @@ struct VmemRange { _nid: u32, } -impl Default for X86BootSetup { - fn default() -> Self { - Self::new() - } +#[derive(Default)] +pub struct X86PvPlatform { + table: PageTable, + p2m_segment: Option, + page_table_segment: Option, + start_info_segment: Option, + boot_stack_segment: Option, + xenstore_segment: Option, } -impl X86BootSetup { - pub fn new() -> X86BootSetup { - X86BootSetup { - table: PageTable::default(), +impl X86PvPlatform { + pub fn new() -> Self { + Self { + ..Default::default() } } @@ -159,22 +165,22 @@ impl X86BootSetup { const PAGE_DIRTY: u64 = 0x040; fn get_pg_prot(&mut self, l: usize, pfn: u64) -> u64 { let prot = [ - X86BootSetup::PAGE_PRESENT | X86BootSetup::PAGE_RW | X86BootSetup::PAGE_ACCESSED, - X86BootSetup::PAGE_PRESENT - | X86BootSetup::PAGE_RW - | X86BootSetup::PAGE_ACCESSED - | X86BootSetup::PAGE_DIRTY - | X86BootSetup::PAGE_USER, - X86BootSetup::PAGE_PRESENT - | X86BootSetup::PAGE_RW - | X86BootSetup::PAGE_ACCESSED - | X86BootSetup::PAGE_DIRTY - | X86BootSetup::PAGE_USER, - X86BootSetup::PAGE_PRESENT - | X86BootSetup::PAGE_RW - | X86BootSetup::PAGE_ACCESSED - | X86BootSetup::PAGE_DIRTY - | X86BootSetup::PAGE_USER, + X86PvPlatform::PAGE_PRESENT | X86PvPlatform::PAGE_RW | X86PvPlatform::PAGE_ACCESSED, + X86PvPlatform::PAGE_PRESENT + | X86PvPlatform::PAGE_RW + | X86PvPlatform::PAGE_ACCESSED + | X86PvPlatform::PAGE_DIRTY + | X86PvPlatform::PAGE_USER, + X86PvPlatform::PAGE_PRESENT + | X86PvPlatform::PAGE_RW + | X86PvPlatform::PAGE_ACCESSED + | X86PvPlatform::PAGE_DIRTY + | X86PvPlatform::PAGE_USER, + X86PvPlatform::PAGE_PRESENT + | X86PvPlatform::PAGE_RW + | X86PvPlatform::PAGE_ACCESSED + | X86PvPlatform::PAGE_DIRTY + | X86PvPlatform::PAGE_USER, ]; let prot = prot[l]; @@ -187,7 +193,7 @@ impl X86BootSetup { let pfn_s = map.levels[(X86_PGTABLE_LEVELS - 1) as usize].pfn; let pfn_e = map.area.pgtables as u64 + pfn_s; if pfn >= pfn_s && pfn < pfn_e { - return prot & !X86BootSetup::PAGE_RW; + return prot & !X86PvPlatform::PAGE_RW; } } prot @@ -195,7 +201,7 @@ impl X86BootSetup { fn count_page_tables( &mut self, - setup: &mut BootSetup, + domain: &mut BootDomain, from: u64, to: u64, pfn: u64, @@ -208,7 +214,7 @@ impl X86BootSetup { let m = self.table.mappings_count; let pfn_end = pfn + ((to - from) >> X86_PAGE_SHIFT); - if pfn_end >= setup.phys.p2m_size() { + if pfn_end >= domain.phys.p2m_size() { return Err(Error::MemorySetupFailed("pfn_end greater than p2m size")); } @@ -222,7 +228,7 @@ impl X86BootSetup { map.area.to = to & X86_VIRT_MASK; for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() { - map.levels[l].pfn = setup.pfn_alloc_end + map.area.pgtables as u64; + map.levels[l].pfn = domain.pfn_alloc_end + map.area.pgtables as u64; if l as u64 == X86_PGTABLE_LEVELS - 1 { if self.table.mappings_count == 0 { map.levels[l].from = 0; @@ -234,7 +240,7 @@ impl X86BootSetup { } let bits = X86_PAGE_SHIFT + (l + 1) as u64 * X86_PGTABLE_LEVEL_SHIFT; - let mask = BootSetup::bits_to_mask(bits); + let mask = BootDomain::bits_to_mask(bits); map.levels[l].from = map.area.from & !mask; map.levels[l].to = map.area.to | mask; @@ -426,229 +432,29 @@ impl X86BootSetup { } #[async_trait::async_trait] -impl ArchBootSetup for X86BootSetup { - fn page_size(&mut self) -> u64 { +impl BootSetupPlatform for X86PvPlatform { + fn page_size(&self) -> u64 { X86_PAGE_SIZE } - fn page_shift(&mut self) -> u64 { + fn page_shift(&self) -> u64 { X86_PAGE_SHIFT } - fn needs_early_kernel(&mut self) -> bool { + fn needs_early_kernel(&self) -> bool { false } - async fn alloc_p2m_segment( - &mut self, - setup: &mut BootSetup, - image_info: &BootImageInfo, - ) -> Result> { - let mut p2m_alloc_size = - ((setup.phys.p2m_size() * 8) + X86_PAGE_SIZE - 1) & !(X86_PAGE_SIZE - 1); - let from = image_info.virt_p2m_base; - let to = from + p2m_alloc_size - 1; - let m = self.count_page_tables(setup, from, to, setup.pfn_alloc_end)?; - - let pgtables: usize; - { - let map = &mut self.table.mappings[m]; - map.area.pfn = setup.pfn_alloc_end; - for lvl_idx in 0..4 { - map.levels[lvl_idx].pfn += p2m_alloc_size >> X86_PAGE_SHIFT; - } - pgtables = map.area.pgtables; - } - self.table.mappings_count += 1; - p2m_alloc_size += (pgtables << X86_PAGE_SHIFT) as u64; - let p2m_segment = setup - .alloc_segment(self.page_size(), 0, p2m_alloc_size) + async fn initialize_memory(&self, domain: &mut BootDomain) -> Result<()> { + domain.call.set_address_size(domain.domid, 64).await?; + domain + .call + .claim_pages(domain.domid, domain.total_pages) .await?; - Ok(Some(p2m_segment)) - } - - async fn alloc_page_tables( - &mut self, - setup: &mut BootSetup, - image_info: &BootImageInfo, - ) -> Result> { - let mut extra_pages = 1; - extra_pages += (512 * 1024) / X86_PAGE_SIZE; - let mut pages = extra_pages; - - let mut try_virt_end: u64; - let mut m: usize; - loop { - try_virt_end = BootSetup::round_up( - setup.virt_alloc_end + pages * X86_PAGE_SIZE, - BootSetup::bits_to_mask(22), - ); - m = self.count_page_tables(setup, image_info.virt_base, try_virt_end, 0)?; - pages = self.table.mappings[m].area.pgtables as u64 + extra_pages; - if setup.virt_alloc_end + pages * X86_PAGE_SIZE <= try_virt_end + 1 { - break; - } - } - - self.table.mappings[m].area.pfn = 0; - self.table.mappings_count += 1; - setup.virt_pgtab_end = try_virt_end + 1; - let size = self.table.mappings[m].area.pgtables as u64 * X86_PAGE_SIZE; - let segment = setup.alloc_segment(self.page_size(), 0, size).await?; - debug!( - "alloc_page_tables table={:?} segment={:?}", - self.table, segment - ); - Ok(Some(segment)) - } - - async fn setup_page_tables( - &mut self, - setup: &mut BootSetup, - state: &mut BootState, - ) -> Result<()> { - let p2m_segment = state - .p2m_segment - .as_ref() - .ok_or(Error::MemorySetupFailed("p2m_segment missing"))?; - let p2m_guest = unsafe { - slice::from_raw_parts_mut(p2m_segment.addr as *mut u64, setup.phys.p2m_size() as usize) - }; - copy(p2m_guest, &setup.phys.p2m); - - for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() { - for m1 in 0usize..self.table.mappings_count { - let map1 = &self.table.mappings[m1]; - let from = map1.levels[l].from; - let to = map1.levels[l].to; - let pg_ptr = setup.phys.pfn_to_ptr(map1.levels[l].pfn, 0).await? as *mut u64; - for m2 in 0usize..self.table.mappings_count { - let map2 = &self.table.mappings[m2]; - let lvl = if l > 0 { - &map2.levels[l - 1] - } else { - &map2.area - }; - - if l > 0 && lvl.pgtables == 0 { - continue; - } - - if lvl.from >= to || lvl.to <= from { - continue; - } - - let p_s = (max(from, lvl.from) - from) - >> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT); - let p_e = (min(to, lvl.to) - from) - >> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT); - let rhs = X86_PAGE_SHIFT as usize + l * X86_PGTABLE_LEVEL_SHIFT as usize; - let mut pfn = ((max(from, lvl.from) - lvl.from) >> rhs) + lvl.pfn; - - debug!( - "setup_page_tables lvl={} map_1={} map_2={} pfn={:#x} p_s={:#x} p_e={:#x}", - l, m1, m2, pfn, p_s, p_e - ); - - let pg = unsafe { slice::from_raw_parts_mut(pg_ptr, (p_e + 1) as usize) }; - for p in p_s..p_e + 1 { - let prot = self.get_pg_prot(l, pfn); - let pfn_paddr = setup.phys.p2m[pfn as usize] << X86_PAGE_SHIFT; - let value = pfn_paddr | prot; - pg[p as usize] = value; - pfn += 1; - } - } - } - } - Ok(()) - } - - async fn setup_start_info( - &mut self, - setup: &mut BootSetup, - state: &BootState, - cmdline: &str, - ) -> Result<()> { - let ptr = setup - .phys - .pfn_to_ptr(state.start_info_segment.pfn, 1) - .await?; - let byte_slice = - unsafe { slice::from_raw_parts_mut(ptr as *mut u8, X86_PAGE_SIZE as usize) }; - byte_slice.fill(0); - let info = ptr as *mut StartInfo; - - let page_table_segment = state - .page_table_segment - .as_ref() - .ok_or(Error::MemorySetupFailed("page_table_segment missing"))?; - let p2m_segment = state - .p2m_segment - .as_ref() - .ok_or(Error::MemorySetupFailed("p2m_segment missing"))?; - unsafe { - for (i, c) in X86_GUEST_MAGIC.chars().enumerate() { - (*info).magic[i] = c as c_char; - } - (*info).magic[X86_GUEST_MAGIC.len()] = 0 as c_char; - (*info).nr_pages = setup.total_pages; - (*info).shared_info = state.shared_info_frame << X86_PAGE_SHIFT; - (*info).pt_base = page_table_segment.vstart; - (*info).nr_pt_frames = self.table.mappings[0].area.pgtables as u64; - (*info).mfn_list = p2m_segment.vstart; - (*info).first_p2m_pfn = p2m_segment.pfn; - (*info).nr_p2m_frames = p2m_segment.pages; - (*info).flags = 0; - (*info).store_evtchn = state.store_evtchn; - (*info).store_mfn = setup.phys.p2m[state.xenstore_segment.pfn as usize]; - let console = state.consoles.first().unwrap(); - (*info).console.mfn = setup.phys.p2m[console.1.pfn as usize]; - (*info).console.evtchn = console.0; - (*info).mod_start = state.initrd_segment.vstart; - (*info).mod_len = state.initrd_segment.size; - for (i, c) in cmdline.chars().enumerate() { - (*info).cmdline[i] = c as c_char; - } - (*info).cmdline[MAX_GUEST_CMDLINE - 1] = 0; - trace!("setup_start_info start_info={:?}", *info); - } - Ok(()) - } - - async fn setup_shared_info( - &mut self, - setup: &mut BootSetup, - shared_info_frame: u64, - ) -> Result<()> { - let info = setup - .phys - .map_foreign_pages(shared_info_frame, X86_PAGE_SIZE) - .await? as *mut SharedInfo; - unsafe { - let size = size_of::(); - let info_as_buff = slice::from_raw_parts_mut(info as *mut u8, size); - info_as_buff.fill(0); - for i in 0..32 { - (*info).vcpu_info[i].evtchn_upcall_mask = 1; - } - trace!("setup_shared_info shared_info={:?}", *info); - } - Ok(()) - } - - async fn meminit( - &mut self, - setup: &mut BootSetup, - total_pages: u64, - _: &Option, - _: &Option, - ) -> Result<()> { - setup.call.claim_pages(setup.domid, total_pages).await?; let mut vmemranges: Vec = Vec::new(); let stub = VmemRange { start: 0, - end: total_pages << XEN_PAGE_SHIFT, + end: domain.total_pages << XEN_PAGE_SHIFT, _flags: 0, _nid: 0, }; @@ -660,12 +466,10 @@ impl ArchBootSetup for X86BootSetup { p2m_size = p2m_size.max(range.end >> XEN_PAGE_SHIFT); } - if total != total_pages { + if total != domain.total_pages { return Err(Error::MemorySetupFailed("total pages mismatch")); } - setup.total_pages = total; - let mut p2m = vec![u64::MAX; p2m_size as usize]; for range in &vmemranges { let mut extents_init = vec![0u64; SUPERPAGE_BATCH_SIZE as usize]; @@ -694,10 +498,10 @@ impl ArchBootSetup for X86BootSetup { } let extents_init_slice = extents_init.as_slice(); - let extents = setup + let extents = domain .call .populate_physmap( - setup.domid, + domain.domid, count, SUPERPAGE_2MB_SHIFT as u32, 0, @@ -725,9 +529,9 @@ impl ArchBootSetup for X86BootSetup { let p2m_idx = (pfn_base + j) as usize; let p2m_end_idx = p2m_idx + allocsz as usize; let input_extent_starts = &p2m[p2m_idx..p2m_end_idx]; - let result = setup + let result = domain .call - .populate_physmap(setup.domid, allocsz, 0, 0, input_extent_starts) + .populate_physmap(domain.domid, allocsz, 0, 0, input_extent_starts) .await?; if result.len() != allocsz as usize { @@ -747,50 +551,300 @@ impl ArchBootSetup for X86BootSetup { } } - setup.phys.load_p2m(p2m); - setup.call.claim_pages(setup.domid, 0).await?; + domain.phys.load_p2m(p2m); + domain.call.claim_pages(domain.domid, 0).await?; Ok(()) } - async fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> { - let p2m_segment = state + async fn alloc_p2m_segment( + &mut self, + domain: &mut BootDomain, + ) -> Result> { + let mut p2m_alloc_size = + ((domain.phys.p2m_size() * 8) + X86_PAGE_SIZE - 1) & !(X86_PAGE_SIZE - 1); + let from = domain.image_info.virt_p2m_base; + let to = from + p2m_alloc_size - 1; + let m = self.count_page_tables(domain, from, to, domain.pfn_alloc_end)?; + + let pgtables: usize; + { + let map = &mut self.table.mappings[m]; + map.area.pfn = domain.pfn_alloc_end; + for lvl_idx in 0..4 { + map.levels[lvl_idx].pfn += p2m_alloc_size >> X86_PAGE_SHIFT; + } + pgtables = map.area.pgtables; + } + self.table.mappings_count += 1; + p2m_alloc_size += (pgtables << X86_PAGE_SHIFT) as u64; + let p2m_segment = domain.alloc_segment(0, p2m_alloc_size).await?; + Ok(Some(p2m_segment)) + } + + async fn alloc_page_tables( + &mut self, + domain: &mut BootDomain, + ) -> Result> { + let mut extra_pages = 1; + extra_pages += (512 * 1024) / X86_PAGE_SIZE; + let mut pages = extra_pages; + + let mut try_virt_end: u64; + let mut m: usize; + loop { + try_virt_end = BootDomain::round_up( + domain.virt_alloc_end + pages * X86_PAGE_SIZE, + BootDomain::bits_to_mask(22), + ); + m = self.count_page_tables(domain, domain.image_info.virt_base, try_virt_end, 0)?; + pages = self.table.mappings[m].area.pgtables as u64 + extra_pages; + if domain.virt_alloc_end + pages * X86_PAGE_SIZE <= try_virt_end + 1 { + break; + } + } + + self.table.mappings[m].area.pfn = 0; + self.table.mappings_count += 1; + domain.virt_pgtab_end = try_virt_end + 1; + let size = self.table.mappings[m].area.pgtables as u64 * X86_PAGE_SIZE; + let segment = domain.alloc_segment(0, size).await?; + debug!( + "alloc_page_tables table={:?} segment={:?}", + self.table, segment + ); + Ok(Some(segment)) + } + + async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()> { + let p2m_segment = self .p2m_segment .as_ref() .ok_or(Error::MemorySetupFailed("p2m_segment missing"))?; - let page_table_segment = state - .page_table_segment - .as_ref() - .ok_or(Error::MemorySetupFailed("page_table_segment missing"))?; - let pg_pfn = page_table_segment.pfn; - setup.phys.unmap(pg_pfn)?; - setup.phys.unmap(p2m_segment.pfn)?; + let p2m_guest = unsafe { + slice::from_raw_parts_mut( + p2m_segment.addr as *mut u64, + domain.phys.p2m_size() as usize, + ) + }; + copy(p2m_guest, &domain.phys.p2m); - let map = setup.call.get_memory_map(E820_MAX).await?; - let mem_mb = setup.total_pages >> (20 - self.page_shift()); - let mem_kb = mem_mb * 1024; - let e820 = self.e820_sanitize(map, mem_kb, 0)?; - setup.call.set_memory_map(setup.domid, e820).await?; + for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() { + for m1 in 0usize..self.table.mappings_count { + let map1 = &self.table.mappings[m1]; + let from = map1.levels[l].from; + let to = map1.levels[l].to; + let pg_ptr = domain.phys.pfn_to_ptr(map1.levels[l].pfn, 0).await? as *mut u64; + for m2 in 0usize..self.table.mappings_count { + let map2 = &self.table.mappings[m2]; + let lvl = if l > 0 { + &map2.levels[l - 1] + } else { + &map2.area + }; - // setup - // .call - // .mmuext(setup.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0) - // .await?; + if l > 0 && lvl.pgtables == 0 { + continue; + } + + if lvl.from >= to || lvl.to <= from { + continue; + } + + let p_s = (std::cmp::max(from, lvl.from) - from) + >> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT); + let p_e = (std::cmp::min(to, lvl.to) - from) + >> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT); + let rhs = X86_PAGE_SHIFT as usize + l * X86_PGTABLE_LEVEL_SHIFT as usize; + let mut pfn = ((std::cmp::max(from, lvl.from) - lvl.from) >> rhs) + lvl.pfn; + + debug!( + "setup_page_tables lvl={} map_1={} map_2={} pfn={:#x} p_s={:#x} p_e={:#x}", + l, m1, m2, pfn, p_s, p_e + ); + + let pg = unsafe { slice::from_raw_parts_mut(pg_ptr, (p_e + 1) as usize) }; + for p in p_s..p_e + 1 { + let prot = self.get_pg_prot(l, pfn); + let pfn_paddr = domain.phys.p2m[pfn as usize] << X86_PAGE_SHIFT; + let value = pfn_paddr | prot; + pg[p as usize] = value; + pfn += 1; + } + } + } + } Ok(()) } - async fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> { - let page_table_segment = state + async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()> { + if domain.image_info.virt_hypercall == u64::MAX { + return Ok(()); + } + let pfn = + (domain.image_info.virt_hypercall - domain.image_info.virt_base) >> self.page_shift(); + let mfn = domain.phys.p2m[pfn as usize]; + domain.call.hypercall_init(domain.domid, mfn).await?; + Ok(()) + } + + async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()> { + if domain.image_info.virt_p2m_base >= domain.image_info.virt_base + || (domain.image_info.virt_p2m_base & ((1 << self.page_shift()) - 1)) != 0 + { + self.p2m_segment = self.alloc_p2m_segment(domain).await?; + } + self.start_info_segment = Some(domain.alloc_page()?); + self.xenstore_segment = Some(domain.alloc_page()?); + domain.xenstore_mfn = domain.phys.p2m[self.xenstore_segment.as_ref().unwrap().pfn as usize]; + let evtchn = domain.call.evtchn_alloc_unbound(domain.domid, 0).await?; + let page = domain.alloc_page()?; + domain + .consoles + .push((evtchn, domain.phys.p2m[page.pfn as usize])); + self.page_table_segment = self.alloc_page_tables(domain).await?; + self.boot_stack_segment = Some(domain.alloc_page()?); + + if domain.virt_pgtab_end > 0 { + domain.alloc_padding_pages(domain.virt_pgtab_end)?; + } + + if self.p2m_segment.is_none() { + if let Some(mut p2m_segment) = self.alloc_p2m_segment(domain).await? { + p2m_segment.vstart = domain.image_info.virt_p2m_base; + self.p2m_segment = Some(p2m_segment); + } + } + + Ok(()) + } + + async fn setup_shared_info( + &mut self, + domain: &mut BootDomain, + shared_info_frame: u64, + ) -> Result<()> { + let info = domain + .phys + .map_foreign_pages(shared_info_frame, X86_PAGE_SIZE) + .await? as *mut SharedInfo; + unsafe { + let size = size_of::(); + let info_as_buff = slice::from_raw_parts_mut(info as *mut u8, size); + info_as_buff.fill(0); + for i in 0..32 { + (*info).vcpu_info[i].evtchn_upcall_mask = 1; + } + trace!("setup_shared_info shared_info={:?}", *info); + } + Ok(()) + } + + async fn setup_start_info( + &mut self, + domain: &mut BootDomain, + cmdline: &str, + shared_info_frame: u64, + ) -> Result<()> { + let start_info_segment = self + .start_info_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("start_info_segment missing"))?; + + let ptr = domain.phys.pfn_to_ptr(start_info_segment.pfn, 1).await?; + let byte_slice = + unsafe { slice::from_raw_parts_mut(ptr as *mut u8, X86_PAGE_SIZE as usize) }; + byte_slice.fill(0); + let info = ptr as *mut StartInfo; + + let page_table_segment = self + .page_table_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("page_table_segment missing"))?; + let p2m_segment = self + .p2m_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("p2m_segment missing"))?; + let xenstore_segment = self + .xenstore_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?; + unsafe { + for (i, c) in X86_GUEST_MAGIC.chars().enumerate() { + (*info).magic[i] = c as c_char; + } + (*info).magic[X86_GUEST_MAGIC.len()] = 0 as c_char; + (*info).nr_pages = domain.total_pages; + (*info).shared_info = shared_info_frame << X86_PAGE_SHIFT; + (*info).pt_base = page_table_segment.vstart; + (*info).nr_pt_frames = self.table.mappings[0].area.pgtables as u64; + (*info).mfn_list = p2m_segment.vstart; + (*info).first_p2m_pfn = p2m_segment.pfn; + (*info).nr_p2m_frames = p2m_segment.pages; + (*info).flags = 0; + (*info).store_evtchn = domain.store_evtchn; + (*info).store_mfn = domain.phys.p2m[xenstore_segment.pfn as usize]; + let console = domain.consoles.first().unwrap(); + (*info).console.mfn = console.1; + (*info).console.evtchn = console.0; + (*info).mod_start = domain.initrd_segment.vstart; + (*info).mod_len = domain.initrd_segment.size; + for (i, c) in cmdline.chars().enumerate() { + (*info).cmdline[i] = c as c_char; + } + (*info).cmdline[MAX_GUEST_CMDLINE - 1] = 0; + trace!("setup_start_info start_info={:?}", *info); + } + Ok(()) + } + + async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()> { + let p2m_segment = self + .p2m_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("p2m_segment missing"))?; + let page_table_segment = self .page_table_segment .as_ref() .ok_or(Error::MemorySetupFailed("page_table_segment missing"))?; let pg_pfn = page_table_segment.pfn; - let pg_mfn = setup.phys.p2m[pg_pfn as usize]; + let pg_mfn = domain.phys.p2m[pg_pfn as usize]; + domain.phys.unmap(pg_pfn)?; + domain.phys.unmap(p2m_segment.pfn)?; + + let map = domain.call.get_memory_map(E820_MAX).await?; + let mem_mb = domain.total_pages >> (20 - self.page_shift()); + let mem_kb = mem_mb * 1024; + let e820 = self.e820_sanitize(map, mem_kb, 0)?; + domain.call.set_memory_map(domain.domid, e820).await?; + + domain + .call + .mmuext(domain.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0) + .await?; + Ok(()) + } + + async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()> { + let page_table_segment = self + .page_table_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("page_table_segment missing"))?; + let boot_stack_segment = self + .boot_stack_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("boot_stack_segment missing"))?; + let start_info_segment = self + .start_info_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("start_info_segment missing"))?; + let pg_pfn = page_table_segment.pfn; + let pg_mfn = domain.phys.p2m[pg_pfn as usize]; let mut vcpu = VcpuGuestContext::default(); - vcpu.user_regs.rip = state.image_info.virt_entry; + vcpu.user_regs.rip = domain.image_info.virt_entry; vcpu.user_regs.rsp = - state.image_info.virt_base + (state.boot_stack_segment.pfn + 1) * self.page_size(); + domain.image_info.virt_base + (boot_stack_segment.pfn + 1) * self.page_size(); vcpu.user_regs.rsi = - state.image_info.virt_base + (state.start_info_segment.pfn) * self.page_size(); + domain.image_info.virt_base + (start_info_segment.pfn) * self.page_size(); vcpu.user_regs.rflags = 1 << 9; vcpu.debugreg[6] = 0xffff0ff0; vcpu.debugreg[7] = 0x00000400; @@ -807,7 +861,40 @@ impl ArchBootSetup for X86BootSetup { vcpu.kernel_ss = vcpu.user_regs.ss as u64; vcpu.kernel_sp = vcpu.user_regs.rsp; trace!("vcpu context: {:?}", vcpu); - setup.call.set_vcpu_context(setup.domid, 0, &vcpu).await?; + domain.call.set_vcpu_context(domain.domid, 0, &vcpu).await?; + Ok(()) + } + + async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()> { + let xenstore_segment = self + .xenstore_segment + .as_ref() + .ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?; + + let console_gfn = domain.consoles.first().map(|x| x.1).unwrap_or(0) as usize; + let xenstore_gfn = domain.phys.p2m[xenstore_segment.pfn as usize]; + let addr = domain + .call + .mmap(0, 1 << XEN_PAGE_SHIFT) + .await + .ok_or(Error::MmapFailed)?; + domain + .call + .map_resource(domain.domid, 1, 0, 0, 1, addr) + .await?; + let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) }; + entries[0].flags = 1 << 0; + entries[0].domid = 0; + entries[0].frame = console_gfn as u32; + entries[1].flags = 1 << 0; + entries[1].domid = 0; + entries[1].frame = xenstore_gfn as u32; + unsafe { + let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT); + if result != 0 { + return Err(Error::UnmapFailed(Errno::from_raw(result))); + } + } Ok(()) } }