feat: move platform stuff all into it's own thing

This commit is contained in:
Alex Zenla 2024-04-29 02:59:49 -07:00
parent f479963cd5
commit 4839ee2679
No known key found for this signature in database
GPG Key ID: 067B238899B51269
8 changed files with 635 additions and 980 deletions

View File

@ -783,6 +783,7 @@ impl XenCall {
Ok(())
}
#[allow(clippy::field_reassign_with_default)]
pub async fn map_pirq(&self, domid: u32, index: isize, pirq: Option<u32>) -> Result<u32> {
trace!(
"physdev fd={} map_pirq domid={} index={} pirq={:?}",

View File

@ -1,288 +0,0 @@
use crate::boot::{ArchBootSetup, BootImageInfo, BootSetup, BootState, DomainSegment};
use crate::error::Result;
use crate::sys::XEN_PAGE_SHIFT;
use crate::Error;
use log::trace;
use xencall::sys::VcpuGuestContext;
pub const ARM_PAGE_SHIFT: u64 = 12;
const ARM_PAGE_SIZE: u64 = 1 << ARM_PAGE_SHIFT;
const GUEST_RAM0_BASE: u64 = 0x40000000;
const GUEST_RAM0_SIZE: u64 = 0xc0000000;
const GUEST_RAM1_BASE: u64 = 0x0200000000;
const GUEST_RAM1_SIZE: u64 = 0xfe00000000;
const GUEST_RAM_BANK_BASES: [u64; 2] = [GUEST_RAM0_BASE, GUEST_RAM1_BASE];
const GUEST_RAM_BANK_SIZES: [u64; 2] = [GUEST_RAM0_SIZE, GUEST_RAM1_SIZE];
const LPAE_SHIFT: u64 = 9;
const PFN_4K_SHIFT: u64 = 0;
const PFN_2M_SHIFT: u64 = PFN_4K_SHIFT + LPAE_SHIFT;
const PFN_1G_SHIFT: u64 = PFN_2M_SHIFT + LPAE_SHIFT;
const PFN_512G_SHIFT: u64 = PFN_1G_SHIFT + LPAE_SHIFT;
const PSR_FIQ_MASK: u64 = 1 << 6; /* Fast Interrupt mask */
const PSR_IRQ_MASK: u64 = 1 << 7; /* Interrupt mask */
const PSR_ABT_MASK: u64 = 1 << 8; /* Asynchronous Abort mask */
const PSR_MODE_EL1H: u64 = 0x05;
const PSR_GUEST64_INIT: u64 = PSR_ABT_MASK | PSR_FIQ_MASK | PSR_IRQ_MASK | PSR_MODE_EL1H;
pub struct Arm64BootSetup {}
impl Default for Arm64BootSetup {
fn default() -> Self {
Self::new()
}
}
impl Arm64BootSetup {
pub fn new() -> Arm64BootSetup {
Arm64BootSetup {}
}
async fn populate_one_size(
&self,
setup: &mut BootSetup<'_>,
pfn_shift: u64,
base_pfn: u64,
pfn_count: u64,
extents: &mut [u64],
) -> Result<u64> {
let mask = (1u64 << pfn_shift) - 1;
let next_shift = pfn_shift + LPAE_SHIFT;
let next_mask = (1u64 << next_shift) - 1;
let next_boundary = (base_pfn + (1 << next_shift)) - 1;
let mut end_pfn = base_pfn + pfn_count;
if pfn_shift == PFN_512G_SHIFT {
return Ok(0);
}
if (base_pfn & next_mask) != 0 && end_pfn > next_boundary {
end_pfn = next_boundary;
}
if (mask & base_pfn) != 0 {
return Ok(0);
}
let count = (end_pfn - base_pfn) >> pfn_shift;
if count == 0 {
return Ok(0);
}
for i in 0..count {
extents[i as usize] = base_pfn + (i << pfn_shift);
}
let result_extents = setup
.call
.populate_physmap(
setup.domid,
count,
pfn_shift as u32,
0,
&extents[0usize..count as usize],
)
.await?;
slice_copy::copy(extents, &result_extents);
Ok((result_extents.len() as u64) << pfn_shift)
}
async fn populate_guest_memory(
&mut self,
setup: &mut BootSetup<'_>,
base_pfn: u64,
pfn_count: u64,
) -> Result<()> {
let mut extents = vec![0u64; 1024 * 1024];
for pfn in 0..extents.len() {
let mut allocsz = (1024 * 1024).min(pfn_count - pfn as u64);
allocsz = self
.populate_one_size(
setup,
PFN_512G_SHIFT,
base_pfn + pfn as u64,
allocsz,
&mut extents,
)
.await?;
if allocsz > 0 {
continue;
}
allocsz = self
.populate_one_size(
setup,
PFN_1G_SHIFT,
base_pfn + pfn as u64,
allocsz,
&mut extents,
)
.await?;
if allocsz > 0 {
continue;
}
allocsz = self
.populate_one_size(
setup,
PFN_2M_SHIFT,
base_pfn + pfn as u64,
allocsz,
&mut extents,
)
.await?;
if allocsz > 0 {
continue;
}
allocsz = self
.populate_one_size(
setup,
PFN_4K_SHIFT,
base_pfn + pfn as u64,
allocsz,
&mut extents,
)
.await?;
if allocsz == 0 {
return Err(Error::MemorySetupFailed("allocsz is zero"));
}
}
Ok(())
}
}
#[async_trait::async_trait]
impl ArchBootSetup for Arm64BootSetup {
fn page_size(&mut self) -> u64 {
ARM_PAGE_SIZE
}
fn page_shift(&mut self) -> u64 {
ARM_PAGE_SHIFT
}
fn needs_early_kernel(&mut self) -> bool {
true
}
async fn setup_shared_info(&mut self, _: &mut BootSetup, _: u64) -> Result<()> {
Ok(())
}
async fn setup_start_info(&mut self, _: &mut BootSetup, _: &BootState, _: &str) -> Result<()> {
Ok(())
}
async fn meminit(
&mut self,
setup: &mut BootSetup,
total_pages: u64,
kernel_segment: &Option<DomainSegment>,
initrd_segment: &Option<DomainSegment>,
) -> Result<()> {
let kernel_segment = kernel_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?;
setup.call.claim_pages(setup.domid, total_pages).await?;
let mut ramsize = total_pages << XEN_PAGE_SHIFT;
let bankbase = GUEST_RAM_BANK_BASES;
let bankmax = GUEST_RAM_BANK_SIZES;
let kernbase = kernel_segment.vstart;
let kernend = BootSetup::round_up(kernel_segment.size, 21);
let dtb = setup.dtb.as_ref();
let dtb_size = dtb.map(|blob| BootSetup::round_up(blob.len() as u64, XEN_PAGE_SHIFT));
let ramdisk_size = initrd_segment
.as_ref()
.map(|segment| BootSetup::round_up(segment.size, XEN_PAGE_SHIFT));
let modsize = dtb_size.unwrap_or(0) + ramdisk_size.unwrap_or(0);
let ram128mb = bankbase[0] + (128 << 20);
let mut rambank_size: [u64; 2] = [0, 0];
for i in 0..2 {
let size = if ramsize > bankmax[i] {
bankmax[i]
} else {
ramsize
};
ramsize -= size;
rambank_size[i] = size >> XEN_PAGE_SHIFT;
}
for i in 0..2 {
let size = if ramsize > bankmax[i] {
bankmax[i]
} else {
ramsize
};
ramsize -= size;
rambank_size[i] = size >> XEN_PAGE_SHIFT;
}
for i in 0..2 {
self.populate_guest_memory(setup, bankbase[i] >> XEN_PAGE_SHIFT, rambank_size[i])
.await?;
}
let bank0end = bankbase[0] + (rambank_size[0] << XEN_PAGE_SHIFT);
let _modbase = if bank0end >= ram128mb + modsize && kernend < ram128mb {
ram128mb
} else if bank0end - modsize > kernend {
bank0end - modsize
} else if kernbase - bankbase[0] > modsize {
kernbase - modsize
} else {
return Err(Error::MemorySetupFailed("unable to determine modbase"));
};
setup.call.claim_pages(setup.domid, 0).await?;
Ok(())
}
async fn bootlate(&mut self, _: &mut BootSetup, _: &mut BootState) -> Result<()> {
Ok(())
}
async fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let mut vcpu = VcpuGuestContext::default();
vcpu.user_regs.pc = state.image_info.virt_entry;
vcpu.user_regs.x0 = 0xffffffff;
vcpu.user_regs.x1 = 0;
vcpu.user_regs.x2 = 0;
vcpu.user_regs.x3 = 0;
vcpu.sctlr = 0x00c50078;
vcpu.ttbr0 = 0;
vcpu.ttbr1 = 0;
vcpu.ttbcr = 0;
vcpu.user_regs.cpsr = PSR_GUEST64_INIT;
vcpu.flags = 1 << 0; // VGCF_ONLINE
trace!("vcpu context: {:?}", vcpu);
setup.call.set_vcpu_context(setup.domid, 0, &vcpu).await?;
Ok(())
}
async fn alloc_p2m_segment(
&mut self,
_: &mut BootSetup,
_: &BootImageInfo,
) -> Result<Option<DomainSegment>> {
Ok(None)
}
async fn alloc_page_tables(
&mut self,
_: &mut BootSetup,
_: &BootImageInfo,
) -> Result<Option<DomainSegment>> {
Ok(None)
}
async fn setup_page_tables(&mut self, _: &mut BootSetup, _: &mut BootState) -> Result<()> {
Ok(())
}
}

View File

@ -1,291 +1,61 @@
use crate::error::Result;
use crate::mem::PhysicalPages;
use crate::sys::{GrantEntry, XEN_PAGE_SHIFT};
use crate::Error;
use libc::munmap;
use log::debug;
use nix::errno::Errno;
use slice_copy::copy;
use crate::mem::ARCH_PAGE_SHIFT;
use std::ffi::c_void;
use std::slice;
use log::debug;
use slice_copy::copy;
use xencall::XenCall;
pub trait BootImageLoader {
fn parse(&self) -> Result<BootImageInfo>;
fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()>;
use crate::{
error::{Error, Result},
mem::PhysicalPages,
sys::XEN_PAGE_SHIFT,
};
pub struct BootSetup<I: BootImageLoader, P: BootSetupPlatform> {
pub call: XenCall,
pub domid: u32,
pub platform: P,
pub image_loader: I,
pub dtb: Option<Vec<u8>>,
}
pub const XEN_UNSET_ADDR: u64 = -1i64 as u64;
#[derive(Debug)]
pub struct BootImageInfo {
pub start: u64,
pub virt_base: u64,
pub virt_kstart: u64,
pub virt_kend: u64,
pub virt_hypercall: u64,
pub virt_entry: u64,
pub virt_p2m_base: u64,
pub unmapped_initrd: bool,
}
pub struct BootSetup<'a> {
pub(crate) call: &'a XenCall,
pub phys: PhysicalPages<'a>,
pub(crate) domid: u32,
pub(crate) virt_alloc_end: u64,
pub(crate) pfn_alloc_end: u64,
pub(crate) virt_pgtab_end: u64,
pub(crate) total_pages: u64,
#[cfg(target_arch = "aarch64")]
pub(crate) dtb: Option<Vec<u8>>,
}
#[derive(Debug)]
#[derive(Debug, Default)]
pub struct DomainSegment {
pub(crate) vstart: u64,
vend: u64,
pub vstart: u64,
pub vend: u64,
pub pfn: u64,
pub(crate) addr: u64,
pub(crate) size: u64,
#[cfg(target_arch = "x86_64")]
pub(crate) pages: u64,
pub addr: u64,
pub size: u64,
pub pages: u64,
}
#[derive(Debug)]
pub struct BootState {
pub kernel_segment: DomainSegment,
pub start_info_segment: DomainSegment,
pub xenstore_segment: DomainSegment,
pub boot_stack_segment: DomainSegment,
pub p2m_segment: Option<DomainSegment>,
pub page_table_segment: Option<DomainSegment>,
pub struct BootDomain {
pub domid: u32,
pub call: XenCall,
pub page_size: u64,
pub virt_alloc_end: u64,
pub pfn_alloc_end: u64,
pub virt_pgtab_end: u64,
pub total_pages: u64,
pub image_info: BootImageInfo,
pub shared_info_frame: u64,
pub initrd_segment: DomainSegment,
pub phys: PhysicalPages,
pub store_evtchn: u32,
pub consoles: Vec<(u32, DomainSegment)>,
pub xenstore_mfn: u64,
pub initrd_segment: DomainSegment,
pub consoles: Vec<(u32, u64)>,
}
impl BootSetup<'_> {
pub fn new(call: &XenCall, domid: u32) -> BootSetup {
BootSetup {
call,
phys: PhysicalPages::new(call, domid),
domid,
virt_alloc_end: 0,
pfn_alloc_end: 0,
virt_pgtab_end: 0,
total_pages: 0,
#[cfg(target_arch = "aarch64")]
dtb: None,
}
impl BootDomain {
pub async fn alloc_module(&mut self, buffer: &[u8]) -> Result<DomainSegment> {
let segment = self.alloc_segment(0, buffer.len() as u64).await?;
let slice = unsafe { slice::from_raw_parts_mut(segment.addr as *mut u8, buffer.len()) };
copy(slice, buffer);
Ok(segment)
}
async fn initialize_memory(
&mut self,
arch: &mut Box<dyn ArchBootSetup + Send + Sync>,
total_pages: u64,
kernel_segment: &Option<DomainSegment>,
initrd_segment: &Option<DomainSegment>,
) -> Result<()> {
arch.meminit(self, total_pages, kernel_segment, initrd_segment)
.await?;
Ok(())
}
async fn setup_hypercall_page(&mut self, image_info: &BootImageInfo) -> Result<()> {
if image_info.virt_hypercall == XEN_UNSET_ADDR {
return Ok(());
}
let pfn = (image_info.virt_hypercall - image_info.virt_base) >> ARCH_PAGE_SHIFT;
let mfn = self.phys.p2m[pfn as usize];
self.call.hypercall_init(self.domid, mfn).await?;
Ok(())
}
pub async fn initialize<I: BootImageLoader + Send + Sync>(
&mut self,
arch: &mut Box<dyn ArchBootSetup + Send + Sync>,
image_loader: &I,
initrd: &[u8],
max_vcpus: u32,
mem_mb: u64,
console_count: usize,
) -> Result<BootState> {
debug!("initialize max_vcpus={:?} mem_mb={:?}", max_vcpus, mem_mb);
let page_size = arch.page_size();
let image_info = image_loader.parse()?;
debug!("initialize image_info={:?}", image_info);
let mut kernel_segment: Option<DomainSegment> = None;
let mut initrd_segment: Option<DomainSegment> = None;
if !image_info.unmapped_initrd {
initrd_segment = Some(self.alloc_module(page_size, initrd).await?);
}
if arch.needs_early_kernel() {
kernel_segment = Some(
self.load_kernel_segment(page_size, image_loader, &image_info)
.await?,
);
}
let total_pages = mem_mb << (20 - arch.page_shift());
self.initialize_memory(arch, total_pages, &kernel_segment, &initrd_segment)
.await?;
self.virt_alloc_end = image_info.virt_base;
if kernel_segment.is_none() {
kernel_segment = Some(
self.load_kernel_segment(page_size, image_loader, &image_info)
.await?,
);
}
let mut p2m_segment: Option<DomainSegment> = None;
if image_info.virt_p2m_base >= image_info.virt_base
|| (image_info.virt_p2m_base & ((1 << arch.page_shift()) - 1)) != 0
{
p2m_segment = arch.alloc_p2m_segment(self, &image_info).await?;
}
let start_info_segment = self.alloc_page(page_size)?;
let xenstore_segment = self.alloc_page(page_size)?;
let mut consoles: Vec<(u32, DomainSegment)> = Vec::new();
for _ in 0..console_count {
let evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?;
let page = self.alloc_page(page_size)?;
consoles.push((evtchn, page));
}
let page_table_segment = arch.alloc_page_tables(self, &image_info).await?;
let boot_stack_segment = self.alloc_page(page_size)?;
if self.virt_pgtab_end > 0 {
self.alloc_padding_pages(page_size, self.virt_pgtab_end)?;
}
if p2m_segment.is_none() {
if let Some(mut segment) = arch.alloc_p2m_segment(self, &image_info).await? {
segment.vstart = image_info.virt_p2m_base;
p2m_segment = Some(segment);
}
}
if image_info.unmapped_initrd {
initrd_segment = Some(self.alloc_module(page_size, initrd).await?);
}
let initrd_segment = initrd_segment.unwrap();
let store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?;
let kernel_segment =
kernel_segment.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?;
let state = BootState {
kernel_segment,
start_info_segment,
xenstore_segment,
consoles,
boot_stack_segment,
p2m_segment,
page_table_segment,
image_info,
initrd_segment,
store_evtchn,
shared_info_frame: 0,
};
debug!("initialize state={:?}", state);
Ok(state)
}
pub async fn boot(
&mut self,
arch: &mut Box<dyn ArchBootSetup + Send + Sync>,
state: &mut BootState,
cmdline: &str,
) -> Result<()> {
let domain_info = self.call.get_domain_info(self.domid).await?;
let shared_info_frame = domain_info.shared_info_frame;
state.shared_info_frame = shared_info_frame;
arch.setup_page_tables(self, state).await?;
arch.setup_start_info(self, state, cmdline).await?;
self.setup_hypercall_page(&state.image_info).await?;
arch.bootlate(self, state).await?;
arch.setup_shared_info(self, state.shared_info_frame)
.await?;
arch.vcpu(self, state).await?;
self.phys.unmap_all()?;
self.gnttab_seed(state).await?;
Ok(())
}
async fn gnttab_seed(&mut self, state: &mut BootState) -> Result<()> {
let console_gfn =
self.phys.p2m[state.consoles.first().map(|x| x.1.pfn).unwrap_or(0) as usize];
let xenstore_gfn = self.phys.p2m[state.xenstore_segment.pfn as usize];
let addr = self
.call
.mmap(0, 1 << XEN_PAGE_SHIFT)
.await
.ok_or(Error::MmapFailed)?;
self.call.map_resource(self.domid, 1, 0, 0, 1, addr).await?;
let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) };
entries[0].flags = 1 << 0;
entries[0].domid = 0;
entries[0].frame = console_gfn as u32;
entries[1].flags = 1 << 0;
entries[1].domid = 0;
entries[1].frame = xenstore_gfn as u32;
unsafe {
let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT);
if result != 0 {
return Err(Error::UnmapFailed(Errno::from_raw(result)));
}
}
Ok(())
}
async fn load_kernel_segment<I: BootImageLoader + Send + Sync>(
&mut self,
page_size: u64,
image_loader: &I,
image_info: &BootImageInfo,
) -> Result<DomainSegment> {
let kernel_segment = self
.alloc_segment(
page_size,
image_info.virt_kstart,
image_info.virt_kend - image_info.virt_kstart,
)
.await?;
let kernel_segment_ptr = kernel_segment.addr as *mut u8;
let kernel_segment_slice =
unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) };
image_loader.load(image_info, kernel_segment_slice)?;
Ok(kernel_segment)
}
pub(crate) fn round_up(addr: u64, mask: u64) -> u64 {
addr | mask
}
#[cfg(target_arch = "x86_64")]
pub(crate) fn bits_to_mask(bits: u64) -> u64 {
(1 << bits) - 1
}
pub(crate) async fn alloc_segment(
&mut self,
page_size: u64,
start: u64,
size: u64,
) -> Result<DomainSegment> {
pub async fn alloc_segment(&mut self, start: u64, size: u64) -> Result<DomainSegment> {
debug!("alloc_segment {:#x} {:#x}", start, size);
if start > 0 {
self.alloc_padding_pages(page_size, start)?;
self.alloc_padding_pages(start)?;
}
let local_page_size: u32 = (1i64 << XEN_PAGE_SHIFT) as u32;
@ -302,7 +72,7 @@ impl BootSetup<'_> {
pages,
};
self.chk_alloc_pages(page_size, pages)?;
self.chk_alloc_pages(pages)?;
let ptr = self.phys.pfn_to_ptr(segment.pfn, pages).await?;
segment.addr = ptr;
@ -318,46 +88,20 @@ impl BootSetup<'_> {
Ok(segment)
}
fn alloc_page(&mut self, page_size: u64) -> Result<DomainSegment> {
let start = self.virt_alloc_end;
let pfn = self.pfn_alloc_end;
self.chk_alloc_pages(page_size, 1)?;
debug!("alloc_page {:#x} (pfn {:#x})", start, pfn);
Ok(DomainSegment {
vstart: start,
vend: (start + page_size) - 1,
pfn,
addr: 0,
size: 0,
#[cfg(target_arch = "x86_64")]
pages: 1,
})
}
async fn alloc_module(&mut self, page_size: u64, buffer: &[u8]) -> Result<DomainSegment> {
let segment = self
.alloc_segment(page_size, 0, buffer.len() as u64)
.await?;
let slice = unsafe { slice::from_raw_parts_mut(segment.addr as *mut u8, buffer.len()) };
copy(slice, buffer);
Ok(segment)
}
fn alloc_padding_pages(&mut self, page_size: u64, boundary: u64) -> Result<()> {
if (boundary & (page_size - 1)) != 0 {
pub fn alloc_padding_pages(&mut self, boundary: u64) -> Result<()> {
if (boundary & (self.page_size - 1)) != 0 {
return Err(Error::MemorySetupFailed("boundary is incorrect"));
}
if boundary < self.virt_alloc_end {
return Err(Error::MemorySetupFailed("boundary is below allocation end"));
}
let pages = (boundary - self.virt_alloc_end) / page_size;
self.chk_alloc_pages(page_size, pages)?;
let pages = (boundary - self.virt_alloc_end) / self.page_size;
self.chk_alloc_pages(pages)?;
Ok(())
}
fn chk_alloc_pages(&mut self, page_size: u64, pages: u64) -> Result<()> {
pub fn chk_alloc_pages(&mut self, pages: u64) -> Result<()> {
if pages > self.total_pages
|| self.pfn_alloc_end > self.total_pages
|| pages > self.total_pages - self.pfn_alloc_end
@ -366,56 +110,195 @@ impl BootSetup<'_> {
}
self.pfn_alloc_end += pages;
self.virt_alloc_end += pages * page_size;
self.virt_alloc_end += pages * self.page_size;
Ok(())
}
pub fn alloc_page(&mut self) -> Result<DomainSegment> {
let start = self.virt_alloc_end;
let pfn = self.pfn_alloc_end;
self.chk_alloc_pages(1)?;
debug!("alloc_page {:#x} (pfn {:#x})", start, pfn);
Ok(DomainSegment {
vstart: start,
vend: (start + self.page_size) - 1,
pfn,
addr: 0,
size: 0,
pages: 1,
})
}
pub fn round_up(addr: u64, mask: u64) -> u64 {
addr | mask
}
pub fn bits_to_mask(bits: u64) -> u64 {
(1 << bits) - 1
}
}
impl<I: BootImageLoader, P: BootSetupPlatform> BootSetup<I, P> {
pub fn new(
call: XenCall,
domid: u32,
platform: P,
image_loader: I,
dtb: Option<Vec<u8>>,
) -> BootSetup<I, P> {
BootSetup {
call,
domid,
platform,
image_loader,
dtb,
}
}
pub async fn initialize(&mut self, initrd: &[u8], mem_mb: u64) -> Result<BootDomain> {
let total_pages = mem_mb << (20 - self.platform.page_shift());
let image_info = self.image_loader.parse().await?;
let mut domain = BootDomain {
domid: self.domid,
call: self.call.clone(),
virt_alloc_end: 0,
virt_pgtab_end: 0,
pfn_alloc_end: 0,
total_pages,
page_size: self.platform.page_size(),
image_info,
consoles: Vec::new(),
phys: PhysicalPages::new(self.call.clone(), self.domid, self.platform.page_shift()),
initrd_segment: DomainSegment::default(),
store_evtchn: 0,
xenstore_mfn: 0,
};
let mut initrd_segment = if !domain.image_info.unmapped_initrd {
Some(domain.alloc_module(initrd).await?)
} else {
None
};
let mut kernel_segment = if self.platform.needs_early_kernel() {
Some(self.load_kernel_segment(&mut domain).await?)
} else {
None
};
self.platform.initialize_memory(&mut domain).await?;
domain.virt_alloc_end = domain.image_info.virt_base;
if kernel_segment.is_none() {
kernel_segment = Some(self.load_kernel_segment(&mut domain).await?);
}
self.platform.alloc_magic_pages(&mut domain).await?;
if domain.image_info.unmapped_initrd {
initrd_segment = Some(domain.alloc_module(initrd).await?);
}
domain.initrd_segment =
initrd_segment.ok_or(Error::MemorySetupFailed("initd_segment missing"))?;
domain.store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?;
let _kernel_segment =
kernel_segment.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?;
Ok(domain)
}
pub async fn boot(&mut self, domain: &mut BootDomain, cmdline: &str) -> Result<()> {
let domain_info = self.call.get_domain_info(self.domid).await?;
let shared_info_frame = domain_info.shared_info_frame;
self.platform.setup_page_tables(domain).await?;
self.platform
.setup_start_info(domain, cmdline, shared_info_frame)
.await?;
self.platform.setup_hypercall_page(domain).await?;
self.platform.bootlate(domain).await?;
self.platform
.setup_shared_info(domain, shared_info_frame)
.await?;
self.platform.vcpu(domain).await?;
domain.phys.unmap_all()?;
self.platform.gnttab_seed(domain).await?;
Ok(())
}
async fn load_kernel_segment(&mut self, domain: &mut BootDomain) -> Result<DomainSegment> {
let kernel_segment = domain
.alloc_segment(
domain.image_info.virt_kstart,
domain.image_info.virt_kend - domain.image_info.virt_kstart,
)
.await?;
let kernel_segment_ptr = kernel_segment.addr as *mut u8;
let kernel_segment_slice =
unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) };
self.image_loader
.load(&domain.image_info, kernel_segment_slice)
.await?;
Ok(kernel_segment)
}
}
#[async_trait::async_trait]
pub trait ArchBootSetup {
fn page_size(&mut self) -> u64;
fn page_shift(&mut self) -> u64;
pub trait BootSetupPlatform {
fn page_size(&self) -> u64;
fn page_shift(&self) -> u64;
fn needs_early_kernel(&self) -> bool;
fn needs_early_kernel(&mut self) -> bool;
async fn initialize_memory(&self, domain: &mut BootDomain) -> Result<()>;
async fn alloc_p2m_segment(
async fn alloc_page_tables(&mut self, domain: &mut BootDomain)
-> Result<Option<DomainSegment>>;
async fn alloc_p2m_segment(&mut self, domain: &mut BootDomain)
-> Result<Option<DomainSegment>>;
async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn setup_shared_info(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<Option<DomainSegment>>;
async fn alloc_page_tables(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<Option<DomainSegment>>;
async fn setup_page_tables(
&mut self,
setup: &mut BootSetup,
state: &mut BootState,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()>;
async fn setup_start_info(
&mut self,
setup: &mut BootSetup,
state: &BootState,
domain: &mut BootDomain,
cmdline: &str,
) -> Result<()>;
async fn setup_shared_info(
&mut self,
setup: &mut BootSetup,
shared_info_frame: u64,
) -> Result<()>;
async fn meminit(
&mut self,
setup: &mut BootSetup,
total_pages: u64,
kernel_segment: &Option<DomainSegment>,
initrd_segment: &Option<DomainSegment>,
) -> Result<()>;
async fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
async fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()>;
}
#[async_trait::async_trait]
pub trait BootImageLoader {
async fn parse(&self) -> Result<BootImageInfo>;
async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()>;
}
#[derive(Debug)]
pub struct BootImageInfo {
pub start: u64,
pub virt_base: u64,
pub virt_kstart: u64,
pub virt_kend: u64,
pub virt_hypercall: u64,
pub virt_entry: u64,
pub virt_p2m_base: u64,
pub unmapped_initrd: bool,
}

View File

@ -1,4 +1,4 @@
use crate::boot::{BootImageInfo, BootImageLoader, XEN_UNSET_ADDR};
use crate::boot::{BootImageInfo, BootImageLoader};
use crate::error::Result;
use crate::sys::{
XEN_ELFNOTE_ENTRY, XEN_ELFNOTE_HYPERCALL_PAGE, XEN_ELFNOTE_INIT_P2M, XEN_ELFNOTE_MOD_START_PFN,
@ -128,8 +128,9 @@ struct ElfNoteValue {
value: u64,
}
#[async_trait::async_trait]
impl BootImageLoader for ElfImageLoader {
fn parse(&self) -> Result<BootImageInfo> {
async fn parse(&self) -> Result<BootImageInfo> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let headers = elf.section_headers().ok_or(Error::ElfInvalidImage)?;
let mut linux_notes: HashMap<u64, Vec<u8>> = HashMap::new();
@ -220,7 +221,7 @@ impl BootImageLoader for ElfImageLoader {
}
}
if paddr_offset != XEN_UNSET_ADDR && virt_base == XEN_UNSET_ADDR {
if paddr_offset != u64::MAX && virt_base == u64::MAX {
return Err(Error::ElfInvalidImage);
}
@ -242,7 +243,7 @@ impl BootImageLoader for ElfImageLoader {
Ok(image_info)
}
fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> {
async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let segments = elf.segments().ok_or(Error::ElfInvalidImage)?;

View File

@ -4,19 +4,7 @@ pub mod error;
pub mod mem;
pub mod sys;
#[cfg(target_arch = "x86_64")]
pub mod x86;
#[cfg(target_arch = "x86_64")]
use crate::x86::X86BootSetup;
#[cfg(target_arch = "aarch64")]
pub mod arm64;
#[cfg(target_arch = "aarch64")]
use crate::arm64::Arm64BootSetup;
use crate::boot::{ArchBootSetup, BootSetup};
use crate::boot::{BootDomain, BootSetup};
use crate::elfloader::ElfImageLoader;
use crate::error::{Error, Result};
use boot::BootState;
@ -31,7 +19,8 @@ use std::str::FromStr;
use std::time::Duration;
use uuid::Uuid;
use xencall::sys::{
CreateDomain, DOMCTL_DEV_RDM_RELAXED, X86_EMU_LAPIC, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST, XEN_DOMCTL_CDF_IOMMU
CreateDomain, DOMCTL_DEV_RDM_RELAXED, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST,
XEN_DOMCTL_CDF_IOMMU,
};
use xencall::XenCall;
use xenstore::{
@ -40,6 +29,7 @@ use xenstore::{
};
pub mod pci;
pub mod x86pv;
#[derive(Clone)]
pub struct XenClient {
@ -154,14 +144,17 @@ impl XenClient {
}
pub async fn create(&self, config: &DomainConfig) -> Result<CreatedDomain> {
let mut domain = CreateDomain::default();
let mut domain = CreateDomain {
max_vcpus: config.max_vcpus,
..Default::default()
};
domain.max_vcpus = config.max_vcpus;
if cfg!(target_arch = "aarch64") {
domain.flags = XEN_DOMCTL_CDF_HVM_GUEST | XEN_DOMCTL_CDF_HAP;
} else {
domain.flags = XEN_DOMCTL_CDF_HVM_GUEST | XEN_DOMCTL_CDF_HAP | XEN_DOMCTL_CDF_IOMMU;
domain.arch_domain_config.emulation_flags = X86_EMU_LAPIC;
domain.flags = XEN_DOMCTL_CDF_IOMMU;
domain.arch_domain_config.emulation_flags = 0;
}
let domid = self.call.create_domain(domain).await?;
@ -294,33 +287,18 @@ impl XenClient {
self.call.set_max_vcpus(domid, config.max_vcpus).await?;
self.call.set_max_mem(domid, config.mem_mb * 1024).await?;
let image_loader = ElfImageLoader::load_file_kernel(&config.kernel)?;
let xenstore_evtchn: u32;
let xenstore_mfn: u64;
let p2m: Vec<u64>;
let mut state: BootState;
let mut domain: BootDomain;
{
let mut boot = BootSetup::new(&self.call, domid);
#[cfg(target_arch = "x86_64")]
let mut arch = Box::new(X86BootSetup::new()) as Box<dyn ArchBootSetup + Send + Sync>;
#[cfg(target_arch = "aarch64")]
let mut arch = Box::new(Arm64BootSetup::new()) as Box<dyn ArchBootSetup + Send + Sync>;
state = boot
.initialize(
&mut arch,
&image_loader,
&config.initrd,
config.max_vcpus,
config.mem_mb,
1,
)
.await?;
boot.boot(&mut arch, &mut state, &config.cmdline).await?;
xenstore_evtchn = state.store_evtchn;
xenstore_mfn = boot.phys.p2m[state.xenstore_segment.pfn as usize];
p2m = boot.phys.p2m;
let loader = ElfImageLoader::load_file_kernel(&config.kernel)?;
let mut boot =
BootSetup::new(self.call.clone(), domid, X86PvPlatform::new(), loader, None);
domain = boot.initialize(&config.initrd, config.mem_mb).await?;
boot.boot(&mut domain, &config.cmdline).await?;
xenstore_evtchn = domain.store_evtchn;
xenstore_mfn = domain.xenstore_mfn;
}
{
@ -386,8 +364,6 @@ impl XenClient {
.to_string(),
initialized: true,
},
&p2m,
&state,
&dom_path,
&backend_dom_path,
config.backend_domid,
@ -402,8 +378,6 @@ impl XenClient {
.console_device_add(
&tx,
channel,
&p2m,
&state,
&dom_path,
&backend_dom_path,
config.backend_domid,
@ -547,17 +521,15 @@ impl XenClient {
&self,
tx: &XsdTransaction,
channel: &DomainChannel,
p2m: &[u64],
state: &BootState,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
) -> Result<(Option<u64>, Option<u32>)> {
let console = state.consoles.get(index);
let console = domain.consoles.get(index);
let port = console.map(|x| x.0);
let ring = console.map(|x| p2m[x.1.pfn as usize]);
let ring = console.map(|x| x.1);
let mut backend_entries = vec![
("frontend-id", domid.to_string()),

View File

@ -6,11 +6,6 @@ use log::debug;
use nix::errno::Errno;
use std::ffi::c_void;
#[cfg(target_arch = "aarch64")]
pub(crate) use crate::arm64::ARM_PAGE_SHIFT as ARCH_PAGE_SHIFT;
#[cfg(target_arch = "x86_64")]
pub(crate) use crate::x86::X86_PAGE_SHIFT as ARCH_PAGE_SHIFT;
use xencall::sys::MmapEntry;
use xencall::XenCall;
@ -21,16 +16,18 @@ pub struct PhysicalPage {
count: u64,
}
pub struct PhysicalPages<'a> {
pub struct PhysicalPages {
page_shift: u64,
domid: u32,
pub(crate) p2m: Vec<u64>,
call: &'a XenCall,
pub p2m: Vec<u64>,
call: XenCall,
pages: Vec<PhysicalPage>,
}
impl PhysicalPages<'_> {
pub fn new(call: &XenCall, domid: u32) -> PhysicalPages {
impl PhysicalPages {
pub fn new(call: XenCall, domid: u32, page_shift: u64) -> PhysicalPages {
PhysicalPages {
page_shift,
domid,
p2m: Vec::new(),
call,
@ -70,7 +67,7 @@ impl PhysicalPages<'_> {
}
}
return Ok(page.ptr + ((pfn - page.pfn) << ARCH_PAGE_SHIFT));
return Ok(page.ptr + ((pfn - page.pfn) << self.page_shift));
}
if count == 0 {
@ -161,7 +158,7 @@ impl PhysicalPages<'_> {
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << ARCH_PAGE_SHIFT) as usize,
(page.count << self.page_shift) as usize,
);
if err != 0 {
return Err(Error::UnmapFailed(Errno::from_raw(err)));
@ -182,11 +179,11 @@ impl PhysicalPages<'_> {
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << ARCH_PAGE_SHIFT) as usize,
(page.count << self.page_shift) as usize,
);
debug!(
"unmapped {:#x} foreign bytes at {:#x}",
(page.count << ARCH_PAGE_SHIFT) as usize,
(page.count << self.page_shift) as usize,
page.ptr
);
if err != 0 {

View File

@ -128,3 +128,5 @@ pub struct GrantEntry {
pub domid: u16,
pub frame: u32,
}
pub const XEN_HVM_START_MAGIC_VALUE: u64 = 0x336ec578;

View File

@ -1,20 +1,26 @@
use crate::boot::{ArchBootSetup, BootImageInfo, BootSetup, BootState, DomainSegment};
use crate::error::Result;
use crate::sys::{
SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE, VGCF_IN_KERNEL, VGCF_ONLINE,
XEN_PAGE_SHIFT,
use std::{
mem::size_of,
os::raw::{c_char, c_void},
slice,
};
use crate::Error;
use libc::c_char;
use libc::munmap;
use log::{debug, trace};
use nix::errno::Errno;
use slice_copy::copy;
use std::cmp::{max, min};
use std::mem::size_of;
use std::slice;
use xencall::sys::{
E820Entry, VcpuGuestContext, E820_MAX, E820_RAM, E820_UNUSABLE, MMUEXT_PIN_L4_TABLE,
};
use crate::{
boot::{BootDomain, BootSetupPlatform, DomainSegment},
error::{Error, Result},
sys::{
GrantEntry, SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE,
VGCF_IN_KERNEL, VGCF_ONLINE, XEN_PAGE_SHIFT,
},
};
pub const X86_PAGE_SHIFT: u64 = 12;
pub const X86_PAGE_SIZE: u64 = 1 << X86_PAGE_SHIFT;
pub const X86_VIRT_BITS: u64 = 48;
@ -127,10 +133,6 @@ pub struct SharedInfo {
pub p2m_generation: u64,
}
pub struct X86BootSetup {
table: PageTable,
}
#[derive(Debug)]
struct VmemRange {
start: u64,
@ -139,16 +141,20 @@ struct VmemRange {
_nid: u32,
}
impl Default for X86BootSetup {
fn default() -> Self {
Self::new()
}
#[derive(Default)]
pub struct X86PvPlatform {
table: PageTable,
p2m_segment: Option<DomainSegment>,
page_table_segment: Option<DomainSegment>,
start_info_segment: Option<DomainSegment>,
boot_stack_segment: Option<DomainSegment>,
xenstore_segment: Option<DomainSegment>,
}
impl X86BootSetup {
pub fn new() -> X86BootSetup {
X86BootSetup {
table: PageTable::default(),
impl X86PvPlatform {
pub fn new() -> Self {
Self {
..Default::default()
}
}
@ -159,22 +165,22 @@ impl X86BootSetup {
const PAGE_DIRTY: u64 = 0x040;
fn get_pg_prot(&mut self, l: usize, pfn: u64) -> u64 {
let prot = [
X86BootSetup::PAGE_PRESENT | X86BootSetup::PAGE_RW | X86BootSetup::PAGE_ACCESSED,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86PvPlatform::PAGE_PRESENT | X86PvPlatform::PAGE_RW | X86PvPlatform::PAGE_ACCESSED,
X86PvPlatform::PAGE_PRESENT
| X86PvPlatform::PAGE_RW
| X86PvPlatform::PAGE_ACCESSED
| X86PvPlatform::PAGE_DIRTY
| X86PvPlatform::PAGE_USER,
X86PvPlatform::PAGE_PRESENT
| X86PvPlatform::PAGE_RW
| X86PvPlatform::PAGE_ACCESSED
| X86PvPlatform::PAGE_DIRTY
| X86PvPlatform::PAGE_USER,
X86PvPlatform::PAGE_PRESENT
| X86PvPlatform::PAGE_RW
| X86PvPlatform::PAGE_ACCESSED
| X86PvPlatform::PAGE_DIRTY
| X86PvPlatform::PAGE_USER,
];
let prot = prot[l];
@ -187,7 +193,7 @@ impl X86BootSetup {
let pfn_s = map.levels[(X86_PGTABLE_LEVELS - 1) as usize].pfn;
let pfn_e = map.area.pgtables as u64 + pfn_s;
if pfn >= pfn_s && pfn < pfn_e {
return prot & !X86BootSetup::PAGE_RW;
return prot & !X86PvPlatform::PAGE_RW;
}
}
prot
@ -195,7 +201,7 @@ impl X86BootSetup {
fn count_page_tables(
&mut self,
setup: &mut BootSetup,
domain: &mut BootDomain,
from: u64,
to: u64,
pfn: u64,
@ -208,7 +214,7 @@ impl X86BootSetup {
let m = self.table.mappings_count;
let pfn_end = pfn + ((to - from) >> X86_PAGE_SHIFT);
if pfn_end >= setup.phys.p2m_size() {
if pfn_end >= domain.phys.p2m_size() {
return Err(Error::MemorySetupFailed("pfn_end greater than p2m size"));
}
@ -222,7 +228,7 @@ impl X86BootSetup {
map.area.to = to & X86_VIRT_MASK;
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
map.levels[l].pfn = setup.pfn_alloc_end + map.area.pgtables as u64;
map.levels[l].pfn = domain.pfn_alloc_end + map.area.pgtables as u64;
if l as u64 == X86_PGTABLE_LEVELS - 1 {
if self.table.mappings_count == 0 {
map.levels[l].from = 0;
@ -234,7 +240,7 @@ impl X86BootSetup {
}
let bits = X86_PAGE_SHIFT + (l + 1) as u64 * X86_PGTABLE_LEVEL_SHIFT;
let mask = BootSetup::bits_to_mask(bits);
let mask = BootDomain::bits_to_mask(bits);
map.levels[l].from = map.area.from & !mask;
map.levels[l].to = map.area.to | mask;
@ -426,229 +432,29 @@ impl X86BootSetup {
}
#[async_trait::async_trait]
impl ArchBootSetup for X86BootSetup {
fn page_size(&mut self) -> u64 {
impl BootSetupPlatform for X86PvPlatform {
fn page_size(&self) -> u64 {
X86_PAGE_SIZE
}
fn page_shift(&mut self) -> u64 {
fn page_shift(&self) -> u64 {
X86_PAGE_SHIFT
}
fn needs_early_kernel(&mut self) -> bool {
fn needs_early_kernel(&self) -> bool {
false
}
async fn alloc_p2m_segment(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<Option<DomainSegment>> {
let mut p2m_alloc_size =
((setup.phys.p2m_size() * 8) + X86_PAGE_SIZE - 1) & !(X86_PAGE_SIZE - 1);
let from = image_info.virt_p2m_base;
let to = from + p2m_alloc_size - 1;
let m = self.count_page_tables(setup, from, to, setup.pfn_alloc_end)?;
let pgtables: usize;
{
let map = &mut self.table.mappings[m];
map.area.pfn = setup.pfn_alloc_end;
for lvl_idx in 0..4 {
map.levels[lvl_idx].pfn += p2m_alloc_size >> X86_PAGE_SHIFT;
}
pgtables = map.area.pgtables;
}
self.table.mappings_count += 1;
p2m_alloc_size += (pgtables << X86_PAGE_SHIFT) as u64;
let p2m_segment = setup
.alloc_segment(self.page_size(), 0, p2m_alloc_size)
async fn initialize_memory(&self, domain: &mut BootDomain) -> Result<()> {
domain.call.set_address_size(domain.domid, 64).await?;
domain
.call
.claim_pages(domain.domid, domain.total_pages)
.await?;
Ok(Some(p2m_segment))
}
async fn alloc_page_tables(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<Option<DomainSegment>> {
let mut extra_pages = 1;
extra_pages += (512 * 1024) / X86_PAGE_SIZE;
let mut pages = extra_pages;
let mut try_virt_end: u64;
let mut m: usize;
loop {
try_virt_end = BootSetup::round_up(
setup.virt_alloc_end + pages * X86_PAGE_SIZE,
BootSetup::bits_to_mask(22),
);
m = self.count_page_tables(setup, image_info.virt_base, try_virt_end, 0)?;
pages = self.table.mappings[m].area.pgtables as u64 + extra_pages;
if setup.virt_alloc_end + pages * X86_PAGE_SIZE <= try_virt_end + 1 {
break;
}
}
self.table.mappings[m].area.pfn = 0;
self.table.mappings_count += 1;
setup.virt_pgtab_end = try_virt_end + 1;
let size = self.table.mappings[m].area.pgtables as u64 * X86_PAGE_SIZE;
let segment = setup.alloc_segment(self.page_size(), 0, size).await?;
debug!(
"alloc_page_tables table={:?} segment={:?}",
self.table, segment
);
Ok(Some(segment))
}
async fn setup_page_tables(
&mut self,
setup: &mut BootSetup,
state: &mut BootState,
) -> Result<()> {
let p2m_segment = state
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let p2m_guest = unsafe {
slice::from_raw_parts_mut(p2m_segment.addr as *mut u64, setup.phys.p2m_size() as usize)
};
copy(p2m_guest, &setup.phys.p2m);
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
for m1 in 0usize..self.table.mappings_count {
let map1 = &self.table.mappings[m1];
let from = map1.levels[l].from;
let to = map1.levels[l].to;
let pg_ptr = setup.phys.pfn_to_ptr(map1.levels[l].pfn, 0).await? as *mut u64;
for m2 in 0usize..self.table.mappings_count {
let map2 = &self.table.mappings[m2];
let lvl = if l > 0 {
&map2.levels[l - 1]
} else {
&map2.area
};
if l > 0 && lvl.pgtables == 0 {
continue;
}
if lvl.from >= to || lvl.to <= from {
continue;
}
let p_s = (max(from, lvl.from) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let p_e = (min(to, lvl.to) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let rhs = X86_PAGE_SHIFT as usize + l * X86_PGTABLE_LEVEL_SHIFT as usize;
let mut pfn = ((max(from, lvl.from) - lvl.from) >> rhs) + lvl.pfn;
debug!(
"setup_page_tables lvl={} map_1={} map_2={} pfn={:#x} p_s={:#x} p_e={:#x}",
l, m1, m2, pfn, p_s, p_e
);
let pg = unsafe { slice::from_raw_parts_mut(pg_ptr, (p_e + 1) as usize) };
for p in p_s..p_e + 1 {
let prot = self.get_pg_prot(l, pfn);
let pfn_paddr = setup.phys.p2m[pfn as usize] << X86_PAGE_SHIFT;
let value = pfn_paddr | prot;
pg[p as usize] = value;
pfn += 1;
}
}
}
}
Ok(())
}
async fn setup_start_info(
&mut self,
setup: &mut BootSetup,
state: &BootState,
cmdline: &str,
) -> Result<()> {
let ptr = setup
.phys
.pfn_to_ptr(state.start_info_segment.pfn, 1)
.await?;
let byte_slice =
unsafe { slice::from_raw_parts_mut(ptr as *mut u8, X86_PAGE_SIZE as usize) };
byte_slice.fill(0);
let info = ptr as *mut StartInfo;
let page_table_segment = state
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let p2m_segment = state
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
unsafe {
for (i, c) in X86_GUEST_MAGIC.chars().enumerate() {
(*info).magic[i] = c as c_char;
}
(*info).magic[X86_GUEST_MAGIC.len()] = 0 as c_char;
(*info).nr_pages = setup.total_pages;
(*info).shared_info = state.shared_info_frame << X86_PAGE_SHIFT;
(*info).pt_base = page_table_segment.vstart;
(*info).nr_pt_frames = self.table.mappings[0].area.pgtables as u64;
(*info).mfn_list = p2m_segment.vstart;
(*info).first_p2m_pfn = p2m_segment.pfn;
(*info).nr_p2m_frames = p2m_segment.pages;
(*info).flags = 0;
(*info).store_evtchn = state.store_evtchn;
(*info).store_mfn = setup.phys.p2m[state.xenstore_segment.pfn as usize];
let console = state.consoles.first().unwrap();
(*info).console.mfn = setup.phys.p2m[console.1.pfn as usize];
(*info).console.evtchn = console.0;
(*info).mod_start = state.initrd_segment.vstart;
(*info).mod_len = state.initrd_segment.size;
for (i, c) in cmdline.chars().enumerate() {
(*info).cmdline[i] = c as c_char;
}
(*info).cmdline[MAX_GUEST_CMDLINE - 1] = 0;
trace!("setup_start_info start_info={:?}", *info);
}
Ok(())
}
async fn setup_shared_info(
&mut self,
setup: &mut BootSetup,
shared_info_frame: u64,
) -> Result<()> {
let info = setup
.phys
.map_foreign_pages(shared_info_frame, X86_PAGE_SIZE)
.await? as *mut SharedInfo;
unsafe {
let size = size_of::<SharedInfo>();
let info_as_buff = slice::from_raw_parts_mut(info as *mut u8, size);
info_as_buff.fill(0);
for i in 0..32 {
(*info).vcpu_info[i].evtchn_upcall_mask = 1;
}
trace!("setup_shared_info shared_info={:?}", *info);
}
Ok(())
}
async fn meminit(
&mut self,
setup: &mut BootSetup,
total_pages: u64,
_: &Option<DomainSegment>,
_: &Option<DomainSegment>,
) -> Result<()> {
setup.call.claim_pages(setup.domid, total_pages).await?;
let mut vmemranges: Vec<VmemRange> = Vec::new();
let stub = VmemRange {
start: 0,
end: total_pages << XEN_PAGE_SHIFT,
end: domain.total_pages << XEN_PAGE_SHIFT,
_flags: 0,
_nid: 0,
};
@ -660,12 +466,10 @@ impl ArchBootSetup for X86BootSetup {
p2m_size = p2m_size.max(range.end >> XEN_PAGE_SHIFT);
}
if total != total_pages {
if total != domain.total_pages {
return Err(Error::MemorySetupFailed("total pages mismatch"));
}
setup.total_pages = total;
let mut p2m = vec![u64::MAX; p2m_size as usize];
for range in &vmemranges {
let mut extents_init = vec![0u64; SUPERPAGE_BATCH_SIZE as usize];
@ -694,10 +498,10 @@ impl ArchBootSetup for X86BootSetup {
}
let extents_init_slice = extents_init.as_slice();
let extents = setup
let extents = domain
.call
.populate_physmap(
setup.domid,
domain.domid,
count,
SUPERPAGE_2MB_SHIFT as u32,
0,
@ -725,9 +529,9 @@ impl ArchBootSetup for X86BootSetup {
let p2m_idx = (pfn_base + j) as usize;
let p2m_end_idx = p2m_idx + allocsz as usize;
let input_extent_starts = &p2m[p2m_idx..p2m_end_idx];
let result = setup
let result = domain
.call
.populate_physmap(setup.domid, allocsz, 0, 0, input_extent_starts)
.populate_physmap(domain.domid, allocsz, 0, 0, input_extent_starts)
.await?;
if result.len() != allocsz as usize {
@ -747,50 +551,300 @@ impl ArchBootSetup for X86BootSetup {
}
}
setup.phys.load_p2m(p2m);
setup.call.claim_pages(setup.domid, 0).await?;
domain.phys.load_p2m(p2m);
domain.call.claim_pages(domain.domid, 0).await?;
Ok(())
}
async fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let p2m_segment = state
async fn alloc_p2m_segment(
&mut self,
domain: &mut BootDomain,
) -> Result<Option<DomainSegment>> {
let mut p2m_alloc_size =
((domain.phys.p2m_size() * 8) + X86_PAGE_SIZE - 1) & !(X86_PAGE_SIZE - 1);
let from = domain.image_info.virt_p2m_base;
let to = from + p2m_alloc_size - 1;
let m = self.count_page_tables(domain, from, to, domain.pfn_alloc_end)?;
let pgtables: usize;
{
let map = &mut self.table.mappings[m];
map.area.pfn = domain.pfn_alloc_end;
for lvl_idx in 0..4 {
map.levels[lvl_idx].pfn += p2m_alloc_size >> X86_PAGE_SHIFT;
}
pgtables = map.area.pgtables;
}
self.table.mappings_count += 1;
p2m_alloc_size += (pgtables << X86_PAGE_SHIFT) as u64;
let p2m_segment = domain.alloc_segment(0, p2m_alloc_size).await?;
Ok(Some(p2m_segment))
}
async fn alloc_page_tables(
&mut self,
domain: &mut BootDomain,
) -> Result<Option<DomainSegment>> {
let mut extra_pages = 1;
extra_pages += (512 * 1024) / X86_PAGE_SIZE;
let mut pages = extra_pages;
let mut try_virt_end: u64;
let mut m: usize;
loop {
try_virt_end = BootDomain::round_up(
domain.virt_alloc_end + pages * X86_PAGE_SIZE,
BootDomain::bits_to_mask(22),
);
m = self.count_page_tables(domain, domain.image_info.virt_base, try_virt_end, 0)?;
pages = self.table.mappings[m].area.pgtables as u64 + extra_pages;
if domain.virt_alloc_end + pages * X86_PAGE_SIZE <= try_virt_end + 1 {
break;
}
}
self.table.mappings[m].area.pfn = 0;
self.table.mappings_count += 1;
domain.virt_pgtab_end = try_virt_end + 1;
let size = self.table.mappings[m].area.pgtables as u64 * X86_PAGE_SIZE;
let segment = domain.alloc_segment(0, size).await?;
debug!(
"alloc_page_tables table={:?} segment={:?}",
self.table, segment
);
Ok(Some(segment))
}
async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()> {
let p2m_segment = self
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let page_table_segment = state
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let pg_pfn = page_table_segment.pfn;
setup.phys.unmap(pg_pfn)?;
setup.phys.unmap(p2m_segment.pfn)?;
let p2m_guest = unsafe {
slice::from_raw_parts_mut(
p2m_segment.addr as *mut u64,
domain.phys.p2m_size() as usize,
)
};
copy(p2m_guest, &domain.phys.p2m);
let map = setup.call.get_memory_map(E820_MAX).await?;
let mem_mb = setup.total_pages >> (20 - self.page_shift());
let mem_kb = mem_mb * 1024;
let e820 = self.e820_sanitize(map, mem_kb, 0)?;
setup.call.set_memory_map(setup.domid, e820).await?;
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
for m1 in 0usize..self.table.mappings_count {
let map1 = &self.table.mappings[m1];
let from = map1.levels[l].from;
let to = map1.levels[l].to;
let pg_ptr = domain.phys.pfn_to_ptr(map1.levels[l].pfn, 0).await? as *mut u64;
for m2 in 0usize..self.table.mappings_count {
let map2 = &self.table.mappings[m2];
let lvl = if l > 0 {
&map2.levels[l - 1]
} else {
&map2.area
};
// setup
// .call
// .mmuext(setup.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)
// .await?;
if l > 0 && lvl.pgtables == 0 {
continue;
}
if lvl.from >= to || lvl.to <= from {
continue;
}
let p_s = (std::cmp::max(from, lvl.from) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let p_e = (std::cmp::min(to, lvl.to) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let rhs = X86_PAGE_SHIFT as usize + l * X86_PGTABLE_LEVEL_SHIFT as usize;
let mut pfn = ((std::cmp::max(from, lvl.from) - lvl.from) >> rhs) + lvl.pfn;
debug!(
"setup_page_tables lvl={} map_1={} map_2={} pfn={:#x} p_s={:#x} p_e={:#x}",
l, m1, m2, pfn, p_s, p_e
);
let pg = unsafe { slice::from_raw_parts_mut(pg_ptr, (p_e + 1) as usize) };
for p in p_s..p_e + 1 {
let prot = self.get_pg_prot(l, pfn);
let pfn_paddr = domain.phys.p2m[pfn as usize] << X86_PAGE_SHIFT;
let value = pfn_paddr | prot;
pg[p as usize] = value;
pfn += 1;
}
}
}
}
Ok(())
}
async fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let page_table_segment = state
async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()> {
if domain.image_info.virt_hypercall == u64::MAX {
return Ok(());
}
let pfn =
(domain.image_info.virt_hypercall - domain.image_info.virt_base) >> self.page_shift();
let mfn = domain.phys.p2m[pfn as usize];
domain.call.hypercall_init(domain.domid, mfn).await?;
Ok(())
}
async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()> {
if domain.image_info.virt_p2m_base >= domain.image_info.virt_base
|| (domain.image_info.virt_p2m_base & ((1 << self.page_shift()) - 1)) != 0
{
self.p2m_segment = self.alloc_p2m_segment(domain).await?;
}
self.start_info_segment = Some(domain.alloc_page()?);
self.xenstore_segment = Some(domain.alloc_page()?);
domain.xenstore_mfn = domain.phys.p2m[self.xenstore_segment.as_ref().unwrap().pfn as usize];
let evtchn = domain.call.evtchn_alloc_unbound(domain.domid, 0).await?;
let page = domain.alloc_page()?;
domain
.consoles
.push((evtchn, domain.phys.p2m[page.pfn as usize]));
self.page_table_segment = self.alloc_page_tables(domain).await?;
self.boot_stack_segment = Some(domain.alloc_page()?);
if domain.virt_pgtab_end > 0 {
domain.alloc_padding_pages(domain.virt_pgtab_end)?;
}
if self.p2m_segment.is_none() {
if let Some(mut p2m_segment) = self.alloc_p2m_segment(domain).await? {
p2m_segment.vstart = domain.image_info.virt_p2m_base;
self.p2m_segment = Some(p2m_segment);
}
}
Ok(())
}
async fn setup_shared_info(
&mut self,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()> {
let info = domain
.phys
.map_foreign_pages(shared_info_frame, X86_PAGE_SIZE)
.await? as *mut SharedInfo;
unsafe {
let size = size_of::<SharedInfo>();
let info_as_buff = slice::from_raw_parts_mut(info as *mut u8, size);
info_as_buff.fill(0);
for i in 0..32 {
(*info).vcpu_info[i].evtchn_upcall_mask = 1;
}
trace!("setup_shared_info shared_info={:?}", *info);
}
Ok(())
}
async fn setup_start_info(
&mut self,
domain: &mut BootDomain,
cmdline: &str,
shared_info_frame: u64,
) -> Result<()> {
let start_info_segment = self
.start_info_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("start_info_segment missing"))?;
let ptr = domain.phys.pfn_to_ptr(start_info_segment.pfn, 1).await?;
let byte_slice =
unsafe { slice::from_raw_parts_mut(ptr as *mut u8, X86_PAGE_SIZE as usize) };
byte_slice.fill(0);
let info = ptr as *mut StartInfo;
let page_table_segment = self
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let p2m_segment = self
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let xenstore_segment = self
.xenstore_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?;
unsafe {
for (i, c) in X86_GUEST_MAGIC.chars().enumerate() {
(*info).magic[i] = c as c_char;
}
(*info).magic[X86_GUEST_MAGIC.len()] = 0 as c_char;
(*info).nr_pages = domain.total_pages;
(*info).shared_info = shared_info_frame << X86_PAGE_SHIFT;
(*info).pt_base = page_table_segment.vstart;
(*info).nr_pt_frames = self.table.mappings[0].area.pgtables as u64;
(*info).mfn_list = p2m_segment.vstart;
(*info).first_p2m_pfn = p2m_segment.pfn;
(*info).nr_p2m_frames = p2m_segment.pages;
(*info).flags = 0;
(*info).store_evtchn = domain.store_evtchn;
(*info).store_mfn = domain.phys.p2m[xenstore_segment.pfn as usize];
let console = domain.consoles.first().unwrap();
(*info).console.mfn = console.1;
(*info).console.evtchn = console.0;
(*info).mod_start = domain.initrd_segment.vstart;
(*info).mod_len = domain.initrd_segment.size;
for (i, c) in cmdline.chars().enumerate() {
(*info).cmdline[i] = c as c_char;
}
(*info).cmdline[MAX_GUEST_CMDLINE - 1] = 0;
trace!("setup_start_info start_info={:?}", *info);
}
Ok(())
}
async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()> {
let p2m_segment = self
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let page_table_segment = self
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let pg_pfn = page_table_segment.pfn;
let pg_mfn = setup.phys.p2m[pg_pfn as usize];
let pg_mfn = domain.phys.p2m[pg_pfn as usize];
domain.phys.unmap(pg_pfn)?;
domain.phys.unmap(p2m_segment.pfn)?;
let map = domain.call.get_memory_map(E820_MAX).await?;
let mem_mb = domain.total_pages >> (20 - self.page_shift());
let mem_kb = mem_mb * 1024;
let e820 = self.e820_sanitize(map, mem_kb, 0)?;
domain.call.set_memory_map(domain.domid, e820).await?;
domain
.call
.mmuext(domain.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)
.await?;
Ok(())
}
async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()> {
let page_table_segment = self
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let boot_stack_segment = self
.boot_stack_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("boot_stack_segment missing"))?;
let start_info_segment = self
.start_info_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("start_info_segment missing"))?;
let pg_pfn = page_table_segment.pfn;
let pg_mfn = domain.phys.p2m[pg_pfn as usize];
let mut vcpu = VcpuGuestContext::default();
vcpu.user_regs.rip = state.image_info.virt_entry;
vcpu.user_regs.rip = domain.image_info.virt_entry;
vcpu.user_regs.rsp =
state.image_info.virt_base + (state.boot_stack_segment.pfn + 1) * self.page_size();
domain.image_info.virt_base + (boot_stack_segment.pfn + 1) * self.page_size();
vcpu.user_regs.rsi =
state.image_info.virt_base + (state.start_info_segment.pfn) * self.page_size();
domain.image_info.virt_base + (start_info_segment.pfn) * self.page_size();
vcpu.user_regs.rflags = 1 << 9;
vcpu.debugreg[6] = 0xffff0ff0;
vcpu.debugreg[7] = 0x00000400;
@ -807,7 +861,40 @@ impl ArchBootSetup for X86BootSetup {
vcpu.kernel_ss = vcpu.user_regs.ss as u64;
vcpu.kernel_sp = vcpu.user_regs.rsp;
trace!("vcpu context: {:?}", vcpu);
setup.call.set_vcpu_context(setup.domid, 0, &vcpu).await?;
domain.call.set_vcpu_context(domain.domid, 0, &vcpu).await?;
Ok(())
}
async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()> {
let xenstore_segment = self
.xenstore_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?;
let console_gfn = domain.consoles.first().map(|x| x.1).unwrap_or(0) as usize;
let xenstore_gfn = domain.phys.p2m[xenstore_segment.pfn as usize];
let addr = domain
.call
.mmap(0, 1 << XEN_PAGE_SHIFT)
.await
.ok_or(Error::MmapFailed)?;
domain
.call
.map_resource(domain.domid, 1, 0, 0, 1, addr)
.await?;
let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) };
entries[0].flags = 1 << 0;
entries[0].domid = 0;
entries[0].frame = console_gfn as u32;
entries[1].flags = 1 << 0;
entries[1].domid = 0;
entries[1].frame = xenstore_gfn as u32;
unsafe {
let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT);
if result != 0 {
return Err(Error::UnmapFailed(Errno::from_raw(result)));
}
}
Ok(())
}
}