feature(xen): dynamic platform architecture (#194)

* wip hvm

* feat: move platform stuff all into it's own thing

* hvm work

* more hvm work

* more hvm work

* feat: rework to support multiple platforms

* hvm nonredist

* more hvm work

* more hvm work

* pvh work

* work on loading cmdline

* implement initrd loading for pvh

* partially working pvh support

* fix merge issues

* pvh works!

* swap over to pv support

* remove old kernel stuff

* fix support for pv

* pvh is gone for now

* fix(runtime): debug should be respected

* fix(xen): arm64 is currently unsupported, treat it as such at runtime

* fix(examples): use architecture cfg for boot example

* fix(x86): use IOMMU only when needed for passthrough

* chore(build): print kernel architecture during fetch
This commit is contained in:
Alex Zenla
2024-06-20 19:42:45 -07:00
committed by GitHub
parent 2c7210d85e
commit e219f3adf1
25 changed files with 1124 additions and 11293 deletions

View File

@ -26,6 +26,7 @@ async fn main() -> Result<()> {
fs::create_dir_all("target/kernel").await?;
let arch = env::var("TARGET_ARCH").map_err(|_| anyhow!("missing TARGET_ARCH env var"))?;
println!("kernel architecture: {}", arch);
let platform = OciPlatform::new(
Os::Linux,
match arch.as_str() {

View File

@ -375,7 +375,10 @@ impl KrataChannelBackendProcessor {
};
ring_ref = self.use_reserved_ref.unwrap_or(ring_ref);
debug!(
"channel backend for domain {} channel {}: ring-ref={} port={}",
self.domid, self.id, ring_ref, port,
);
break (ring_ref, port);
}
}
@ -389,14 +392,24 @@ impl KrataChannelBackendProcessor {
self.store
.write_string(format!("{}/state", self.backend), "4")
.await?;
let memory = self.gnttab.map_grant_refs(
vec![GrantRef {
domid: self.domid,
reference: ring_ref as u32,
}],
true,
true,
)?;
let memory = self
.gnttab
.map_grant_refs(
vec![GrantRef {
domid: self.domid,
reference: ring_ref as u32,
}],
true,
true,
)
.map_err(|e| {
anyhow!(
"failed to map grant ref {} for domid {}: {}",
ring_ref,
self.domid,
e
)
})?;
let mut channel = self.evtchn.bind(self.domid, port).await?;
unsafe {
let buffer = self.read_output_buffer(channel.local_port, &memory).await?;

View File

@ -138,10 +138,10 @@ impl GuestLauncher {
} else {
None
};
let cmdline_options = [
if request.debug { "debug" } else { "quiet" },
"elevator=noop",
];
let mut cmdline_options = ["console=hvc0"].to_vec();
if !request.debug {
cmdline_options.push("quiet");
}
let cmdline = cmdline_options.join(" ");
let guest_mac_string = container_mac.to_string().replace('-', ":");
@ -227,7 +227,7 @@ impl GuestLauncher {
kernel: request.kernel,
initrd: request.initrd,
cmdline,
use_console_backend: Some("krata-console".to_string()),
swap_console_backend: Some("krata-console".to_string()),
disks,
channels: vec![DomainChannel {
typ: "krata-channel".to_string(),

View File

@ -21,6 +21,12 @@ pub mod channel;
pub mod ip;
pub mod launch;
#[cfg(target_arch = "x86_64")]
type RuntimePlatform = xenclient::x86pv::X86PvPlatform;
#[cfg(not(target_arch = "x86_64"))]
type RuntimePlatform = xenclient::unsupported::UnsupportedPlatform;
pub struct GuestLoopInfo {
pub device: String,
pub file: String,
@ -49,13 +55,13 @@ pub struct GuestInfo {
#[derive(Clone)]
pub struct RuntimeContext {
pub autoloop: AutoLoop,
pub xen: XenClient,
pub xen: XenClient<RuntimePlatform>,
pub ipvendor: IpVendor,
}
impl RuntimeContext {
pub async fn new(host_uuid: Uuid) -> Result<Self> {
let xen = XenClient::open(0).await?;
let xen = XenClient::new(0, RuntimePlatform::new()).await?;
let ipv4_network = Ipv4Network::new(Ipv4Addr::new(10, 75, 80, 0), 24)?;
let ipv6_network = Ipv6Network::from_str("fdd4:1476:6c7e::/48")?;
let ipvend =

View File

@ -33,7 +33,3 @@ path = "examples/domain_create.rs"
[[example]]
name = "xencall-version-capabilities"
path = "examples/version_capabilities.rs"
[[example]]
name = "xencall-vcpu-context"
path = "examples/vcpu_context.rs"

View File

@ -1,12 +0,0 @@
use xencall::error::Result;
use xencall::XenCall;
#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();
let call = XenCall::open(0)?;
let context = call.get_vcpu_context(224, 0).await?;
println!("{:?}", context);
Ok(())
}

View File

@ -3,34 +3,38 @@ pub mod sys;
use crate::error::{Error, Result};
use crate::sys::{
AddressSize, AssignDevice, CreateDomain, DomCtl, DomCtlValue, DomCtlVcpuContext,
EvtChnAllocUnbound, GetDomainInfo, GetPageFrameInfo3, Hypercall, HypercallInit,
IoMemPermission, IoPortPermission, IrqPermission, MaxMem, MaxVcpus, MemoryMap,
MemoryReservation, MmapBatch, MmapResource, MmuExtOp, MultiCallEntry, PciAssignDevice,
VcpuGuestContext, VcpuGuestContextAny, XenCapabilitiesInfo, DOMCTL_DEV_PCI, HYPERVISOR_DOMCTL,
HYPERVISOR_EVENT_CHANNEL_OP, HYPERVISOR_MEMORY_OP, HYPERVISOR_MMUEXT_OP, HYPERVISOR_MULTICALL,
HYPERVISOR_XEN_VERSION, XENVER_CAPABILITIES, XEN_DOMCTL_ASSIGN_DEVICE, XEN_DOMCTL_CREATEDOMAIN,
XEN_DOMCTL_DESTROYDOMAIN, XEN_DOMCTL_GETDOMAININFO, XEN_DOMCTL_GETPAGEFRAMEINFO3,
XEN_DOMCTL_GETVCPUCONTEXT, XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_IOMEM_PERMISSION,
XEN_DOMCTL_IOPORT_PERMISSION, XEN_DOMCTL_IRQ_PERMISSION, XEN_DOMCTL_MAX_MEM,
XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN, XEN_DOMCTL_SETVCPUCONTEXT,
XEN_DOMCTL_SET_ADDRESS_SIZE, XEN_DOMCTL_UNPAUSEDOMAIN, XEN_MEM_CLAIM_PAGES, XEN_MEM_MEMORY_MAP,
AddToPhysmap, AddressSize, AssignDevice, CreateDomain, DomCtl, DomCtlValue, DomCtlVcpuContext,
EvtChnAllocUnbound, GetDomainInfo, GetPageFrameInfo3, HvmContext, HvmParam, Hypercall,
HypercallInit, IoMemPermission, IoPortPermission, IrqPermission, MaxMem, MaxVcpus, MemoryMap,
MemoryReservation, MmapBatch, MmapResource, MmuExtOp, MultiCallEntry, PagingMempool,
PciAssignDevice, XenCapabilitiesInfo, DOMCTL_DEV_PCI, HYPERVISOR_DOMCTL,
HYPERVISOR_EVENT_CHANNEL_OP, HYPERVISOR_HVM_OP, HYPERVISOR_MEMORY_OP, HYPERVISOR_MMUEXT_OP,
HYPERVISOR_MULTICALL, HYPERVISOR_XEN_VERSION, XENVER_CAPABILITIES, XEN_DOMCTL_ASSIGN_DEVICE,
XEN_DOMCTL_CREATEDOMAIN, XEN_DOMCTL_DESTROYDOMAIN, XEN_DOMCTL_GETDOMAININFO,
XEN_DOMCTL_GETHVMCONTEXT, XEN_DOMCTL_GETPAGEFRAMEINFO3, XEN_DOMCTL_HYPERCALL_INIT,
XEN_DOMCTL_IOMEM_PERMISSION, XEN_DOMCTL_IOPORT_PERMISSION, XEN_DOMCTL_IRQ_PERMISSION,
XEN_DOMCTL_MAX_MEM, XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN, XEN_DOMCTL_SETHVMCONTEXT,
XEN_DOMCTL_SETVCPUCONTEXT, XEN_DOMCTL_SET_ADDRESS_SIZE, XEN_DOMCTL_SET_PAGING_MEMPOOL_SIZE,
XEN_DOMCTL_UNPAUSEDOMAIN, XEN_MEM_ADD_TO_PHYSMAP, XEN_MEM_CLAIM_PAGES, XEN_MEM_MEMORY_MAP,
XEN_MEM_POPULATE_PHYSMAP,
};
use libc::{c_int, mmap, usleep, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE};
use libc::{c_int, mmap, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE};
use log::trace;
use nix::errno::Errno;
use std::ffi::{c_long, c_uint, c_ulong, c_void};
use std::sync::Arc;
use std::time::Duration;
use sys::{
E820Entry, ForeignMemoryMap, PhysdevMapPirq, HYPERVISOR_PHYSDEV_OP, PHYSDEVOP_MAP_PIRQ,
XEN_DOMCTL_MAX_INTERFACE_VERSION, XEN_DOMCTL_MIN_INTERFACE_VERSION, XEN_MEM_SET_MEMORY_MAP,
E820Entry, ForeignMemoryMap, PhysdevMapPirq, VcpuGuestContextAny, HYPERVISOR_PHYSDEV_OP,
PHYSDEVOP_MAP_PIRQ, XEN_DOMCTL_MAX_INTERFACE_VERSION, XEN_DOMCTL_MIN_INTERFACE_VERSION,
XEN_MEM_SET_MEMORY_MAP,
};
use tokio::sync::Semaphore;
use tokio::time::sleep;
use std::fs::{File, OpenOptions};
use std::os::fd::AsRawFd;
use std::ptr::addr_of_mut;
use std::ptr::{addr_of_mut, null_mut};
use std::slice;
#[derive(Clone)]
@ -233,8 +237,8 @@ impl XenCall {
num: num as u32,
domid: domid as u16,
addr,
mfns: mfns.as_mut_ptr(),
errors: errors.as_mut_ptr(),
mfns: mfns.as_mut_ptr() as u64,
errors: errors.as_mut_ptr() as u64,
};
let result = sys::mmapbatch(self.handle.as_raw_fd(), &mut batch);
@ -243,7 +247,7 @@ impl XenCall {
return Err(Error::MmapBatchFailed(errno))?;
}
usleep(100);
sleep(Duration::from_micros(100)).await;
let mut i: usize = 0;
let mut paged: usize = 0;
@ -258,8 +262,8 @@ impl XenCall {
num: 1,
domid: domid as u16,
addr: addr + ((i as u64) << 12),
mfns: mfns.as_mut_ptr().add(i),
errors: errors.as_mut_ptr().add(i),
mfns: mfns.as_mut_ptr().add(i) as u64,
errors: errors.as_mut_ptr().add(i) as u64,
};
loop {
@ -459,45 +463,19 @@ impl XenCall {
Ok(())
}
pub async fn get_vcpu_context(&self, domid: u32, vcpu: u32) -> Result<VcpuGuestContext> {
trace!(
"domctl fd={} get_vcpu_context domid={}",
self.handle.as_raw_fd(),
domid,
);
let mut wrapper = VcpuGuestContextAny {
value: VcpuGuestContext::default(),
};
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_GETVCPUCONTEXT,
interface_version: self.domctl_interface_version,
domid,
value: DomCtlValue {
vcpu_context: DomCtlVcpuContext {
vcpu,
ctx: addr_of_mut!(wrapper) as c_ulong,
},
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
.await?;
Ok(unsafe { wrapper.value })
}
pub async fn set_vcpu_context(
&self,
domid: u32,
vcpu: u32,
context: &VcpuGuestContext,
mut context: VcpuGuestContextAny,
) -> Result<()> {
trace!(
"domctl fd={} set_vcpu_context domid={} context={:?}",
self.handle.as_raw_fd(),
domid,
context,
unsafe { context.value }
);
let mut value = VcpuGuestContextAny { value: *context };
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_SETVCPUCONTEXT,
interface_version: self.domctl_interface_version,
@ -505,7 +483,7 @@ impl XenCall {
value: DomCtlValue {
vcpu_context: DomCtlVcpuContext {
vcpu,
ctx: addr_of_mut!(value) as c_ulong,
ctx: addr_of_mut!(context) as c_ulong,
},
},
};
@ -597,6 +575,12 @@ impl XenCall {
domid: u32,
entries: Vec<E820Entry>,
) -> Result<Vec<E820Entry>> {
trace!(
"fd={} set_memory_map domid={} entries={:?}",
self.handle.as_raw_fd(),
domid,
entries
);
let mut memory_map = ForeignMemoryMap {
domid: domid as u16,
map: MemoryMap {
@ -633,24 +617,14 @@ impl XenCall {
domid: domid as u16,
};
let calls = &mut [MultiCallEntry {
op: HYPERVISOR_MEMORY_OP,
result: 0,
args: [
let code = self
.hypercall2(
HYPERVISOR_MEMORY_OP,
XEN_MEM_POPULATE_PHYSMAP as c_ulong,
addr_of_mut!(reservation) as c_ulong,
0,
0,
0,
0,
],
}];
self.multicall(calls).await?;
let code = calls[0].result;
if code > !0xfff {
return Err(Error::PopulatePhysmapFailed);
}
if code as usize > extent_starts.len() {
)
.await?;
if code as usize != extent_starts.len() {
return Err(Error::PopulatePhysmapFailed);
}
let extents = extent_starts[0..code as usize].to_vec();
@ -680,6 +654,31 @@ impl XenCall {
Ok(())
}
pub async fn add_to_physmap(&self, domid: u32, space: u32, idx: u64, pfn: u64) -> Result<()> {
trace!(
"memory fd={} add_to_physmap domid={} space={} idx={} pfn={}",
self.handle.as_raw_fd(),
domid,
space,
idx,
pfn,
);
let mut add = AddToPhysmap {
domid: domid as u16,
size: 0,
space,
idx,
gpfn: pfn,
};
self.hypercall2(
HYPERVISOR_MEMORY_OP,
XEN_MEM_ADD_TO_PHYSMAP as c_ulong,
addr_of_mut!(add) as c_ulong,
)
.await?;
Ok(())
}
pub async fn mmuext(&self, domid: u32, cmd: c_uint, arg1: u64, arg2: u64) -> Result<()> {
let mut ops = MmuExtOp { cmd, arg1, arg2 };
@ -783,6 +782,7 @@ impl XenCall {
Ok(())
}
#[allow(clippy::field_reassign_with_default)]
pub async fn map_pirq(&self, domid: u32, index: isize, pirq: Option<u32>) -> Result<u32> {
trace!(
"physdev fd={} map_pirq domid={} index={} pirq={:?}",
@ -835,4 +835,86 @@ impl XenCall {
.await?;
Ok(())
}
#[allow(clippy::field_reassign_with_default)]
pub async fn set_hvm_param(&self, domid: u32, index: u32, value: u64) -> Result<()> {
trace!(
"set_hvm_param fd={} domid={} index={} value={:?}",
self.handle.as_raw_fd(),
domid,
index,
value,
);
let mut param = HvmParam::default();
param.domid = domid as u16;
param.index = index;
param.value = value;
self.hypercall2(HYPERVISOR_HVM_OP, 0, addr_of_mut!(param) as c_ulong)
.await?;
Ok(())
}
pub async fn get_hvm_context(&self, domid: u32, buffer: Option<&mut [u8]>) -> Result<u32> {
trace!(
"domctl fd={} get_hvm_context domid={}",
self.handle.as_raw_fd(),
domid,
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_GETHVMCONTEXT,
interface_version: self.domctl_interface_version,
domid,
value: DomCtlValue {
hvm_context: HvmContext {
size: buffer.as_ref().map(|x| x.len()).unwrap_or(0) as u32,
buffer: buffer.map(|x| x.as_mut_ptr()).unwrap_or(null_mut()) as u64,
},
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
.await?;
Ok(unsafe { domctl.value.hvm_context.size })
}
pub async fn set_hvm_context(&self, domid: u32, buffer: &mut [u8]) -> Result<u32> {
trace!(
"domctl fd={} set_hvm_context domid={}",
self.handle.as_raw_fd(),
domid,
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_SETHVMCONTEXT,
interface_version: self.domctl_interface_version,
domid,
value: DomCtlValue {
hvm_context: HvmContext {
size: buffer.len() as u32,
buffer: buffer.as_ptr() as u64,
},
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
.await?;
Ok(unsafe { domctl.value.hvm_context.size })
}
pub async fn set_paging_mempool_size(&self, domid: u32, size: u64) -> Result<()> {
trace!(
"domctl fd={} set_paging_mempool_size domid={} size={}",
self.handle.as_raw_fd(),
domid,
size,
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_SET_PAGING_MEMPOOL_SIZE,
interface_version: self.domctl_interface_version,
domid,
value: DomCtlValue {
paging_mempool: PagingMempool { size },
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
.await?;
Ok(())
}
}

View File

@ -35,8 +35,8 @@ pub struct MmapBatch {
pub num: u32,
pub domid: u16,
pub addr: u64,
pub mfns: *mut u64,
pub errors: *mut c_int,
pub mfns: u64,
pub errors: u64,
}
#[repr(C)]
@ -200,6 +200,7 @@ pub const XEN_DOMCTL_PSR_CAT_OP: u32 = 78;
pub const XEN_DOMCTL_SOFT_RESET: u32 = 79;
pub const XEN_DOMCTL_SET_GNTTAB_LIMITS: u32 = 80;
pub const XEN_DOMCTL_VUART_OP: u32 = 81;
pub const XEN_DOMCTL_SET_PAGING_MEMPOOL_SIZE: u32 = 86;
pub const XEN_DOMCTL_GDBSX_GUESTMEMIO: u32 = 1000;
pub const XEN_DOMCTL_GDBSX_PAUSEVCPU: u32 = 1001;
pub const XEN_DOMCTL_GDBSX_UNPAUSEVCPU: u32 = 1002;
@ -242,6 +243,8 @@ pub union DomCtlValue {
pub iomem_permission: IoMemPermission,
pub irq_permission: IrqPermission,
pub assign_device: AssignDevice,
pub hvm_context: HvmContext,
pub paging_mempool: PagingMempool,
pub pad: [u8; 128],
}
@ -267,10 +270,7 @@ impl Default for CreateDomain {
CreateDomain {
ssidref: SECINITSID_DOMU,
handle: Uuid::new_v4().into_bytes(),
#[cfg(target_arch = "x86_64")]
flags: 0,
#[cfg(target_arch = "aarch64")]
flags: 1 << XEN_DOMCTL_CDF_HVM_GUEST,
iommu_opts: 0,
max_vcpus: 1,
max_evtchn_port: 1023,
@ -346,6 +346,8 @@ pub struct ArchDomainConfig {
pub misc_flags: u32,
}
pub const X86_EMU_LAPIC: u32 = 1 << 0;
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
#[cfg(target_arch = "aarch64")]
@ -398,6 +400,16 @@ pub struct MemoryReservation {
pub domid: u16,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct AddToPhysmap {
pub domid: u16,
pub size: u16,
pub space: u32,
pub idx: u64,
pub gpfn: u64,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct MultiCallEntry {
@ -410,6 +422,7 @@ pub const XEN_MEM_POPULATE_PHYSMAP: u32 = 6;
pub const XEN_MEM_MEMORY_MAP: u32 = 10;
pub const XEN_MEM_SET_MEMORY_MAP: u32 = 13;
pub const XEN_MEM_CLAIM_PAGES: u32 = 24;
pub const XEN_MEM_ADD_TO_PHYSMAP: u32 = 7;
#[repr(C)]
#[derive(Copy, Clone, Debug)]
@ -439,8 +452,8 @@ impl Default for VcpuGuestContextFpuCtx {
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
#[cfg(target_arch = "x86_64")]
pub struct CpuUserRegs {
#[allow(non_camel_case_types)]
pub struct x8664CpuUserRegs {
pub r15: u64,
pub r14: u64,
pub r13: u64,
@ -479,7 +492,6 @@ pub struct CpuUserRegs {
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
#[cfg(target_arch = "x86_64")]
pub struct TrapInfo {
pub vector: u8,
pub flags: u8,
@ -489,11 +501,11 @@ pub struct TrapInfo {
#[repr(C)]
#[derive(Copy, Clone, Debug)]
#[cfg(target_arch = "x86_64")]
pub struct VcpuGuestContext {
#[allow(non_camel_case_types)]
pub struct x8664VcpuGuestContext {
pub fpu_ctx: VcpuGuestContextFpuCtx,
pub flags: u64,
pub user_regs: CpuUserRegs,
pub user_regs: x8664CpuUserRegs,
pub trap_ctx: [TrapInfo; 256],
pub ldt_base: u64,
pub ldt_ents: u64,
@ -512,10 +524,9 @@ pub struct VcpuGuestContext {
pub gs_base_user: u64,
}
#[cfg(target_arch = "x86_64")]
impl Default for VcpuGuestContext {
impl Default for x8664VcpuGuestContext {
fn default() -> Self {
VcpuGuestContext {
Self {
fpu_ctx: Default::default(),
flags: 0,
user_regs: Default::default(),
@ -541,8 +552,7 @@ impl Default for VcpuGuestContext {
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
#[cfg(target_arch = "aarch64")]
pub struct CpuUserRegs {
pub struct Arm64CpuUserRegs {
pub x0: u64,
pub x1: u64,
pub x2: u64,
@ -588,10 +598,9 @@ pub struct CpuUserRegs {
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
#[cfg(target_arch = "aarch64")]
pub struct VcpuGuestContext {
pub struct Arm64VcpuGuestContext {
pub flags: u32,
pub user_regs: CpuUserRegs,
pub user_regs: x8664CpuUserRegs,
pub sctlr: u64,
pub ttbcr: u64,
pub ttbr0: u64,
@ -599,7 +608,10 @@ pub struct VcpuGuestContext {
}
pub union VcpuGuestContextAny {
pub value: VcpuGuestContext,
#[cfg(target_arch = "aarch64")]
pub value: Arm64VcpuGuestContext,
#[cfg(target_arch = "x86_64")]
pub value: x8664VcpuGuestContext,
}
#[repr(C)]
@ -628,17 +640,11 @@ pub struct E820Entry {
pub typ: u32,
}
#[cfg(target_arch = "x86_64")]
pub const E820_MAX: u32 = 1024;
#[cfg(target_arch = "x86_64")]
pub const E820_RAM: u32 = 1;
#[cfg(target_arch = "x86_64")]
pub const E820_RESERVED: u32 = 2;
#[cfg(target_arch = "x86_64")]
pub const E820_ACPI: u32 = 3;
#[cfg(target_arch = "x86_64")]
pub const E820_NVS: u32 = 4;
#[cfg(target_arch = "x86_64")]
pub const E820_UNUSABLE: u32 = 5;
pub const PHYSDEVOP_MAP_PIRQ: u64 = 13;
@ -676,3 +682,34 @@ pub struct AssignDevice {
}
pub const DOMID_IO: u32 = 0x7FF1;
pub const MEMFLAGS_POPULATE_ON_DEMAND: u32 = 1 << 16;
pub struct PodTarget {
pub target_pages: u64,
pub total_pages: u64,
pub pod_cache_pages: u64,
pub pod_entries: u64,
pub domid: u16,
}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
pub struct HvmParam {
pub domid: u16,
pub pad: u8,
pub index: u32,
pub value: u64,
}
#[repr(C)]
#[derive(Clone, Copy, Debug)]
pub struct HvmContext {
pub size: u32,
pub buffer: u64,
}
#[repr(C)]
#[derive(Clone, Copy, Debug)]
pub struct PagingMempool {
pub size: u64,
}

View File

@ -10,6 +10,7 @@ resolver = "2"
[dependencies]
async-trait = { workspace = true }
c2rust-bitfields = "0.18.0"
elf = { workspace = true }
flate2 = { workspace = true }
indexmap = { workspace = true }

View File

@ -3,6 +3,12 @@ use tokio::fs;
use xenclient::error::Result;
use xenclient::{DomainConfig, XenClient};
#[cfg(target_arch = "x86_64")]
type RuntimePlatform = xenclient::x86pv::X86PvPlatform;
#[cfg(not(target_arch = "x86_64"))]
type RuntimePlatform = xenclient::unsupported::UnsupportedPlatform;
#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();
@ -14,7 +20,7 @@ async fn main() -> Result<()> {
}
let kernel_image_path = args.get(1).expect("argument not specified");
let initrd_path = args.get(2).expect("argument not specified");
let client = XenClient::open(0).await?;
let client = XenClient::new(0, RuntimePlatform::new()).await?;
let config = DomainConfig {
backend_domid: 0,
name: "xenclient-test".to_string(),
@ -22,8 +28,8 @@ async fn main() -> Result<()> {
mem_mb: 512,
kernel: fs::read(&kernel_image_path).await?,
initrd: fs::read(&initrd_path).await?,
cmdline: "debug elevator=noop".to_string(),
use_console_backend: None,
cmdline: "earlyprintk=xen earlycon=xen console=hvc0 init=/init".to_string(),
swap_console_backend: None,
disks: vec![],
channels: vec![],
vifs: vec![],

View File

@ -1,288 +0,0 @@
use crate::boot::{ArchBootSetup, BootImageInfo, BootSetup, BootState, DomainSegment};
use crate::error::Result;
use crate::sys::XEN_PAGE_SHIFT;
use crate::Error;
use log::trace;
use xencall::sys::VcpuGuestContext;
pub const ARM_PAGE_SHIFT: u64 = 12;
const ARM_PAGE_SIZE: u64 = 1 << ARM_PAGE_SHIFT;
const GUEST_RAM0_BASE: u64 = 0x40000000;
const GUEST_RAM0_SIZE: u64 = 0xc0000000;
const GUEST_RAM1_BASE: u64 = 0x0200000000;
const GUEST_RAM1_SIZE: u64 = 0xfe00000000;
const GUEST_RAM_BANK_BASES: [u64; 2] = [GUEST_RAM0_BASE, GUEST_RAM1_BASE];
const GUEST_RAM_BANK_SIZES: [u64; 2] = [GUEST_RAM0_SIZE, GUEST_RAM1_SIZE];
const LPAE_SHIFT: u64 = 9;
const PFN_4K_SHIFT: u64 = 0;
const PFN_2M_SHIFT: u64 = PFN_4K_SHIFT + LPAE_SHIFT;
const PFN_1G_SHIFT: u64 = PFN_2M_SHIFT + LPAE_SHIFT;
const PFN_512G_SHIFT: u64 = PFN_1G_SHIFT + LPAE_SHIFT;
const PSR_FIQ_MASK: u64 = 1 << 6; /* Fast Interrupt mask */
const PSR_IRQ_MASK: u64 = 1 << 7; /* Interrupt mask */
const PSR_ABT_MASK: u64 = 1 << 8; /* Asynchronous Abort mask */
const PSR_MODE_EL1H: u64 = 0x05;
const PSR_GUEST64_INIT: u64 = PSR_ABT_MASK | PSR_FIQ_MASK | PSR_IRQ_MASK | PSR_MODE_EL1H;
pub struct Arm64BootSetup {}
impl Default for Arm64BootSetup {
fn default() -> Self {
Self::new()
}
}
impl Arm64BootSetup {
pub fn new() -> Arm64BootSetup {
Arm64BootSetup {}
}
async fn populate_one_size(
&self,
setup: &mut BootSetup<'_>,
pfn_shift: u64,
base_pfn: u64,
pfn_count: u64,
extents: &mut [u64],
) -> Result<u64> {
let mask = (1u64 << pfn_shift) - 1;
let next_shift = pfn_shift + LPAE_SHIFT;
let next_mask = (1u64 << next_shift) - 1;
let next_boundary = (base_pfn + (1 << next_shift)) - 1;
let mut end_pfn = base_pfn + pfn_count;
if pfn_shift == PFN_512G_SHIFT {
return Ok(0);
}
if (base_pfn & next_mask) != 0 && end_pfn > next_boundary {
end_pfn = next_boundary;
}
if (mask & base_pfn) != 0 {
return Ok(0);
}
let count = (end_pfn - base_pfn) >> pfn_shift;
if count == 0 {
return Ok(0);
}
for i in 0..count {
extents[i as usize] = base_pfn + (i << pfn_shift);
}
let result_extents = setup
.call
.populate_physmap(
setup.domid,
count,
pfn_shift as u32,
0,
&extents[0usize..count as usize],
)
.await?;
slice_copy::copy(extents, &result_extents);
Ok((result_extents.len() as u64) << pfn_shift)
}
async fn populate_guest_memory(
&mut self,
setup: &mut BootSetup<'_>,
base_pfn: u64,
pfn_count: u64,
) -> Result<()> {
let mut extents = vec![0u64; 1024 * 1024];
for pfn in 0..extents.len() {
let mut allocsz = (1024 * 1024).min(pfn_count - pfn as u64);
allocsz = self
.populate_one_size(
setup,
PFN_512G_SHIFT,
base_pfn + pfn as u64,
allocsz,
&mut extents,
)
.await?;
if allocsz > 0 {
continue;
}
allocsz = self
.populate_one_size(
setup,
PFN_1G_SHIFT,
base_pfn + pfn as u64,
allocsz,
&mut extents,
)
.await?;
if allocsz > 0 {
continue;
}
allocsz = self
.populate_one_size(
setup,
PFN_2M_SHIFT,
base_pfn + pfn as u64,
allocsz,
&mut extents,
)
.await?;
if allocsz > 0 {
continue;
}
allocsz = self
.populate_one_size(
setup,
PFN_4K_SHIFT,
base_pfn + pfn as u64,
allocsz,
&mut extents,
)
.await?;
if allocsz == 0 {
return Err(Error::MemorySetupFailed("allocsz is zero"));
}
}
Ok(())
}
}
#[async_trait::async_trait]
impl ArchBootSetup for Arm64BootSetup {
fn page_size(&mut self) -> u64 {
ARM_PAGE_SIZE
}
fn page_shift(&mut self) -> u64 {
ARM_PAGE_SHIFT
}
fn needs_early_kernel(&mut self) -> bool {
true
}
async fn setup_shared_info(&mut self, _: &mut BootSetup, _: u64) -> Result<()> {
Ok(())
}
async fn setup_start_info(&mut self, _: &mut BootSetup, _: &BootState, _: &str) -> Result<()> {
Ok(())
}
async fn meminit(
&mut self,
setup: &mut BootSetup,
total_pages: u64,
kernel_segment: &Option<DomainSegment>,
initrd_segment: &Option<DomainSegment>,
) -> Result<()> {
let kernel_segment = kernel_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?;
setup.call.claim_pages(setup.domid, total_pages).await?;
let mut ramsize = total_pages << XEN_PAGE_SHIFT;
let bankbase = GUEST_RAM_BANK_BASES;
let bankmax = GUEST_RAM_BANK_SIZES;
let kernbase = kernel_segment.vstart;
let kernend = BootSetup::round_up(kernel_segment.size, 21);
let dtb = setup.dtb.as_ref();
let dtb_size = dtb.map(|blob| BootSetup::round_up(blob.len() as u64, XEN_PAGE_SHIFT));
let ramdisk_size = initrd_segment
.as_ref()
.map(|segment| BootSetup::round_up(segment.size, XEN_PAGE_SHIFT));
let modsize = dtb_size.unwrap_or(0) + ramdisk_size.unwrap_or(0);
let ram128mb = bankbase[0] + (128 << 20);
let mut rambank_size: [u64; 2] = [0, 0];
for i in 0..2 {
let size = if ramsize > bankmax[i] {
bankmax[i]
} else {
ramsize
};
ramsize -= size;
rambank_size[i] = size >> XEN_PAGE_SHIFT;
}
for i in 0..2 {
let size = if ramsize > bankmax[i] {
bankmax[i]
} else {
ramsize
};
ramsize -= size;
rambank_size[i] = size >> XEN_PAGE_SHIFT;
}
for i in 0..2 {
self.populate_guest_memory(setup, bankbase[i] >> XEN_PAGE_SHIFT, rambank_size[i])
.await?;
}
let bank0end = bankbase[0] + (rambank_size[0] << XEN_PAGE_SHIFT);
let _modbase = if bank0end >= ram128mb + modsize && kernend < ram128mb {
ram128mb
} else if bank0end - modsize > kernend {
bank0end - modsize
} else if kernbase - bankbase[0] > modsize {
kernbase - modsize
} else {
return Err(Error::MemorySetupFailed("unable to determine modbase"));
};
setup.call.claim_pages(setup.domid, 0).await?;
Ok(())
}
async fn bootlate(&mut self, _: &mut BootSetup, _: &mut BootState) -> Result<()> {
Ok(())
}
async fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let mut vcpu = VcpuGuestContext::default();
vcpu.user_regs.pc = state.image_info.virt_entry;
vcpu.user_regs.x0 = 0xffffffff;
vcpu.user_regs.x1 = 0;
vcpu.user_regs.x2 = 0;
vcpu.user_regs.x3 = 0;
vcpu.sctlr = 0x00c50078;
vcpu.ttbr0 = 0;
vcpu.ttbr1 = 0;
vcpu.ttbcr = 0;
vcpu.user_regs.cpsr = PSR_GUEST64_INIT;
vcpu.flags = 1 << 0; // VGCF_ONLINE
trace!("vcpu context: {:?}", vcpu);
setup.call.set_vcpu_context(setup.domid, 0, &vcpu).await?;
Ok(())
}
async fn alloc_p2m_segment(
&mut self,
_: &mut BootSetup,
_: &BootImageInfo,
) -> Result<Option<DomainSegment>> {
Ok(None)
}
async fn alloc_page_tables(
&mut self,
_: &mut BootSetup,
_: &BootImageInfo,
) -> Result<Option<DomainSegment>> {
Ok(None)
}
async fn setup_page_tables(&mut self, _: &mut BootSetup, _: &mut BootState) -> Result<()> {
Ok(())
}
}

View File

@ -1,292 +1,64 @@
use crate::error::Result;
use crate::mem::PhysicalPages;
use crate::sys::{GrantEntry, XEN_PAGE_SHIFT};
use crate::Error;
use libc::munmap;
use log::debug;
use nix::errno::Errno;
use slice_copy::copy;
use crate::mem::ARCH_PAGE_SHIFT;
use std::ffi::c_void;
use std::slice;
use xencall::XenCall;
pub trait BootImageLoader {
fn parse(&self) -> Result<BootImageInfo>;
fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()>;
use log::debug;
use slice_copy::copy;
use xencall::{sys::CreateDomain, XenCall};
use crate::{
error::{Error, Result},
mem::PhysicalPages,
sys::XEN_PAGE_SHIFT,
};
pub struct BootSetup<I: BootImageLoader, P: BootSetupPlatform> {
pub call: XenCall,
pub domid: u32,
pub platform: P,
pub image_loader: I,
pub dtb: Option<Vec<u8>>,
}
pub const XEN_UNSET_ADDR: u64 = -1i64 as u64;
#[derive(Debug)]
pub struct BootImageInfo {
pub start: u64,
pub virt_base: u64,
pub virt_kstart: u64,
pub virt_kend: u64,
pub virt_hypercall: u64,
pub virt_entry: u64,
pub virt_p2m_base: u64,
pub unmapped_initrd: bool,
}
pub struct BootSetup<'a> {
pub(crate) call: &'a XenCall,
pub phys: PhysicalPages<'a>,
pub(crate) domid: u32,
pub(crate) virt_alloc_end: u64,
pub(crate) pfn_alloc_end: u64,
pub(crate) virt_pgtab_end: u64,
pub(crate) total_pages: u64,
#[cfg(target_arch = "aarch64")]
pub(crate) dtb: Option<Vec<u8>>,
}
#[derive(Debug)]
#[derive(Debug, Default, Clone)]
pub struct DomainSegment {
pub(crate) vstart: u64,
vend: u64,
pub vstart: u64,
pub vend: u64,
pub pfn: u64,
pub(crate) addr: u64,
pub(crate) size: u64,
#[cfg(target_arch = "x86_64")]
pub(crate) pages: u64,
pub addr: u64,
pub size: u64,
pub pages: u64,
}
#[derive(Debug)]
pub struct BootState {
pub kernel_segment: DomainSegment,
pub start_info_segment: DomainSegment,
pub xenstore_segment: DomainSegment,
pub boot_stack_segment: DomainSegment,
pub p2m_segment: Option<DomainSegment>,
pub page_table_segment: Option<DomainSegment>,
pub struct BootDomain {
pub domid: u32,
pub call: XenCall,
pub page_size: u64,
pub virt_alloc_end: u64,
pub pfn_alloc_end: u64,
pub virt_pgtab_end: u64,
pub total_pages: u64,
pub target_pages: u64,
pub max_vcpus: u32,
pub image_info: BootImageInfo,
pub shared_info_frame: u64,
pub initrd_segment: DomainSegment,
pub phys: PhysicalPages,
pub store_evtchn: u32,
pub consoles: Vec<(u32, DomainSegment)>,
pub store_mfn: u64,
pub initrd_segment: DomainSegment,
pub consoles: Vec<(u32, u64)>,
pub cmdline: String,
}
impl BootSetup<'_> {
pub fn new(call: &XenCall, domid: u32) -> BootSetup {
BootSetup {
call,
phys: PhysicalPages::new(call, domid),
domid,
virt_alloc_end: 0,
pfn_alloc_end: 0,
virt_pgtab_end: 0,
total_pages: 0,
#[cfg(target_arch = "aarch64")]
dtb: None,
}
impl BootDomain {
pub async fn alloc_module(&mut self, buffer: &[u8]) -> Result<DomainSegment> {
let segment = self.alloc_segment(0, buffer.len() as u64).await?;
let slice = unsafe { slice::from_raw_parts_mut(segment.addr as *mut u8, buffer.len()) };
copy(slice, buffer);
Ok(segment)
}
async fn initialize_memory(
&mut self,
arch: &mut Box<dyn ArchBootSetup + Send + Sync>,
total_pages: u64,
kernel_segment: &Option<DomainSegment>,
initrd_segment: &Option<DomainSegment>,
) -> Result<()> {
self.call.set_address_size(self.domid, 64).await?;
arch.meminit(self, total_pages, kernel_segment, initrd_segment)
.await?;
Ok(())
}
async fn setup_hypercall_page(&mut self, image_info: &BootImageInfo) -> Result<()> {
if image_info.virt_hypercall == XEN_UNSET_ADDR {
return Ok(());
}
let pfn = (image_info.virt_hypercall - image_info.virt_base) >> ARCH_PAGE_SHIFT;
let mfn = self.phys.p2m[pfn as usize];
self.call.hypercall_init(self.domid, mfn).await?;
Ok(())
}
pub async fn initialize<I: BootImageLoader + Send + Sync>(
&mut self,
arch: &mut Box<dyn ArchBootSetup + Send + Sync>,
image_loader: &I,
initrd: &[u8],
max_vcpus: u32,
mem_mb: u64,
console_count: usize,
) -> Result<BootState> {
debug!("initialize max_vcpus={:?} mem_mb={:?}", max_vcpus, mem_mb);
let page_size = arch.page_size();
let image_info = image_loader.parse()?;
debug!("initialize image_info={:?}", image_info);
let mut kernel_segment: Option<DomainSegment> = None;
let mut initrd_segment: Option<DomainSegment> = None;
if !image_info.unmapped_initrd {
initrd_segment = Some(self.alloc_module(page_size, initrd).await?);
}
if arch.needs_early_kernel() {
kernel_segment = Some(
self.load_kernel_segment(page_size, image_loader, &image_info)
.await?,
);
}
let total_pages = mem_mb << (20 - arch.page_shift());
self.initialize_memory(arch, total_pages, &kernel_segment, &initrd_segment)
.await?;
self.virt_alloc_end = image_info.virt_base;
if kernel_segment.is_none() {
kernel_segment = Some(
self.load_kernel_segment(page_size, image_loader, &image_info)
.await?,
);
}
let mut p2m_segment: Option<DomainSegment> = None;
if image_info.virt_p2m_base >= image_info.virt_base
|| (image_info.virt_p2m_base & ((1 << arch.page_shift()) - 1)) != 0
{
p2m_segment = arch.alloc_p2m_segment(self, &image_info).await?;
}
let start_info_segment = self.alloc_page(page_size)?;
let xenstore_segment = self.alloc_page(page_size)?;
let mut consoles: Vec<(u32, DomainSegment)> = Vec::new();
for _ in 0..console_count {
let evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?;
let page = self.alloc_page(page_size)?;
consoles.push((evtchn, page));
}
let page_table_segment = arch.alloc_page_tables(self, &image_info).await?;
let boot_stack_segment = self.alloc_page(page_size)?;
if self.virt_pgtab_end > 0 {
self.alloc_padding_pages(page_size, self.virt_pgtab_end)?;
}
if p2m_segment.is_none() {
if let Some(mut segment) = arch.alloc_p2m_segment(self, &image_info).await? {
segment.vstart = image_info.virt_p2m_base;
p2m_segment = Some(segment);
}
}
if image_info.unmapped_initrd {
initrd_segment = Some(self.alloc_module(page_size, initrd).await?);
}
let initrd_segment = initrd_segment.unwrap();
let store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?;
let kernel_segment =
kernel_segment.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?;
let state = BootState {
kernel_segment,
start_info_segment,
xenstore_segment,
consoles,
boot_stack_segment,
p2m_segment,
page_table_segment,
image_info,
initrd_segment,
store_evtchn,
shared_info_frame: 0,
};
debug!("initialize state={:?}", state);
Ok(state)
}
pub async fn boot(
&mut self,
arch: &mut Box<dyn ArchBootSetup + Send + Sync>,
state: &mut BootState,
cmdline: &str,
) -> Result<()> {
let domain_info = self.call.get_domain_info(self.domid).await?;
let shared_info_frame = domain_info.shared_info_frame;
state.shared_info_frame = shared_info_frame;
arch.setup_page_tables(self, state).await?;
arch.setup_start_info(self, state, cmdline).await?;
self.setup_hypercall_page(&state.image_info).await?;
arch.bootlate(self, state).await?;
arch.setup_shared_info(self, state.shared_info_frame)
.await?;
arch.vcpu(self, state).await?;
self.phys.unmap_all()?;
self.gnttab_seed(state).await?;
Ok(())
}
async fn gnttab_seed(&mut self, state: &mut BootState) -> Result<()> {
let console_gfn =
self.phys.p2m[state.consoles.first().map(|x| x.1.pfn).unwrap_or(0) as usize];
let xenstore_gfn = self.phys.p2m[state.xenstore_segment.pfn as usize];
let addr = self
.call
.mmap(0, 1 << XEN_PAGE_SHIFT)
.await
.ok_or(Error::MmapFailed)?;
self.call.map_resource(self.domid, 1, 0, 0, 1, addr).await?;
let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) };
entries[0].flags = 1 << 0;
entries[0].domid = 0;
entries[0].frame = console_gfn as u32;
entries[1].flags = 1 << 0;
entries[1].domid = 0;
entries[1].frame = xenstore_gfn as u32;
unsafe {
let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT);
if result != 0 {
return Err(Error::UnmapFailed(Errno::from_raw(result)));
}
}
Ok(())
}
async fn load_kernel_segment<I: BootImageLoader + Send + Sync>(
&mut self,
page_size: u64,
image_loader: &I,
image_info: &BootImageInfo,
) -> Result<DomainSegment> {
let kernel_segment = self
.alloc_segment(
page_size,
image_info.virt_kstart,
image_info.virt_kend - image_info.virt_kstart,
)
.await?;
let kernel_segment_ptr = kernel_segment.addr as *mut u8;
let kernel_segment_slice =
unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) };
image_loader.load(image_info, kernel_segment_slice)?;
Ok(kernel_segment)
}
pub(crate) fn round_up(addr: u64, mask: u64) -> u64 {
addr | mask
}
#[cfg(target_arch = "x86_64")]
pub(crate) fn bits_to_mask(bits: u64) -> u64 {
(1 << bits) - 1
}
pub(crate) async fn alloc_segment(
&mut self,
page_size: u64,
start: u64,
size: u64,
) -> Result<DomainSegment> {
pub async fn alloc_segment(&mut self, start: u64, size: u64) -> Result<DomainSegment> {
debug!("alloc_segment {:#x} {:#x}", start, size);
if start > 0 {
self.alloc_padding_pages(page_size, start)?;
self.alloc_padding_pages(start)?;
}
let local_page_size: u32 = (1i64 << XEN_PAGE_SHIFT) as u32;
@ -299,11 +71,10 @@ impl BootSetup<'_> {
pfn: self.pfn_alloc_end,
addr: 0,
size,
#[cfg(target_arch = "x86_64")]
pages,
};
self.chk_alloc_pages(page_size, pages)?;
self.chk_alloc_pages(pages)?;
let ptr = self.phys.pfn_to_ptr(segment.pfn, pages).await?;
segment.addr = ptr;
@ -319,46 +90,20 @@ impl BootSetup<'_> {
Ok(segment)
}
fn alloc_page(&mut self, page_size: u64) -> Result<DomainSegment> {
let start = self.virt_alloc_end;
let pfn = self.pfn_alloc_end;
self.chk_alloc_pages(page_size, 1)?;
debug!("alloc_page {:#x} (pfn {:#x})", start, pfn);
Ok(DomainSegment {
vstart: start,
vend: (start + page_size) - 1,
pfn,
addr: 0,
size: 0,
#[cfg(target_arch = "x86_64")]
pages: 1,
})
}
async fn alloc_module(&mut self, page_size: u64, buffer: &[u8]) -> Result<DomainSegment> {
let segment = self
.alloc_segment(page_size, 0, buffer.len() as u64)
.await?;
let slice = unsafe { slice::from_raw_parts_mut(segment.addr as *mut u8, buffer.len()) };
copy(slice, buffer);
Ok(segment)
}
fn alloc_padding_pages(&mut self, page_size: u64, boundary: u64) -> Result<()> {
if (boundary & (page_size - 1)) != 0 {
pub fn alloc_padding_pages(&mut self, boundary: u64) -> Result<()> {
if (boundary & (self.page_size - 1)) != 0 {
return Err(Error::MemorySetupFailed("boundary is incorrect"));
}
if boundary < self.virt_alloc_end {
return Err(Error::MemorySetupFailed("boundary is below allocation end"));
}
let pages = (boundary - self.virt_alloc_end) / page_size;
self.chk_alloc_pages(page_size, pages)?;
let pages = (boundary - self.virt_alloc_end) / self.page_size;
self.chk_alloc_pages(pages)?;
Ok(())
}
fn chk_alloc_pages(&mut self, page_size: u64, pages: u64) -> Result<()> {
pub fn chk_alloc_pages(&mut self, pages: u64) -> Result<()> {
if pages > self.total_pages
|| self.pfn_alloc_end > self.total_pages
|| pages > self.total_pages - self.pfn_alloc_end
@ -367,56 +112,210 @@ impl BootSetup<'_> {
}
self.pfn_alloc_end += pages;
self.virt_alloc_end += pages * page_size;
self.virt_alloc_end += pages * self.page_size;
Ok(())
}
pub fn alloc_page(&mut self) -> Result<DomainSegment> {
let start = self.virt_alloc_end;
let pfn = self.pfn_alloc_end;
self.chk_alloc_pages(1)?;
debug!("alloc_page {:#x} (pfn {:#x})", start, pfn);
Ok(DomainSegment {
vstart: start,
vend: (start + self.page_size) - 1,
pfn,
addr: 0,
size: 0,
pages: 1,
})
}
pub fn round_up(addr: u64, mask: u64) -> u64 {
addr | mask
}
pub fn bits_to_mask(bits: u64) -> u64 {
(1 << bits) - 1
}
}
impl<I: BootImageLoader, P: BootSetupPlatform> BootSetup<I, P> {
pub fn new(
call: XenCall,
domid: u32,
platform: P,
image_loader: I,
dtb: Option<Vec<u8>>,
) -> BootSetup<I, P> {
BootSetup {
call,
domid,
platform,
image_loader,
dtb,
}
}
pub async fn initialize(
&mut self,
initrd: &[u8],
mem_mb: u64,
max_vcpus: u32,
cmdline: &str,
) -> Result<BootDomain> {
let total_pages = mem_mb << (20 - self.platform.page_shift());
let image_info = self.image_loader.parse(self.platform.hvm()).await?;
let mut domain = BootDomain {
domid: self.domid,
call: self.call.clone(),
virt_alloc_end: 0,
virt_pgtab_end: 0,
pfn_alloc_end: 0,
total_pages,
target_pages: total_pages,
page_size: self.platform.page_size(),
image_info,
consoles: Vec::new(),
max_vcpus,
phys: PhysicalPages::new(self.call.clone(), self.domid, self.platform.page_shift()),
initrd_segment: DomainSegment::default(),
store_evtchn: 0,
store_mfn: 0,
cmdline: cmdline.to_string(),
};
self.platform.initialize_early(&mut domain).await?;
let mut initrd_segment = if !domain.image_info.unmapped_initrd {
Some(domain.alloc_module(initrd).await?)
} else {
None
};
let mut kernel_segment = if self.platform.needs_early_kernel() {
Some(self.load_kernel_segment(&mut domain).await?)
} else {
None
};
self.platform.initialize_memory(&mut domain).await?;
domain.virt_alloc_end = domain.image_info.virt_base;
if kernel_segment.is_none() {
kernel_segment = Some(self.load_kernel_segment(&mut domain).await?);
}
if domain.image_info.unmapped_initrd {
initrd_segment = Some(domain.alloc_module(initrd).await?);
}
domain.initrd_segment =
initrd_segment.ok_or(Error::MemorySetupFailed("initrd_segment missing"))?;
self.platform.alloc_magic_pages(&mut domain).await?;
domain.store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?;
let _kernel_segment =
kernel_segment.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?;
Ok(domain)
}
pub async fn boot(&mut self, domain: &mut BootDomain) -> Result<()> {
let domain_info = self.call.get_domain_info(self.domid).await?;
let shared_info_frame = domain_info.shared_info_frame;
self.platform.setup_page_tables(domain).await?;
self.platform
.setup_start_info(domain, shared_info_frame)
.await?;
self.platform.setup_hypercall_page(domain).await?;
self.platform.bootlate(domain).await?;
self.platform
.setup_shared_info(domain, shared_info_frame)
.await?;
self.platform.vcpu(domain).await?;
domain.phys.unmap_all()?;
self.platform.gnttab_seed(domain).await?;
Ok(())
}
async fn load_kernel_segment(&mut self, domain: &mut BootDomain) -> Result<DomainSegment> {
let kernel_segment = domain
.alloc_segment(
domain.image_info.virt_kstart,
domain.image_info.virt_kend - domain.image_info.virt_kstart,
)
.await?;
let kernel_segment_ptr = kernel_segment.addr as *mut u8;
let kernel_segment_slice =
unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) };
self.image_loader
.load(&domain.image_info, kernel_segment_slice)
.await?;
Ok(kernel_segment)
}
}
#[async_trait::async_trait]
pub trait ArchBootSetup {
fn page_size(&mut self) -> u64;
fn page_shift(&mut self) -> u64;
pub trait BootSetupPlatform: Clone {
fn create_domain(&self, needs_passthrough: bool) -> CreateDomain;
fn page_size(&self) -> u64;
fn page_shift(&self) -> u64;
fn needs_early_kernel(&self) -> bool;
fn hvm(&self) -> bool;
fn needs_early_kernel(&mut self) -> bool;
async fn initialize_early(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn alloc_p2m_segment(
async fn initialize_memory(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn alloc_page_tables(&mut self, domain: &mut BootDomain)
-> Result<Option<DomainSegment>>;
async fn alloc_p2m_segment(&mut self, domain: &mut BootDomain)
-> Result<Option<DomainSegment>>;
async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn setup_shared_info(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<Option<DomainSegment>>;
async fn alloc_page_tables(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<Option<DomainSegment>>;
async fn setup_page_tables(
&mut self,
setup: &mut BootSetup,
state: &mut BootState,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()>;
async fn setup_start_info(
&mut self,
setup: &mut BootSetup,
state: &BootState,
cmdline: &str,
) -> Result<()>;
async fn setup_shared_info(
&mut self,
setup: &mut BootSetup,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()>;
async fn meminit(
&mut self,
setup: &mut BootSetup,
total_pages: u64,
kernel_segment: &Option<DomainSegment>,
initrd_segment: &Option<DomainSegment>,
) -> Result<()>;
async fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
async fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()>;
}
#[async_trait::async_trait]
pub trait BootImageLoader {
async fn parse(&self, hvm: bool) -> Result<BootImageInfo>;
async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()>;
}
#[derive(Debug)]
pub struct BootImageInfo {
pub start: u64,
pub virt_base: u64,
pub virt_kstart: u64,
pub virt_kend: u64,
pub virt_hypercall: u64,
pub virt_entry: u64,
pub virt_p2m_base: u64,
pub unmapped_initrd: bool,
}

View File

@ -1,8 +1,8 @@
use crate::boot::{BootImageInfo, BootImageLoader, XEN_UNSET_ADDR};
use crate::boot::{BootImageInfo, BootImageLoader};
use crate::error::Result;
use crate::sys::{
XEN_ELFNOTE_ENTRY, XEN_ELFNOTE_HYPERCALL_PAGE, XEN_ELFNOTE_INIT_P2M, XEN_ELFNOTE_MOD_START_PFN,
XEN_ELFNOTE_PADDR_OFFSET, XEN_ELFNOTE_TYPES, XEN_ELFNOTE_VIRT_BASE,
XEN_ELFNOTE_PADDR_OFFSET, XEN_ELFNOTE_PHYS32_ENTRY, XEN_ELFNOTE_TYPES, XEN_ELFNOTE_VIRT_BASE,
};
use crate::Error;
use elf::abi::{PF_R, PF_W, PF_X, PT_LOAD, SHT_NOTE};
@ -128,8 +128,9 @@ struct ElfNoteValue {
value: u64,
}
#[async_trait::async_trait]
impl BootImageLoader for ElfImageLoader {
fn parse(&self) -> Result<BootImageInfo> {
async fn parse(&self, hvm: bool) -> Result<BootImageInfo> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let headers = elf.section_headers().ok_or(Error::ElfInvalidImage)?;
let mut linux_notes: HashMap<u64, Vec<u8>> = HashMap::new();
@ -200,6 +201,8 @@ impl BootImageLoader for ElfImageLoader {
.ok_or(Error::ElfInvalidImage)?
.value;
let phys32_entry = xen_notes.get(&XEN_ELFNOTE_PHYS32_ENTRY).map(|x| x.value);
let mut start: u64 = u64::MAX;
let mut end: u64 = 0;
@ -220,15 +223,21 @@ impl BootImageLoader for ElfImageLoader {
}
}
if paddr_offset != XEN_UNSET_ADDR && virt_base == XEN_UNSET_ADDR {
if paddr_offset != u64::MAX && virt_base == u64::MAX {
return Err(Error::ElfInvalidImage);
}
let virt_offset = virt_base - paddr_offset;
let virt_kstart = start + virt_offset;
let virt_kend = end + virt_offset;
let virt_entry = entry;
let mut virt_entry = entry;
if hvm {
if let Some(entry) = phys32_entry {
virt_entry = entry;
} else {
virt_entry = elf.ehdr.e_entry;
}
}
let image_info = BootImageInfo {
start,
virt_base,
@ -242,7 +251,7 @@ impl BootImageLoader for ElfImageLoader {
Ok(image_info)
}
fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> {
async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let segments = elf.segments().ok_or(Error::ElfInvalidImage)?;

View File

@ -4,22 +4,10 @@ pub mod error;
pub mod mem;
pub mod sys;
#[cfg(target_arch = "x86_64")]
pub mod x86;
#[cfg(target_arch = "x86_64")]
use crate::x86::X86BootSetup;
#[cfg(target_arch = "aarch64")]
pub mod arm64;
#[cfg(target_arch = "aarch64")]
use crate::arm64::Arm64BootSetup;
use crate::boot::{ArchBootSetup, BootSetup};
use crate::boot::{BootDomain, BootSetup};
use crate::elfloader::ElfImageLoader;
use crate::error::{Error, Result};
use boot::BootState;
use boot::BootSetupPlatform;
use indexmap::IndexMap;
use log::{debug, trace, warn};
use pci::{PciBdf, XenPciBackend};
@ -28,12 +16,10 @@ use tokio::time::timeout;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use uuid::Uuid;
use xencall::sys::{
CreateDomain, DOMCTL_DEV_RDM_RELAXED, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST,
XEN_DOMCTL_CDF_IOMMU,
};
use xencall::sys::{CreateDomain, DOMCTL_DEV_RDM_RELAXED};
use xencall::XenCall;
use xenstore::{
XsPermission, XsdClient, XsdInterface, XsdTransaction, XS_PERM_NONE, XS_PERM_READ,
@ -42,10 +28,15 @@ use xenstore::{
pub mod pci;
pub mod unsupported;
#[cfg(target_arch = "x86_64")]
pub mod x86pv;
#[derive(Clone)]
pub struct XenClient {
pub struct XenClient<P: BootSetupPlatform> {
pub store: XsdClient,
call: XenCall,
platform: Arc<P>,
}
#[derive(Clone, Debug)]
@ -124,7 +115,7 @@ pub struct DomainConfig {
pub initrd: Vec<u8>,
pub cmdline: String,
pub disks: Vec<DomainDisk>,
pub use_console_backend: Option<String>,
pub swap_console_backend: Option<String>,
pub channels: Vec<DomainChannel>,
pub vifs: Vec<DomainNetworkInterface>,
pub filesystems: Vec<DomainFilesystem>,
@ -147,25 +138,20 @@ pub struct CreatedDomain {
}
#[allow(clippy::too_many_arguments)]
impl XenClient {
pub async fn open(current_domid: u32) -> Result<XenClient> {
impl<P: BootSetupPlatform> XenClient<P> {
pub async fn new(current_domid: u32, platform: P) -> Result<XenClient<P>> {
let store = XsdClient::open().await?;
let call = XenCall::open(current_domid)?;
Ok(XenClient { store, call })
Ok(XenClient {
store,
call,
platform: Arc::new(platform),
})
}
pub async fn create(&self, config: &DomainConfig) -> Result<CreatedDomain> {
let mut domain = CreateDomain {
..Default::default()
};
let mut domain = self.platform.create_domain(!config.pcis.is_empty());
domain.max_vcpus = config.max_vcpus;
if cfg!(target_arch = "aarch64") {
domain.flags = XEN_DOMCTL_CDF_HVM_GUEST | XEN_DOMCTL_CDF_HAP;
} else {
domain.flags = XEN_DOMCTL_CDF_IOMMU;
}
let domid = self.call.create_domain(domain).await?;
match self.init(domid, &domain, config).await {
Ok(created) => Ok(created),
@ -181,18 +167,13 @@ impl XenClient {
async fn init(
&self,
domid: u32,
domain: &CreateDomain,
created: &CreateDomain,
config: &DomainConfig,
) -> Result<CreatedDomain> {
trace!(
"XenClient init domid={} domain={:?} config={:?}",
domid,
domain,
config
);
trace!("XenClient init domid={} domain={:?}", domid, created,);
let backend_dom_path = self.store.get_domain_path(0).await?;
let dom_path = self.store.get_domain_path(domid).await?;
let uuid_string = Uuid::from_bytes(domain.handle).to_string();
let uuid_string = Uuid::from_bytes(created.handle).to_string();
let vm_path = format!("/vm/{}", uuid_string);
let ro_perm = &[
@ -273,7 +254,7 @@ impl XenClient {
tx.write_string(
format!("{}/uuid", vm_path).as_str(),
&Uuid::from_bytes(domain.handle).to_string(),
&Uuid::from_bytes(created.handle).to_string(),
)
.await?;
tx.write_string(format!("{}/name", dom_path).as_str(), &config.name)
@ -295,34 +276,23 @@ impl XenClient {
}
self.call.set_max_vcpus(domid, config.max_vcpus).await?;
self.call.set_max_mem(domid, config.mem_mb * 1024).await?;
let image_loader = ElfImageLoader::load_file_kernel(&config.kernel)?;
let xenstore_evtchn: u32;
let xenstore_mfn: u64;
let p2m: Vec<u64>;
let mut state: BootState;
self.call
.set_max_mem(domid, (config.mem_mb * 1024) + 2048)
.await?;
let mut domain: BootDomain;
{
let mut boot = BootSetup::new(&self.call, domid);
#[cfg(target_arch = "x86_64")]
let mut arch = Box::new(X86BootSetup::new()) as Box<dyn ArchBootSetup + Send + Sync>;
#[cfg(target_arch = "aarch64")]
let mut arch = Box::new(Arm64BootSetup::new()) as Box<dyn ArchBootSetup + Send + Sync>;
state = boot
let loader = ElfImageLoader::load_file_kernel(&config.kernel)?;
let platform = (*self.platform).clone();
let mut boot = BootSetup::new(self.call.clone(), domid, platform, loader, None);
domain = boot
.initialize(
&mut arch,
&image_loader,
&config.initrd,
config.max_vcpus,
config.mem_mb,
1,
config.max_vcpus,
&config.cmdline,
)
.await?;
boot.boot(&mut arch, &mut state, &config.cmdline).await?;
xenstore_evtchn = state.store_evtchn;
xenstore_mfn = boot.phys.p2m[state.xenstore_segment.pfn as usize];
p2m = boot.phys.p2m;
boot.boot(&mut domain).await?;
}
{
@ -349,14 +319,16 @@ impl XenClient {
.await?;
tx.write_string(format!("{}/domid", dom_path).as_str(), &domid.to_string())
.await?;
tx.write_string(format!("{}/type", dom_path).as_str(), "PV")
.await?;
tx.write_string(
format!("{}/store/port", dom_path).as_str(),
&xenstore_evtchn.to_string(),
&domain.store_evtchn.to_string(),
)
.await?;
tx.write_string(
format!("{}/store/ring-ref", dom_path).as_str(),
&xenstore_mfn.to_string(),
&domain.store_mfn.to_string(),
)
.await?;
for i in 0..config.max_vcpus {
@ -371,7 +343,7 @@ impl XenClient {
}
if !self
.store
.introduce_domain(domid, xenstore_mfn, xenstore_evtchn)
.introduce_domain(domid, domain.store_mfn, domain.store_evtchn)
.await?
{
return Err(Error::IntroduceDomainFailed);
@ -380,16 +352,15 @@ impl XenClient {
let tx = self.store.transaction().await?;
self.console_device_add(
&tx,
&mut domain,
&DomainChannel {
typ: config
.use_console_backend
.swap_console_backend
.clone()
.unwrap_or("xenconsoled".to_string())
.to_string(),
initialized: true,
},
&p2m,
&state,
&dom_path,
&backend_dom_path,
config.backend_domid,
@ -403,9 +374,8 @@ impl XenClient {
let (Some(ring_ref), Some(evtchn)) = self
.console_device_add(
&tx,
&mut domain,
channel,
&p2m,
&state,
&dom_path,
&backend_dom_path,
config.backend_domid,
@ -548,18 +518,17 @@ impl XenClient {
async fn console_device_add(
&self,
tx: &XsdTransaction,
domain: &mut BootDomain,
channel: &DomainChannel,
p2m: &[u64],
state: &BootState,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
) -> Result<(Option<u64>, Option<u32>)> {
let console = state.consoles.get(index);
let console = domain.consoles.get(index);
let port = console.map(|x| x.0);
let ring = console.map(|x| p2m[x.1.pfn as usize]);
let ring = console.map(|x| x.1);
let mut backend_entries = vec![
("frontend-id", domid.to_string()),

View File

@ -1,36 +1,34 @@
use crate::error::Result;
use crate::sys::{XEN_PAGE_SHIFT, XEN_PAGE_SIZE};
use crate::sys::XEN_PAGE_SHIFT;
use crate::Error;
use libc::munmap;
use log::debug;
use nix::errno::Errno;
use std::ffi::c_void;
#[cfg(target_arch = "aarch64")]
pub(crate) use crate::arm64::ARM_PAGE_SHIFT as ARCH_PAGE_SHIFT;
#[cfg(target_arch = "x86_64")]
pub(crate) use crate::x86::X86_PAGE_SHIFT as ARCH_PAGE_SHIFT;
use std::slice;
use xencall::sys::MmapEntry;
use xencall::XenCall;
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct PhysicalPage {
pfn: u64,
ptr: u64,
pub ptr: u64,
count: u64,
}
pub struct PhysicalPages<'a> {
pub struct PhysicalPages {
page_shift: u64,
domid: u32,
pub(crate) p2m: Vec<u64>,
call: &'a XenCall,
pub p2m: Vec<u64>,
call: XenCall,
pages: Vec<PhysicalPage>,
}
impl PhysicalPages<'_> {
pub fn new(call: &XenCall, domid: u32) -> PhysicalPages {
impl PhysicalPages {
pub fn new(call: XenCall, domid: u32, page_shift: u64) -> PhysicalPages {
PhysicalPages {
page_shift,
domid,
p2m: Vec::new(),
call,
@ -70,7 +68,7 @@ impl PhysicalPages<'_> {
}
}
return Ok(page.ptr + ((pfn - page.pfn) << ARCH_PAGE_SHIFT));
return Ok(page.ptr + ((pfn - page.pfn) << self.page_shift));
}
if count == 0 {
@ -83,7 +81,11 @@ impl PhysicalPages<'_> {
async fn pfn_alloc(&mut self, pfn: u64, count: u64) -> Result<u64> {
let mut entries = vec![MmapEntry::default(); count as usize];
for (i, entry) in entries.iter_mut().enumerate() {
entry.mfn = self.p2m[pfn as usize + i];
if !self.p2m.is_empty() {
entry.mfn = self.p2m[pfn as usize + i];
} else {
entry.mfn = pfn + i as u64;
}
}
let chunk_size = 1 << XEN_PAGE_SHIFT;
let num_per_entry = chunk_size >> XEN_PAGE_SHIFT;
@ -123,10 +125,19 @@ impl PhysicalPages<'_> {
}
pub async fn map_foreign_pages(&mut self, mfn: u64, size: u64) -> Result<u64> {
let num = ((size + XEN_PAGE_SIZE - 1) >> XEN_PAGE_SHIFT) as usize;
let count = (size >> XEN_PAGE_SHIFT) as usize;
let mut entries = vec![MmapEntry::default(); count];
for (i, entry) in entries.iter_mut().enumerate() {
entry.mfn = mfn + i as u64;
}
let chunk_size = 1 << XEN_PAGE_SHIFT;
let num_per_entry = chunk_size >> XEN_PAGE_SHIFT;
let num = num_per_entry * count;
let mut pfns = vec![u64::MAX; num];
for (i, item) in pfns.iter_mut().enumerate().take(num) {
*item = mfn + i as u64;
for i in 0..count {
for j in 0..num_per_entry {
pfns[i * num_per_entry + j] = entries[i].mfn + j as u64;
}
}
let actual_mmap_len = (num as u64) << XEN_PAGE_SHIFT;
@ -144,9 +155,9 @@ impl PhysicalPages<'_> {
return Err(Error::MmapFailed);
}
let page = PhysicalPage {
pfn: u64::MAX,
pfn: mfn,
ptr: addr,
count: num as u64,
count: count as u64,
};
debug!(
"alloc_mfn {:#x}+{:#x} at {:#x}",
@ -156,12 +167,21 @@ impl PhysicalPages<'_> {
Ok(addr)
}
pub async fn clear_pages(&mut self, pfn: u64, count: u64) -> Result<()> {
let ptr = self.pfn_to_ptr(pfn, count).await?;
let slice = unsafe {
slice::from_raw_parts_mut(ptr as *mut u8, (count * (1 << self.page_shift)) as usize)
};
slice.fill(0);
Ok(())
}
pub fn unmap_all(&mut self) -> Result<()> {
for page in &self.pages {
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << ARCH_PAGE_SHIFT) as usize,
(page.count << self.page_shift) as usize,
);
if err != 0 {
return Err(Error::UnmapFailed(Errno::from_raw(err)));
@ -182,11 +202,11 @@ impl PhysicalPages<'_> {
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << ARCH_PAGE_SHIFT) as usize,
(page.count << self.page_shift) as usize,
);
debug!(
"unmapped {:#x} foreign bytes at {:#x}",
(page.count << ARCH_PAGE_SHIFT) as usize,
(page.count << self.page_shift) as usize,
page.ptr
);
if err != 0 {

View File

@ -119,6 +119,8 @@ pub const XEN_PAGE_MASK: u64 = !(XEN_PAGE_SIZE - 1);
pub const SUPERPAGE_BATCH_SIZE: u64 = 512;
pub const SUPERPAGE_2MB_SHIFT: u64 = 9;
pub const SUPERPAGE_2MB_NR_PFNS: u64 = 1u64 << SUPERPAGE_2MB_SHIFT;
pub const SUPERPAGE_1GB_SHIFT: u64 = 18;
pub const SUPERPAGE_1GB_NR_PFNS: u64 = 1u64 << SUPERPAGE_1GB_SHIFT;
pub const VGCF_IN_KERNEL: u64 = 1 << 2;
pub const VGCF_ONLINE: u64 = 1 << 5;
@ -128,3 +130,18 @@ pub struct GrantEntry {
pub domid: u16,
pub frame: u32,
}
pub const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578;
pub const HVM_PARAM_STORE_PFN: u32 = 1;
pub const HVM_PARAM_STORE_EVTCHN: u32 = 2;
pub const HVM_PARAM_IOREQ_PFN: u32 = 5;
pub const HVM_PARAM_BUFIOREQ_PFN: u32 = 6;
pub const HVM_PARAM_CONSOLE_PFN: u32 = 17;
pub const HVM_PARAM_CONSOLE_EVTCHN: u32 = 18;
pub const HVM_PARAM_PAGING_RING_PFN: u32 = 27;
pub const HVM_PARAM_MONITOR_RING_PFN: u32 = 28;
pub const HVM_PARAM_SHARING_RING_PFN: u32 = 29;
pub const HVM_PARAM_TIMER_MODE: u32 = 10;
pub const HVM_PARAM_ALTP2M: u32 = 35;
pub const HVM_PARAM_IDENT_PT: u32 = 12;

View File

@ -0,0 +1,86 @@
use xencall::sys::CreateDomain;
use crate::{
boot::{BootDomain, BootSetupPlatform, DomainSegment},
error::Result,
};
#[derive(Default, Clone)]
pub struct UnsupportedPlatform;
impl UnsupportedPlatform {
pub fn new() -> Self {
Self {}
}
}
#[async_trait::async_trait]
impl BootSetupPlatform for UnsupportedPlatform {
fn create_domain(&self, _: bool) -> CreateDomain {
panic!("unsupported platform")
}
fn page_size(&self) -> u64 {
panic!("unsupported platform")
}
fn page_shift(&self) -> u64 {
panic!("unsupported platform")
}
fn needs_early_kernel(&self) -> bool {
panic!("unsupported platform")
}
fn hvm(&self) -> bool {
panic!("unsupported platform")
}
async fn initialize_early(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn initialize_memory(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn alloc_p2m_segment(&mut self, _: &mut BootDomain) -> Result<Option<DomainSegment>> {
panic!("unsupported platform")
}
async fn alloc_page_tables(&mut self, _: &mut BootDomain) -> Result<Option<DomainSegment>> {
panic!("unsupported platform")
}
async fn setup_page_tables(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn setup_hypercall_page(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn alloc_magic_pages(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn setup_shared_info(&mut self, _: &mut BootDomain, _: u64) -> Result<()> {
panic!("unsupported platform")
}
async fn setup_start_info(&mut self, _: &mut BootDomain, _: u64) -> Result<()> {
panic!("unsupported platform")
}
async fn bootlate(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn vcpu(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn gnttab_seed(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
}

View File

@ -1,18 +1,25 @@
use crate::boot::{ArchBootSetup, BootImageInfo, BootSetup, BootState, DomainSegment};
use crate::error::Result;
use crate::sys::{
SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE, VGCF_IN_KERNEL, VGCF_ONLINE,
XEN_PAGE_SHIFT,
use std::{
mem::size_of,
os::raw::{c_char, c_void},
slice,
};
use crate::Error;
use libc::c_char;
use libc::munmap;
use log::{debug, trace};
use nix::errno::Errno;
use slice_copy::copy;
use std::cmp::{max, min};
use std::mem::size_of;
use std::slice;
use xencall::sys::{
E820Entry, VcpuGuestContext, E820_MAX, E820_RAM, E820_UNUSABLE, MMUEXT_PIN_L4_TABLE,
x8664VcpuGuestContext, CreateDomain, E820Entry, VcpuGuestContextAny, E820_MAX, E820_RAM,
E820_UNUSABLE, MMUEXT_PIN_L4_TABLE, XEN_DOMCTL_CDF_IOMMU,
};
use crate::{
boot::{BootDomain, BootSetupPlatform, DomainSegment},
error::{Error, Result},
sys::{
GrantEntry, SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE,
VGCF_IN_KERNEL, VGCF_ONLINE, XEN_PAGE_SHIFT,
},
};
pub const X86_PAGE_SHIFT: u64 = 12;
@ -127,10 +134,6 @@ pub struct SharedInfo {
pub p2m_generation: u64,
}
pub struct X86BootSetup {
table: PageTable,
}
#[derive(Debug)]
struct VmemRange {
start: u64,
@ -139,16 +142,20 @@ struct VmemRange {
_nid: u32,
}
impl Default for X86BootSetup {
fn default() -> Self {
Self::new()
}
#[derive(Default, Clone)]
pub struct X86PvPlatform {
table: PageTable,
p2m_segment: Option<DomainSegment>,
page_table_segment: Option<DomainSegment>,
start_info_segment: Option<DomainSegment>,
boot_stack_segment: Option<DomainSegment>,
xenstore_segment: Option<DomainSegment>,
}
impl X86BootSetup {
pub fn new() -> X86BootSetup {
X86BootSetup {
table: PageTable::default(),
impl X86PvPlatform {
pub fn new() -> Self {
Self {
..Default::default()
}
}
@ -159,22 +166,22 @@ impl X86BootSetup {
const PAGE_DIRTY: u64 = 0x040;
fn get_pg_prot(&mut self, l: usize, pfn: u64) -> u64 {
let prot = [
X86BootSetup::PAGE_PRESENT | X86BootSetup::PAGE_RW | X86BootSetup::PAGE_ACCESSED,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86PvPlatform::PAGE_PRESENT | X86PvPlatform::PAGE_RW | X86PvPlatform::PAGE_ACCESSED,
X86PvPlatform::PAGE_PRESENT
| X86PvPlatform::PAGE_RW
| X86PvPlatform::PAGE_ACCESSED
| X86PvPlatform::PAGE_DIRTY
| X86PvPlatform::PAGE_USER,
X86PvPlatform::PAGE_PRESENT
| X86PvPlatform::PAGE_RW
| X86PvPlatform::PAGE_ACCESSED
| X86PvPlatform::PAGE_DIRTY
| X86PvPlatform::PAGE_USER,
X86PvPlatform::PAGE_PRESENT
| X86PvPlatform::PAGE_RW
| X86PvPlatform::PAGE_ACCESSED
| X86PvPlatform::PAGE_DIRTY
| X86PvPlatform::PAGE_USER,
];
let prot = prot[l];
@ -187,7 +194,7 @@ impl X86BootSetup {
let pfn_s = map.levels[(X86_PGTABLE_LEVELS - 1) as usize].pfn;
let pfn_e = map.area.pgtables as u64 + pfn_s;
if pfn >= pfn_s && pfn < pfn_e {
return prot & !X86BootSetup::PAGE_RW;
return prot & !X86PvPlatform::PAGE_RW;
}
}
prot
@ -195,7 +202,7 @@ impl X86BootSetup {
fn count_page_tables(
&mut self,
setup: &mut BootSetup,
domain: &mut BootDomain,
from: u64,
to: u64,
pfn: u64,
@ -208,7 +215,7 @@ impl X86BootSetup {
let m = self.table.mappings_count;
let pfn_end = pfn + ((to - from) >> X86_PAGE_SHIFT);
if pfn_end >= setup.phys.p2m_size() {
if pfn_end >= domain.phys.p2m_size() {
return Err(Error::MemorySetupFailed("pfn_end greater than p2m size"));
}
@ -222,7 +229,7 @@ impl X86BootSetup {
map.area.to = to & X86_VIRT_MASK;
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
map.levels[l].pfn = setup.pfn_alloc_end + map.area.pgtables as u64;
map.levels[l].pfn = domain.pfn_alloc_end + map.area.pgtables as u64;
if l as u64 == X86_PGTABLE_LEVELS - 1 {
if self.table.mappings_count == 0 {
map.levels[l].from = 0;
@ -234,7 +241,7 @@ impl X86BootSetup {
}
let bits = X86_PAGE_SHIFT + (l + 1) as u64 * X86_PGTABLE_LEVEL_SHIFT;
let mask = BootSetup::bits_to_mask(bits);
let mask = BootDomain::bits_to_mask(bits);
map.levels[l].from = map.area.from & !mask;
map.levels[l].to = map.area.to | mask;
@ -426,229 +433,48 @@ impl X86BootSetup {
}
#[async_trait::async_trait]
impl ArchBootSetup for X86BootSetup {
fn page_size(&mut self) -> u64 {
impl BootSetupPlatform for X86PvPlatform {
fn create_domain(&self, needs_passthrough: bool) -> CreateDomain {
CreateDomain {
flags: if needs_passthrough {
XEN_DOMCTL_CDF_IOMMU
} else {
0
},
..Default::default()
}
}
fn page_size(&self) -> u64 {
X86_PAGE_SIZE
}
fn page_shift(&mut self) -> u64 {
fn page_shift(&self) -> u64 {
X86_PAGE_SHIFT
}
fn needs_early_kernel(&mut self) -> bool {
fn needs_early_kernel(&self) -> bool {
false
}
async fn alloc_p2m_segment(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<Option<DomainSegment>> {
let mut p2m_alloc_size =
((setup.phys.p2m_size() * 8) + X86_PAGE_SIZE - 1) & !(X86_PAGE_SIZE - 1);
let from = image_info.virt_p2m_base;
let to = from + p2m_alloc_size - 1;
let m = self.count_page_tables(setup, from, to, setup.pfn_alloc_end)?;
fn hvm(&self) -> bool {
false
}
let pgtables: usize;
{
let map = &mut self.table.mappings[m];
map.area.pfn = setup.pfn_alloc_end;
for lvl_idx in 0..4 {
map.levels[lvl_idx].pfn += p2m_alloc_size >> X86_PAGE_SHIFT;
}
pgtables = map.area.pgtables;
}
self.table.mappings_count += 1;
p2m_alloc_size += (pgtables << X86_PAGE_SHIFT) as u64;
let p2m_segment = setup
.alloc_segment(self.page_size(), 0, p2m_alloc_size)
async fn initialize_early(&mut self, _: &mut BootDomain) -> Result<()> {
Ok(())
}
async fn initialize_memory(&mut self, domain: &mut BootDomain) -> Result<()> {
domain.call.set_address_size(domain.domid, 64).await?;
domain
.call
.claim_pages(domain.domid, domain.total_pages)
.await?;
Ok(Some(p2m_segment))
}
async fn alloc_page_tables(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<Option<DomainSegment>> {
let mut extra_pages = 1;
extra_pages += (512 * 1024) / X86_PAGE_SIZE;
let mut pages = extra_pages;
let mut try_virt_end: u64;
let mut m: usize;
loop {
try_virt_end = BootSetup::round_up(
setup.virt_alloc_end + pages * X86_PAGE_SIZE,
BootSetup::bits_to_mask(22),
);
m = self.count_page_tables(setup, image_info.virt_base, try_virt_end, 0)?;
pages = self.table.mappings[m].area.pgtables as u64 + extra_pages;
if setup.virt_alloc_end + pages * X86_PAGE_SIZE <= try_virt_end + 1 {
break;
}
}
self.table.mappings[m].area.pfn = 0;
self.table.mappings_count += 1;
setup.virt_pgtab_end = try_virt_end + 1;
let size = self.table.mappings[m].area.pgtables as u64 * X86_PAGE_SIZE;
let segment = setup.alloc_segment(self.page_size(), 0, size).await?;
debug!(
"alloc_page_tables table={:?} segment={:?}",
self.table, segment
);
Ok(Some(segment))
}
async fn setup_page_tables(
&mut self,
setup: &mut BootSetup,
state: &mut BootState,
) -> Result<()> {
let p2m_segment = state
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let p2m_guest = unsafe {
slice::from_raw_parts_mut(p2m_segment.addr as *mut u64, setup.phys.p2m_size() as usize)
};
copy(p2m_guest, &setup.phys.p2m);
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
for m1 in 0usize..self.table.mappings_count {
let map1 = &self.table.mappings[m1];
let from = map1.levels[l].from;
let to = map1.levels[l].to;
let pg_ptr = setup.phys.pfn_to_ptr(map1.levels[l].pfn, 0).await? as *mut u64;
for m2 in 0usize..self.table.mappings_count {
let map2 = &self.table.mappings[m2];
let lvl = if l > 0 {
&map2.levels[l - 1]
} else {
&map2.area
};
if l > 0 && lvl.pgtables == 0 {
continue;
}
if lvl.from >= to || lvl.to <= from {
continue;
}
let p_s = (max(from, lvl.from) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let p_e = (min(to, lvl.to) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let rhs = X86_PAGE_SHIFT as usize + l * X86_PGTABLE_LEVEL_SHIFT as usize;
let mut pfn = ((max(from, lvl.from) - lvl.from) >> rhs) + lvl.pfn;
debug!(
"setup_page_tables lvl={} map_1={} map_2={} pfn={:#x} p_s={:#x} p_e={:#x}",
l, m1, m2, pfn, p_s, p_e
);
let pg = unsafe { slice::from_raw_parts_mut(pg_ptr, (p_e + 1) as usize) };
for p in p_s..p_e + 1 {
let prot = self.get_pg_prot(l, pfn);
let pfn_paddr = setup.phys.p2m[pfn as usize] << X86_PAGE_SHIFT;
let value = pfn_paddr | prot;
pg[p as usize] = value;
pfn += 1;
}
}
}
}
Ok(())
}
async fn setup_start_info(
&mut self,
setup: &mut BootSetup,
state: &BootState,
cmdline: &str,
) -> Result<()> {
let ptr = setup
.phys
.pfn_to_ptr(state.start_info_segment.pfn, 1)
.await?;
let byte_slice =
unsafe { slice::from_raw_parts_mut(ptr as *mut u8, X86_PAGE_SIZE as usize) };
byte_slice.fill(0);
let info = ptr as *mut StartInfo;
let page_table_segment = state
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let p2m_segment = state
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
unsafe {
for (i, c) in X86_GUEST_MAGIC.chars().enumerate() {
(*info).magic[i] = c as c_char;
}
(*info).magic[X86_GUEST_MAGIC.len()] = 0 as c_char;
(*info).nr_pages = setup.total_pages;
(*info).shared_info = state.shared_info_frame << X86_PAGE_SHIFT;
(*info).pt_base = page_table_segment.vstart;
(*info).nr_pt_frames = self.table.mappings[0].area.pgtables as u64;
(*info).mfn_list = p2m_segment.vstart;
(*info).first_p2m_pfn = p2m_segment.pfn;
(*info).nr_p2m_frames = p2m_segment.pages;
(*info).flags = 0;
(*info).store_evtchn = state.store_evtchn;
(*info).store_mfn = setup.phys.p2m[state.xenstore_segment.pfn as usize];
let console = state.consoles.first().unwrap();
(*info).console.mfn = setup.phys.p2m[console.1.pfn as usize];
(*info).console.evtchn = console.0;
(*info).mod_start = state.initrd_segment.vstart;
(*info).mod_len = state.initrd_segment.size;
for (i, c) in cmdline.chars().enumerate() {
(*info).cmdline[i] = c as c_char;
}
(*info).cmdline[MAX_GUEST_CMDLINE - 1] = 0;
trace!("setup_start_info start_info={:?}", *info);
}
Ok(())
}
async fn setup_shared_info(
&mut self,
setup: &mut BootSetup,
shared_info_frame: u64,
) -> Result<()> {
let info = setup
.phys
.map_foreign_pages(shared_info_frame, X86_PAGE_SIZE)
.await? as *mut SharedInfo;
unsafe {
let size = size_of::<SharedInfo>();
let info_as_buff = slice::from_raw_parts_mut(info as *mut u8, size);
info_as_buff.fill(0);
for i in 0..32 {
(*info).vcpu_info[i].evtchn_upcall_mask = 1;
}
trace!("setup_shared_info shared_info={:?}", *info);
}
Ok(())
}
async fn meminit(
&mut self,
setup: &mut BootSetup,
total_pages: u64,
_: &Option<DomainSegment>,
_: &Option<DomainSegment>,
) -> Result<()> {
setup.call.claim_pages(setup.domid, total_pages).await?;
let mut vmemranges: Vec<VmemRange> = Vec::new();
let stub = VmemRange {
start: 0,
end: total_pages << XEN_PAGE_SHIFT,
end: domain.total_pages << XEN_PAGE_SHIFT,
_flags: 0,
_nid: 0,
};
@ -660,12 +486,10 @@ impl ArchBootSetup for X86BootSetup {
p2m_size = p2m_size.max(range.end >> XEN_PAGE_SHIFT);
}
if total != total_pages {
if total != domain.total_pages {
return Err(Error::MemorySetupFailed("total pages mismatch"));
}
setup.total_pages = total;
let mut p2m = vec![u64::MAX; p2m_size as usize];
for range in &vmemranges {
let mut extents_init = vec![0u64; SUPERPAGE_BATCH_SIZE as usize];
@ -694,10 +518,10 @@ impl ArchBootSetup for X86BootSetup {
}
let extents_init_slice = extents_init.as_slice();
let extents = setup
let extents = domain
.call
.populate_physmap(
setup.domid,
domain.domid,
count,
SUPERPAGE_2MB_SHIFT as u32,
0,
@ -725,9 +549,9 @@ impl ArchBootSetup for X86BootSetup {
let p2m_idx = (pfn_base + j) as usize;
let p2m_end_idx = p2m_idx + allocsz as usize;
let input_extent_starts = &p2m[p2m_idx..p2m_end_idx];
let result = setup
let result = domain
.call
.populate_physmap(setup.domid, allocsz, 0, 0, input_extent_starts)
.populate_physmap(domain.domid, allocsz, 0, 0, input_extent_starts)
.await?;
if result.len() != allocsz as usize {
@ -747,51 +571,299 @@ impl ArchBootSetup for X86BootSetup {
}
}
setup.phys.load_p2m(p2m);
setup.call.claim_pages(setup.domid, 0).await?;
domain.phys.load_p2m(p2m);
domain.call.claim_pages(domain.domid, 0).await?;
Ok(())
}
async fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let p2m_segment = state
async fn alloc_p2m_segment(
&mut self,
domain: &mut BootDomain,
) -> Result<Option<DomainSegment>> {
let mut p2m_alloc_size =
((domain.phys.p2m_size() * 8) + X86_PAGE_SIZE - 1) & !(X86_PAGE_SIZE - 1);
let from = domain.image_info.virt_p2m_base;
let to = from + p2m_alloc_size - 1;
let m = self.count_page_tables(domain, from, to, domain.pfn_alloc_end)?;
let pgtables: usize;
{
let map = &mut self.table.mappings[m];
map.area.pfn = domain.pfn_alloc_end;
for lvl_idx in 0..4 {
map.levels[lvl_idx].pfn += p2m_alloc_size >> X86_PAGE_SHIFT;
}
pgtables = map.area.pgtables;
}
self.table.mappings_count += 1;
p2m_alloc_size += (pgtables << X86_PAGE_SHIFT) as u64;
let p2m_segment = domain.alloc_segment(0, p2m_alloc_size).await?;
Ok(Some(p2m_segment))
}
async fn alloc_page_tables(
&mut self,
domain: &mut BootDomain,
) -> Result<Option<DomainSegment>> {
let mut extra_pages = 1;
extra_pages += (512 * 1024) / X86_PAGE_SIZE;
let mut pages = extra_pages;
let mut try_virt_end: u64;
let mut m: usize;
loop {
try_virt_end = BootDomain::round_up(
domain.virt_alloc_end + pages * X86_PAGE_SIZE,
BootDomain::bits_to_mask(22),
);
m = self.count_page_tables(domain, domain.image_info.virt_base, try_virt_end, 0)?;
pages = self.table.mappings[m].area.pgtables as u64 + extra_pages;
if domain.virt_alloc_end + pages * X86_PAGE_SIZE <= try_virt_end + 1 {
break;
}
}
self.table.mappings[m].area.pfn = 0;
self.table.mappings_count += 1;
domain.virt_pgtab_end = try_virt_end + 1;
let size = self.table.mappings[m].area.pgtables as u64 * X86_PAGE_SIZE;
let segment = domain.alloc_segment(0, size).await?;
debug!(
"alloc_page_tables table={:?} segment={:?}",
self.table, segment
);
Ok(Some(segment))
}
async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()> {
let p2m_segment = self
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let page_table_segment = state
let p2m_guest = unsafe {
slice::from_raw_parts_mut(
p2m_segment.addr as *mut u64,
domain.phys.p2m_size() as usize,
)
};
copy(p2m_guest, &domain.phys.p2m);
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
for m1 in 0usize..self.table.mappings_count {
let map1 = &self.table.mappings[m1];
let from = map1.levels[l].from;
let to = map1.levels[l].to;
let pg_ptr = domain.phys.pfn_to_ptr(map1.levels[l].pfn, 0).await? as *mut u64;
for m2 in 0usize..self.table.mappings_count {
let map2 = &self.table.mappings[m2];
let lvl = if l > 0 {
&map2.levels[l - 1]
} else {
&map2.area
};
if l > 0 && lvl.pgtables == 0 {
continue;
}
if lvl.from >= to || lvl.to <= from {
continue;
}
let p_s = (std::cmp::max(from, lvl.from) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let p_e = (std::cmp::min(to, lvl.to) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let rhs = X86_PAGE_SHIFT as usize + l * X86_PGTABLE_LEVEL_SHIFT as usize;
let mut pfn = ((std::cmp::max(from, lvl.from) - lvl.from) >> rhs) + lvl.pfn;
debug!(
"setup_page_tables lvl={} map_1={} map_2={} pfn={:#x} p_s={:#x} p_e={:#x}",
l, m1, m2, pfn, p_s, p_e
);
let pg = unsafe { slice::from_raw_parts_mut(pg_ptr, (p_e + 1) as usize) };
for p in p_s..p_e + 1 {
let prot = self.get_pg_prot(l, pfn);
let pfn_paddr = domain.phys.p2m[pfn as usize] << X86_PAGE_SHIFT;
let value = pfn_paddr | prot;
pg[p as usize] = value;
pfn += 1;
}
}
}
}
Ok(())
}
async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()> {
if domain.image_info.virt_hypercall == u64::MAX {
return Ok(());
}
let pfn =
(domain.image_info.virt_hypercall - domain.image_info.virt_base) >> self.page_shift();
let mfn = domain.phys.p2m[pfn as usize];
domain.call.hypercall_init(domain.domid, mfn).await?;
Ok(())
}
async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()> {
if domain.image_info.virt_p2m_base >= domain.image_info.virt_base
|| (domain.image_info.virt_p2m_base & ((1 << self.page_shift()) - 1)) != 0
{
self.p2m_segment = self.alloc_p2m_segment(domain).await?;
}
self.start_info_segment = Some(domain.alloc_page()?);
self.xenstore_segment = Some(domain.alloc_page()?);
domain.store_mfn = domain.phys.p2m[self.xenstore_segment.as_ref().unwrap().pfn as usize];
let evtchn = domain.call.evtchn_alloc_unbound(domain.domid, 0).await?;
let page = domain.alloc_page()?;
domain
.consoles
.push((evtchn, domain.phys.p2m[page.pfn as usize]));
self.page_table_segment = self.alloc_page_tables(domain).await?;
self.boot_stack_segment = Some(domain.alloc_page()?);
if domain.virt_pgtab_end > 0 {
domain.alloc_padding_pages(domain.virt_pgtab_end)?;
}
if self.p2m_segment.is_none() {
if let Some(mut p2m_segment) = self.alloc_p2m_segment(domain).await? {
p2m_segment.vstart = domain.image_info.virt_p2m_base;
self.p2m_segment = Some(p2m_segment);
}
}
Ok(())
}
async fn setup_shared_info(
&mut self,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()> {
let info = domain
.phys
.map_foreign_pages(shared_info_frame, X86_PAGE_SIZE)
.await? as *mut SharedInfo;
unsafe {
let size = size_of::<SharedInfo>();
let info_as_buff = slice::from_raw_parts_mut(info as *mut u8, size);
info_as_buff.fill(0);
for i in 0..32 {
(*info).vcpu_info[i].evtchn_upcall_mask = 1;
}
trace!("setup_shared_info shared_info={:?}", *info);
}
Ok(())
}
async fn setup_start_info(
&mut self,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()> {
let start_info_segment = self
.start_info_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("start_info_segment missing"))?;
let ptr = domain.phys.pfn_to_ptr(start_info_segment.pfn, 1).await?;
let byte_slice =
unsafe { slice::from_raw_parts_mut(ptr as *mut u8, X86_PAGE_SIZE as usize) };
byte_slice.fill(0);
let info = ptr as *mut StartInfo;
let page_table_segment = self
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let p2m_segment = self
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let xenstore_segment = self
.xenstore_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?;
unsafe {
for (i, c) in X86_GUEST_MAGIC.chars().enumerate() {
(*info).magic[i] = c as c_char;
}
(*info).magic[X86_GUEST_MAGIC.len()] = 0 as c_char;
(*info).nr_pages = domain.total_pages;
(*info).shared_info = shared_info_frame << X86_PAGE_SHIFT;
(*info).pt_base = page_table_segment.vstart;
(*info).nr_pt_frames = self.table.mappings[0].area.pgtables as u64;
(*info).mfn_list = p2m_segment.vstart;
(*info).first_p2m_pfn = p2m_segment.pfn;
(*info).nr_p2m_frames = p2m_segment.pages;
(*info).flags = 0;
(*info).store_evtchn = domain.store_evtchn;
(*info).store_mfn = domain.phys.p2m[xenstore_segment.pfn as usize];
let console = domain.consoles.first().unwrap();
(*info).console.mfn = console.1;
(*info).console.evtchn = console.0;
(*info).mod_start = domain.initrd_segment.vstart;
(*info).mod_len = domain.initrd_segment.size;
for (i, c) in domain.cmdline.chars().enumerate() {
(*info).cmdline[i] = c as c_char;
}
(*info).cmdline[MAX_GUEST_CMDLINE - 1] = 0;
trace!("setup_start_info start_info={:?}", *info);
}
Ok(())
}
async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()> {
let p2m_segment = self
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let page_table_segment = self
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let pg_pfn = page_table_segment.pfn;
let pg_mfn = setup.phys.p2m[pg_pfn as usize];
setup.phys.unmap(pg_pfn)?;
setup.phys.unmap(p2m_segment.pfn)?;
let pg_mfn = domain.phys.p2m[pg_pfn as usize];
domain.phys.unmap(pg_pfn)?;
domain.phys.unmap(p2m_segment.pfn)?;
let map = setup.call.get_memory_map(E820_MAX).await?;
let mem_mb = setup.total_pages >> (20 - self.page_shift());
let map = domain.call.get_memory_map(E820_MAX).await?;
let mem_mb = domain.total_pages >> (20 - self.page_shift());
let mem_kb = mem_mb * 1024;
let e820 = self.e820_sanitize(map, mem_kb, 0)?;
setup.call.set_memory_map(setup.domid, e820).await?;
domain.call.set_memory_map(domain.domid, e820).await?;
setup
domain
.call
.mmuext(setup.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)
.mmuext(domain.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)
.await?;
Ok(())
}
async fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let page_table_segment = state
async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()> {
let page_table_segment = self
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let boot_stack_segment = self
.boot_stack_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("boot_stack_segment missing"))?;
let start_info_segment = self
.start_info_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("start_info_segment missing"))?;
let pg_pfn = page_table_segment.pfn;
let pg_mfn = setup.phys.p2m[pg_pfn as usize];
let mut vcpu = VcpuGuestContext::default();
vcpu.user_regs.rip = state.image_info.virt_entry;
let pg_mfn = domain.phys.p2m[pg_pfn as usize];
let mut vcpu = x8664VcpuGuestContext::default();
vcpu.user_regs.rip = domain.image_info.virt_entry;
vcpu.user_regs.rsp =
state.image_info.virt_base + (state.boot_stack_segment.pfn + 1) * self.page_size();
domain.image_info.virt_base + (boot_stack_segment.pfn + 1) * self.page_size();
vcpu.user_regs.rsi =
state.image_info.virt_base + (state.start_info_segment.pfn) * self.page_size();
domain.image_info.virt_base + (start_info_segment.pfn) * self.page_size();
vcpu.user_regs.rflags = 1 << 9;
vcpu.debugreg[6] = 0xffff0ff0;
vcpu.debugreg[7] = 0x00000400;
@ -808,7 +880,43 @@ impl ArchBootSetup for X86BootSetup {
vcpu.kernel_ss = vcpu.user_regs.ss as u64;
vcpu.kernel_sp = vcpu.user_regs.rsp;
trace!("vcpu context: {:?}", vcpu);
setup.call.set_vcpu_context(setup.domid, 0, &vcpu).await?;
domain
.call
.set_vcpu_context(domain.domid, 0, VcpuGuestContextAny { value: vcpu })
.await?;
Ok(())
}
async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()> {
let xenstore_segment = self
.xenstore_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?;
let console_gfn = domain.consoles.first().map(|x| x.1).unwrap_or(0) as usize;
let xenstore_gfn = domain.phys.p2m[xenstore_segment.pfn as usize];
let addr = domain
.call
.mmap(0, 1 << XEN_PAGE_SHIFT)
.await
.ok_or(Error::MmapFailed)?;
domain
.call
.map_resource(domain.domid, 1, 0, 0, 1, addr)
.await?;
let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) };
entries[0].flags = 1 << 0;
entries[0].domid = 0;
entries[0].frame = console_gfn as u32;
entries[1].flags = 1 << 0;
entries[1].domid = 0;
entries[1].frame = xenstore_gfn as u32;
unsafe {
let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT);
if result != 0 {
return Err(Error::UnmapFailed(Errno::from_raw(result)));
}
}
Ok(())
}
}

View File

@ -108,7 +108,6 @@ impl XsdClient {
}
async fn write<P: AsRef<str>>(&self, tx: u32, path: P, data: Vec<u8>) -> Result<bool> {
trace!("write tx={tx} path={} data={:?}", path.as_ref(), data);
let mut buffer = Vec::new();
let path = CString::new(path.as_ref())?;
buffer.extend_from_slice(path.as_bytes_with_nul());
@ -120,6 +119,11 @@ impl XsdClient {
response.parse_bool()
}
async fn write_string<P: AsRef<str>>(&self, tx: u32, path: P, data: &str) -> Result<bool> {
trace!("write tx={tx} path={} data=\"{}\"", path.as_ref(), data);
self.write(tx, path, data.as_bytes().to_vec()).await
}
async fn mkdir<P: AsRef<str>>(&self, tx: u32, path: P) -> Result<bool> {
trace!("mkdir tx={tx} path={}", path.as_ref());
self.socket
@ -247,7 +251,7 @@ impl XsdInterface for XsdClient {
}
async fn write_string<P: AsRef<str>>(&self, path: P, data: &str) -> Result<bool> {
self.write(0, path, data.as_bytes().to_vec()).await
self.write_string(0, path, data).await
}
async fn mkdir<P: AsRef<str>>(&self, path: P) -> Result<bool> {
@ -287,9 +291,7 @@ impl XsdInterface for XsdTransaction {
}
async fn write_string<P: AsRef<str>>(&self, path: P, data: &str) -> Result<bool> {
self.client
.write(self.tx, path, data.as_bytes().to_vec())
.await
self.client.write_string(self.tx, path, data).await
}
async fn mkdir<P: AsRef<str>>(&self, path: P) -> Result<bool> {