chore(xen): split platform support into separate crate (#195)

This commit is contained in:
Alex Zenla
2024-06-21 01:10:45 -07:00
committed by GitHub
parent 23c7302c04
commit 6f39f115b7
23 changed files with 136 additions and 45 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "krata-xenclient"
description = "An implementation of Xen userspace for krata."
description = "An implementation of Xen userspace for krata"
license.workspace = true
version.workspace = true
homepage.workspace = true
@ -10,22 +10,16 @@ resolver = "2"
[dependencies]
async-trait = { workspace = true }
c2rust-bitfields = "0.18.0"
elf = { workspace = true }
flate2 = { workspace = true }
indexmap = { workspace = true }
libc = { workspace = true }
log = { workspace = true }
krata-xencall = { path = "../xencall", version = "^0.0.10" }
krata-xenplatform = { path = "../xenplatform", version = "^0.0.10" }
krata-xenstore = { path = "../xenstore", version = "^0.0.10" }
memchr = { workspace = true }
nix = { workspace = true }
regex = { workspace = true }
slice-copy = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
uuid = { workspace = true }
xz2 = { workspace = true }
[dev-dependencies]
env_logger = { workspace = true }

View File

@ -4,10 +4,10 @@ use xenclient::error::Result;
use xenclient::{DomainConfig, XenClient};
#[cfg(target_arch = "x86_64")]
type RuntimePlatform = xenclient::x86pv::X86PvPlatform;
type RuntimePlatform = xenplatform::x86pv::X86PvPlatform;
#[cfg(not(target_arch = "x86_64"))]
type RuntimePlatform = xenclient::unsupported::UnsupportedPlatform;
type RuntimePlatform = xenplatform::unsupported::UnsupportedPlatform;
#[tokio::main]
async fn main() -> Result<()> {

View File

@ -1,321 +0,0 @@
use std::slice;
use log::debug;
use slice_copy::copy;
use xencall::{sys::CreateDomain, XenCall};
use crate::{
error::{Error, Result},
mem::PhysicalPages,
sys::XEN_PAGE_SHIFT,
};
pub struct BootSetup<I: BootImageLoader, P: BootSetupPlatform> {
pub call: XenCall,
pub domid: u32,
pub platform: P,
pub image_loader: I,
pub dtb: Option<Vec<u8>>,
}
#[derive(Debug, Default, Clone)]
pub struct DomainSegment {
pub vstart: u64,
pub vend: u64,
pub pfn: u64,
pub addr: u64,
pub size: u64,
pub pages: u64,
}
pub struct BootDomain {
pub domid: u32,
pub call: XenCall,
pub page_size: u64,
pub virt_alloc_end: u64,
pub pfn_alloc_end: u64,
pub virt_pgtab_end: u64,
pub total_pages: u64,
pub target_pages: u64,
pub max_vcpus: u32,
pub image_info: BootImageInfo,
pub phys: PhysicalPages,
pub store_evtchn: u32,
pub store_mfn: u64,
pub initrd_segment: DomainSegment,
pub consoles: Vec<(u32, u64)>,
pub cmdline: String,
}
impl BootDomain {
pub async fn alloc_module(&mut self, buffer: &[u8]) -> Result<DomainSegment> {
let segment = self.alloc_segment(0, buffer.len() as u64).await?;
let slice = unsafe { slice::from_raw_parts_mut(segment.addr as *mut u8, buffer.len()) };
copy(slice, buffer);
Ok(segment)
}
pub async fn alloc_segment(&mut self, start: u64, size: u64) -> Result<DomainSegment> {
debug!("alloc_segment {:#x} {:#x}", start, size);
if start > 0 {
self.alloc_padding_pages(start)?;
}
let local_page_size: u32 = (1i64 << XEN_PAGE_SHIFT) as u32;
let pages = (size + local_page_size as u64 - 1) / local_page_size as u64;
let start = self.virt_alloc_end;
let mut segment = DomainSegment {
vstart: start,
vend: 0,
pfn: self.pfn_alloc_end,
addr: 0,
size,
pages,
};
self.chk_alloc_pages(pages)?;
let ptr = self.phys.pfn_to_ptr(segment.pfn, pages).await?;
segment.addr = ptr;
let slice = unsafe {
slice::from_raw_parts_mut(ptr as *mut u8, (pages * local_page_size as u64) as usize)
};
slice.fill(0);
segment.vend = self.virt_alloc_end;
debug!(
"alloc_segment {:#x} -> {:#x} (pfn {:#x} + {:#x} pages)",
start, segment.vend, segment.pfn, pages
);
Ok(segment)
}
pub fn alloc_padding_pages(&mut self, boundary: u64) -> Result<()> {
if (boundary & (self.page_size - 1)) != 0 {
return Err(Error::MemorySetupFailed("boundary is incorrect"));
}
if boundary < self.virt_alloc_end {
return Err(Error::MemorySetupFailed("boundary is below allocation end"));
}
let pages = (boundary - self.virt_alloc_end) / self.page_size;
self.chk_alloc_pages(pages)?;
Ok(())
}
pub fn chk_alloc_pages(&mut self, pages: u64) -> Result<()> {
if pages > self.total_pages
|| self.pfn_alloc_end > self.total_pages
|| pages > self.total_pages - self.pfn_alloc_end
{
return Err(Error::MemorySetupFailed("no more pages left"));
}
self.pfn_alloc_end += pages;
self.virt_alloc_end += pages * self.page_size;
Ok(())
}
pub fn alloc_page(&mut self) -> Result<DomainSegment> {
let start = self.virt_alloc_end;
let pfn = self.pfn_alloc_end;
self.chk_alloc_pages(1)?;
debug!("alloc_page {:#x} (pfn {:#x})", start, pfn);
Ok(DomainSegment {
vstart: start,
vend: (start + self.page_size) - 1,
pfn,
addr: 0,
size: 0,
pages: 1,
})
}
pub fn round_up(addr: u64, mask: u64) -> u64 {
addr | mask
}
pub fn bits_to_mask(bits: u64) -> u64 {
(1 << bits) - 1
}
}
impl<I: BootImageLoader, P: BootSetupPlatform> BootSetup<I, P> {
pub fn new(
call: XenCall,
domid: u32,
platform: P,
image_loader: I,
dtb: Option<Vec<u8>>,
) -> BootSetup<I, P> {
BootSetup {
call,
domid,
platform,
image_loader,
dtb,
}
}
pub async fn initialize(
&mut self,
initrd: &[u8],
mem_mb: u64,
max_vcpus: u32,
cmdline: &str,
) -> Result<BootDomain> {
let total_pages = mem_mb << (20 - self.platform.page_shift());
let image_info = self.image_loader.parse(self.platform.hvm()).await?;
let mut domain = BootDomain {
domid: self.domid,
call: self.call.clone(),
virt_alloc_end: 0,
virt_pgtab_end: 0,
pfn_alloc_end: 0,
total_pages,
target_pages: total_pages,
page_size: self.platform.page_size(),
image_info,
consoles: Vec::new(),
max_vcpus,
phys: PhysicalPages::new(self.call.clone(), self.domid, self.platform.page_shift()),
initrd_segment: DomainSegment::default(),
store_evtchn: 0,
store_mfn: 0,
cmdline: cmdline.to_string(),
};
self.platform.initialize_early(&mut domain).await?;
let mut initrd_segment = if !domain.image_info.unmapped_initrd {
Some(domain.alloc_module(initrd).await?)
} else {
None
};
let mut kernel_segment = if self.platform.needs_early_kernel() {
Some(self.load_kernel_segment(&mut domain).await?)
} else {
None
};
self.platform.initialize_memory(&mut domain).await?;
domain.virt_alloc_end = domain.image_info.virt_base;
if kernel_segment.is_none() {
kernel_segment = Some(self.load_kernel_segment(&mut domain).await?);
}
if domain.image_info.unmapped_initrd {
initrd_segment = Some(domain.alloc_module(initrd).await?);
}
domain.initrd_segment =
initrd_segment.ok_or(Error::MemorySetupFailed("initrd_segment missing"))?;
self.platform.alloc_magic_pages(&mut domain).await?;
domain.store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?;
let _kernel_segment =
kernel_segment.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?;
Ok(domain)
}
pub async fn boot(&mut self, domain: &mut BootDomain) -> Result<()> {
let domain_info = self.call.get_domain_info(self.domid).await?;
let shared_info_frame = domain_info.shared_info_frame;
self.platform.setup_page_tables(domain).await?;
self.platform
.setup_start_info(domain, shared_info_frame)
.await?;
self.platform.setup_hypercall_page(domain).await?;
self.platform.bootlate(domain).await?;
self.platform
.setup_shared_info(domain, shared_info_frame)
.await?;
self.platform.vcpu(domain).await?;
domain.phys.unmap_all()?;
self.platform.gnttab_seed(domain).await?;
Ok(())
}
async fn load_kernel_segment(&mut self, domain: &mut BootDomain) -> Result<DomainSegment> {
let kernel_segment = domain
.alloc_segment(
domain.image_info.virt_kstart,
domain.image_info.virt_kend - domain.image_info.virt_kstart,
)
.await?;
let kernel_segment_ptr = kernel_segment.addr as *mut u8;
let kernel_segment_slice =
unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) };
self.image_loader
.load(&domain.image_info, kernel_segment_slice)
.await?;
Ok(kernel_segment)
}
}
#[async_trait::async_trait]
pub trait BootSetupPlatform: Clone {
fn create_domain(&self, needs_passthrough: bool) -> CreateDomain;
fn page_size(&self) -> u64;
fn page_shift(&self) -> u64;
fn needs_early_kernel(&self) -> bool;
fn hvm(&self) -> bool;
async fn initialize_early(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn initialize_memory(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn alloc_page_tables(&mut self, domain: &mut BootDomain)
-> Result<Option<DomainSegment>>;
async fn alloc_p2m_segment(&mut self, domain: &mut BootDomain)
-> Result<Option<DomainSegment>>;
async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn setup_shared_info(
&mut self,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()>;
async fn setup_start_info(
&mut self,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()>;
async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()>;
async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()>;
}
#[async_trait::async_trait]
pub trait BootImageLoader {
async fn parse(&self, hvm: bool) -> Result<BootImageInfo>;
async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()>;
}
#[derive(Debug)]
pub struct BootImageInfo {
pub start: u64,
pub virt_base: u64,
pub virt_kstart: u64,
pub virt_kend: u64,
pub virt_hypercall: u64,
pub virt_entry: u64,
pub virt_p2m_base: u64,
pub unmapped_initrd: bool,
}

View File

@ -1,291 +0,0 @@
use crate::boot::{BootImageInfo, BootImageLoader};
use crate::error::Result;
use crate::sys::{
XEN_ELFNOTE_ENTRY, XEN_ELFNOTE_HYPERCALL_PAGE, XEN_ELFNOTE_INIT_P2M, XEN_ELFNOTE_MOD_START_PFN,
XEN_ELFNOTE_PADDR_OFFSET, XEN_ELFNOTE_PHYS32_ENTRY, XEN_ELFNOTE_TYPES, XEN_ELFNOTE_VIRT_BASE,
};
use crate::Error;
use elf::abi::{PF_R, PF_W, PF_X, PT_LOAD, SHT_NOTE};
use elf::endian::AnyEndian;
use elf::note::Note;
use elf::ElfBytes;
use flate2::bufread::GzDecoder;
use log::debug;
use memchr::memmem::find_iter;
use slice_copy::copy;
use std::collections::HashMap;
use std::io::{BufReader, Read};
use std::mem::size_of;
use xz2::bufread::XzDecoder;
pub struct ElfImageLoader {
data: Vec<u8>,
}
fn xen_note_value_as_u64(endian: AnyEndian, value: &[u8]) -> Option<u64> {
let bytes = value.to_vec();
match value.len() {
1 => {
let bytes: Option<[u8; size_of::<u8>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u8::from_le_bytes(bytes?),
AnyEndian::Big => u8::from_be_bytes(bytes?),
} as u64)
}
2 => {
let bytes: Option<[u8; size_of::<u16>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u16::from_le_bytes(bytes?),
AnyEndian::Big => u16::from_be_bytes(bytes?),
} as u64)
}
4 => {
let bytes: Option<[u8; size_of::<u32>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u32::from_le_bytes(bytes?),
AnyEndian::Big => u32::from_be_bytes(bytes?),
} as u64)
}
8 => {
let bytes: Option<[u8; size_of::<u64>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u64::from_le_bytes(bytes?),
AnyEndian::Big => u64::from_be_bytes(bytes?),
})
}
_ => None,
}
}
impl ElfImageLoader {
pub fn new(data: Vec<u8>) -> ElfImageLoader {
ElfImageLoader { data }
}
pub fn load_gz(data: &[u8]) -> Result<ElfImageLoader> {
let buff = BufReader::new(data);
let image = ElfImageLoader::read_one_stream(&mut GzDecoder::new(buff))?;
Ok(ElfImageLoader::new(image))
}
pub fn load_xz(data: &[u8]) -> Result<ElfImageLoader> {
let buff = BufReader::new(data);
let image = ElfImageLoader::read_one_stream(&mut XzDecoder::new(buff))?;
Ok(ElfImageLoader::new(image))
}
fn read_one_stream(read: &mut dyn Read) -> Result<Vec<u8>> {
let mut result: Vec<u8> = Vec::new();
let mut buffer = [0u8; 8192];
loop {
match read.read(&mut buffer) {
Ok(size) => {
if size == 0 {
break;
}
result.extend_from_slice(&buffer[0..size])
}
Err(error) => {
if !result.is_empty() {
break;
}
return Err(Error::from(error));
}
}
}
Ok(result)
}
pub fn load_file_gz(path: &str) -> Result<ElfImageLoader> {
let file = std::fs::read(path)?;
ElfImageLoader::load_gz(file.as_slice())
}
pub fn load_file_xz(path: &str) -> Result<ElfImageLoader> {
let file = std::fs::read(path)?;
ElfImageLoader::load_xz(file.as_slice())
}
pub fn load_file_kernel(data: &[u8]) -> Result<ElfImageLoader> {
for start in find_iter(data, &[0x1f, 0x8b]) {
if let Ok(elf) = ElfImageLoader::load_gz(&data[start..]) {
return Ok(elf);
}
}
for start in find_iter(data, &[0xfd, 0x37, 0x7a, 0x58]) {
if let Ok(elf) = ElfImageLoader::load_xz(&data[start..]) {
return Ok(elf);
}
}
Err(Error::ElfCompressionUnknown)
}
}
struct ElfNoteValue {
value: u64,
}
#[async_trait::async_trait]
impl BootImageLoader for ElfImageLoader {
async fn parse(&self, hvm: bool) -> Result<BootImageInfo> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let headers = elf.section_headers().ok_or(Error::ElfInvalidImage)?;
let mut linux_notes: HashMap<u64, Vec<u8>> = HashMap::new();
let mut xen_notes: HashMap<u64, ElfNoteValue> = HashMap::new();
for header in headers {
if header.sh_type != SHT_NOTE {
continue;
}
let notes = elf.section_data_as_notes(&header)?;
for note in notes {
if let Note::Unknown(note) = note {
if note.name == "Linux" {
linux_notes.insert(note.n_type, note.desc.to_vec());
}
if note.name == "Xen" {
for typ in XEN_ELFNOTE_TYPES {
if typ.id != note.n_type {
continue;
}
let value = if !typ.is_string {
xen_note_value_as_u64(elf.ehdr.endianness, note.desc).unwrap_or(0)
} else {
0
};
xen_notes.insert(typ.id, ElfNoteValue { value });
}
continue;
}
}
}
}
if linux_notes.is_empty() {
return Err(Error::ElfInvalidImage);
}
if xen_notes.is_empty() {
return Err(Error::ElfXenSupportMissing);
}
let paddr_offset = xen_notes
.get(&XEN_ELFNOTE_PADDR_OFFSET)
.ok_or(Error::ElfInvalidImage)?
.value;
let virt_base = xen_notes
.get(&XEN_ELFNOTE_VIRT_BASE)
.ok_or(Error::ElfInvalidImage)?
.value;
let entry = xen_notes
.get(&XEN_ELFNOTE_ENTRY)
.ok_or(Error::ElfInvalidImage)?
.value;
let virt_hypercall = xen_notes
.get(&XEN_ELFNOTE_HYPERCALL_PAGE)
.ok_or(Error::ElfInvalidImage)?
.value;
let init_p2m = xen_notes
.get(&XEN_ELFNOTE_INIT_P2M)
.ok_or(Error::ElfInvalidImage)?
.value;
let mod_start_pfn = xen_notes
.get(&XEN_ELFNOTE_MOD_START_PFN)
.ok_or(Error::ElfInvalidImage)?
.value;
let phys32_entry = xen_notes.get(&XEN_ELFNOTE_PHYS32_ENTRY).map(|x| x.value);
let mut start: u64 = u64::MAX;
let mut end: u64 = 0;
let segments = elf.segments().ok_or(Error::ElfInvalidImage)?;
for header in segments {
if (header.p_type != PT_LOAD) || (header.p_flags & (PF_R | PF_W | PF_X)) == 0 {
continue;
}
let paddr = header.p_paddr;
let memsz = header.p_memsz;
if start > paddr {
start = paddr;
}
if end < paddr + memsz {
end = paddr + memsz;
}
}
if paddr_offset != u64::MAX && virt_base == u64::MAX {
return Err(Error::ElfInvalidImage);
}
let virt_offset = virt_base - paddr_offset;
let virt_kstart = start + virt_offset;
let virt_kend = end + virt_offset;
let mut virt_entry = entry;
if hvm {
if let Some(entry) = phys32_entry {
virt_entry = entry;
} else {
virt_entry = elf.ehdr.e_entry;
}
}
let image_info = BootImageInfo {
start,
virt_base,
virt_kstart,
virt_kend,
virt_hypercall,
virt_entry,
virt_p2m_base: init_p2m,
unmapped_initrd: mod_start_pfn != 0,
};
Ok(image_info)
}
async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let segments = elf.segments().ok_or(Error::ElfInvalidImage)?;
debug!(
"load dst={:#x} segments={}",
dst.as_ptr() as u64,
segments.len()
);
for header in segments {
let paddr = header.p_paddr;
let filesz = header.p_filesz;
let memsz = header.p_memsz;
let base_offset = paddr - image_info.start;
let data = elf.segment_data(&header)?;
let segment_dst = &mut dst[base_offset as usize..];
let copy_slice = &data[0..filesz as usize];
debug!(
"load copy hdr={:?} dst={:#x} len={}",
header,
copy_slice.as_ptr() as u64,
copy_slice.len()
);
copy(segment_dst, copy_slice);
if (memsz - filesz) > 0 {
let remaining = &mut segment_dst[filesz as usize..memsz as usize];
debug!(
"load fill_zero hdr={:?} dst={:#x} len={}",
header.p_offset,
remaining.as_ptr() as u64,
remaining.len()
);
remaining.fill(0);
}
}
Ok(())
}
}

View File

@ -20,12 +20,6 @@ pub enum Error {
PathParentNotFound,
#[error("domain does not exist")]
DomainNonExistent,
#[error("elf parse failed: {0}")]
ElfParseFailed(#[from] elf::ParseError),
#[error("mmap failed")]
MmapFailed,
#[error("munmap failed: {0}")]
UnmapFailed(nix::errno::Errno),
#[error("memory setup failed: {0}")]
MemorySetupFailed(&'static str),
#[error("populate physmap failed: wanted={0}, received={1}, input_extents={2}")]
@ -46,6 +40,8 @@ pub enum Error {
InvalidPciBdfString,
#[error("pci device {0} is not assignable")]
PciDeviceNotAssignable(PciBdf),
#[error("xen platform error: {0}")]
XenPlatform(#[from] xenplatform::error::Error),
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@ -1,18 +1,13 @@
pub mod boot;
pub mod elfloader;
pub mod error;
pub mod mem;
pub mod sys;
use crate::boot::{BootDomain, BootSetup};
use crate::elfloader::ElfImageLoader;
use crate::error::{Error, Result};
use boot::BootSetupPlatform;
use indexmap::IndexMap;
use log::{debug, trace, warn};
use pci::{PciBdf, XenPciBackend};
use sys::XEN_PAGE_SHIFT;
use tokio::time::timeout;
use xenplatform::boot::{BootDomain, BootSetup, BootSetupPlatform};
use xenplatform::elfloader::ElfImageLoader;
use xenplatform::sys::XEN_PAGE_SHIFT;
use std::path::PathBuf;
use std::str::FromStr;
@ -28,10 +23,6 @@ use xenstore::{
pub mod pci;
pub mod unsupported;
#[cfg(target_arch = "x86_64")]
pub mod x86pv;
#[derive(Clone)]
pub struct XenClient<P: BootSetupPlatform> {
pub store: XsdClient,

View File

@ -1,219 +0,0 @@
use crate::error::Result;
use crate::sys::XEN_PAGE_SHIFT;
use crate::Error;
use libc::munmap;
use log::debug;
use nix::errno::Errno;
use std::ffi::c_void;
use std::slice;
use xencall::sys::MmapEntry;
use xencall::XenCall;
#[derive(Debug, Clone)]
pub struct PhysicalPage {
pfn: u64,
pub ptr: u64,
count: u64,
}
pub struct PhysicalPages {
page_shift: u64,
domid: u32,
pub p2m: Vec<u64>,
call: XenCall,
pages: Vec<PhysicalPage>,
}
impl PhysicalPages {
pub fn new(call: XenCall, domid: u32, page_shift: u64) -> PhysicalPages {
PhysicalPages {
page_shift,
domid,
p2m: Vec::new(),
call,
pages: Vec::new(),
}
}
pub fn load_p2m(&mut self, p2m: Vec<u64>) {
self.p2m = p2m;
}
pub fn p2m_size(&mut self) -> u64 {
self.p2m.len() as u64
}
pub async fn pfn_to_ptr(&mut self, pfn: u64, count: u64) -> Result<u64> {
for page in &self.pages {
if pfn >= page.pfn + page.count {
continue;
}
if count > 0 {
if (pfn + count) <= page.pfn {
continue;
}
if pfn < page.pfn || (pfn + count) > page.pfn + page.count {
return Err(Error::MemorySetupFailed("pfn is out of range"));
}
} else {
if pfn < page.pfn {
continue;
}
if pfn >= page.pfn + page.count {
continue;
}
}
return Ok(page.ptr + ((pfn - page.pfn) << self.page_shift));
}
if count == 0 {
return Err(Error::MemorySetupFailed("page count is zero"));
}
self.pfn_alloc(pfn, count).await
}
async fn pfn_alloc(&mut self, pfn: u64, count: u64) -> Result<u64> {
let mut entries = vec![MmapEntry::default(); count as usize];
for (i, entry) in entries.iter_mut().enumerate() {
if !self.p2m.is_empty() {
entry.mfn = self.p2m[pfn as usize + i];
} else {
entry.mfn = pfn + i as u64;
}
}
let chunk_size = 1 << XEN_PAGE_SHIFT;
let num_per_entry = chunk_size >> XEN_PAGE_SHIFT;
let num = num_per_entry * count as usize;
let mut pfns = vec![u64::MAX; num];
for i in 0..count as usize {
for j in 0..num_per_entry {
pfns[i * num_per_entry + j] = entries[i].mfn + j as u64;
}
}
let actual_mmap_len = (num as u64) << XEN_PAGE_SHIFT;
let addr = self
.call
.mmap(0, actual_mmap_len)
.await
.ok_or(Error::MmapFailed)?;
debug!("mapped {:#x} foreign bytes at {:#x}", actual_mmap_len, addr);
let result = self
.call
.mmap_batch(self.domid, num as u64, addr, pfns)
.await?;
if result != 0 {
return Err(Error::MmapFailed);
}
let page = PhysicalPage {
pfn,
ptr: addr,
count,
};
debug!(
"alloc_pfn {:#x}+{:#x} at {:#x}",
page.pfn, page.count, page.ptr
);
self.pages.push(page);
Ok(addr)
}
pub async fn map_foreign_pages(&mut self, mfn: u64, size: u64) -> Result<u64> {
let count = (size >> XEN_PAGE_SHIFT) as usize;
let mut entries = vec![MmapEntry::default(); count];
for (i, entry) in entries.iter_mut().enumerate() {
entry.mfn = mfn + i as u64;
}
let chunk_size = 1 << XEN_PAGE_SHIFT;
let num_per_entry = chunk_size >> XEN_PAGE_SHIFT;
let num = num_per_entry * count;
let mut pfns = vec![u64::MAX; num];
for i in 0..count {
for j in 0..num_per_entry {
pfns[i * num_per_entry + j] = entries[i].mfn + j as u64;
}
}
let actual_mmap_len = (num as u64) << XEN_PAGE_SHIFT;
let addr = self
.call
.mmap(0, actual_mmap_len)
.await
.ok_or(Error::MmapFailed)?;
debug!("mapped {:#x} foreign bytes at {:#x}", actual_mmap_len, addr);
let result = self
.call
.mmap_batch(self.domid, num as u64, addr, pfns)
.await?;
if result != 0 {
return Err(Error::MmapFailed);
}
let page = PhysicalPage {
pfn: mfn,
ptr: addr,
count: count as u64,
};
debug!(
"alloc_mfn {:#x}+{:#x} at {:#x}",
page.pfn, page.count, page.ptr
);
self.pages.push(page);
Ok(addr)
}
pub async fn clear_pages(&mut self, pfn: u64, count: u64) -> Result<()> {
let ptr = self.pfn_to_ptr(pfn, count).await?;
let slice = unsafe {
slice::from_raw_parts_mut(ptr as *mut u8, (count * (1 << self.page_shift)) as usize)
};
slice.fill(0);
Ok(())
}
pub fn unmap_all(&mut self) -> Result<()> {
for page in &self.pages {
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << self.page_shift) as usize,
);
if err != 0 {
return Err(Error::UnmapFailed(Errno::from_raw(err)));
}
}
}
self.pages.clear();
Ok(())
}
pub fn unmap(&mut self, pfn: u64) -> Result<()> {
let page = self.pages.iter().enumerate().find(|(_, x)| x.pfn == pfn);
if page.is_none() {
return Err(Error::MemorySetupFailed("cannot unmap missing page"));
}
let (i, page) = page.unwrap();
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << self.page_shift) as usize,
);
debug!(
"unmapped {:#x} foreign bytes at {:#x}",
(page.count << self.page_shift) as usize,
page.ptr
);
if err != 0 {
return Err(Error::UnmapFailed(Errno::from_raw(err)));
}
self.pages.remove(i);
}
Ok(())
}
}

View File

@ -1,147 +0,0 @@
pub const XEN_ELFNOTE_INFO: u64 = 0;
pub const XEN_ELFNOTE_ENTRY: u64 = 1;
pub const XEN_ELFNOTE_HYPERCALL_PAGE: u64 = 2;
pub const XEN_ELFNOTE_VIRT_BASE: u64 = 3;
pub const XEN_ELFNOTE_PADDR_OFFSET: u64 = 4;
pub const XEN_ELFNOTE_XEN_VERSION: u64 = 5;
pub const XEN_ELFNOTE_GUEST_OS: u64 = 6;
pub const XEN_ELFNOTE_GUEST_VERSION: u64 = 7;
pub const XEN_ELFNOTE_LOADER: u64 = 8;
pub const XEN_ELFNOTE_PAE_MODE: u64 = 9;
pub const XEN_ELFNOTE_FEATURES: u64 = 10;
pub const XEN_ELFNOTE_BSD_SYMTAB: u64 = 11;
pub const XEN_ELFNOTE_HV_START_LOW: u64 = 12;
pub const XEN_ELFNOTE_L1_MFN_VALID: u64 = 13;
pub const XEN_ELFNOTE_SUSPEND_CANCEL: u64 = 14;
pub const XEN_ELFNOTE_INIT_P2M: u64 = 15;
pub const XEN_ELFNOTE_MOD_START_PFN: u64 = 16;
pub const XEN_ELFNOTE_SUPPORTED_FEATURES: u64 = 17;
pub const XEN_ELFNOTE_PHYS32_ENTRY: u64 = 18;
#[derive(Copy, Clone)]
pub struct ElfNoteXenType {
pub id: u64,
pub name: &'static str,
pub is_string: bool,
}
pub const XEN_ELFNOTE_TYPES: &[ElfNoteXenType] = &[
ElfNoteXenType {
id: XEN_ELFNOTE_ENTRY,
name: "ENTRY",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_HYPERCALL_PAGE,
name: "HYPERCALL_PAGE",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_VIRT_BASE,
name: "VIRT_BASE",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_INIT_P2M,
name: "INIT_P2M",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_PADDR_OFFSET,
name: "PADDR_OFFSET",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_HV_START_LOW,
name: "HV_START_LOW",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_XEN_VERSION,
name: "XEN_VERSION",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_GUEST_OS,
name: "GUEST_OS",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_GUEST_VERSION,
name: "GUEST_VERSION",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_LOADER,
name: "LOADER",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_PAE_MODE,
name: "PAE_MODE",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_FEATURES,
name: "FEATURES",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_SUPPORTED_FEATURES,
name: "SUPPORTED_FEATURES",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_BSD_SYMTAB,
name: "BSD_SYMTAB",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_SUSPEND_CANCEL,
name: "SUSPEND_CANCEL",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_MOD_START_PFN,
name: "MOD_START_PFN",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_PHYS32_ENTRY,
name: "PHYS32_ENTRY",
is_string: false,
},
];
pub const XEN_PAGE_SHIFT: u64 = 12;
pub const XEN_PAGE_SIZE: u64 = 1 << XEN_PAGE_SHIFT;
pub const XEN_PAGE_MASK: u64 = !(XEN_PAGE_SIZE - 1);
pub const SUPERPAGE_BATCH_SIZE: u64 = 512;
pub const SUPERPAGE_2MB_SHIFT: u64 = 9;
pub const SUPERPAGE_2MB_NR_PFNS: u64 = 1u64 << SUPERPAGE_2MB_SHIFT;
pub const SUPERPAGE_1GB_SHIFT: u64 = 18;
pub const SUPERPAGE_1GB_NR_PFNS: u64 = 1u64 << SUPERPAGE_1GB_SHIFT;
pub const VGCF_IN_KERNEL: u64 = 1 << 2;
pub const VGCF_ONLINE: u64 = 1 << 5;
#[repr(C)]
pub struct GrantEntry {
pub flags: u16,
pub domid: u16,
pub frame: u32,
}
pub const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578;
pub const HVM_PARAM_STORE_PFN: u32 = 1;
pub const HVM_PARAM_STORE_EVTCHN: u32 = 2;
pub const HVM_PARAM_IOREQ_PFN: u32 = 5;
pub const HVM_PARAM_BUFIOREQ_PFN: u32 = 6;
pub const HVM_PARAM_CONSOLE_PFN: u32 = 17;
pub const HVM_PARAM_CONSOLE_EVTCHN: u32 = 18;
pub const HVM_PARAM_PAGING_RING_PFN: u32 = 27;
pub const HVM_PARAM_MONITOR_RING_PFN: u32 = 28;
pub const HVM_PARAM_SHARING_RING_PFN: u32 = 29;
pub const HVM_PARAM_TIMER_MODE: u32 = 10;
pub const HVM_PARAM_ALTP2M: u32 = 35;
pub const HVM_PARAM_IDENT_PT: u32 = 12;

View File

@ -1,86 +0,0 @@
use xencall::sys::CreateDomain;
use crate::{
boot::{BootDomain, BootSetupPlatform, DomainSegment},
error::Result,
};
#[derive(Default, Clone)]
pub struct UnsupportedPlatform;
impl UnsupportedPlatform {
pub fn new() -> Self {
Self {}
}
}
#[async_trait::async_trait]
impl BootSetupPlatform for UnsupportedPlatform {
fn create_domain(&self, _: bool) -> CreateDomain {
panic!("unsupported platform")
}
fn page_size(&self) -> u64 {
panic!("unsupported platform")
}
fn page_shift(&self) -> u64 {
panic!("unsupported platform")
}
fn needs_early_kernel(&self) -> bool {
panic!("unsupported platform")
}
fn hvm(&self) -> bool {
panic!("unsupported platform")
}
async fn initialize_early(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn initialize_memory(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn alloc_p2m_segment(&mut self, _: &mut BootDomain) -> Result<Option<DomainSegment>> {
panic!("unsupported platform")
}
async fn alloc_page_tables(&mut self, _: &mut BootDomain) -> Result<Option<DomainSegment>> {
panic!("unsupported platform")
}
async fn setup_page_tables(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn setup_hypercall_page(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn alloc_magic_pages(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn setup_shared_info(&mut self, _: &mut BootDomain, _: u64) -> Result<()> {
panic!("unsupported platform")
}
async fn setup_start_info(&mut self, _: &mut BootDomain, _: u64) -> Result<()> {
panic!("unsupported platform")
}
async fn bootlate(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn vcpu(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
async fn gnttab_seed(&mut self, _: &mut BootDomain) -> Result<()> {
panic!("unsupported platform")
}
}

View File

@ -1,922 +0,0 @@
use std::{
mem::size_of,
os::raw::{c_char, c_void},
slice,
};
use libc::munmap;
use log::{debug, trace};
use nix::errno::Errno;
use slice_copy::copy;
use xencall::sys::{
x8664VcpuGuestContext, CreateDomain, E820Entry, VcpuGuestContextAny, E820_MAX, E820_RAM,
E820_UNUSABLE, MMUEXT_PIN_L4_TABLE, XEN_DOMCTL_CDF_IOMMU,
};
use crate::{
boot::{BootDomain, BootSetupPlatform, DomainSegment},
error::{Error, Result},
sys::{
GrantEntry, SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE,
VGCF_IN_KERNEL, VGCF_ONLINE, XEN_PAGE_SHIFT,
},
};
pub const X86_PAGE_SHIFT: u64 = 12;
pub const X86_PAGE_SIZE: u64 = 1 << X86_PAGE_SHIFT;
pub const X86_VIRT_BITS: u64 = 48;
pub const X86_VIRT_MASK: u64 = (1 << X86_VIRT_BITS) - 1;
pub const X86_PGTABLE_LEVELS: u64 = 4;
pub const X86_PGTABLE_LEVEL_SHIFT: u64 = 9;
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct PageTableMappingLevel {
pub from: u64,
pub to: u64,
pub pfn: u64,
pub pgtables: usize,
}
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct PageTableMapping {
pub area: PageTableMappingLevel,
pub levels: [PageTableMappingLevel; X86_PGTABLE_LEVELS as usize],
}
pub const X86_PAGE_TABLE_MAX_MAPPINGS: usize = 2;
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct PageTable {
pub mappings_count: usize,
pub mappings: [PageTableMapping; X86_PAGE_TABLE_MAX_MAPPINGS],
}
#[repr(C)]
#[derive(Debug)]
pub struct StartInfoConsole {
pub mfn: u64,
pub evtchn: u32,
}
pub const MAX_GUEST_CMDLINE: usize = 1024;
#[repr(C)]
#[derive(Debug)]
pub struct StartInfo {
pub magic: [c_char; 32],
pub nr_pages: u64,
pub shared_info: u64,
pub flags: u32,
pub store_mfn: u64,
pub store_evtchn: u32,
pub console: StartInfoConsole,
pub pt_base: u64,
pub nr_pt_frames: u64,
pub mfn_list: u64,
pub mod_start: u64,
pub mod_len: u64,
pub cmdline: [c_char; MAX_GUEST_CMDLINE],
pub first_p2m_pfn: u64,
pub nr_p2m_frames: u64,
}
pub const X86_GUEST_MAGIC: &str = "xen-3.0-x86_64";
#[repr(C)]
#[derive(Debug)]
pub struct ArchVcpuInfo {
pub cr2: u64,
pub pad: u64,
}
#[repr(C)]
#[derive(Debug)]
pub struct VcpuInfoTime {
pub version: u32,
pub pad0: u32,
pub tsc_timestamp: u64,
pub system_time: u64,
pub tsc_to_system_mul: u32,
pub tsc_shift: i8,
pub flags: u8,
pub pad1: [u8; 2],
}
#[repr(C)]
#[derive(Debug)]
pub struct VcpuInfo {
pub evtchn_upcall_pending: u8,
pub evtchn_upcall_mask: u8,
pub evtchn_pending_sel: u64,
pub arch_vcpu_info: ArchVcpuInfo,
pub vcpu_info_time: VcpuInfoTime,
}
#[repr(C)]
#[derive(Debug)]
pub struct SharedInfo {
pub vcpu_info: [VcpuInfo; 32],
pub evtchn_pending: [u64; u64::BITS as usize],
pub evtchn_mask: [u64; u64::BITS as usize],
pub wc_version: u32,
pub wc_sec: u32,
pub wc_nsec: u32,
pub wc_sec_hi: u32,
// arch shared info
pub max_pfn: u64,
pub pfn_to_mfn_frame_list_list: u64,
pub nmi_reason: u64,
pub p2m_cr3: u64,
pub p2m_vaddr: u64,
pub p2m_generation: u64,
}
#[derive(Debug)]
struct VmemRange {
start: u64,
end: u64,
_flags: u32,
_nid: u32,
}
#[derive(Default, Clone)]
pub struct X86PvPlatform {
table: PageTable,
p2m_segment: Option<DomainSegment>,
page_table_segment: Option<DomainSegment>,
start_info_segment: Option<DomainSegment>,
boot_stack_segment: Option<DomainSegment>,
xenstore_segment: Option<DomainSegment>,
}
impl X86PvPlatform {
pub fn new() -> Self {
Self {
..Default::default()
}
}
const PAGE_PRESENT: u64 = 0x001;
const PAGE_RW: u64 = 0x002;
const PAGE_USER: u64 = 0x004;
const PAGE_ACCESSED: u64 = 0x020;
const PAGE_DIRTY: u64 = 0x040;
fn get_pg_prot(&mut self, l: usize, pfn: u64) -> u64 {
let prot = [
X86PvPlatform::PAGE_PRESENT | X86PvPlatform::PAGE_RW | X86PvPlatform::PAGE_ACCESSED,
X86PvPlatform::PAGE_PRESENT
| X86PvPlatform::PAGE_RW
| X86PvPlatform::PAGE_ACCESSED
| X86PvPlatform::PAGE_DIRTY
| X86PvPlatform::PAGE_USER,
X86PvPlatform::PAGE_PRESENT
| X86PvPlatform::PAGE_RW
| X86PvPlatform::PAGE_ACCESSED
| X86PvPlatform::PAGE_DIRTY
| X86PvPlatform::PAGE_USER,
X86PvPlatform::PAGE_PRESENT
| X86PvPlatform::PAGE_RW
| X86PvPlatform::PAGE_ACCESSED
| X86PvPlatform::PAGE_DIRTY
| X86PvPlatform::PAGE_USER,
];
let prot = prot[l];
if l > 0 {
return prot;
}
for m in 0..self.table.mappings_count {
let map = &self.table.mappings[m];
let pfn_s = map.levels[(X86_PGTABLE_LEVELS - 1) as usize].pfn;
let pfn_e = map.area.pgtables as u64 + pfn_s;
if pfn >= pfn_s && pfn < pfn_e {
return prot & !X86PvPlatform::PAGE_RW;
}
}
prot
}
fn count_page_tables(
&mut self,
domain: &mut BootDomain,
from: u64,
to: u64,
pfn: u64,
) -> Result<usize> {
debug!("counting pgtables from={} to={} pfn={}", from, to, pfn);
if self.table.mappings_count == X86_PAGE_TABLE_MAX_MAPPINGS {
return Err(Error::MemorySetupFailed("max page table count reached"));
}
let m = self.table.mappings_count;
let pfn_end = pfn + ((to - from) >> X86_PAGE_SHIFT);
if pfn_end >= domain.phys.p2m_size() {
return Err(Error::MemorySetupFailed("pfn_end greater than p2m size"));
}
for idx in 0..self.table.mappings_count {
if from < self.table.mappings[idx].area.to && to > self.table.mappings[idx].area.from {
return Err(Error::MemorySetupFailed("page table calculation failed"));
}
}
let mut map = PageTableMapping::default();
map.area.from = from & X86_VIRT_MASK;
map.area.to = to & X86_VIRT_MASK;
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
map.levels[l].pfn = domain.pfn_alloc_end + map.area.pgtables as u64;
if l as u64 == X86_PGTABLE_LEVELS - 1 {
if self.table.mappings_count == 0 {
map.levels[l].from = 0;
map.levels[l].to = X86_VIRT_MASK;
map.levels[l].pgtables = 1;
map.area.pgtables += 1;
}
continue;
}
let bits = X86_PAGE_SHIFT + (l + 1) as u64 * X86_PGTABLE_LEVEL_SHIFT;
let mask = BootDomain::bits_to_mask(bits);
map.levels[l].from = map.area.from & !mask;
map.levels[l].to = map.area.to | mask;
for cmp in &mut self.table.mappings[0..self.table.mappings_count] {
if cmp.levels[l].from == cmp.levels[l].to {
continue;
}
if map.levels[l].from >= cmp.levels[l].from && map.levels[l].to <= cmp.levels[l].to
{
map.levels[l].from = 0;
map.levels[l].to = 0;
break;
}
if map.levels[l].from >= cmp.levels[l].from
&& map.levels[l].from <= cmp.levels[l].to
{
map.levels[l].from = cmp.levels[l].to + 1;
}
if map.levels[l].to >= cmp.levels[l].from && map.levels[l].to <= cmp.levels[l].to {
map.levels[l].to = cmp.levels[l].from - 1;
}
}
if map.levels[l].from < map.levels[l].to {
map.levels[l].pgtables =
(((map.levels[l].to - map.levels[l].from) >> bits) + 1) as usize;
}
debug!(
"count_pgtables {:#x}/{}: {:#x} -> {:#x}, {} tables",
mask, bits, map.levels[l].from, map.levels[l].to, map.levels[l].pgtables
);
map.area.pgtables += map.levels[l].pgtables;
}
self.table.mappings[m] = map;
Ok(m)
}
fn e820_sanitize(
&self,
mut source: Vec<E820Entry>,
map_limit_kb: u64,
balloon_kb: u64,
) -> Result<Vec<E820Entry>> {
let mut e820 = vec![E820Entry::default(); E820_MAX as usize];
for entry in &mut source {
if entry.addr > 0x100000 {
continue;
}
// entries under 1MB should be removed.
entry.typ = 0;
entry.size = 0;
entry.addr = u64::MAX;
}
let mut lowest = u64::MAX;
let mut highest = 0;
for entry in &source {
if entry.typ == E820_RAM || entry.typ == E820_UNUSABLE || entry.typ == 0 {
continue;
}
lowest = if entry.addr < lowest {
entry.addr
} else {
lowest
};
highest = if entry.addr + entry.size > highest {
entry.addr + entry.size
} else {
highest
}
}
let start_kb = if lowest > 1024 { lowest >> 10 } else { 0 };
let mut idx: usize = 0;
e820[idx].addr = 0;
e820[idx].size = map_limit_kb << 10;
e820[idx].typ = E820_RAM;
let mut delta_kb = 0u64;
if start_kb > 0 && map_limit_kb > start_kb {
delta_kb = map_limit_kb - start_kb;
if delta_kb > 0 {
e820[idx].size -= delta_kb << 10;
}
}
let ram_end = source[0].addr + source[0].size;
idx += 1;
for src in &mut source {
let end = src.addr + src.size;
if src.typ == E820_UNUSABLE || end < ram_end {
src.typ = 0;
continue;
}
if src.typ != E820_RAM {
continue;
}
if src.addr >= (1 << 32) {
continue;
}
if src.addr < ram_end {
let delta = ram_end - src.addr;
src.typ = E820_UNUSABLE;
if src.size < delta {
src.typ = 0;
} else {
src.size -= delta;
src.addr = ram_end;
}
if src.addr + src.size != end {
src.typ = 0;
}
}
if end > ram_end {
src.typ = E820_UNUSABLE;
}
}
if lowest > ram_end {
let mut add_unusable = true;
for src in &mut source {
if !add_unusable {
break;
}
if src.typ != E820_UNUSABLE {
continue;
}
if ram_end != src.addr {
continue;
}
if lowest != src.addr + src.size {
src.size = lowest - src.addr;
}
add_unusable = false;
}
if add_unusable {
e820[1].typ = E820_UNUSABLE;
e820[1].addr = ram_end;
e820[1].size = lowest - ram_end;
}
}
for src in &source {
if src.typ == E820_RAM || src.typ == 0 {
continue;
}
e820[idx].typ = src.typ;
e820[idx].addr = src.addr;
e820[idx].size = src.size;
idx += 1;
}
if balloon_kb > 0 || delta_kb > 0 {
e820[idx].typ = E820_RAM;
e820[idx].addr = if (1u64 << 32u64) > highest {
1u64 << 32u64
} else {
highest
};
e820[idx].size = (delta_kb << 10) + (balloon_kb << 10);
}
Ok(e820)
}
}
#[async_trait::async_trait]
impl BootSetupPlatform for X86PvPlatform {
fn create_domain(&self, needs_passthrough: bool) -> CreateDomain {
CreateDomain {
flags: if needs_passthrough {
XEN_DOMCTL_CDF_IOMMU
} else {
0
},
..Default::default()
}
}
fn page_size(&self) -> u64 {
X86_PAGE_SIZE
}
fn page_shift(&self) -> u64 {
X86_PAGE_SHIFT
}
fn needs_early_kernel(&self) -> bool {
false
}
fn hvm(&self) -> bool {
false
}
async fn initialize_early(&mut self, _: &mut BootDomain) -> Result<()> {
Ok(())
}
async fn initialize_memory(&mut self, domain: &mut BootDomain) -> Result<()> {
domain.call.set_address_size(domain.domid, 64).await?;
domain
.call
.claim_pages(domain.domid, domain.total_pages)
.await?;
let mut vmemranges: Vec<VmemRange> = Vec::new();
let stub = VmemRange {
start: 0,
end: domain.total_pages << XEN_PAGE_SHIFT,
_flags: 0,
_nid: 0,
};
vmemranges.push(stub);
let mut p2m_size: u64 = 0;
let mut total: u64 = 0;
for range in &vmemranges {
total += (range.end - range.start) >> XEN_PAGE_SHIFT;
p2m_size = p2m_size.max(range.end >> XEN_PAGE_SHIFT);
}
if total != domain.total_pages {
return Err(Error::MemorySetupFailed("total pages mismatch"));
}
let mut p2m = vec![u64::MAX; p2m_size as usize];
for range in &vmemranges {
let mut extents_init = vec![0u64; SUPERPAGE_BATCH_SIZE as usize];
let pages = (range.end - range.start) >> XEN_PAGE_SHIFT;
let pfn_base = range.start >> XEN_PAGE_SHIFT;
for pfn in pfn_base..pfn_base + pages {
p2m[pfn as usize] = pfn;
}
let mut super_pages = pages >> SUPERPAGE_2MB_SHIFT;
let mut pfn_base_idx: u64 = pfn_base;
while super_pages > 0 {
let count = super_pages.min(SUPERPAGE_BATCH_SIZE);
super_pages -= count;
let mut j: usize = 0;
let mut pfn: u64 = pfn_base_idx;
loop {
if pfn >= pfn_base_idx + (count << SUPERPAGE_2MB_SHIFT) {
break;
}
extents_init[j] = p2m[pfn as usize];
pfn += SUPERPAGE_2MB_NR_PFNS;
j += 1;
}
let extents_init_slice = extents_init.as_slice();
let extents = domain
.call
.populate_physmap(
domain.domid,
count,
SUPERPAGE_2MB_SHIFT as u32,
0,
&extents_init_slice[0usize..count as usize],
)
.await?;
pfn = pfn_base_idx;
for mfn in extents {
for k in 0..SUPERPAGE_2MB_NR_PFNS {
p2m[pfn as usize] = mfn + k;
pfn += 1;
}
}
pfn_base_idx = pfn;
}
let mut j = pfn_base_idx - pfn_base;
loop {
if j >= pages {
break;
}
let allocsz = (1024 * 1024).min(pages - j);
let p2m_idx = (pfn_base + j) as usize;
let p2m_end_idx = p2m_idx + allocsz as usize;
let input_extent_starts = &p2m[p2m_idx..p2m_end_idx];
let result = domain
.call
.populate_physmap(domain.domid, allocsz, 0, 0, input_extent_starts)
.await?;
if result.len() != allocsz as usize {
return Err(Error::PopulatePhysmapFailed(
allocsz as usize,
result.len(),
input_extent_starts.len(),
));
}
for (i, item) in result.iter().enumerate() {
let p = (pfn_base + j + i as u64) as usize;
let m = *item;
p2m[p] = m;
}
j += allocsz;
}
}
domain.phys.load_p2m(p2m);
domain.call.claim_pages(domain.domid, 0).await?;
Ok(())
}
async fn alloc_p2m_segment(
&mut self,
domain: &mut BootDomain,
) -> Result<Option<DomainSegment>> {
let mut p2m_alloc_size =
((domain.phys.p2m_size() * 8) + X86_PAGE_SIZE - 1) & !(X86_PAGE_SIZE - 1);
let from = domain.image_info.virt_p2m_base;
let to = from + p2m_alloc_size - 1;
let m = self.count_page_tables(domain, from, to, domain.pfn_alloc_end)?;
let pgtables: usize;
{
let map = &mut self.table.mappings[m];
map.area.pfn = domain.pfn_alloc_end;
for lvl_idx in 0..4 {
map.levels[lvl_idx].pfn += p2m_alloc_size >> X86_PAGE_SHIFT;
}
pgtables = map.area.pgtables;
}
self.table.mappings_count += 1;
p2m_alloc_size += (pgtables << X86_PAGE_SHIFT) as u64;
let p2m_segment = domain.alloc_segment(0, p2m_alloc_size).await?;
Ok(Some(p2m_segment))
}
async fn alloc_page_tables(
&mut self,
domain: &mut BootDomain,
) -> Result<Option<DomainSegment>> {
let mut extra_pages = 1;
extra_pages += (512 * 1024) / X86_PAGE_SIZE;
let mut pages = extra_pages;
let mut try_virt_end: u64;
let mut m: usize;
loop {
try_virt_end = BootDomain::round_up(
domain.virt_alloc_end + pages * X86_PAGE_SIZE,
BootDomain::bits_to_mask(22),
);
m = self.count_page_tables(domain, domain.image_info.virt_base, try_virt_end, 0)?;
pages = self.table.mappings[m].area.pgtables as u64 + extra_pages;
if domain.virt_alloc_end + pages * X86_PAGE_SIZE <= try_virt_end + 1 {
break;
}
}
self.table.mappings[m].area.pfn = 0;
self.table.mappings_count += 1;
domain.virt_pgtab_end = try_virt_end + 1;
let size = self.table.mappings[m].area.pgtables as u64 * X86_PAGE_SIZE;
let segment = domain.alloc_segment(0, size).await?;
debug!(
"alloc_page_tables table={:?} segment={:?}",
self.table, segment
);
Ok(Some(segment))
}
async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()> {
let p2m_segment = self
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let p2m_guest = unsafe {
slice::from_raw_parts_mut(
p2m_segment.addr as *mut u64,
domain.phys.p2m_size() as usize,
)
};
copy(p2m_guest, &domain.phys.p2m);
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
for m1 in 0usize..self.table.mappings_count {
let map1 = &self.table.mappings[m1];
let from = map1.levels[l].from;
let to = map1.levels[l].to;
let pg_ptr = domain.phys.pfn_to_ptr(map1.levels[l].pfn, 0).await? as *mut u64;
for m2 in 0usize..self.table.mappings_count {
let map2 = &self.table.mappings[m2];
let lvl = if l > 0 {
&map2.levels[l - 1]
} else {
&map2.area
};
if l > 0 && lvl.pgtables == 0 {
continue;
}
if lvl.from >= to || lvl.to <= from {
continue;
}
let p_s = (std::cmp::max(from, lvl.from) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let p_e = (std::cmp::min(to, lvl.to) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let rhs = X86_PAGE_SHIFT as usize + l * X86_PGTABLE_LEVEL_SHIFT as usize;
let mut pfn = ((std::cmp::max(from, lvl.from) - lvl.from) >> rhs) + lvl.pfn;
debug!(
"setup_page_tables lvl={} map_1={} map_2={} pfn={:#x} p_s={:#x} p_e={:#x}",
l, m1, m2, pfn, p_s, p_e
);
let pg = unsafe { slice::from_raw_parts_mut(pg_ptr, (p_e + 1) as usize) };
for p in p_s..p_e + 1 {
let prot = self.get_pg_prot(l, pfn);
let pfn_paddr = domain.phys.p2m[pfn as usize] << X86_PAGE_SHIFT;
let value = pfn_paddr | prot;
pg[p as usize] = value;
pfn += 1;
}
}
}
}
Ok(())
}
async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()> {
if domain.image_info.virt_hypercall == u64::MAX {
return Ok(());
}
let pfn =
(domain.image_info.virt_hypercall - domain.image_info.virt_base) >> self.page_shift();
let mfn = domain.phys.p2m[pfn as usize];
domain.call.hypercall_init(domain.domid, mfn).await?;
Ok(())
}
async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()> {
if domain.image_info.virt_p2m_base >= domain.image_info.virt_base
|| (domain.image_info.virt_p2m_base & ((1 << self.page_shift()) - 1)) != 0
{
self.p2m_segment = self.alloc_p2m_segment(domain).await?;
}
self.start_info_segment = Some(domain.alloc_page()?);
self.xenstore_segment = Some(domain.alloc_page()?);
domain.store_mfn = domain.phys.p2m[self.xenstore_segment.as_ref().unwrap().pfn as usize];
let evtchn = domain.call.evtchn_alloc_unbound(domain.domid, 0).await?;
let page = domain.alloc_page()?;
domain
.consoles
.push((evtchn, domain.phys.p2m[page.pfn as usize]));
self.page_table_segment = self.alloc_page_tables(domain).await?;
self.boot_stack_segment = Some(domain.alloc_page()?);
if domain.virt_pgtab_end > 0 {
domain.alloc_padding_pages(domain.virt_pgtab_end)?;
}
if self.p2m_segment.is_none() {
if let Some(mut p2m_segment) = self.alloc_p2m_segment(domain).await? {
p2m_segment.vstart = domain.image_info.virt_p2m_base;
self.p2m_segment = Some(p2m_segment);
}
}
Ok(())
}
async fn setup_shared_info(
&mut self,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()> {
let info = domain
.phys
.map_foreign_pages(shared_info_frame, X86_PAGE_SIZE)
.await? as *mut SharedInfo;
unsafe {
let size = size_of::<SharedInfo>();
let info_as_buff = slice::from_raw_parts_mut(info as *mut u8, size);
info_as_buff.fill(0);
for i in 0..32 {
(*info).vcpu_info[i].evtchn_upcall_mask = 1;
}
trace!("setup_shared_info shared_info={:?}", *info);
}
Ok(())
}
async fn setup_start_info(
&mut self,
domain: &mut BootDomain,
shared_info_frame: u64,
) -> Result<()> {
let start_info_segment = self
.start_info_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("start_info_segment missing"))?;
let ptr = domain.phys.pfn_to_ptr(start_info_segment.pfn, 1).await?;
let byte_slice =
unsafe { slice::from_raw_parts_mut(ptr as *mut u8, X86_PAGE_SIZE as usize) };
byte_slice.fill(0);
let info = ptr as *mut StartInfo;
let page_table_segment = self
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let p2m_segment = self
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let xenstore_segment = self
.xenstore_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?;
unsafe {
for (i, c) in X86_GUEST_MAGIC.chars().enumerate() {
(*info).magic[i] = c as c_char;
}
(*info).magic[X86_GUEST_MAGIC.len()] = 0 as c_char;
(*info).nr_pages = domain.total_pages;
(*info).shared_info = shared_info_frame << X86_PAGE_SHIFT;
(*info).pt_base = page_table_segment.vstart;
(*info).nr_pt_frames = self.table.mappings[0].area.pgtables as u64;
(*info).mfn_list = p2m_segment.vstart;
(*info).first_p2m_pfn = p2m_segment.pfn;
(*info).nr_p2m_frames = p2m_segment.pages;
(*info).flags = 0;
(*info).store_evtchn = domain.store_evtchn;
(*info).store_mfn = domain.phys.p2m[xenstore_segment.pfn as usize];
let console = domain.consoles.first().unwrap();
(*info).console.mfn = console.1;
(*info).console.evtchn = console.0;
(*info).mod_start = domain.initrd_segment.vstart;
(*info).mod_len = domain.initrd_segment.size;
for (i, c) in domain.cmdline.chars().enumerate() {
(*info).cmdline[i] = c as c_char;
}
(*info).cmdline[MAX_GUEST_CMDLINE - 1] = 0;
trace!("setup_start_info start_info={:?}", *info);
}
Ok(())
}
async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()> {
let p2m_segment = self
.p2m_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("p2m_segment missing"))?;
let page_table_segment = self
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let pg_pfn = page_table_segment.pfn;
let pg_mfn = domain.phys.p2m[pg_pfn as usize];
domain.phys.unmap(pg_pfn)?;
domain.phys.unmap(p2m_segment.pfn)?;
let map = domain.call.get_memory_map(E820_MAX).await?;
let mem_mb = domain.total_pages >> (20 - self.page_shift());
let mem_kb = mem_mb * 1024;
let e820 = self.e820_sanitize(map, mem_kb, 0)?;
domain.call.set_memory_map(domain.domid, e820).await?;
domain
.call
.mmuext(domain.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)
.await?;
Ok(())
}
async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()> {
let page_table_segment = self
.page_table_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("page_table_segment missing"))?;
let boot_stack_segment = self
.boot_stack_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("boot_stack_segment missing"))?;
let start_info_segment = self
.start_info_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("start_info_segment missing"))?;
let pg_pfn = page_table_segment.pfn;
let pg_mfn = domain.phys.p2m[pg_pfn as usize];
let mut vcpu = x8664VcpuGuestContext::default();
vcpu.user_regs.rip = domain.image_info.virt_entry;
vcpu.user_regs.rsp =
domain.image_info.virt_base + (boot_stack_segment.pfn + 1) * self.page_size();
vcpu.user_regs.rsi =
domain.image_info.virt_base + (start_info_segment.pfn) * self.page_size();
vcpu.user_regs.rflags = 1 << 9;
vcpu.debugreg[6] = 0xffff0ff0;
vcpu.debugreg[7] = 0x00000400;
vcpu.flags = VGCF_IN_KERNEL | VGCF_ONLINE;
let cr3_pfn = pg_mfn;
debug!("cr3: pfn {:#x} mfn {:#x}", page_table_segment.pfn, cr3_pfn);
vcpu.ctrlreg[3] = cr3_pfn << 12;
vcpu.user_regs.ds = 0x0;
vcpu.user_regs.es = 0x0;
vcpu.user_regs.fs = 0x0;
vcpu.user_regs.gs = 0x0;
vcpu.user_regs.ss = 0xe02b;
vcpu.user_regs.cs = 0xe033;
vcpu.kernel_ss = vcpu.user_regs.ss as u64;
vcpu.kernel_sp = vcpu.user_regs.rsp;
trace!("vcpu context: {:?}", vcpu);
domain
.call
.set_vcpu_context(domain.domid, 0, VcpuGuestContextAny { value: vcpu })
.await?;
Ok(())
}
async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()> {
let xenstore_segment = self
.xenstore_segment
.as_ref()
.ok_or(Error::MemorySetupFailed("xenstore_segment missing"))?;
let console_gfn = domain.consoles.first().map(|x| x.1).unwrap_or(0) as usize;
let xenstore_gfn = domain.phys.p2m[xenstore_segment.pfn as usize];
let addr = domain
.call
.mmap(0, 1 << XEN_PAGE_SHIFT)
.await
.ok_or(Error::MmapFailed)?;
domain
.call
.map_resource(domain.domid, 1, 0, 0, 1, addr)
.await?;
let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) };
entries[0].flags = 1 << 0;
entries[0].domid = 0;
entries[0].frame = console_gfn as u32;
entries[1].flags = 1 << 0;
entries[1].domid = 0;
entries[1].frame = xenstore_gfn as u32;
unsafe {
let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT);
if result != 0 {
return Err(Error::UnmapFailed(Errno::from_raw(result)));
}
}
Ok(())
}
}