hypha: move libraries to libs/

This commit is contained in:
Alex Zenla
2024-02-01 10:01:49 +00:00
parent def4306a04
commit bd56de235f
36 changed files with 10 additions and 10 deletions

View File

@ -0,0 +1,35 @@
[package]
name = "xenclient"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
thiserror = { workspace = true }
libc = { workspace = true }
elf = { workspace = true }
flate2 = { workspace = true }
xz2 = { workspace = true }
memchr = { workspace = true }
slice-copy = { workspace = true }
log = { workspace = true }
uuid = { workspace = true }
[dependencies.xencall]
path = "../xencall"
[dependencies.xenstore]
path = "../xenstore"
[dependencies.xenevtchn]
path = "../xenevtchn"
[dev-dependencies]
env_logger = { workspace = true }
[lib]
path = "src/lib.rs"
[[example]]
name = "xenclient-boot"
path = "examples/boot.rs"

View File

@ -0,0 +1,31 @@
use std::{env, process};
use xenclient::error::Result;
use xenclient::{DomainConfig, XenClient};
fn main() -> Result<()> {
env_logger::init();
let args: Vec<String> = env::args().collect();
if args.len() != 3 {
println!("usage: boot <kernel-image> <initrd>");
process::exit(1);
}
let kernel_image_path = args.get(1).expect("argument not specified");
let initrd_path = args.get(2).expect("argument not specified");
let mut client = XenClient::open()?;
let config = DomainConfig {
backend_domid: 0,
name: "xenclient-test",
max_vcpus: 1,
mem_mb: 512,
kernel_path: kernel_image_path.as_str(),
initrd_path: initrd_path.as_str(),
cmdline: "debug elevator=noop",
disks: vec![],
filesystems: vec![],
extra_keys: vec![],
};
let domid = client.create(&config)?;
println!("created domain {}", domid);
Ok(())
}

View File

@ -0,0 +1,358 @@
use crate::error::Result;
use crate::mem::PhysicalPages;
use crate::sys::{GrantEntry, XEN_PAGE_SHIFT};
use crate::Error;
use libc::munmap;
use log::debug;
use slice_copy::copy;
use std::ffi::c_void;
use std::slice;
use xencall::XenCall;
pub trait BootImageLoader {
fn parse(&self) -> Result<BootImageInfo>;
fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()>;
}
pub const XEN_UNSET_ADDR: u64 = -1i64 as u64;
#[derive(Debug)]
pub struct BootImageInfo {
pub start: u64,
pub virt_base: u64,
pub virt_kstart: u64,
pub virt_kend: u64,
pub virt_hypercall: u64,
pub virt_entry: u64,
pub virt_p2m_base: u64,
pub unmapped_initrd: bool,
}
pub struct BootSetup<'a> {
pub(crate) call: &'a XenCall,
pub phys: PhysicalPages<'a>,
pub(crate) domid: u32,
pub(crate) virt_alloc_end: u64,
pub(crate) pfn_alloc_end: u64,
pub(crate) virt_pgtab_end: u64,
pub(crate) total_pages: u64,
}
#[derive(Debug)]
pub struct DomainSegment {
pub(crate) vstart: u64,
vend: u64,
pub pfn: u64,
pub(crate) addr: u64,
pub(crate) size: u64,
pub(crate) pages: u64,
}
#[derive(Debug)]
pub struct BootState {
pub kernel_segment: DomainSegment,
pub start_info_segment: DomainSegment,
pub xenstore_segment: DomainSegment,
pub console_segment: DomainSegment,
pub boot_stack_segment: DomainSegment,
pub p2m_segment: DomainSegment,
pub page_table_segment: DomainSegment,
pub image_info: BootImageInfo,
pub shared_info_frame: u64,
pub initrd_segment: DomainSegment,
pub store_evtchn: u32,
pub console_evtchn: u32,
}
impl BootSetup<'_> {
pub fn new(call: &XenCall, domid: u32) -> BootSetup {
BootSetup {
call,
phys: PhysicalPages::new(call, domid),
domid,
virt_alloc_end: 0,
pfn_alloc_end: 0,
virt_pgtab_end: 0,
total_pages: 0,
}
}
fn initialize_memory(&mut self, arch: &mut dyn ArchBootSetup, total_pages: u64) -> Result<()> {
self.call.set_address_size(self.domid, 64)?;
arch.meminit(self, total_pages)?;
Ok(())
}
pub fn initialize(
&mut self,
arch: &mut dyn ArchBootSetup,
image_loader: &dyn BootImageLoader,
initrd: &[u8],
max_vcpus: u32,
mem_mb: u64,
) -> Result<BootState> {
debug!(
"BootSetup initialize max_vcpus={:?} mem_mb={:?}",
max_vcpus, mem_mb
);
let total_pages = mem_mb << (20 - arch.page_shift());
self.initialize_memory(arch, total_pages)?;
let image_info = image_loader.parse()?;
debug!("BootSetup initialize image_info={:?}", image_info);
self.virt_alloc_end = image_info.virt_base;
let kernel_segment = self.load_kernel_segment(arch, image_loader, &image_info)?;
let mut p2m_segment: Option<DomainSegment> = None;
if image_info.virt_p2m_base >= image_info.virt_base
|| (image_info.virt_p2m_base & ((1 << arch.page_shift()) - 1)) != 0
{
p2m_segment = Some(arch.alloc_p2m_segment(self, &image_info)?);
}
let start_info_segment = self.alloc_page(arch)?;
let xenstore_segment = self.alloc_page(arch)?;
let console_segment = self.alloc_page(arch)?;
let page_table_segment = arch.alloc_page_tables(self, &image_info)?;
let boot_stack_segment = self.alloc_page(arch)?;
if self.virt_pgtab_end > 0 {
self.alloc_padding_pages(arch, self.virt_pgtab_end)?;
}
let mut initrd_segment: Option<DomainSegment> = None;
if !image_info.unmapped_initrd {
initrd_segment = Some(self.alloc_module(arch, initrd)?);
}
if p2m_segment.is_none() {
let mut segment = arch.alloc_p2m_segment(self, &image_info)?;
segment.vstart = image_info.virt_p2m_base;
p2m_segment = Some(segment);
}
let p2m_segment = p2m_segment.unwrap();
if image_info.unmapped_initrd {
initrd_segment = Some(self.alloc_module(arch, initrd)?);
}
let initrd_segment = initrd_segment.unwrap();
let store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0)?;
let console_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0)?;
let state = BootState {
kernel_segment,
start_info_segment,
xenstore_segment,
console_segment,
boot_stack_segment,
p2m_segment,
page_table_segment,
image_info,
initrd_segment,
store_evtchn,
console_evtchn,
shared_info_frame: 0,
};
debug!("BootSetup initialize state={:?}", state);
Ok(state)
}
pub fn boot(
&mut self,
arch: &mut dyn ArchBootSetup,
state: &mut BootState,
cmdline: &str,
) -> Result<()> {
let domain_info = self.call.get_domain_info(self.domid)?;
let shared_info_frame = domain_info.shared_info_frame;
state.shared_info_frame = shared_info_frame;
arch.setup_page_tables(self, state)?;
arch.setup_start_info(self, state, cmdline)?;
arch.setup_hypercall_page(self, &state.image_info)?;
arch.bootlate(self, state)?;
arch.setup_shared_info(self, state.shared_info_frame)?;
arch.vcpu(self, state)?;
self.phys.unmap_all()?;
self.gnttab_seed(state)?;
Ok(())
}
fn gnttab_seed(&mut self, state: &mut BootState) -> Result<()> {
let console_gfn = self.phys.p2m[state.console_segment.pfn as usize];
let xenstore_gfn = self.phys.p2m[state.xenstore_segment.pfn as usize];
let addr = self
.call
.mmap(0, 1 << XEN_PAGE_SHIFT)
.ok_or(Error::MmapFailed)?;
self.call.map_resource(self.domid, 1, 0, 0, 1, addr)?;
let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) };
entries[0].flags = 1 << 0;
entries[0].domid = 0;
entries[0].frame = console_gfn as u32;
entries[1].flags = 1 << 0;
entries[1].domid = 0;
entries[1].frame = xenstore_gfn as u32;
unsafe {
let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT);
if result != 0 {
return Err(Error::UnmapFailed);
}
}
Ok(())
}
fn load_kernel_segment(
&mut self,
arch: &mut dyn ArchBootSetup,
image_loader: &dyn BootImageLoader,
image_info: &BootImageInfo,
) -> Result<DomainSegment> {
let kernel_segment = self.alloc_segment(
arch,
image_info.virt_kstart,
image_info.virt_kend - image_info.virt_kstart,
)?;
let kernel_segment_ptr = kernel_segment.addr as *mut u8;
let kernel_segment_slice =
unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) };
image_loader.load(image_info, kernel_segment_slice)?;
Ok(kernel_segment)
}
pub(crate) fn round_up(addr: u64, mask: u64) -> u64 {
addr | mask
}
pub(crate) fn bits_to_mask(bits: u64) -> u64 {
(1 << bits) - 1
}
pub(crate) fn alloc_segment(
&mut self,
arch: &mut dyn ArchBootSetup,
start: u64,
size: u64,
) -> Result<DomainSegment> {
if start > 0 {
self.alloc_padding_pages(arch, start)?;
}
let page_size: u32 = (1i64 << XEN_PAGE_SHIFT) as u32;
let pages = (size + page_size as u64 - 1) / page_size as u64;
let start = self.virt_alloc_end;
let mut segment = DomainSegment {
vstart: start,
vend: 0,
pfn: self.pfn_alloc_end,
addr: 0,
size,
pages,
};
self.chk_alloc_pages(arch, pages)?;
let ptr = self.phys.pfn_to_ptr(segment.pfn, pages)?;
segment.addr = ptr;
let slice = unsafe {
slice::from_raw_parts_mut(ptr as *mut u8, (pages * page_size as u64) as usize)
};
slice.fill(0);
segment.vend = self.virt_alloc_end;
debug!(
"BootSetup alloc_segment {:#x} -> {:#x} (pfn {:#x} + {:#x} pages)",
start, segment.vend, segment.pfn, pages
);
Ok(segment)
}
fn alloc_page(&mut self, arch: &mut dyn ArchBootSetup) -> Result<DomainSegment> {
let start = self.virt_alloc_end;
let pfn = self.pfn_alloc_end;
self.chk_alloc_pages(arch, 1)?;
debug!("BootSetup alloc_page {:#x} (pfn {:#x})", start, pfn);
Ok(DomainSegment {
vstart: start,
vend: (start + arch.page_size()) - 1,
pfn,
addr: 0,
size: 0,
pages: 1,
})
}
fn alloc_module(
&mut self,
arch: &mut dyn ArchBootSetup,
buffer: &[u8],
) -> Result<DomainSegment> {
let segment = self.alloc_segment(arch, 0, buffer.len() as u64)?;
let slice = unsafe { slice::from_raw_parts_mut(segment.addr as *mut u8, buffer.len()) };
copy(slice, buffer);
Ok(segment)
}
fn alloc_padding_pages(&mut self, arch: &mut dyn ArchBootSetup, boundary: u64) -> Result<()> {
if (boundary & (arch.page_size() - 1)) != 0 {
return Err(Error::MemorySetupFailed);
}
if boundary < self.virt_alloc_end {
return Err(Error::MemorySetupFailed);
}
let pages = (boundary - self.virt_alloc_end) / arch.page_size();
self.chk_alloc_pages(arch, pages)?;
Ok(())
}
fn chk_alloc_pages(&mut self, arch: &mut dyn ArchBootSetup, pages: u64) -> Result<()> {
if pages > self.total_pages
|| self.pfn_alloc_end > self.total_pages
|| pages > self.total_pages - self.pfn_alloc_end
{
return Err(Error::MemorySetupFailed);
}
self.pfn_alloc_end += pages;
self.virt_alloc_end += pages * arch.page_size();
Ok(())
}
}
pub trait ArchBootSetup {
fn page_size(&mut self) -> u64;
fn page_shift(&mut self) -> u64;
fn alloc_p2m_segment(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<DomainSegment>;
fn alloc_page_tables(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<DomainSegment>;
fn setup_page_tables(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
fn setup_start_info(
&mut self,
setup: &mut BootSetup,
state: &BootState,
cmdline: &str,
) -> Result<()>;
fn setup_shared_info(&mut self, setup: &mut BootSetup, shared_info_frame: u64) -> Result<()>;
fn setup_hypercall_page(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<()>;
fn meminit(&mut self, setup: &mut BootSetup, total_pages: u64) -> Result<()>;
fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
}

View File

@ -0,0 +1,289 @@
use crate::boot::{BootImageInfo, BootImageLoader, XEN_UNSET_ADDR};
use crate::error::Result;
use crate::sys::{
XEN_ELFNOTE_ENTRY, XEN_ELFNOTE_HYPERCALL_PAGE, XEN_ELFNOTE_INIT_P2M, XEN_ELFNOTE_MOD_START_PFN,
XEN_ELFNOTE_PADDR_OFFSET, XEN_ELFNOTE_TYPES, XEN_ELFNOTE_VIRT_BASE,
};
use crate::Error;
use elf::abi::{PF_R, PF_W, PF_X, PT_LOAD, SHT_NOTE};
use elf::endian::AnyEndian;
use elf::note::Note;
use elf::ElfBytes;
use flate2::bufread::GzDecoder;
use log::debug;
use memchr::memmem::find_iter;
use slice_copy::copy;
use std::collections::HashMap;
use std::io::{BufReader, Read};
use std::mem::size_of;
use xz2::bufread::XzDecoder;
pub struct ElfImageLoader {
data: Vec<u8>,
}
fn xen_note_value_as_u64(endian: AnyEndian, value: &[u8]) -> Option<u64> {
let bytes = value.to_vec();
match value.len() {
1 => {
let bytes: Option<[u8; size_of::<u8>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u8::from_le_bytes(bytes?),
AnyEndian::Big => u8::from_be_bytes(bytes?),
} as u64)
}
2 => {
let bytes: Option<[u8; size_of::<u16>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u16::from_le_bytes(bytes?),
AnyEndian::Big => u16::from_be_bytes(bytes?),
} as u64)
}
4 => {
let bytes: Option<[u8; size_of::<u32>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u32::from_le_bytes(bytes?),
AnyEndian::Big => u32::from_be_bytes(bytes?),
} as u64)
}
8 => {
let bytes: Option<[u8; size_of::<u64>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u64::from_le_bytes(bytes?),
AnyEndian::Big => u64::from_be_bytes(bytes?),
})
}
_ => None,
}
}
impl ElfImageLoader {
pub fn new(data: Vec<u8>) -> ElfImageLoader {
ElfImageLoader { data }
}
pub fn load_file(path: &str) -> Result<ElfImageLoader> {
let data = std::fs::read(path)?;
Ok(ElfImageLoader::new(data))
}
pub fn load_gz(data: &[u8]) -> Result<ElfImageLoader> {
let buff = BufReader::new(data);
let image = ElfImageLoader::read_one_stream(&mut GzDecoder::new(buff))?;
Ok(ElfImageLoader::new(image))
}
pub fn load_xz(data: &[u8]) -> Result<ElfImageLoader> {
let buff = BufReader::new(data);
let image = ElfImageLoader::read_one_stream(&mut XzDecoder::new(buff))?;
Ok(ElfImageLoader::new(image))
}
fn read_one_stream(read: &mut dyn Read) -> Result<Vec<u8>> {
let mut result: Vec<u8> = Vec::new();
let mut buffer = [0u8; 8192];
loop {
match read.read(&mut buffer) {
Ok(size) => {
if size == 0 {
break;
}
result.extend_from_slice(&buffer[0..size])
}
Err(error) => {
if !result.is_empty() {
break;
}
return Err(Error::from(error));
}
}
}
Ok(result)
}
pub fn load_file_gz(path: &str) -> Result<ElfImageLoader> {
let file = std::fs::read(path)?;
ElfImageLoader::load_gz(file.as_slice())
}
pub fn load_file_xz(path: &str) -> Result<ElfImageLoader> {
let file = std::fs::read(path)?;
ElfImageLoader::load_xz(file.as_slice())
}
pub fn load_file_kernel(path: &str) -> Result<ElfImageLoader> {
let file = std::fs::read(path)?;
for start in find_iter(file.as_slice(), &[0x1f, 0x8b]) {
if let Ok(elf) = ElfImageLoader::load_gz(&file[start..]) {
return Ok(elf);
}
}
for start in find_iter(file.as_slice(), &[0xfd, 0x37, 0x7a, 0x58]) {
if let Ok(elf) = ElfImageLoader::load_xz(&file[start..]) {
return Ok(elf);
}
}
Err(Error::ElfCompressionUnknown)
}
}
struct ElfNoteValue {
value: u64,
}
impl BootImageLoader for ElfImageLoader {
fn parse(&self) -> Result<BootImageInfo> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let headers = elf.section_headers().ok_or(Error::ElfInvalidImage)?;
let mut linux_notes: HashMap<u64, Vec<u8>> = HashMap::new();
let mut xen_notes: HashMap<u64, ElfNoteValue> = HashMap::new();
for header in headers {
if header.sh_type != SHT_NOTE {
continue;
}
let notes = elf.section_data_as_notes(&header)?;
for note in notes {
if let Note::Unknown(note) = note {
if note.name == "Linux" {
linux_notes.insert(note.n_type, note.desc.to_vec());
}
if note.name == "Xen" {
for typ in XEN_ELFNOTE_TYPES {
if typ.id != note.n_type {
continue;
}
let value = if !typ.is_string {
xen_note_value_as_u64(elf.ehdr.endianness, note.desc).unwrap_or(0)
} else {
0
};
xen_notes.insert(typ.id, ElfNoteValue { value });
}
continue;
}
}
}
}
if linux_notes.is_empty() {
return Err(Error::ElfInvalidImage);
}
if xen_notes.is_empty() {
return Err(Error::ElfXenSupportMissing);
}
let paddr_offset = xen_notes
.get(&XEN_ELFNOTE_PADDR_OFFSET)
.ok_or(Error::ElfInvalidImage)?
.value;
let virt_base = xen_notes
.get(&XEN_ELFNOTE_VIRT_BASE)
.ok_or(Error::ElfInvalidImage)?
.value;
let entry = xen_notes
.get(&XEN_ELFNOTE_ENTRY)
.ok_or(Error::ElfInvalidImage)?
.value;
let virt_hypercall = xen_notes
.get(&XEN_ELFNOTE_HYPERCALL_PAGE)
.ok_or(Error::ElfInvalidImage)?
.value;
let init_p2m = xen_notes
.get(&XEN_ELFNOTE_INIT_P2M)
.ok_or(Error::ElfInvalidImage)?
.value;
let mod_start_pfn = xen_notes
.get(&XEN_ELFNOTE_MOD_START_PFN)
.ok_or(Error::ElfInvalidImage)?
.value;
let mut start: u64 = u64::MAX;
let mut end: u64 = 0;
let segments = elf.segments().ok_or(Error::ElfInvalidImage)?;
for header in segments {
if (header.p_type != PT_LOAD) || (header.p_flags & (PF_R | PF_W | PF_X)) == 0 {
continue;
}
let paddr = header.p_paddr;
let memsz = header.p_memsz;
if start > paddr {
start = paddr;
}
if end < paddr + memsz {
end = paddr + memsz;
}
}
if paddr_offset != XEN_UNSET_ADDR && virt_base == XEN_UNSET_ADDR {
return Err(Error::ElfInvalidImage);
}
let virt_offset = virt_base - paddr_offset;
let virt_kstart = start + virt_offset;
let virt_kend = end + virt_offset;
let virt_entry = entry;
let image_info = BootImageInfo {
start,
virt_base,
virt_kstart,
virt_kend,
virt_hypercall,
virt_entry,
virt_p2m_base: init_p2m,
unmapped_initrd: mod_start_pfn != 0,
};
Ok(image_info)
}
fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let segments = elf.segments().ok_or(Error::ElfInvalidImage)?;
debug!(
"ElfImageLoader load dst={:#x} segments={}",
dst.as_ptr() as u64,
segments.len()
);
for header in segments {
let paddr = header.p_paddr;
let filesz = header.p_filesz;
let memsz = header.p_memsz;
let base_offset = paddr - image_info.start;
let data = elf.segment_data(&header)?;
let segment_dst = &mut dst[base_offset as usize..];
let copy_slice = &data[0..filesz as usize];
debug!(
"ElfImageLoader load copy hdr={:?} dst={:#x} len={}",
header,
copy_slice.as_ptr() as u64,
copy_slice.len()
);
copy(segment_dst, copy_slice);
if (memsz - filesz) > 0 {
let remaining = &mut segment_dst[filesz as usize..memsz as usize];
debug!(
"ElfImageLoader load fill_zero hdr={:?} dst={:#x} len={}",
header.p_offset,
remaining.as_ptr() as u64,
remaining.len()
);
remaining.fill(0);
}
}
Ok(())
}
}

View File

@ -0,0 +1,39 @@
use std::io;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("io issue encountered")]
Io(#[from] io::Error),
#[error("xenstore issue encountered")]
XenStore(#[from] xenstore::error::Error),
#[error("xencall issue encountered")]
XenCall(#[from] xencall::error::Error),
#[error("domain does not have a tty")]
TtyNotFound,
#[error("introducing the domain failed")]
IntroduceDomainFailed,
#[error("string conversion of a path failed")]
PathStringConversion,
#[error("parent of path not found")]
PathParentNotFound,
#[error("domain does not exist")]
DomainNonExistent,
#[error("elf parse failed")]
ElfParseFailed(#[from] elf::ParseError),
#[error("mmap failed")]
MmapFailed,
#[error("munmap failed")]
UnmapFailed,
#[error("memory setup failed")]
MemorySetupFailed,
#[error("populate physmap failed: wanted={0}, received={1}, input_extents={2}")]
PopulatePhysmapFailed(usize, usize, usize),
#[error("unknown elf compression method")]
ElfCompressionUnknown,
#[error("expected elf image format not found")]
ElfInvalidImage,
#[error("provided elf image does not contain xen support")]
ElfXenSupportMissing,
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@ -0,0 +1,575 @@
pub mod boot;
pub mod elfloader;
pub mod error;
pub mod mem;
pub mod sys;
pub mod x86;
use crate::boot::BootSetup;
use crate::elfloader::ElfImageLoader;
use crate::error::{Error, Result};
use crate::x86::X86BootSetup;
use log::{trace, warn};
use std::fs::{read, File, OpenOptions};
use std::path::PathBuf;
use std::str::FromStr;
use std::thread;
use std::time::Duration;
use uuid::Uuid;
use xencall::sys::CreateDomain;
use xencall::XenCall;
use xenstore::client::{
XsPermission, XsdClient, XsdInterface, XS_PERM_NONE, XS_PERM_READ, XS_PERM_READ_WRITE,
};
pub struct XenClient {
pub store: XsdClient,
call: XenCall,
}
#[derive(Debug)]
pub struct BlockDeviceRef {
pub path: String,
pub major: u32,
pub minor: u32,
}
#[derive(Debug)]
pub struct DomainDisk<'a> {
pub vdev: &'a str,
pub block: &'a BlockDeviceRef,
pub writable: bool,
}
#[derive(Debug)]
pub struct DomainFilesystem<'a> {
pub path: &'a str,
pub tag: &'a str,
}
#[derive(Debug)]
pub struct DomainConfig<'a> {
pub backend_domid: u32,
pub name: &'a str,
pub max_vcpus: u32,
pub mem_mb: u64,
pub kernel_path: &'a str,
pub initrd_path: &'a str,
pub cmdline: &'a str,
pub disks: Vec<DomainDisk<'a>>,
pub filesystems: Vec<DomainFilesystem<'a>>,
pub extra_keys: Vec<(String, String)>,
}
impl XenClient {
pub fn open() -> Result<XenClient> {
let store = XsdClient::open()?;
let call = XenCall::open()?;
Ok(XenClient { store, call })
}
pub fn create(&mut self, config: &DomainConfig) -> Result<u32> {
let domain = CreateDomain {
max_vcpus: config.max_vcpus,
..Default::default()
};
let domid = self.call.create_domain(domain)?;
match self.init(domid, &domain, config) {
Ok(_) => Ok(domid),
Err(err) => {
// ignore since destroying a domain is best
// effort when an error occurs
let _ = self.destroy(domid);
Err(err)
}
}
}
pub fn destroy(&mut self, domid: u32) -> Result<()> {
if let Err(err) = self.destroy_store(domid) {
warn!("failed to destroy store for domain {}: {}", domid, err);
}
self.call.destroy_domain(domid)?;
Ok(())
}
fn destroy_store(&mut self, domid: u32) -> Result<()> {
let dom_path = self.store.get_domain_path(domid)?;
let vm_path = self.store.read_string(&format!("{}/vm", dom_path))?;
if vm_path.is_empty() {
return Err(Error::DomainNonExistent);
}
let mut backend_paths: Vec<String> = Vec::new();
let console_frontend_path = format!("{}/console", dom_path);
let console_backend_path = self
.store
.read_string_optional(format!("{}/backend", console_frontend_path).as_str())?;
for device_category in self
.store
.list_any(format!("{}/device", dom_path).as_str())?
{
for device_id in self
.store
.list_any(format!("{}/device/{}", dom_path, device_category).as_str())?
{
let device_path = format!("{}/device/{}/{}", dom_path, device_category, device_id);
let backend_path = self
.store
.read_string(format!("{}/backend", device_path).as_str())?;
backend_paths.push(backend_path);
}
}
for backend in &backend_paths {
let state_path = format!("{}/state", backend);
let online_path = format!("{}/online", backend);
let mut tx = self.store.transaction()?;
let state = tx.read_string(&state_path)?;
if state.is_empty() {
break;
}
tx.write_string(&online_path, "0")?;
if !state.is_empty() && u32::from_str(&state).unwrap_or(0) != 6 {
tx.write_string(&state_path, "5")?;
}
tx.commit()?;
let mut count: u32 = 0;
loop {
if count >= 100 {
warn!("unable to safely destroy backend: {}", backend);
break;
}
let state = self.store.read_string(&state_path)?;
let state = i64::from_str(&state).unwrap_or(-1);
if state == 6 {
break;
}
thread::sleep(Duration::from_millis(100));
count += 1;
}
}
let mut tx = self.store.transaction()?;
let mut backend_removals: Vec<String> = Vec::new();
backend_removals.extend_from_slice(backend_paths.as_slice());
if let Some(backend) = console_backend_path {
backend_removals.push(backend);
}
for path in &backend_removals {
let path = PathBuf::from(path);
let parent = path.parent().ok_or(Error::PathParentNotFound)?;
tx.rm(parent.to_str().ok_or(Error::PathStringConversion)?)?;
}
tx.rm(&vm_path)?;
tx.rm(&dom_path)?;
tx.commit()?;
Ok(())
}
fn init(&mut self, domid: u32, domain: &CreateDomain, config: &DomainConfig) -> Result<()> {
trace!(
"XenClient init domid={} domain={:?} config={:?}",
domid,
domain,
config
);
let backend_dom_path = self.store.get_domain_path(0)?;
let dom_path = self.store.get_domain_path(domid)?;
let uuid_string = Uuid::from_bytes(domain.handle).to_string();
let vm_path = format!("/vm/{}", uuid_string);
let ro_perm = &[
XsPermission {
id: 0,
perms: XS_PERM_NONE,
},
XsPermission {
id: domid,
perms: XS_PERM_READ,
},
];
let rw_perm = &[XsPermission {
id: domid,
perms: XS_PERM_READ_WRITE,
}];
let no_perm = &[XsPermission {
id: 0,
perms: XS_PERM_NONE,
}];
{
let mut tx = self.store.transaction()?;
tx.rm(dom_path.as_str())?;
tx.mknod(dom_path.as_str(), ro_perm)?;
tx.rm(vm_path.as_str())?;
tx.mknod(vm_path.as_str(), ro_perm)?;
tx.mknod(vm_path.as_str(), no_perm)?;
tx.mknod(format!("{}/device", vm_path).as_str(), no_perm)?;
tx.write_string(format!("{}/vm", dom_path).as_str(), &vm_path)?;
tx.mknod(format!("{}/cpu", dom_path).as_str(), ro_perm)?;
tx.mknod(format!("{}/memory", dom_path).as_str(), ro_perm)?;
tx.mknod(format!("{}/control", dom_path).as_str(), ro_perm)?;
tx.mknod(format!("{}/control/shutdown", dom_path).as_str(), rw_perm)?;
tx.mknod(
format!("{}/control/feature-poweroff", dom_path).as_str(),
rw_perm,
)?;
tx.mknod(
format!("{}/control/feature-reboot", dom_path).as_str(),
rw_perm,
)?;
tx.mknod(
format!("{}/control/feature-suspend", dom_path).as_str(),
rw_perm,
)?;
tx.mknod(format!("{}/control/sysrq", dom_path).as_str(), rw_perm)?;
tx.mknod(format!("{}/data", dom_path).as_str(), rw_perm)?;
tx.mknod(format!("{}/drivers", dom_path).as_str(), rw_perm)?;
tx.mknod(format!("{}/feature", dom_path).as_str(), rw_perm)?;
tx.mknod(format!("{}/attr", dom_path).as_str(), rw_perm)?;
tx.mknod(format!("{}/error", dom_path).as_str(), rw_perm)?;
tx.write_string(
format!("{}/uuid", vm_path).as_str(),
&Uuid::from_bytes(domain.handle).to_string(),
)?;
tx.write_string(format!("{}/name", dom_path).as_str(), config.name)?;
tx.write_string(format!("{}/name", vm_path).as_str(), config.name)?;
for (key, value) in &config.extra_keys {
tx.write_string(format!("{}/{}", dom_path, key).as_str(), value)?;
}
tx.commit()?;
}
self.call.set_max_vcpus(domid, config.max_vcpus)?;
self.call.set_max_mem(domid, config.mem_mb * 1024)?;
let image_loader = ElfImageLoader::load_file_kernel(config.kernel_path)?;
let console_evtchn: u32;
let xenstore_evtchn: u32;
let console_mfn: u64;
let xenstore_mfn: u64;
{
let mut boot = BootSetup::new(&self.call, domid);
let mut arch = X86BootSetup::new();
let initrd = read(config.initrd_path)?;
let mut state = boot.initialize(
&mut arch,
&image_loader,
initrd.as_slice(),
config.max_vcpus,
config.mem_mb,
)?;
boot.boot(&mut arch, &mut state, config.cmdline)?;
console_evtchn = state.console_evtchn;
xenstore_evtchn = state.store_evtchn;
console_mfn = boot.phys.p2m[state.console_segment.pfn as usize];
xenstore_mfn = boot.phys.p2m[state.xenstore_segment.pfn as usize];
}
{
let mut tx = self.store.transaction()?;
tx.write_string(format!("{}/image/os_type", vm_path).as_str(), "linux")?;
tx.write_string(
format!("{}/image/kernel", vm_path).as_str(),
config.kernel_path,
)?;
tx.write_string(
format!("{}/image/ramdisk", vm_path).as_str(),
config.initrd_path,
)?;
tx.write_string(
format!("{}/image/cmdline", vm_path).as_str(),
config.cmdline,
)?;
tx.write_string(
format!("{}/memory/static-max", dom_path).as_str(),
&(config.mem_mb * 1024).to_string(),
)?;
tx.write_string(
format!("{}/memory/target", dom_path).as_str(),
&(config.mem_mb * 1024).to_string(),
)?;
tx.write_string(format!("{}/memory/videoram", dom_path).as_str(), "0")?;
tx.write_string(format!("{}/domid", dom_path).as_str(), &domid.to_string())?;
tx.write_string(
format!("{}/store/port", dom_path).as_str(),
&xenstore_evtchn.to_string(),
)?;
tx.write_string(
format!("{}/store/ring-ref", dom_path).as_str(),
&xenstore_mfn.to_string(),
)?;
for i in 0..config.max_vcpus {
let path = format!("{}/cpu/{}", dom_path, i);
tx.mkdir(&path)?;
tx.set_perms(&path, ro_perm)?;
let path = format!("{}/cpu/{}/availability", dom_path, i);
tx.write_string(&path, "online")?;
tx.set_perms(&path, ro_perm)?;
}
tx.commit()?;
}
if !self
.store
.introduce_domain(domid, xenstore_mfn, xenstore_evtchn)?
{
return Err(Error::IntroduceDomainFailed);
}
self.console_device_add(
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
console_evtchn,
console_mfn,
)?;
for (index, disk) in config.disks.iter().enumerate() {
self.disk_device_add(
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
disk,
)?;
}
for (index, filesystem) in config.filesystems.iter().enumerate() {
self.fs_9p_device_add(
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
filesystem,
)?;
}
self.call.unpause_domain(domid)?;
Ok(())
}
fn disk_device_add(
&mut self,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
disk: &DomainDisk,
) -> Result<()> {
let id = (202 << 8) | (index << 4) as u64;
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("removable", "0".to_string()),
("bootable", "1".to_string()),
("state", "1".to_string()),
("dev", disk.vdev.to_string()),
("type", "phy".to_string()),
("mode", if disk.writable { "w" } else { "r" }.to_string()),
("device-type", "disk".to_string()),
("discard-enable", "0".to_string()),
("specification", "xen".to_string()),
("physical-device-path", disk.block.path.to_string()),
(
"physical-device",
format!("{:02x}:{:02x}", disk.block.major, disk.block.minor),
),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
("virtual-device", id.to_string()),
("device-type", "disk".to_string()),
("trusted", "1".to_string()),
("protocol", "x86_64-abi".to_string()),
];
self.device_add(
"vbd",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)?;
Ok(())
}
fn console_device_add(
&mut self,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
port: u32,
mfn: u64,
) -> Result<()> {
let backend_entries = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("protocol", "vt100".to_string()),
];
let frontend_entries = vec![
("backend-id", backend_domid.to_string()),
("limit", "1048576".to_string()),
("type", "xenconsoled".to_string()),
("output", "pty".to_string()),
("tty", "".to_string()),
("port", port.to_string()),
("ring-ref", mfn.to_string()),
];
self.device_add(
"console",
0,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_entries,
backend_entries,
)?;
Ok(())
}
fn fs_9p_device_add(
&mut self,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
filesystem: &DomainFilesystem,
) -> Result<()> {
let id = 90 + index as u64;
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("path", filesystem.path.to_string()),
("security-model", "none".to_string()),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
("tag", filesystem.tag.to_string()),
];
self.device_add(
"9pfs",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)?;
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn device_add(
&mut self,
typ: &str,
id: u64,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
frontend_items: Vec<(&str, String)>,
backend_items: Vec<(&str, String)>,
) -> Result<()> {
let console_zero = typ == "console" && id == 0;
let frontend_path = if console_zero {
format!("{}/console", dom_path)
} else {
format!("{}/device/{}/{}", dom_path, typ, id)
};
let backend_path = format!("{}/backend/{}/{}/{}", backend_dom_path, typ, domid, id);
let mut backend_items: Vec<(&str, String)> = backend_items.clone();
let mut frontend_items: Vec<(&str, String)> = frontend_items.clone();
backend_items.push(("frontend", frontend_path.clone()));
frontend_items.push(("backend", backend_path.clone()));
let frontend_perms = &[
XsPermission {
id: domid,
perms: XS_PERM_NONE,
},
XsPermission {
id: backend_domid,
perms: XS_PERM_READ,
},
];
let backend_perms = &[
XsPermission {
id: backend_domid,
perms: XS_PERM_NONE,
},
XsPermission {
id: domid,
perms: XS_PERM_READ,
},
];
let mut tx = self.store.transaction()?;
tx.mknod(&frontend_path, frontend_perms)?;
for (p, value) in &frontend_items {
let path = format!("{}/{}", frontend_path, *p);
tx.write_string(&path, value)?;
if !console_zero {
tx.set_perms(&path, frontend_perms)?;
}
}
tx.mknod(&backend_path, backend_perms)?;
for (p, value) in &backend_items {
let path = format!("{}/{}", backend_path, *p);
tx.write_string(&path, value)?;
}
tx.commit()?;
Ok(())
}
pub fn open_console(&mut self, domid: u32) -> Result<(File, File)> {
let dom_path = self.store.get_domain_path(domid)?;
let console_tty_path = format!("{}/console/tty", dom_path);
let tty = self
.store
.read_string_optional(&console_tty_path)?
.unwrap_or("".to_string());
if tty.is_empty() {
return Err(Error::TtyNotFound);
}
let read = OpenOptions::new().read(true).write(false).open(&tty)?;
let write = OpenOptions::new().read(false).write(true).open(&tty)?;
Ok((read, write))
}
}

View File

@ -0,0 +1,186 @@
use crate::error::Result;
use crate::sys::{XEN_PAGE_SHIFT, XEN_PAGE_SIZE};
use crate::Error;
use libc::munmap;
use log::debug;
use std::ffi::c_void;
use crate::x86::X86_PAGE_SHIFT;
use xencall::sys::MmapEntry;
use xencall::XenCall;
#[derive(Debug)]
pub struct PhysicalPage {
pfn: u64,
ptr: u64,
count: u64,
}
pub struct PhysicalPages<'a> {
domid: u32,
pub(crate) p2m: Vec<u64>,
call: &'a XenCall,
pages: Vec<PhysicalPage>,
}
impl PhysicalPages<'_> {
pub fn new(call: &XenCall, domid: u32) -> PhysicalPages {
PhysicalPages {
domid,
p2m: Vec::new(),
call,
pages: Vec::new(),
}
}
pub fn load_p2m(&mut self, p2m: Vec<u64>) {
self.p2m = p2m;
}
pub fn p2m_size(&mut self) -> u64 {
self.p2m.len() as u64
}
pub fn pfn_to_ptr(&mut self, pfn: u64, count: u64) -> Result<u64> {
for page in &self.pages {
if pfn >= page.pfn + page.count {
continue;
}
if count > 0 {
if (pfn + count) <= page.pfn {
continue;
}
if pfn < page.pfn || (pfn + count) > page.pfn + page.count {
return Err(Error::MemorySetupFailed);
}
} else {
if pfn < page.pfn {
continue;
}
if pfn >= page.pfn + page.count {
continue;
}
}
return Ok(page.ptr + ((pfn - page.pfn) << X86_PAGE_SHIFT));
}
if count == 0 {
return Err(Error::MemorySetupFailed);
}
self.pfn_alloc(pfn, count)
}
fn pfn_alloc(&mut self, pfn: u64, count: u64) -> Result<u64> {
let mut entries = vec![MmapEntry::default(); count as usize];
for (i, entry) in entries.iter_mut().enumerate() {
entry.mfn = self.p2m[pfn as usize + i];
}
let chunk_size = 1 << XEN_PAGE_SHIFT;
let num_per_entry = chunk_size >> XEN_PAGE_SHIFT;
let num = num_per_entry * count as usize;
let mut pfns = vec![u64::MAX; num];
for i in 0..count as usize {
for j in 0..num_per_entry {
pfns[i * num_per_entry + j] = entries[i].mfn + j as u64;
}
}
let actual_mmap_len = (num as u64) << XEN_PAGE_SHIFT;
let addr = self
.call
.mmap(0, actual_mmap_len)
.ok_or(Error::MmapFailed)?;
debug!("mapped {:#x} foreign bytes at {:#x}", actual_mmap_len, addr);
let result = self.call.mmap_batch(self.domid, num as u64, addr, pfns)?;
if result != 0 {
return Err(Error::MmapFailed);
}
let page = PhysicalPage {
pfn,
ptr: addr,
count,
};
debug!(
"alloc_pfn {:#x}+{:#x} at {:#x}",
page.pfn, page.count, page.ptr
);
self.pages.push(page);
Ok(addr)
}
pub fn map_foreign_pages(&mut self, mfn: u64, size: u64) -> Result<u64> {
let num = ((size + XEN_PAGE_SIZE - 1) >> XEN_PAGE_SHIFT) as usize;
let mut pfns = vec![u64::MAX; num];
for (i, item) in pfns.iter_mut().enumerate().take(num) {
*item = mfn + i as u64;
}
let actual_mmap_len = (num as u64) << XEN_PAGE_SHIFT;
let addr = self
.call
.mmap(0, actual_mmap_len)
.ok_or(Error::MmapFailed)?;
debug!("mapped {:#x} foreign bytes at {:#x}", actual_mmap_len, addr);
let result = self.call.mmap_batch(self.domid, num as u64, addr, pfns)?;
if result != 0 {
return Err(Error::MmapFailed);
}
let page = PhysicalPage {
pfn: u64::MAX,
ptr: addr,
count: num as u64,
};
debug!(
"alloc_mfn {:#x}+{:#x} at {:#x}",
page.pfn, page.count, page.ptr
);
self.pages.push(page);
Ok(addr)
}
pub fn unmap_all(&mut self) -> Result<()> {
for page in &self.pages {
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << X86_PAGE_SHIFT) as usize,
);
if err != 0 {
return Err(Error::UnmapFailed);
}
}
}
self.pages.clear();
Ok(())
}
pub fn unmap(&mut self, pfn: u64) -> Result<()> {
let page = self.pages.iter().enumerate().find(|(_, x)| x.pfn == pfn);
if page.is_none() {
return Err(Error::MemorySetupFailed);
}
let (i, page) = page.unwrap();
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << X86_PAGE_SHIFT) as usize,
);
debug!(
"unmapped {:#x} foreign bytes at {:#x}",
(page.count << X86_PAGE_SHIFT) as usize,
page.ptr
);
if err != 0 {
return Err(Error::UnmapFailed);
}
self.pages.remove(i);
}
Ok(())
}
}

View File

@ -0,0 +1,130 @@
pub const XEN_ELFNOTE_INFO: u64 = 0;
pub const XEN_ELFNOTE_ENTRY: u64 = 1;
pub const XEN_ELFNOTE_HYPERCALL_PAGE: u64 = 2;
pub const XEN_ELFNOTE_VIRT_BASE: u64 = 3;
pub const XEN_ELFNOTE_PADDR_OFFSET: u64 = 4;
pub const XEN_ELFNOTE_XEN_VERSION: u64 = 5;
pub const XEN_ELFNOTE_GUEST_OS: u64 = 6;
pub const XEN_ELFNOTE_GUEST_VERSION: u64 = 7;
pub const XEN_ELFNOTE_LOADER: u64 = 8;
pub const XEN_ELFNOTE_PAE_MODE: u64 = 9;
pub const XEN_ELFNOTE_FEATURES: u64 = 10;
pub const XEN_ELFNOTE_BSD_SYMTAB: u64 = 11;
pub const XEN_ELFNOTE_HV_START_LOW: u64 = 12;
pub const XEN_ELFNOTE_L1_MFN_VALID: u64 = 13;
pub const XEN_ELFNOTE_SUSPEND_CANCEL: u64 = 14;
pub const XEN_ELFNOTE_INIT_P2M: u64 = 15;
pub const XEN_ELFNOTE_MOD_START_PFN: u64 = 16;
pub const XEN_ELFNOTE_SUPPORTED_FEATURES: u64 = 17;
pub const XEN_ELFNOTE_PHYS32_ENTRY: u64 = 18;
#[derive(Copy, Clone)]
pub struct ElfNoteXenType {
pub id: u64,
pub name: &'static str,
pub is_string: bool,
}
pub const XEN_ELFNOTE_TYPES: &[ElfNoteXenType] = &[
ElfNoteXenType {
id: XEN_ELFNOTE_ENTRY,
name: "ENTRY",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_HYPERCALL_PAGE,
name: "HYPERCALL_PAGE",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_VIRT_BASE,
name: "VIRT_BASE",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_INIT_P2M,
name: "INIT_P2M",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_PADDR_OFFSET,
name: "PADDR_OFFSET",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_HV_START_LOW,
name: "HV_START_LOW",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_XEN_VERSION,
name: "XEN_VERSION",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_GUEST_OS,
name: "GUEST_OS",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_GUEST_VERSION,
name: "GUEST_VERSION",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_LOADER,
name: "LOADER",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_PAE_MODE,
name: "PAE_MODE",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_FEATURES,
name: "FEATURES",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_SUPPORTED_FEATURES,
name: "SUPPORTED_FEATURES",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_BSD_SYMTAB,
name: "BSD_SYMTAB",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_SUSPEND_CANCEL,
name: "SUSPEND_CANCEL",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_MOD_START_PFN,
name: "MOD_START_PFN",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_PHYS32_ENTRY,
name: "PHYS32_ENTRY",
is_string: false,
},
];
pub const XEN_PAGE_SHIFT: u64 = 12;
pub const XEN_PAGE_SIZE: u64 = 1 << XEN_PAGE_SHIFT;
pub const XEN_PAGE_MASK: u64 = !(XEN_PAGE_SIZE - 1);
pub const SUPERPAGE_BATCH_SIZE: u64 = 512;
pub const SUPERPAGE_2MB_SHIFT: u64 = 9;
pub const SUPERPAGE_2MB_NR_PFNS: u64 = 1u64 << SUPERPAGE_2MB_SHIFT;
pub const VGCF_IN_KERNEL: u64 = 1 << 2;
pub const VGCF_ONLINE: u64 = 1 << 5;
#[repr(C)]
pub struct GrantEntry {
pub flags: u16,
pub domid: u16,
pub frame: u32,
}

View File

@ -0,0 +1,627 @@
use crate::boot::{
ArchBootSetup, BootImageInfo, BootSetup, BootState, DomainSegment, XEN_UNSET_ADDR,
};
use crate::error::Result;
use crate::sys::{
SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE, VGCF_IN_KERNEL, VGCF_ONLINE,
XEN_PAGE_SHIFT,
};
use crate::Error;
use libc::c_char;
use log::{debug, trace};
use slice_copy::copy;
use std::cmp::{max, min};
use std::mem::size_of;
use std::slice;
use xencall::sys::{VcpuGuestContext, MMUEXT_PIN_L4_TABLE};
pub const X86_PAGE_SHIFT: u64 = 12;
pub const X86_PAGE_SIZE: u64 = 1 << X86_PAGE_SHIFT;
pub const X86_VIRT_BITS: u64 = 48;
pub const X86_VIRT_MASK: u64 = (1 << X86_VIRT_BITS) - 1;
pub const X86_PGTABLE_LEVELS: u64 = 4;
pub const X86_PGTABLE_LEVEL_SHIFT: u64 = 9;
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct PageTableMappingLevel {
pub from: u64,
pub to: u64,
pub pfn: u64,
pub pgtables: usize,
}
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct PageTableMapping {
pub area: PageTableMappingLevel,
pub levels: [PageTableMappingLevel; X86_PGTABLE_LEVELS as usize],
}
pub const X86_PAGE_TABLE_MAX_MAPPINGS: usize = 2;
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct PageTable {
pub mappings_count: usize,
pub mappings: [PageTableMapping; X86_PAGE_TABLE_MAX_MAPPINGS],
}
#[repr(C)]
#[derive(Debug)]
pub struct StartInfoConsole {
pub mfn: u64,
pub evtchn: u32,
}
pub const MAX_GUEST_CMDLINE: usize = 1024;
#[repr(C)]
#[derive(Debug)]
pub struct StartInfo {
pub magic: [c_char; 32],
pub nr_pages: u64,
pub shared_info: u64,
pub flags: u32,
pub store_mfn: u64,
pub store_evtchn: u32,
pub console: StartInfoConsole,
pub pt_base: u64,
pub nr_pt_frames: u64,
pub mfn_list: u64,
pub mod_start: u64,
pub mod_len: u64,
pub cmdline: [c_char; MAX_GUEST_CMDLINE],
pub first_p2m_pfn: u64,
pub nr_p2m_frames: u64,
}
pub const X86_GUEST_MAGIC: &str = "xen-3.0-x86_64";
#[repr(C)]
#[derive(Debug)]
pub struct ArchVcpuInfo {
pub cr2: u64,
pub pad: u64,
}
#[repr(C)]
#[derive(Debug)]
pub struct VcpuInfoTime {
pub version: u32,
pub pad0: u32,
pub tsc_timestamp: u64,
pub system_time: u64,
pub tsc_to_system_mul: u32,
pub tsc_shift: i8,
pub flags: u8,
pub pad1: [u8; 2],
}
#[repr(C)]
#[derive(Debug)]
pub struct VcpuInfo {
pub evtchn_upcall_pending: u8,
pub evtchn_upcall_mask: u8,
pub evtchn_pending_sel: u64,
pub arch_vcpu_info: ArchVcpuInfo,
pub vcpu_info_time: VcpuInfoTime,
}
#[repr(C)]
#[derive(Debug)]
pub struct SharedInfo {
pub vcpu_info: [VcpuInfo; 32],
pub evtchn_pending: [u64; u64::BITS as usize],
pub evtchn_mask: [u64; u64::BITS as usize],
pub wc_version: u32,
pub wc_sec: u32,
pub wc_nsec: u32,
pub wc_sec_hi: u32,
// arch shared info
pub max_pfn: u64,
pub pfn_to_mfn_frame_list_list: u64,
pub nmi_reason: u64,
pub p2m_cr3: u64,
pub p2m_vaddr: u64,
pub p2m_generation: u64,
}
pub struct X86BootSetup {
table: PageTable,
}
#[derive(Debug)]
struct VmemRange {
start: u64,
end: u64,
_flags: u32,
_nid: u32,
}
impl Default for X86BootSetup {
fn default() -> Self {
Self::new()
}
}
impl X86BootSetup {
pub fn new() -> X86BootSetup {
X86BootSetup {
table: PageTable::default(),
}
}
const PAGE_PRESENT: u64 = 0x001;
const PAGE_RW: u64 = 0x002;
const PAGE_USER: u64 = 0x004;
const PAGE_ACCESSED: u64 = 0x020;
const PAGE_DIRTY: u64 = 0x040;
fn get_pg_prot(&mut self, l: usize, pfn: u64) -> u64 {
let prot = [
X86BootSetup::PAGE_PRESENT | X86BootSetup::PAGE_RW | X86BootSetup::PAGE_ACCESSED,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
];
let prot = prot[l];
if l > 0 {
return prot;
}
for m in 0..self.table.mappings_count {
let map = &self.table.mappings[m];
let pfn_s = map.levels[(X86_PGTABLE_LEVELS - 1) as usize].pfn;
let pfn_e = map.area.pgtables as u64 + pfn_s;
if pfn >= pfn_s && pfn < pfn_e {
return prot & !X86BootSetup::PAGE_RW;
}
}
prot
}
fn count_page_tables(
&mut self,
setup: &mut BootSetup,
from: u64,
to: u64,
pfn: u64,
) -> Result<usize> {
debug!("counting pgtables from={} to={} pfn={}", from, to, pfn);
if self.table.mappings_count == X86_PAGE_TABLE_MAX_MAPPINGS {
return Err(Error::MemorySetupFailed);
}
let m = self.table.mappings_count;
let pfn_end = pfn + ((to - from) >> X86_PAGE_SHIFT);
if pfn_end >= setup.phys.p2m_size() {
return Err(Error::MemorySetupFailed);
}
for idx in 0..self.table.mappings_count {
if from < self.table.mappings[idx].area.to && to > self.table.mappings[idx].area.from {
return Err(Error::MemorySetupFailed);
}
}
let mut map = PageTableMapping::default();
map.area.from = from & X86_VIRT_MASK;
map.area.to = to & X86_VIRT_MASK;
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
map.levels[l].pfn = setup.pfn_alloc_end + map.area.pgtables as u64;
if l as u64 == X86_PGTABLE_LEVELS - 1 {
if self.table.mappings_count == 0 {
map.levels[l].from = 0;
map.levels[l].to = X86_VIRT_MASK;
map.levels[l].pgtables = 1;
map.area.pgtables += 1;
}
continue;
}
let bits = X86_PAGE_SHIFT + (l + 1) as u64 * X86_PGTABLE_LEVEL_SHIFT;
let mask = BootSetup::bits_to_mask(bits);
map.levels[l].from = map.area.from & !mask;
map.levels[l].to = map.area.to | mask;
for cmp in &mut self.table.mappings[0..self.table.mappings_count] {
if cmp.levels[l].from == cmp.levels[l].to {
continue;
}
if map.levels[l].from >= cmp.levels[l].from && map.levels[l].to <= cmp.levels[l].to
{
map.levels[l].from = 0;
map.levels[l].to = 0;
break;
}
if map.levels[l].from >= cmp.levels[l].from
&& map.levels[l].from <= cmp.levels[l].to
{
map.levels[l].from = cmp.levels[l].to + 1;
}
if map.levels[l].to >= cmp.levels[l].from && map.levels[l].to <= cmp.levels[l].to {
map.levels[l].to = cmp.levels[l].from - 1;
}
}
if map.levels[l].from < map.levels[l].to {
map.levels[l].pgtables =
(((map.levels[l].to - map.levels[l].from) >> bits) + 1) as usize;
}
debug!(
"BootSetup count_pgtables {:#x}/{}: {:#x} -> {:#x}, {} tables",
mask, bits, map.levels[l].from, map.levels[l].to, map.levels[l].pgtables
);
map.area.pgtables += map.levels[l].pgtables;
}
self.table.mappings[m] = map;
Ok(m)
}
}
impl ArchBootSetup for X86BootSetup {
fn page_size(&mut self) -> u64 {
X86_PAGE_SIZE
}
fn page_shift(&mut self) -> u64 {
X86_PAGE_SHIFT
}
fn alloc_p2m_segment(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<DomainSegment> {
let mut p2m_alloc_size =
((setup.phys.p2m_size() * 8) + X86_PAGE_SIZE - 1) & !(X86_PAGE_SIZE - 1);
let from = image_info.virt_p2m_base;
let to = from + p2m_alloc_size - 1;
let m = self.count_page_tables(setup, from, to, setup.pfn_alloc_end)?;
let pgtables: usize;
{
let map = &mut self.table.mappings[m];
map.area.pfn = setup.pfn_alloc_end;
for lvl_idx in 0..4 {
map.levels[lvl_idx].pfn += p2m_alloc_size >> X86_PAGE_SHIFT;
}
pgtables = map.area.pgtables;
}
self.table.mappings_count += 1;
p2m_alloc_size += (pgtables << X86_PAGE_SHIFT) as u64;
let p2m_segment = setup.alloc_segment(self, 0, p2m_alloc_size)?;
Ok(p2m_segment)
}
fn alloc_page_tables(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<DomainSegment> {
let mut extra_pages = 1;
extra_pages += (512 * 1024) / X86_PAGE_SIZE;
let mut pages = extra_pages;
let mut try_virt_end: u64;
let mut m: usize;
loop {
try_virt_end = BootSetup::round_up(
setup.virt_alloc_end + pages * X86_PAGE_SIZE,
BootSetup::bits_to_mask(22),
);
m = self.count_page_tables(setup, image_info.virt_base, try_virt_end, 0)?;
pages = self.table.mappings[m].area.pgtables as u64 + extra_pages;
if setup.virt_alloc_end + pages * X86_PAGE_SIZE <= try_virt_end + 1 {
break;
}
}
self.table.mappings[m].area.pfn = 0;
self.table.mappings_count += 1;
setup.virt_pgtab_end = try_virt_end + 1;
let size = self.table.mappings[m].area.pgtables as u64 * X86_PAGE_SIZE;
let segment = setup.alloc_segment(self, 0, size)?;
debug!(
"BootSetup alloc_page_tables table={:?} segment={:?}",
self.table, segment
);
Ok(segment)
}
fn setup_page_tables(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let p2m_guest = unsafe {
slice::from_raw_parts_mut(
state.p2m_segment.addr as *mut u64,
setup.phys.p2m_size() as usize,
)
};
copy(p2m_guest, &setup.phys.p2m);
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
for m1 in 0usize..self.table.mappings_count {
let map1 = &self.table.mappings[m1];
let from = map1.levels[l].from;
let to = map1.levels[l].to;
let pg_ptr = setup.phys.pfn_to_ptr(map1.levels[l].pfn, 0)? as *mut u64;
for m2 in 0usize..self.table.mappings_count {
let map2 = &self.table.mappings[m2];
let lvl = if l > 0 {
&map2.levels[l - 1]
} else {
&map2.area
};
if l > 0 && lvl.pgtables == 0 {
continue;
}
if lvl.from >= to || lvl.to <= from {
continue;
}
let p_s = (max(from, lvl.from) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let p_e = (min(to, lvl.to) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let rhs = X86_PAGE_SHIFT as usize + l * X86_PGTABLE_LEVEL_SHIFT as usize;
let mut pfn = ((max(from, lvl.from) - lvl.from) >> rhs) + lvl.pfn;
debug!(
"BootSetup setup_page_tables lvl={} map_1={} map_2={} pfn={:#x} p_s={:#x} p_e={:#x}",
l, m1, m2, pfn, p_s, p_e
);
let pg = unsafe { slice::from_raw_parts_mut(pg_ptr, (p_e + 1) as usize) };
for p in p_s..p_e + 1 {
let prot = self.get_pg_prot(l, pfn);
let pfn_paddr = setup.phys.p2m[pfn as usize] << X86_PAGE_SHIFT;
let value = pfn_paddr | prot;
pg[p as usize] = value;
pfn += 1;
}
}
}
}
Ok(())
}
fn setup_start_info(
&mut self,
setup: &mut BootSetup,
state: &BootState,
cmdline: &str,
) -> Result<()> {
let ptr = setup.phys.pfn_to_ptr(state.start_info_segment.pfn, 1)?;
let byte_slice =
unsafe { slice::from_raw_parts_mut(ptr as *mut u8, X86_PAGE_SIZE as usize) };
byte_slice.fill(0);
let info = ptr as *mut StartInfo;
unsafe {
for (i, c) in X86_GUEST_MAGIC.chars().enumerate() {
(*info).magic[i] = c as c_char;
}
(*info).magic[X86_GUEST_MAGIC.len()] = 0 as c_char;
(*info).nr_pages = setup.total_pages;
(*info).shared_info = state.shared_info_frame << X86_PAGE_SHIFT;
(*info).pt_base = state.page_table_segment.vstart;
(*info).nr_pt_frames = self.table.mappings[0].area.pgtables as u64;
(*info).mfn_list = state.p2m_segment.vstart;
(*info).first_p2m_pfn = state.p2m_segment.pfn;
(*info).nr_p2m_frames = state.p2m_segment.pages;
(*info).flags = 0;
(*info).store_evtchn = state.store_evtchn;
(*info).store_mfn = setup.phys.p2m[state.xenstore_segment.pfn as usize];
(*info).console.mfn = setup.phys.p2m[state.console_segment.pfn as usize];
(*info).console.evtchn = state.console_evtchn;
(*info).mod_start = state.initrd_segment.vstart;
(*info).mod_len = state.initrd_segment.size;
for (i, c) in cmdline.chars().enumerate() {
(*info).cmdline[i] = c as c_char;
}
(*info).cmdline[MAX_GUEST_CMDLINE - 1] = 0;
trace!("BootSetup setup_start_info start_info={:?}", *info);
}
Ok(())
}
fn setup_shared_info(&mut self, setup: &mut BootSetup, shared_info_frame: u64) -> Result<()> {
let info = setup
.phys
.map_foreign_pages(shared_info_frame, X86_PAGE_SIZE)?
as *mut SharedInfo;
unsafe {
let size = size_of::<SharedInfo>();
let info_as_buff = slice::from_raw_parts_mut(info as *mut u8, size);
info_as_buff.fill(0);
for i in 0..32 {
(*info).vcpu_info[i].evtchn_upcall_mask = 1;
}
trace!("BootSetup setup_shared_info shared_info={:?}", *info);
}
Ok(())
}
fn setup_hypercall_page(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<()> {
if image_info.virt_hypercall == XEN_UNSET_ADDR {
return Ok(());
}
let pfn = (image_info.virt_hypercall - image_info.virt_base) >> X86_PAGE_SHIFT;
let mfn = setup.phys.p2m[pfn as usize];
setup.call.hypercall_init(setup.domid, mfn)?;
Ok(())
}
fn meminit(&mut self, setup: &mut BootSetup, total_pages: u64) -> Result<()> {
setup.call.claim_pages(setup.domid, total_pages)?;
let mut vmemranges: Vec<VmemRange> = Vec::new();
let stub = VmemRange {
start: 0,
end: total_pages << XEN_PAGE_SHIFT,
_flags: 0,
_nid: 0,
};
vmemranges.push(stub);
let mut p2m_size: u64 = 0;
let mut total: u64 = 0;
for range in &vmemranges {
total += (range.end - range.start) >> XEN_PAGE_SHIFT;
p2m_size = p2m_size.max(range.end >> XEN_PAGE_SHIFT);
}
if total != total_pages {
return Err(Error::MemorySetupFailed);
}
setup.total_pages = total;
let mut p2m = vec![u64::MAX; p2m_size as usize];
for range in &vmemranges {
let mut extents_init = vec![0u64; SUPERPAGE_BATCH_SIZE as usize];
let pages = (range.end - range.start) >> XEN_PAGE_SHIFT;
let pfn_base = range.start >> XEN_PAGE_SHIFT;
for pfn in pfn_base..pfn_base + pages {
p2m[pfn as usize] = pfn;
}
let mut super_pages = pages >> SUPERPAGE_2MB_SHIFT;
let mut pfn_base_idx: u64 = pfn_base;
while super_pages > 0 {
let count = super_pages.min(SUPERPAGE_BATCH_SIZE);
super_pages -= count;
let mut j: usize = 0;
let mut pfn: u64 = pfn_base_idx;
loop {
if pfn >= pfn_base_idx + (count << SUPERPAGE_2MB_SHIFT) {
break;
}
extents_init[j] = p2m[pfn as usize];
pfn += SUPERPAGE_2MB_NR_PFNS;
j += 1;
}
let extents_init_slice = extents_init.as_slice();
let extents = setup.call.populate_physmap(
setup.domid,
count,
SUPERPAGE_2MB_SHIFT as u32,
0,
&extents_init_slice[0usize..count as usize],
)?;
pfn = pfn_base_idx;
for mfn in extents {
for k in 0..SUPERPAGE_2MB_NR_PFNS {
p2m[pfn as usize] = mfn + k;
pfn += 1;
}
}
pfn_base_idx = pfn;
}
let mut j = pfn_base_idx - pfn_base;
loop {
if j >= pages {
break;
}
let allocsz = (1024 * 1024).min(pages - j);
let p2m_idx = (pfn_base + j) as usize;
let p2m_end_idx = p2m_idx + allocsz as usize;
let input_extent_starts = &p2m[p2m_idx..p2m_end_idx];
let result =
setup
.call
.populate_physmap(setup.domid, allocsz, 0, 0, input_extent_starts)?;
if result.len() != allocsz as usize {
return Err(Error::PopulatePhysmapFailed(
allocsz as usize,
result.len(),
input_extent_starts.len(),
));
}
for (i, item) in result.iter().enumerate() {
let p = (pfn_base + j + i as u64) as usize;
let m = *item;
p2m[p] = m;
}
j += allocsz;
}
}
setup.phys.load_p2m(p2m);
setup.call.claim_pages(setup.domid, 0)?;
Ok(())
}
fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let pg_pfn = state.page_table_segment.pfn;
let pg_mfn = setup.phys.p2m[pg_pfn as usize];
setup.phys.unmap(pg_pfn)?;
setup.phys.unmap(state.p2m_segment.pfn)?;
setup
.call
.mmuext(setup.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)?;
Ok(())
}
fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let pg_pfn = state.page_table_segment.pfn;
let pg_mfn = setup.phys.p2m[pg_pfn as usize];
let mut vcpu = VcpuGuestContext::default();
vcpu.user_regs.rip = state.image_info.virt_entry;
vcpu.user_regs.rsp =
state.image_info.virt_base + (state.boot_stack_segment.pfn + 1) * self.page_size();
vcpu.user_regs.rsi =
state.image_info.virt_base + (state.start_info_segment.pfn) * self.page_size();
vcpu.user_regs.rflags = 1 << 9;
vcpu.debugreg[6] = 0xffff0ff0;
vcpu.debugreg[7] = 0x00000400;
vcpu.flags = VGCF_IN_KERNEL | VGCF_ONLINE;
let cr3_pfn = pg_mfn;
debug!(
"cr3: pfn {:#x} mfn {:#x}",
state.page_table_segment.pfn, cr3_pfn
);
vcpu.ctrlreg[3] = cr3_pfn << 12;
vcpu.user_regs.ds = 0x0;
vcpu.user_regs.es = 0x0;
vcpu.user_regs.fs = 0x0;
vcpu.user_regs.gs = 0x0;
vcpu.user_regs.ss = 0xe02b;
vcpu.user_regs.cs = 0xe033;
vcpu.kernel_ss = vcpu.user_regs.ss as u64;
vcpu.kernel_sp = vcpu.user_regs.rsp;
debug!("vcpu context: {:?}", vcpu);
setup.call.set_vcpu_context(setup.domid, 0, &vcpu)?;
Ok(())
}
}