feat: pci passthrough

This commit is contained in:
Alex Zenla 2024-04-23 22:36:36 -07:00
parent 95fbc62486
commit afbc0a88bd
No known key found for this signature in database
GPG Key ID: 067B238899B51269
13 changed files with 530 additions and 10 deletions

1
Cargo.lock generated
View File

@ -1602,6 +1602,7 @@ dependencies = [
"log",
"memchr",
"nix 0.28.0",
"regex",
"slice-copy",
"thiserror",
"tokio",

View File

@ -64,6 +64,7 @@ prost-types = "0.12.4"
rand = "0.8.5"
ratatui = "0.26.2"
redb = "2.1.0"
regex = "1.10.4"
rtnetlink = "0.14.1"
scopeguard = "1.2.0"
serde_json = "1.0.116"

View File

@ -150,6 +150,7 @@ impl GuestStarter<'_> {
initrd,
vcpus: spec.vcpus,
mem: spec.mem,
pcis: vec![],
env: task
.environment
.iter()

View File

@ -47,7 +47,7 @@ impl Default for ImageName {
}
impl ImageName {
pub const DOCKER_HUB_MIRROR: &'static str = "registry.docker.io";
pub const DOCKER_HUB_MIRROR: &'static str = "registry-1.docker.io";
pub const DEFAULT_IMAGE_TAG: &'static str = "latest";
pub fn parse(name: &str) -> Result<Self> {

View File

@ -13,7 +13,8 @@ use krata::launchcfg::{
use krataoci::packer::OciPackedImage;
use tokio::sync::Semaphore;
use uuid::Uuid;
use xenclient::{DomainChannel, DomainConfig, DomainDisk, DomainNetworkInterface};
pub use xenclient::pci::PciBdf;
use xenclient::{DomainChannel, DomainConfig, DomainDisk, DomainNetworkInterface, DomainPciDevice};
use xenstore::XsdInterface;
use crate::cfgblk::ConfigBlock;
@ -31,6 +32,7 @@ pub struct GuestLaunchRequest {
pub mem: u64,
pub env: HashMap<String, String>,
pub run: Option<Vec<String>>,
pub pcis: Vec<PciBdf>,
pub debug: bool,
pub image: OciPackedImage,
}
@ -204,6 +206,12 @@ impl GuestLauncher {
bridge: None,
script: None,
}],
pcis: request
.pcis
.clone()
.into_iter()
.map(|bdf| DomainPciDevice { bdf })
.collect::<Vec<_>>(),
filesystems: vec![],
event_channels: vec![],
extra_keys,

View File

@ -4,15 +4,16 @@ pub mod sys;
use crate::error::{Error, Result};
use crate::sys::{
AddressSize, CreateDomain, DomCtl, DomCtlValue, DomCtlVcpuContext, EvtChnAllocUnbound,
GetDomainInfo, GetPageFrameInfo3, Hypercall, HypercallInit, MaxMem, MaxVcpus, MemoryMap,
MemoryReservation, MmapBatch, MmapResource, MmuExtOp, MultiCallEntry, VcpuGuestContext,
VcpuGuestContextAny, XenCapabilitiesInfo, HYPERVISOR_DOMCTL, HYPERVISOR_EVENT_CHANNEL_OP,
HYPERVISOR_MEMORY_OP, HYPERVISOR_MMUEXT_OP, HYPERVISOR_MULTICALL, HYPERVISOR_XEN_VERSION,
XENVER_CAPABILITIES, XEN_DOMCTL_CREATEDOMAIN, XEN_DOMCTL_DESTROYDOMAIN,
GetDomainInfo, GetPageFrameInfo3, Hypercall, HypercallInit, IoMemPermission, IoPortPermission,
MaxMem, MaxVcpus, MemoryMap, MemoryReservation, MmapBatch, MmapResource, MmuExtOp,
MultiCallEntry, VcpuGuestContext, VcpuGuestContextAny, XenCapabilitiesInfo, HYPERVISOR_DOMCTL,
HYPERVISOR_EVENT_CHANNEL_OP, HYPERVISOR_MEMORY_OP, HYPERVISOR_MMUEXT_OP, HYPERVISOR_MULTICALL,
HYPERVISOR_XEN_VERSION, XENVER_CAPABILITIES, XEN_DOMCTL_CREATEDOMAIN, XEN_DOMCTL_DESTROYDOMAIN,
XEN_DOMCTL_GETDOMAININFO, XEN_DOMCTL_GETPAGEFRAMEINFO3, XEN_DOMCTL_GETVCPUCONTEXT,
XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_MAX_MEM, XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN,
XEN_DOMCTL_SETVCPUCONTEXT, XEN_DOMCTL_SET_ADDRESS_SIZE, XEN_DOMCTL_UNPAUSEDOMAIN,
XEN_MEM_CLAIM_PAGES, XEN_MEM_MEMORY_MAP, XEN_MEM_POPULATE_PHYSMAP,
XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_IOMEM_PERMISSION, XEN_DOMCTL_IOPORT_PERMISSION,
XEN_DOMCTL_MAX_MEM, XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN, XEN_DOMCTL_SETVCPUCONTEXT,
XEN_DOMCTL_SET_ADDRESS_SIZE, XEN_DOMCTL_UNPAUSEDOMAIN, XEN_MEM_CLAIM_PAGES, XEN_MEM_MEMORY_MAP,
XEN_MEM_POPULATE_PHYSMAP,
};
use libc::{c_int, mmap, usleep, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE};
use log::trace;
@ -671,4 +672,68 @@ impl XenCall {
.await
.map(|_| ())
}
pub async fn iomem_permission(
&self,
domid: u32,
first_mfn: u64,
nr_mfns: u64,
allow: bool,
) -> Result<()> {
trace!(
"domctl fd={} iomem_permission domid={} first_mfn={:#x}, nr_mfns={:#x} allow={}",
self.handle.as_raw_fd(),
domid,
first_mfn,
nr_mfns,
allow,
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_IOMEM_PERMISSION,
interface_version: self.domctl_interface_version,
domid,
value: DomCtlValue {
iomem_permission: IoMemPermission {
first_mfn,
nr_mfns,
allow: if allow { 1 } else { 0 },
},
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
.await?;
Ok(())
}
pub async fn ioport_permission(
&self,
domid: u32,
first_port: u32,
nr_ports: u32,
allow: bool,
) -> Result<()> {
trace!(
"domctl fd={} ioport_permission domid={} first_port={:#x}, nr_ports={:#x} allow={}",
self.handle.as_raw_fd(),
domid,
first_port,
nr_ports,
allow,
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_IOPORT_PERMISSION,
interface_version: self.domctl_interface_version,
domid,
value: DomCtlValue {
ioport_permission: IoPortPermission {
first_port,
nr_ports,
allow: if allow { 1 } else { 0 },
},
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
.await?;
Ok(())
}
}

View File

@ -237,6 +237,8 @@ pub union DomCtlValue {
pub vcpu_context: DomCtlVcpuContext,
pub address_size: AddressSize,
pub get_page_frame_info: GetPageFrameInfo3,
pub ioport_permission: IoPortPermission,
pub iomem_permission: IoMemPermission,
pub pad: [u8; 128],
}
@ -309,6 +311,22 @@ pub struct GetPageFrameInfo3 {
pub array: c_ulong,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct IoPortPermission {
pub first_port: u32,
pub nr_ports: u32,
pub allow: u8,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct IoMemPermission {
pub first_mfn: u64,
pub nr_mfns: u64,
pub allow: u8,
}
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
#[cfg(target_arch = "x86_64")]

View File

@ -18,6 +18,7 @@ krata-xencall = { path = "../xencall", version = "^0.0.10" }
krata-xenstore = { path = "../xenstore", version = "^0.0.10" }
memchr = { workspace = true }
nix = { workspace = true }
regex = { workspace = true }
slice-copy = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
@ -34,3 +35,7 @@ name = "xenclient"
[[example]]
name = "xenclient-boot"
path = "examples/boot.rs"
[[example]]
name = "xenclient-pci"
path = "examples/pci.rs"

View File

@ -27,6 +27,7 @@ async fn main() -> Result<()> {
disks: vec![],
channels: vec![],
vifs: vec![],
pcis: vec![],
filesystems: vec![],
extra_keys: vec![],
extra_rw_paths: vec![],

View File

@ -0,0 +1,32 @@
use xenclient::pci::*;
use xenclient::error::Result;
#[tokio::main]
async fn main() -> Result<()> {
let backend = XenPciBackend::new();
if !backend.is_loaded().await? {
return Err(xenclient::error::Error::GenericError(
"xen-pciback module not loaded".to_string(),
));
}
println!("assignable devices:");
for device in backend.list_devices().await? {
let is_assigned = backend.is_assigned(&device).await?;
let has_slot = backend.has_slot(&device).await?;
println!("{} slot={} assigned={}", device, has_slot, is_assigned);
let resources = backend.read_resources(&device).await?;
for resource in resources {
println!(
" resource start={:#x} end={:#x} flags={:#x} bar-io={}",
resource.start,
resource.end,
resource.flags,
resource.is_bar_io()
);
}
}
Ok(())
}

View File

@ -34,6 +34,14 @@ pub enum Error {
ElfInvalidImage,
#[error("provided elf image does not contain xen support")]
ElfXenSupportMissing,
#[error("regex error: {0}")]
RegexError(#[from] regex::Error),
#[error("error: {0}")]
GenericError(String),
#[error("failed to parse int: {0}")]
ParseIntError(#[from] std::num::ParseIntError),
#[error("invalid pci bdf string")]
InvalidPciBdfString,
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@ -21,6 +21,8 @@ use crate::elfloader::ElfImageLoader;
use crate::error::{Error, Result};
use boot::BootState;
use log::{debug, trace, warn};
use pci::{PciBdf, XenPciBackend};
use sys::XEN_PAGE_SHIFT;
use tokio::time::timeout;
use std::path::PathBuf;
@ -33,6 +35,8 @@ use xenstore::{
XsPermission, XsdClient, XsdInterface, XS_PERM_NONE, XS_PERM_READ, XS_PERM_READ_WRITE,
};
pub mod pci;
#[derive(Clone)]
pub struct XenClient {
pub store: XsdClient,
@ -78,6 +82,11 @@ pub struct DomainEventChannel {
pub name: String,
}
#[derive(Clone, Debug)]
pub struct DomainPciDevice {
pub bdf: PciBdf,
}
#[derive(Clone, Debug)]
pub struct DomainConfig {
pub backend_domid: u32,
@ -93,6 +102,7 @@ pub struct DomainConfig {
pub vifs: Vec<DomainNetworkInterface>,
pub filesystems: Vec<DomainFilesystem>,
pub event_channels: Vec<DomainEventChannel>,
pub pcis: Vec<DomainPciDevice>,
pub extra_keys: Vec<(String, String)>,
pub extra_rw_paths: Vec<String>,
}
@ -411,6 +421,19 @@ impl XenClient {
.await?;
}
for (index, pci) in config.pcis.iter().enumerate() {
self.pci_device_add(
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
config.pcis.len(),
pci,
)
.await?;
}
for channel in &config.event_channels {
let id = self
.call
@ -645,6 +668,96 @@ impl XenClient {
Ok(())
}
#[allow(clippy::too_many_arguments)]
async fn pci_device_add(
&self,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
device_count: usize,
device: &DomainPciDevice,
) -> Result<()> {
let backend = XenPciBackend::new();
let resources = backend.read_resources(&device.bdf).await?;
for resource in resources {
if resource.is_bar_io() {
self.call
.ioport_permission(domid, resource.start as u32, resource.size() as u32, true)
.await?;
} else {
self.call
.iomem_permission(
domid,
resource.start >> XEN_PAGE_SHIFT,
(resource.size() + (XEN_PAGE_SHIFT - 1)) >> XEN_PAGE_SHIFT,
true,
)
.await?;
}
}
let id = 60;
if index == 0 {
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("num_devs", device_count.to_string()),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
];
self.device_add(
"pci",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)
.await?;
}
let backend_path = format!("{}/backend/{}/{}/{}", backend_dom_path, "pci", domid, id);
self.store
.write_string(
format!("{}/key-{}", backend_path, index),
&device.bdf.to_string(),
)
.await?;
self.store
.write_string(
format!("{}/dev-{}", backend_path, index),
&device.bdf.to_string(),
)
.await?;
if let Some(vdefn) = device.bdf.vdefn {
self.store
.write_string(
format!("{}/vdefn-{}", backend_path, index),
&format!("{:#x}", vdefn),
)
.await?;
}
self.store
.write_string(
format!("{}/opts-{}", backend_path, index),
"msitranslate=0,power_mgmt=0,permissive=0,rdm_policy=0",
)
.await?;
Ok(())
}
#[allow(clippy::too_many_arguments)]
async fn device_add(
&self,

View File

@ -0,0 +1,267 @@
use regex::Regex;
use std::{fmt::Display, path::PathBuf, str::FromStr};
use tokio::fs;
use crate::error::{Error, Result};
const PCIBACK_SYSFS_PATH: &str = "/sys/bus/pci/drivers/pciback";
const PCI_BDF_REGEX: &str = r"^([0-9a-f]{4}):([0-9a-f]{2}):([0-9a-f]{2}).([0-9a-f]{1})$";
const PCI_BDF_SHORT_REGEX: &str = r"^([0-9a-f]{2}):([0-9a-f]{2}).([0-9a-f]{1})$";
const PCI_BDF_VDEFN_REGEX: &str =
r"^([0-9a-f]{4}):([0-9a-f]{2}):([0-9a-f]{2}).([0-9a-f]{1})@([0-9a-f]{2})$";
const FLAG_PCI_BAR_IO: u64 = 0x1;
#[derive(Clone)]
pub struct XenPciBackend {
path: PathBuf,
}
impl Default for XenPciBackend {
fn default() -> Self {
Self::new()
}
}
impl XenPciBackend {
pub fn new() -> Self {
Self {
path: PathBuf::from(PCIBACK_SYSFS_PATH),
}
}
pub async fn is_loaded(&self) -> Result<bool> {
Ok(fs::try_exists(&self.path).await?)
}
pub async fn list_devices(&self) -> Result<Vec<PciBdf>> {
let mut devices = Vec::new();
let mut dir = fs::read_dir(&self.path).await?;
while let Some(entry) = dir.next_entry().await? {
let file_name_string = entry.file_name().to_string_lossy().to_string();
let Some(bdf) = PciBdf::from_str(&file_name_string).ok() else {
continue;
};
devices.push(bdf);
}
Ok(devices)
}
pub async fn is_assigned(&self, bdf: &PciBdf) -> Result<bool> {
let mut path = self.path.clone();
path.push(bdf.to_string());
Ok(fs::try_exists(path).await?)
}
pub async fn read_resources(&self, bdf: &PciBdf) -> Result<Vec<PciMemoryResource>> {
let mut resources = Vec::new();
let mut path = self.path.clone();
path.push(bdf.to_string());
path.push("resource");
let content = fs::read_to_string(&path).await?;
for line in content.lines() {
let parts = line.split(' ').collect::<Vec<_>>();
if parts.len() != 3 {
continue;
}
let Some(start) = parts.first() else {
continue;
};
let Some(end) = parts.get(1) else {
continue;
};
let Some(flags) = parts.get(2) else {
continue;
};
if !start.starts_with("0x") || !end.starts_with("0x") || !flags.starts_with("0x") {
continue;
}
let start = &start[2..];
let end = &end[2..];
let flags = &flags[2..];
let Some(start) = u64::from_str_radix(start, 16).ok() else {
continue;
};
let Some(end) = u64::from_str_radix(end, 16).ok() else {
continue;
};
let Some(flags) = u64::from_str_radix(flags, 16).ok() else {
continue;
};
if start > 0 {
resources.push(PciMemoryResource::new(start, end, flags));
}
}
Ok(resources)
}
pub async fn has_slot(&self, bdf: &PciBdf) -> Result<bool> {
let mut slots_path = self.path.clone();
slots_path.push("slots");
let content = fs::read_to_string(&slots_path).await?;
for line in content.lines() {
if let Ok(slot) = PciBdf::from_str(line) {
if slot == *bdf {
return Ok(true);
}
}
}
Ok(false)
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct PciBdf {
pub domain: Option<u32>,
pub bus: u16,
pub device: u16,
pub function: u16,
pub vdefn: Option<u16>,
}
impl PciBdf {
pub fn new(
domain: Option<u32>,
bus: u16,
device: u16,
function: u16,
vdefn: Option<u16>,
) -> Self {
Self {
domain,
bus,
device,
function,
vdefn,
}
}
}
impl FromStr for PciBdf {
type Err = Error;
fn from_str(s: &str) -> Result<Self> {
let pci_bdf_regex = Regex::from_str(PCI_BDF_REGEX)?;
let pci_bdf_vdefn_regex = Regex::from_str(PCI_BDF_VDEFN_REGEX)?;
let pci_bdf_short_regex = Regex::from_str(PCI_BDF_SHORT_REGEX)?;
if let Some(pci_bdf_captures) = pci_bdf_regex.captures(s) {
let domain = pci_bdf_captures
.get(1)
.ok_or_else(|| Error::GenericError("capture group 1 did not exist".to_string()))?;
let bus = pci_bdf_captures
.get(2)
.ok_or_else(|| Error::GenericError("capture group 2 did not exist".to_string()))?;
let device = pci_bdf_captures
.get(3)
.ok_or_else(|| Error::GenericError("capture group 3 did not exist".to_string()))?;
let function = pci_bdf_captures
.get(4)
.ok_or_else(|| Error::GenericError("capture group 4 did not exist".to_string()))?;
let domain = u32::from_str_radix(domain.as_str(), 16)?;
let bus = u16::from_str_radix(bus.as_str(), 16)?;
let device = u16::from_str_radix(device.as_str(), 16)?;
let function = u16::from_str_radix(function.as_str(), 16)?;
Ok(PciBdf::new(Some(domain), bus, device, function, None))
} else if let Some(pci_bdf_vdefn_captures) = pci_bdf_vdefn_regex.captures(s) {
let domain = pci_bdf_vdefn_captures
.get(1)
.ok_or_else(|| Error::GenericError("capture group 1 did not exist".to_string()))?;
let bus = pci_bdf_vdefn_captures
.get(2)
.ok_or_else(|| Error::GenericError("capture group 2 did not exist".to_string()))?;
let device = pci_bdf_vdefn_captures
.get(3)
.ok_or_else(|| Error::GenericError("capture group 3 did not exist".to_string()))?;
let function = pci_bdf_vdefn_captures
.get(4)
.ok_or_else(|| Error::GenericError("capture group 4 did not exist".to_string()))?;
let vdefn = pci_bdf_vdefn_captures
.get(5)
.ok_or_else(|| Error::GenericError("capture group 5 did not exist".to_string()))?;
let domain = u32::from_str_radix(domain.as_str(), 16)?;
let bus = u16::from_str_radix(bus.as_str(), 16)?;
let device = u16::from_str_radix(device.as_str(), 16)?;
let function = u16::from_str_radix(function.as_str(), 16)?;
let vdefn = u16::from_str_radix(vdefn.as_str(), 16)?;
Ok(PciBdf::new(
Some(domain),
bus,
device,
function,
Some(vdefn),
))
} else if let Some(pci_bdf_short_captures) = pci_bdf_short_regex.captures(s) {
let bus = pci_bdf_short_captures
.get(1)
.ok_or_else(|| Error::GenericError("capture group 1 did not exist".to_string()))?;
let device = pci_bdf_short_captures
.get(2)
.ok_or_else(|| Error::GenericError("capture group 2 did not exist".to_string()))?;
let function = pci_bdf_short_captures
.get(3)
.ok_or_else(|| Error::GenericError("capture group 3 did not exist".to_string()))?;
let bus = u16::from_str_radix(bus.as_str(), 16)?;
let device = u16::from_str_radix(device.as_str(), 16)?;
let function = u16::from_str_radix(function.as_str(), 16)?;
Ok(PciBdf::new(None, bus, device, function, None))
} else {
Err(Error::InvalidPciBdfString)
}
}
}
impl Display for PciBdf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(domain) = self.domain {
if let Some(vdefn) = self.vdefn {
write!(
f,
"{:04x}:{:02x}:{:02x}.{:01x}@{:02x}",
domain, self.bus, self.device, self.function, vdefn
)
} else {
write!(
f,
"{:04x}:{:02x}:{:02x}.{:01x}",
domain, self.bus, self.device, self.function
)
}
} else {
write!(
f,
"{:02x}:{:02x}.{:01x}",
self.bus, self.device, self.function
)
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct PciMemoryResource {
pub start: u64,
pub end: u64,
pub flags: u64,
}
impl PciMemoryResource {
pub fn new(start: u64, end: u64, flags: u64) -> PciMemoryResource {
PciMemoryResource { start, end, flags }
}
pub fn is_bar_io(&self) -> bool {
(self.flags & FLAG_PCI_BAR_IO) != 0
}
pub fn size(&self) -> u64 {
(self.end - self.start) + 1
}
}