mirror of
https://github.com/edera-dev/krata.git
synced 2025-08-02 21:00:55 +00:00
feat: pci passthrough (#114)
* feat: pci passthrough * feat: guest device management * feat: addons mounting and kernel modules support * feat: more pci work * fix: kernel build squashfs fixes * fix: e820entry should be available on all platforms
This commit is contained in:
parent
bece7f33c7
commit
84920a88ab
65
Cargo.lock
generated
65
Cargo.lock
generated
@ -1456,9 +1456,11 @@ dependencies = [
|
||||
"prost",
|
||||
"redb",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"signal-hook",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"toml",
|
||||
"tonic",
|
||||
"uuid",
|
||||
]
|
||||
@ -1479,6 +1481,7 @@ dependencies = [
|
||||
"nix 0.28.0",
|
||||
"oci-spec",
|
||||
"path-absolutize",
|
||||
"platform-info",
|
||||
"rtnetlink",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@ -1558,6 +1561,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"uuid",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1602,6 +1606,7 @@ dependencies = [
|
||||
"log",
|
||||
"memchr",
|
||||
"nix 0.28.0",
|
||||
"regex",
|
||||
"slice-copy",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
@ -2067,6 +2072,16 @@ version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
|
||||
|
||||
[[package]]
|
||||
name = "platform-info"
|
||||
version = "2.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5ff316b9c4642feda973c18f0decd6c8b0919d4722566f6e4337cce0dd88217"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.6.0"
|
||||
@ -2096,7 +2111,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"toml_edit",
|
||||
"toml_edit 0.19.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2570,6 +2585,15 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_spanned"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_urlencoded"
|
||||
version = "0.7.1"
|
||||
@ -2984,11 +3008,26 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.8.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"toml_edit 0.22.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_datetime"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
@ -2998,7 +3037,20 @@ checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
|
||||
dependencies = [
|
||||
"indexmap 2.2.6",
|
||||
"toml_datetime",
|
||||
"winnow",
|
||||
"winnow 0.5.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.22.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef"
|
||||
dependencies = [
|
||||
"indexmap 2.2.6",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"winnow 0.6.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3509,6 +3561,15 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.6.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0c976aaaa0e1f90dbb21e9587cdaf1d9679a1cde8875c0d6bd83ab96a208352"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winreg"
|
||||
version = "0.52.0"
|
||||
|
@ -57,6 +57,7 @@ oci-spec = "0.6.4"
|
||||
once_cell = "1.19.0"
|
||||
path-absolutize = "3.1.1"
|
||||
path-clean = "1.0.1"
|
||||
platform-info = "2.0.3"
|
||||
prost = "0.12.4"
|
||||
prost-build = "0.12.4"
|
||||
prost-reflect-build = "0.13.0"
|
||||
@ -64,6 +65,7 @@ prost-types = "0.12.4"
|
||||
rand = "0.8.5"
|
||||
ratatui = "0.26.2"
|
||||
redb = "2.1.0"
|
||||
regex = "1.10.4"
|
||||
rtnetlink = "0.14.1"
|
||||
scopeguard = "1.2.0"
|
||||
serde_json = "1.0.116"
|
||||
@ -76,6 +78,7 @@ sysinfo = "0.30.11"
|
||||
termtree = "0.4.1"
|
||||
thiserror = "1.0"
|
||||
tokio-tun = "0.11.4"
|
||||
toml = "0.8.12"
|
||||
tonic-build = "0.11.0"
|
||||
tower = "0.4.13"
|
||||
udp-stream = "0.0.11"
|
||||
|
@ -6,8 +6,8 @@ use krata::{
|
||||
events::EventStream,
|
||||
v1::{
|
||||
common::{
|
||||
guest_image_spec::Image, GuestImageSpec, GuestOciImageSpec, GuestSpec, GuestStatus,
|
||||
GuestTaskSpec, GuestTaskSpecEnvVar, OciImageFormat,
|
||||
guest_image_spec::Image, GuestImageSpec, GuestOciImageSpec, GuestSpec, GuestSpecDevice,
|
||||
GuestStatus, GuestTaskSpec, GuestTaskSpecEnvVar, OciImageFormat,
|
||||
},
|
||||
control::{
|
||||
control_service_client::ControlServiceClient, watch_events_reply::Event,
|
||||
@ -50,6 +50,8 @@ pub struct LaunchCommand {
|
||||
help = "Memory available to the guest, in megabytes"
|
||||
)]
|
||||
mem: u64,
|
||||
#[arg[short = 'D', long = "device", help = "Devices to request for the guest"]]
|
||||
device: Vec<String>,
|
||||
#[arg[short, long, help = "Environment variables set in the guest"]]
|
||||
env: Option<Vec<String>>,
|
||||
#[arg(
|
||||
@ -135,6 +137,11 @@ impl LaunchCommand {
|
||||
working_directory: self.working_directory.unwrap_or_default(),
|
||||
}),
|
||||
annotations: vec![],
|
||||
devices: self
|
||||
.device
|
||||
.iter()
|
||||
.map(|name| GuestSpecDevice { name: name.clone() })
|
||||
.collect(),
|
||||
}),
|
||||
};
|
||||
let response = client
|
||||
|
128
crates/ctl/src/cli/list_devices.rs
Normal file
128
crates/ctl/src/cli/list_devices.rs
Normal file
@ -0,0 +1,128 @@
|
||||
use anyhow::Result;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use comfy_table::{presets::UTF8_FULL_CONDENSED, Cell, Color, Table};
|
||||
use krata::{
|
||||
events::EventStream,
|
||||
v1::control::{control_service_client::ControlServiceClient, DeviceInfo, ListDevicesRequest},
|
||||
};
|
||||
|
||||
use serde_json::Value;
|
||||
use tonic::transport::Channel;
|
||||
|
||||
use crate::format::{kv2line, proto2dynamic, proto2kv};
|
||||
|
||||
#[derive(ValueEnum, Clone, Debug, PartialEq, Eq)]
|
||||
enum ListDevicesFormat {
|
||||
Table,
|
||||
Json,
|
||||
JsonPretty,
|
||||
Jsonl,
|
||||
Yaml,
|
||||
KeyValue,
|
||||
Simple,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(about = "List the devices on the hypervisor")]
|
||||
pub struct ListDevicesCommand {
|
||||
#[arg(short, long, default_value = "table", help = "Output format")]
|
||||
format: ListDevicesFormat,
|
||||
}
|
||||
|
||||
impl ListDevicesCommand {
|
||||
pub async fn run(
|
||||
self,
|
||||
mut client: ControlServiceClient<Channel>,
|
||||
_events: EventStream,
|
||||
) -> Result<()> {
|
||||
let reply = client
|
||||
.list_devices(ListDevicesRequest {})
|
||||
.await?
|
||||
.into_inner();
|
||||
let mut devices = reply.devices;
|
||||
|
||||
devices.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
|
||||
match self.format {
|
||||
ListDevicesFormat::Table => {
|
||||
self.print_devices_table(devices)?;
|
||||
}
|
||||
|
||||
ListDevicesFormat::Simple => {
|
||||
for device in devices {
|
||||
println!("{}\t{}\t{}", device.name, device.claimed, device.owner);
|
||||
}
|
||||
}
|
||||
|
||||
ListDevicesFormat::Json | ListDevicesFormat::JsonPretty | ListDevicesFormat::Yaml => {
|
||||
let mut values = Vec::new();
|
||||
for device in devices {
|
||||
let message = proto2dynamic(device)?;
|
||||
values.push(serde_json::to_value(message)?);
|
||||
}
|
||||
let value = Value::Array(values);
|
||||
let encoded = if self.format == ListDevicesFormat::JsonPretty {
|
||||
serde_json::to_string_pretty(&value)?
|
||||
} else if self.format == ListDevicesFormat::Yaml {
|
||||
serde_yaml::to_string(&value)?
|
||||
} else {
|
||||
serde_json::to_string(&value)?
|
||||
};
|
||||
println!("{}", encoded.trim());
|
||||
}
|
||||
|
||||
ListDevicesFormat::Jsonl => {
|
||||
for device in devices {
|
||||
let message = proto2dynamic(device)?;
|
||||
println!("{}", serde_json::to_string(&message)?);
|
||||
}
|
||||
}
|
||||
|
||||
ListDevicesFormat::KeyValue => {
|
||||
self.print_key_value(devices)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_devices_table(&self, devices: Vec<DeviceInfo>) -> Result<()> {
|
||||
let mut table = Table::new();
|
||||
table.load_preset(UTF8_FULL_CONDENSED);
|
||||
table.set_content_arrangement(comfy_table::ContentArrangement::Dynamic);
|
||||
table.set_header(vec!["name", "status", "owner"]);
|
||||
for device in devices {
|
||||
let status_text = if device.claimed {
|
||||
"claimed"
|
||||
} else {
|
||||
"available"
|
||||
};
|
||||
|
||||
let status_color = if device.claimed {
|
||||
Color::Blue
|
||||
} else {
|
||||
Color::Green
|
||||
};
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new(device.name),
|
||||
Cell::new(status_text).fg(status_color),
|
||||
Cell::new(device.owner),
|
||||
]);
|
||||
}
|
||||
if table.is_empty() {
|
||||
println!("no devices configured");
|
||||
} else {
|
||||
println!("{}", table);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_key_value(&self, devices: Vec<DeviceInfo>) -> Result<()> {
|
||||
for device in devices {
|
||||
let kvs = proto2kv(device)?;
|
||||
println!("{}", kv2line(kvs));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -5,6 +5,7 @@ pub mod identify_host;
|
||||
pub mod idm_snoop;
|
||||
pub mod launch;
|
||||
pub mod list;
|
||||
pub mod list_devices;
|
||||
pub mod logs;
|
||||
pub mod metrics;
|
||||
pub mod pull;
|
||||
@ -24,8 +25,9 @@ use tonic::{transport::Channel, Request};
|
||||
use self::{
|
||||
attach::AttachCommand, destroy::DestroyCommand, exec::ExecCommand,
|
||||
identify_host::IdentifyHostCommand, idm_snoop::IdmSnoopCommand, launch::LaunchCommand,
|
||||
list::ListCommand, logs::LogsCommand, metrics::MetricsCommand, pull::PullCommand,
|
||||
resolve::ResolveCommand, top::TopCommand, watch::WatchCommand,
|
||||
list::ListCommand, list_devices::ListDevicesCommand, logs::LogsCommand,
|
||||
metrics::MetricsCommand, pull::PullCommand, resolve::ResolveCommand, top::TopCommand,
|
||||
watch::WatchCommand,
|
||||
};
|
||||
|
||||
#[derive(Parser)]
|
||||
@ -51,6 +53,7 @@ pub enum Commands {
|
||||
Launch(LaunchCommand),
|
||||
Destroy(DestroyCommand),
|
||||
List(ListCommand),
|
||||
ListDevices(ListDevicesCommand),
|
||||
Attach(AttachCommand),
|
||||
Pull(PullCommand),
|
||||
Logs(LogsCommand),
|
||||
@ -120,6 +123,10 @@ impl ControlCommand {
|
||||
Commands::Exec(exec) => {
|
||||
exec.run(client).await?;
|
||||
}
|
||||
|
||||
Commands::ListDevices(list) => {
|
||||
list.run(client, events).await?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -24,9 +24,11 @@ log = { workspace = true }
|
||||
prost = { workspace = true }
|
||||
redb = { workspace = true }
|
||||
scopeguard = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
signal-hook = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tokio-stream = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
krata-tokio-tar = { workspace = true }
|
||||
tonic = { workspace = true, features = ["tls"] }
|
||||
uuid = { workspace = true }
|
||||
|
55
crates/daemon/src/config.rs
Normal file
55
crates/daemon/src/config.rs
Normal file
@ -0,0 +1,55 @@
|
||||
use std::{collections::HashMap, path::Path};
|
||||
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::fs;
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, Default)]
|
||||
pub struct DaemonConfig {
|
||||
#[serde(default)]
|
||||
pub pci: DaemonPciConfig,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, Default)]
|
||||
pub struct DaemonPciConfig {
|
||||
#[serde(default)]
|
||||
pub devices: HashMap<String, DaemonPciDeviceConfig>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct DaemonPciDeviceConfig {
|
||||
pub locations: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub permissive: bool,
|
||||
#[serde(default)]
|
||||
#[serde(rename = "msi-translate")]
|
||||
pub msi_translate: bool,
|
||||
#[serde(default)]
|
||||
#[serde(rename = "power-management")]
|
||||
pub power_management: bool,
|
||||
#[serde(default)]
|
||||
#[serde(rename = "rdm-reserve-policy")]
|
||||
pub rdm_reserve_policy: DaemonPciDeviceRdmReservePolicy,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, Default)]
|
||||
pub enum DaemonPciDeviceRdmReservePolicy {
|
||||
#[default]
|
||||
#[serde(rename = "strict")]
|
||||
Strict,
|
||||
#[serde(rename = "relaxed")]
|
||||
Relaxed,
|
||||
}
|
||||
|
||||
impl DaemonConfig {
|
||||
pub async fn load(path: &Path) -> Result<DaemonConfig> {
|
||||
if path.exists() {
|
||||
let content = fs::read_to_string(path).await?;
|
||||
let config: DaemonConfig = toml::from_str(&content)?;
|
||||
Ok(config)
|
||||
} else {
|
||||
fs::write(&path, "").await?;
|
||||
Ok(DaemonConfig::default())
|
||||
}
|
||||
}
|
||||
}
|
@ -11,10 +11,11 @@ use krata::{
|
||||
control::{
|
||||
control_service_server::ControlService, ConsoleDataReply, ConsoleDataRequest,
|
||||
CreateGuestReply, CreateGuestRequest, DestroyGuestReply, DestroyGuestRequest,
|
||||
ExecGuestReply, ExecGuestRequest, IdentifyHostReply, IdentifyHostRequest,
|
||||
ListGuestsReply, ListGuestsRequest, PullImageReply, PullImageRequest,
|
||||
ReadGuestMetricsReply, ReadGuestMetricsRequest, ResolveGuestReply, ResolveGuestRequest,
|
||||
SnoopIdmReply, SnoopIdmRequest, WatchEventsReply, WatchEventsRequest,
|
||||
DeviceInfo, ExecGuestReply, ExecGuestRequest, IdentifyHostReply, IdentifyHostRequest,
|
||||
ListDevicesReply, ListDevicesRequest, ListGuestsReply, ListGuestsRequest,
|
||||
PullImageReply, PullImageRequest, ReadGuestMetricsReply, ReadGuestMetricsRequest,
|
||||
ResolveGuestReply, ResolveGuestRequest, SnoopIdmReply, SnoopIdmRequest,
|
||||
WatchEventsReply, WatchEventsRequest,
|
||||
},
|
||||
},
|
||||
};
|
||||
@ -35,8 +36,8 @@ use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
command::DaemonCommand, console::DaemonConsoleHandle, db::GuestStore,
|
||||
event::DaemonEventContext, glt::GuestLookupTable, idm::DaemonIdmHandle,
|
||||
metrics::idm_metric_to_api, oci::convert_oci_progress,
|
||||
devices::DaemonDeviceManager, event::DaemonEventContext, glt::GuestLookupTable,
|
||||
idm::DaemonIdmHandle, metrics::idm_metric_to_api, oci::convert_oci_progress,
|
||||
};
|
||||
|
||||
pub struct ApiError {
|
||||
@ -60,6 +61,7 @@ impl From<ApiError> for Status {
|
||||
#[derive(Clone)]
|
||||
pub struct DaemonControlService {
|
||||
glt: GuestLookupTable,
|
||||
devices: DaemonDeviceManager,
|
||||
events: DaemonEventContext,
|
||||
console: DaemonConsoleHandle,
|
||||
idm: DaemonIdmHandle,
|
||||
@ -69,8 +71,10 @@ pub struct DaemonControlService {
|
||||
}
|
||||
|
||||
impl DaemonControlService {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
glt: GuestLookupTable,
|
||||
devices: DaemonDeviceManager,
|
||||
events: DaemonEventContext,
|
||||
console: DaemonConsoleHandle,
|
||||
idm: DaemonIdmHandle,
|
||||
@ -80,6 +84,7 @@ impl DaemonControlService {
|
||||
) -> Self {
|
||||
Self {
|
||||
glt,
|
||||
devices,
|
||||
events,
|
||||
console,
|
||||
idm,
|
||||
@ -524,4 +529,23 @@ impl ControlService for DaemonControlService {
|
||||
};
|
||||
Ok(Response::new(Box::pin(output) as Self::SnoopIdmStream))
|
||||
}
|
||||
|
||||
async fn list_devices(
|
||||
&self,
|
||||
request: Request<ListDevicesRequest>,
|
||||
) -> Result<Response<ListDevicesReply>, Status> {
|
||||
let _ = request.into_inner();
|
||||
let mut devices = Vec::new();
|
||||
let state = self.devices.copy().await.map_err(|error| ApiError {
|
||||
message: error.to_string(),
|
||||
})?;
|
||||
for (name, state) in state {
|
||||
devices.push(DeviceInfo {
|
||||
name,
|
||||
claimed: state.owner.is_some(),
|
||||
owner: state.owner.map(|x| x.to_string()).unwrap_or_default(),
|
||||
});
|
||||
}
|
||||
Ok(Response::new(ListDevicesReply { devices }))
|
||||
}
|
||||
}
|
||||
|
106
crates/daemon/src/devices.rs
Normal file
106
crates/daemon/src/devices.rs
Normal file
@ -0,0 +1,106 @@
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use log::warn;
|
||||
use tokio::sync::RwLock;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::config::{DaemonConfig, DaemonPciDeviceConfig};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DaemonDeviceState {
|
||||
pub pci: Option<DaemonPciDeviceConfig>,
|
||||
pub owner: Option<Uuid>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DaemonDeviceManager {
|
||||
config: Arc<DaemonConfig>,
|
||||
devices: Arc<RwLock<HashMap<String, DaemonDeviceState>>>,
|
||||
}
|
||||
|
||||
impl DaemonDeviceManager {
|
||||
pub fn new(config: Arc<DaemonConfig>) -> Self {
|
||||
Self {
|
||||
config,
|
||||
devices: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn claim(&self, device: &str, uuid: Uuid) -> Result<DaemonDeviceState> {
|
||||
let mut devices = self.devices.write().await;
|
||||
let Some(state) = devices.get_mut(device) else {
|
||||
return Err(anyhow!(
|
||||
"unable to claim unknown device '{}' for guest {}",
|
||||
device,
|
||||
uuid
|
||||
));
|
||||
};
|
||||
|
||||
if let Some(owner) = state.owner {
|
||||
return Err(anyhow!(
|
||||
"unable to claim device '{}' for guest {}: already claimed by {}",
|
||||
device,
|
||||
uuid,
|
||||
owner
|
||||
));
|
||||
}
|
||||
|
||||
state.owner = Some(uuid);
|
||||
Ok(state.clone())
|
||||
}
|
||||
|
||||
pub async fn release_all(&self, uuid: Uuid) -> Result<()> {
|
||||
let mut devices = self.devices.write().await;
|
||||
for state in (*devices).values_mut() {
|
||||
if state.owner == Some(uuid) {
|
||||
state.owner = None;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn release(&self, device: &str, uuid: Uuid) -> Result<()> {
|
||||
let mut devices = self.devices.write().await;
|
||||
let Some(state) = devices.get_mut(device) else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
if let Some(owner) = state.owner {
|
||||
if owner != uuid {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
state.owner = None;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn update_claims(&self, claims: HashMap<String, Uuid>) -> Result<()> {
|
||||
let mut devices = self.devices.write().await;
|
||||
devices.clear();
|
||||
for (name, pci) in &self.config.pci.devices {
|
||||
let owner = claims.get(name).cloned();
|
||||
devices.insert(
|
||||
name.clone(),
|
||||
DaemonDeviceState {
|
||||
owner,
|
||||
pci: Some(pci.clone()),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
for (name, uuid) in &claims {
|
||||
if !devices.contains_key(name) {
|
||||
warn!("unknown device '{}' assigned to guest {}", name, uuid);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn copy(&self) -> Result<HashMap<String, DaemonDeviceState>> {
|
||||
let devices = self.devices.read().await;
|
||||
Ok(devices.clone())
|
||||
}
|
||||
}
|
@ -1,9 +1,11 @@
|
||||
use std::{net::SocketAddr, path::PathBuf, str::FromStr};
|
||||
use std::{net::SocketAddr, path::PathBuf, str::FromStr, sync::Arc};
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use config::DaemonConfig;
|
||||
use console::{DaemonConsole, DaemonConsoleHandle};
|
||||
use control::DaemonControlService;
|
||||
use db::GuestStore;
|
||||
use devices::DaemonDeviceManager;
|
||||
use event::{DaemonEventContext, DaemonEventGenerator};
|
||||
use glt::GuestLookupTable;
|
||||
use idm::{DaemonIdm, DaemonIdmHandle};
|
||||
@ -23,9 +25,11 @@ use tonic::transport::{Identity, Server, ServerTlsConfig};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub mod command;
|
||||
pub mod config;
|
||||
pub mod console;
|
||||
pub mod control;
|
||||
pub mod db;
|
||||
pub mod devices;
|
||||
pub mod event;
|
||||
pub mod glt;
|
||||
pub mod idm;
|
||||
@ -35,7 +39,9 @@ pub mod reconcile;
|
||||
|
||||
pub struct Daemon {
|
||||
store: String,
|
||||
_config: Arc<DaemonConfig>,
|
||||
glt: GuestLookupTable,
|
||||
devices: DaemonDeviceManager,
|
||||
guests: GuestStore,
|
||||
events: DaemonEventContext,
|
||||
guest_reconciler_task: JoinHandle<()>,
|
||||
@ -50,12 +56,20 @@ const GUEST_RECONCILER_QUEUE_LEN: usize = 1000;
|
||||
|
||||
impl Daemon {
|
||||
pub async fn new(store: String) -> Result<Self> {
|
||||
let mut image_cache_dir = PathBuf::from(store.clone());
|
||||
let store_dir = PathBuf::from(store.clone());
|
||||
let mut config_path = store_dir.clone();
|
||||
config_path.push("config.toml");
|
||||
|
||||
let config = DaemonConfig::load(&config_path).await?;
|
||||
let config = Arc::new(config);
|
||||
let devices = DaemonDeviceManager::new(config.clone());
|
||||
|
||||
let mut image_cache_dir = store_dir.clone();
|
||||
image_cache_dir.push("cache");
|
||||
image_cache_dir.push("image");
|
||||
fs::create_dir_all(&image_cache_dir).await?;
|
||||
|
||||
let mut host_uuid_path = PathBuf::from(store.clone());
|
||||
let mut host_uuid_path = store_dir.clone();
|
||||
host_uuid_path.push("host.uuid");
|
||||
let host_uuid = if host_uuid_path.is_file() {
|
||||
let content = fs::read_to_string(&host_uuid_path).await?;
|
||||
@ -74,8 +88,9 @@ impl Daemon {
|
||||
generated
|
||||
};
|
||||
|
||||
let initrd_path = detect_guest_file(&store, "initrd")?;
|
||||
let kernel_path = detect_guest_file(&store, "kernel")?;
|
||||
let initrd_path = detect_guest_path(&store, "initrd")?;
|
||||
let kernel_path = detect_guest_path(&store, "kernel")?;
|
||||
let addons_path = detect_guest_path(&store, "addons.squashfs")?;
|
||||
|
||||
let packer = OciPackerService::new(None, &image_cache_dir, OciPlatform::current()).await?;
|
||||
let runtime = Runtime::new().await?;
|
||||
@ -93,6 +108,7 @@ impl Daemon {
|
||||
.await?;
|
||||
let runtime_for_reconciler = runtime.dupe().await?;
|
||||
let guest_reconciler = GuestReconciler::new(
|
||||
devices.clone(),
|
||||
glt.clone(),
|
||||
guests.clone(),
|
||||
events.clone(),
|
||||
@ -101,6 +117,7 @@ impl Daemon {
|
||||
guest_reconciler_notify.clone(),
|
||||
kernel_path,
|
||||
initrd_path,
|
||||
addons_path,
|
||||
)?;
|
||||
|
||||
let guest_reconciler_task = guest_reconciler.launch(guest_reconciler_receiver).await?;
|
||||
@ -108,7 +125,9 @@ impl Daemon {
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
_config: config,
|
||||
glt,
|
||||
devices,
|
||||
guests,
|
||||
events,
|
||||
guest_reconciler_task,
|
||||
@ -123,6 +142,7 @@ impl Daemon {
|
||||
pub async fn listen(&mut self, addr: ControlDialAddress) -> Result<()> {
|
||||
let control_service = DaemonControlService::new(
|
||||
self.glt.clone(),
|
||||
self.devices.clone(),
|
||||
self.events.clone(),
|
||||
self.console.clone(),
|
||||
self.idm.clone(),
|
||||
@ -186,7 +206,7 @@ impl Drop for Daemon {
|
||||
}
|
||||
}
|
||||
|
||||
fn detect_guest_file(store: &str, name: &str) -> Result<PathBuf> {
|
||||
fn detect_guest_path(store: &str, name: &str) -> Result<PathBuf> {
|
||||
let mut path = PathBuf::from(format!("{}/guest/{}", store, name));
|
||||
if path.is_file() {
|
||||
return Ok(path);
|
||||
|
@ -26,6 +26,7 @@ use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
db::GuestStore,
|
||||
devices::DaemonDeviceManager,
|
||||
event::{DaemonEvent, DaemonEventContext},
|
||||
glt::GuestLookupTable,
|
||||
};
|
||||
@ -55,6 +56,7 @@ impl Drop for GuestReconcilerEntry {
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GuestReconciler {
|
||||
devices: DaemonDeviceManager,
|
||||
glt: GuestLookupTable,
|
||||
guests: GuestStore,
|
||||
events: DaemonEventContext,
|
||||
@ -62,6 +64,7 @@ pub struct GuestReconciler {
|
||||
packer: OciPackerService,
|
||||
kernel_path: PathBuf,
|
||||
initrd_path: PathBuf,
|
||||
addons_path: PathBuf,
|
||||
tasks: Arc<Mutex<HashMap<Uuid, GuestReconcilerEntry>>>,
|
||||
guest_reconciler_notify: Sender<Uuid>,
|
||||
reconcile_lock: Arc<RwLock<()>>,
|
||||
@ -70,6 +73,7 @@ pub struct GuestReconciler {
|
||||
impl GuestReconciler {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
devices: DaemonDeviceManager,
|
||||
glt: GuestLookupTable,
|
||||
guests: GuestStore,
|
||||
events: DaemonEventContext,
|
||||
@ -78,8 +82,10 @@ impl GuestReconciler {
|
||||
guest_reconciler_notify: Sender<Uuid>,
|
||||
kernel_path: PathBuf,
|
||||
initrd_path: PathBuf,
|
||||
modules_path: PathBuf,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
devices,
|
||||
glt,
|
||||
guests,
|
||||
events,
|
||||
@ -87,6 +93,7 @@ impl GuestReconciler {
|
||||
packer,
|
||||
kernel_path,
|
||||
initrd_path,
|
||||
addons_path: modules_path,
|
||||
tasks: Arc::new(Mutex::new(HashMap::new())),
|
||||
guest_reconciler_notify,
|
||||
reconcile_lock: Arc::new(RwLock::with_max_readers((), PARALLEL_LIMIT)),
|
||||
@ -152,6 +159,8 @@ impl GuestReconciler {
|
||||
self.guests.remove(guest.uuid).await?;
|
||||
}
|
||||
|
||||
let mut device_claims = HashMap::new();
|
||||
|
||||
for (uuid, mut stored_guest) in stored_guests {
|
||||
let previous_guest = stored_guest.clone();
|
||||
let runtime_guest = runtime_guests.iter().find(|x| x.uuid == uuid);
|
||||
@ -173,6 +182,17 @@ impl GuestReconciler {
|
||||
} else {
|
||||
state.status = GuestStatus::Started.into();
|
||||
}
|
||||
|
||||
for device in &stored_guest
|
||||
.spec
|
||||
.as_ref()
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.devices
|
||||
{
|
||||
device_claims.insert(device.name.clone(), uuid);
|
||||
}
|
||||
|
||||
state.network = Some(guestinfo_to_networkstate(runtime));
|
||||
stored_guest.state = Some(state);
|
||||
}
|
||||
@ -185,6 +205,9 @@ impl GuestReconciler {
|
||||
let _ = self.guest_reconciler_notify.try_send(uuid);
|
||||
}
|
||||
}
|
||||
|
||||
self.devices.update_claims(device_claims).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -255,8 +278,10 @@ impl GuestReconciler {
|
||||
|
||||
async fn start(&self, uuid: Uuid, guest: &mut Guest) -> Result<GuestReconcilerResult> {
|
||||
let starter = GuestStarter {
|
||||
devices: &self.devices,
|
||||
kernel_path: &self.kernel_path,
|
||||
initrd_path: &self.initrd_path,
|
||||
addons_path: &self.addons_path,
|
||||
packer: &self.packer,
|
||||
glt: &self.glt,
|
||||
runtime: &self.runtime,
|
||||
@ -293,6 +318,7 @@ impl GuestReconciler {
|
||||
host: self.glt.host_uuid().to_string(),
|
||||
domid: domid.unwrap_or(u32::MAX),
|
||||
});
|
||||
self.devices.release_all(uuid).await?;
|
||||
Ok(GuestReconcilerResult::Changed { rerun: false })
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use futures::StreamExt;
|
||||
@ -7,6 +9,7 @@ use krata::launchcfg::LaunchPackedFormat;
|
||||
use krata::v1::common::GuestOciImageSpec;
|
||||
use krata::v1::common::{guest_image_spec::Image, Guest, GuestState, GuestStatus, OciImageFormat};
|
||||
use krataoci::packer::{service::OciPackerService, OciPackedFormat};
|
||||
use kratart::launch::{PciBdf, PciDevice, PciRdmReservePolicy};
|
||||
use kratart::{launch::GuestLaunchRequest, Runtime};
|
||||
use log::info;
|
||||
|
||||
@ -15,6 +18,8 @@ use tokio::io::AsyncReadExt;
|
||||
use tokio_tar::Archive;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::config::DaemonPciDeviceRdmReservePolicy;
|
||||
use crate::devices::DaemonDeviceManager;
|
||||
use crate::{
|
||||
glt::GuestLookupTable,
|
||||
reconcile::guest::{guestinfo_to_networkstate, GuestReconcilerResult},
|
||||
@ -24,8 +29,10 @@ use crate::{
|
||||
const OCI_SPEC_TAR_FILE_MAX_SIZE: usize = 100 * 1024 * 1024;
|
||||
|
||||
pub struct GuestStarter<'a> {
|
||||
pub devices: &'a DaemonDeviceManager,
|
||||
pub kernel_path: &'a Path,
|
||||
pub initrd_path: &'a Path,
|
||||
pub addons_path: &'a Path,
|
||||
pub packer: &'a OciPackerService,
|
||||
pub glt: &'a GuestLookupTable,
|
||||
pub runtime: &'a Runtime,
|
||||
@ -135,6 +142,48 @@ impl GuestStarter<'_> {
|
||||
fs::read(&self.initrd_path).await?
|
||||
};
|
||||
|
||||
let success = AtomicBool::new(false);
|
||||
|
||||
let _device_release_guard = scopeguard::guard(
|
||||
(spec.devices.clone(), self.devices.clone()),
|
||||
|(devices, manager)| {
|
||||
if !success.load(Ordering::Acquire) {
|
||||
tokio::task::spawn(async move {
|
||||
for device in devices {
|
||||
let _ = manager.release(&device.name, uuid).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
let mut pcis = Vec::new();
|
||||
for device in &spec.devices {
|
||||
let state = self.devices.claim(&device.name, uuid).await?;
|
||||
if let Some(cfg) = state.pci {
|
||||
for location in cfg.locations {
|
||||
let pci = PciDevice {
|
||||
bdf: PciBdf::from_str(&location)?.with_domain(0),
|
||||
permissive: cfg.permissive,
|
||||
msi_translate: cfg.msi_translate,
|
||||
power_management: cfg.power_management,
|
||||
rdm_reserve_policy: match cfg.rdm_reserve_policy {
|
||||
DaemonPciDeviceRdmReservePolicy::Strict => PciRdmReservePolicy::Strict,
|
||||
DaemonPciDeviceRdmReservePolicy::Relaxed => {
|
||||
PciRdmReservePolicy::Relaxed
|
||||
}
|
||||
},
|
||||
};
|
||||
pcis.push(pci);
|
||||
}
|
||||
} else {
|
||||
return Err(anyhow!(
|
||||
"device '{}' isn't a known device type",
|
||||
device.name
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let info = self
|
||||
.runtime
|
||||
.launch(GuestLaunchRequest {
|
||||
@ -150,6 +199,7 @@ impl GuestStarter<'_> {
|
||||
initrd,
|
||||
vcpus: spec.vcpus,
|
||||
mem: spec.mem,
|
||||
pcis,
|
||||
env: task
|
||||
.environment
|
||||
.iter()
|
||||
@ -157,6 +207,7 @@ impl GuestStarter<'_> {
|
||||
.collect::<HashMap<_, _>>(),
|
||||
run: empty_vec_optional(task.command.clone()),
|
||||
debug: false,
|
||||
addons_image: Some(self.addons_path.to_path_buf()),
|
||||
})
|
||||
.await?;
|
||||
self.glt.associate(uuid, info.domid).await;
|
||||
@ -169,6 +220,7 @@ impl GuestStarter<'_> {
|
||||
host: self.glt.host_uuid().to_string(),
|
||||
domid: info.domid,
|
||||
});
|
||||
success.store(true, Ordering::Release);
|
||||
Ok(GuestReconcilerResult::Changed { rerun: false })
|
||||
}
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ log = { workspace = true }
|
||||
nix = { workspace = true, features = ["ioctl", "process", "fs"] }
|
||||
oci-spec = { workspace = true }
|
||||
path-absolutize = { workspace = true }
|
||||
platform-info = { workspace = true }
|
||||
rtnetlink = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
|
@ -12,6 +12,7 @@ use nix::ioctl_write_int_bad;
|
||||
use nix::unistd::{dup2, execve, fork, ForkResult, Pid};
|
||||
use oci_spec::image::{Config, ImageConfiguration};
|
||||
use path_absolutize::Absolutize;
|
||||
use platform_info::{PlatformInfo, PlatformInfoAPI, UNameAPI};
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CString;
|
||||
use std::fs::{File, OpenOptions, Permissions};
|
||||
@ -50,6 +51,10 @@ const NEW_ROOT_DEV_PATH: &str = "/newroot/dev";
|
||||
const IMAGE_CONFIG_JSON_PATH: &str = "/config/image/config.json";
|
||||
const LAUNCH_CONFIG_JSON_PATH: &str = "/config/launch.json";
|
||||
|
||||
const ADDONS_DEVICE_PATH: &str = "/dev/xvdc";
|
||||
const ADDONS_MOUNT_PATH: &str = "/addons";
|
||||
const ADDONS_MODULES_PATH: &str = "/addons/modules";
|
||||
|
||||
ioctl_write_int_bad!(set_controlling_terminal, TIOCSCTTY);
|
||||
|
||||
pub struct GuestInit {}
|
||||
@ -88,7 +93,10 @@ impl GuestInit {
|
||||
|
||||
self.mount_root_image(launch.root.format.clone()).await?;
|
||||
|
||||
self.mount_addons().await?;
|
||||
|
||||
self.mount_new_root().await?;
|
||||
self.mount_kernel_modules().await?;
|
||||
self.bind_new_root().await?;
|
||||
|
||||
if let Some(hostname) = launch.hostname.clone() {
|
||||
@ -137,16 +145,60 @@ impl GuestInit {
|
||||
self.create_dir("/root", Some(0o0700)).await?;
|
||||
self.create_dir("/tmp", None).await?;
|
||||
self.create_dir("/run", Some(0o0755)).await?;
|
||||
self.mount_kernel_fs("devtmpfs", "/dev", "mode=0755", None)
|
||||
self.mount_kernel_fs("devtmpfs", "/dev", "mode=0755", None, None)
|
||||
.await?;
|
||||
self.mount_kernel_fs("proc", "/proc", "", None, None)
|
||||
.await?;
|
||||
self.mount_kernel_fs("sysfs", "/sys", "", None, None)
|
||||
.await?;
|
||||
self.create_dir("/dev/pts", Some(0o0755)).await?;
|
||||
self.mount_kernel_fs("devpts", "/dev/pts", "", None, Some("/dev/ptmx"))
|
||||
.await?;
|
||||
self.mount_kernel_fs("proc", "/proc", "", None).await?;
|
||||
self.mount_kernel_fs("sysfs", "/sys", "", None).await?;
|
||||
fs::symlink("/proc/self/fd", "/dev/fd").await?;
|
||||
fs::symlink("/proc/self/fd/0", "/dev/stdin").await?;
|
||||
fs::symlink("/proc/self/fd/1", "/dev/stdout").await?;
|
||||
fs::symlink("/proc/self/fd/2", "/dev/stderr").await?;
|
||||
self.mount_kernel_fs("cgroup2", "/sys/fs/cgroup", "", Some(MountFlags::RELATIME))
|
||||
.await?;
|
||||
self.mount_kernel_fs(
|
||||
"cgroup2",
|
||||
"/sys/fs/cgroup",
|
||||
"",
|
||||
Some(MountFlags::RELATIME),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn mount_addons(&mut self) -> Result<()> {
|
||||
if !fs::try_exists(ADDONS_DEVICE_PATH).await? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.mount_image(
|
||||
&PathBuf::from(ADDONS_DEVICE_PATH),
|
||||
&PathBuf::from(ADDONS_MOUNT_PATH),
|
||||
LaunchPackedFormat::Squashfs,
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn mount_kernel_modules(&mut self) -> Result<()> {
|
||||
if !fs::try_exists(ADDONS_MODULES_PATH).await? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let Some(platform_info) = PlatformInfo::new().ok() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let kernel_release = platform_info.release().to_string_lossy().to_string();
|
||||
let modules_path = format!("/newroot/lib/modules/{}", kernel_release);
|
||||
fs::create_dir_all(&modules_path).await?;
|
||||
Mount::builder()
|
||||
.fstype(FilesystemType::Manual("none"))
|
||||
.flags(MountFlags::BIND | MountFlags::RDONLY)
|
||||
.mount(ADDONS_MODULES_PATH, modules_path)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -170,13 +222,14 @@ impl GuestInit {
|
||||
path: &str,
|
||||
data: &str,
|
||||
flags: Option<MountFlags>,
|
||||
source: Option<&str>,
|
||||
) -> Result<()> {
|
||||
trace!("mounting kernel fs {} to {}", fstype, path);
|
||||
Mount::builder()
|
||||
.fstype(FilesystemType::Manual(fstype))
|
||||
.flags(MountFlags::NOEXEC | MountFlags::NOSUID | flags.unwrap_or(MountFlags::empty()))
|
||||
.data(data)
|
||||
.mount(fstype, path)?;
|
||||
.mount(source.unwrap_or(fstype), path)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,7 @@ message GuestSpec {
|
||||
uint64 mem = 6;
|
||||
GuestTaskSpec task = 7;
|
||||
repeated GuestSpecAnnotation annotations = 8;
|
||||
repeated GuestSpecDevice devices = 9;
|
||||
}
|
||||
|
||||
message GuestImageSpec {
|
||||
@ -62,6 +63,10 @@ message GuestSpecAnnotation {
|
||||
string value = 2;
|
||||
}
|
||||
|
||||
message GuestSpecDevice {
|
||||
string name = 1;
|
||||
}
|
||||
|
||||
message GuestState {
|
||||
GuestStatus status = 1;
|
||||
GuestNetworkState network = 2;
|
||||
|
@ -16,6 +16,7 @@ service ControlService {
|
||||
rpc DestroyGuest(DestroyGuestRequest) returns (DestroyGuestReply);
|
||||
rpc ResolveGuest(ResolveGuestRequest) returns (ResolveGuestReply);
|
||||
rpc ListGuests(ListGuestsRequest) returns (ListGuestsReply);
|
||||
rpc ListDevices(ListDevicesRequest) returns (ListDevicesReply);
|
||||
|
||||
rpc ExecGuest(stream ExecGuestRequest) returns (stream ExecGuestReply);
|
||||
|
||||
@ -187,3 +188,15 @@ message PullImageReply {
|
||||
string digest = 2;
|
||||
krata.v1.common.OciImageFormat format = 3;
|
||||
}
|
||||
|
||||
message DeviceInfo {
|
||||
string name = 1;
|
||||
bool claimed = 2;
|
||||
string owner = 3;
|
||||
}
|
||||
|
||||
message ListDevicesRequest {}
|
||||
|
||||
message ListDevicesReply {
|
||||
repeated DeviceInfo devices = 1;
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ impl Default for ImageName {
|
||||
}
|
||||
|
||||
impl ImageName {
|
||||
pub const DOCKER_HUB_MIRROR: &'static str = "registry.docker.io";
|
||||
pub const DOCKER_HUB_MIRROR: &'static str = "registry-1.docker.io";
|
||||
pub const DEFAULT_IMAGE_TAG: &'static str = "latest";
|
||||
|
||||
pub fn parse(name: &str) -> Result<Self> {
|
||||
|
@ -24,6 +24,7 @@ krata-xenclient = { path = "../xen/xenclient", version = "^0.0.10" }
|
||||
krata-xenevtchn = { path = "../xen/xenevtchn", version = "^0.0.10" }
|
||||
krata-xengnt = { path = "../xen/xengnt", version = "^0.0.10" }
|
||||
krata-xenstore = { path = "../xen/xenstore", version = "^0.0.10" }
|
||||
walkdir = { workspace = true }
|
||||
|
||||
[lib]
|
||||
name = "kratart"
|
||||
|
@ -1,5 +1,6 @@
|
||||
use anyhow::Result;
|
||||
use backhand::{FilesystemWriter, NodeHeader};
|
||||
use backhand::compression::Compressor;
|
||||
use backhand::{FilesystemCompressor, FilesystemWriter, NodeHeader};
|
||||
use krata::launchcfg::LaunchInfo;
|
||||
use krataoci::packer::OciPackedImage;
|
||||
use log::trace;
|
||||
@ -8,14 +9,14 @@ use std::fs::File;
|
||||
use std::path::PathBuf;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub struct ConfigBlock<'a> {
|
||||
pub image: &'a OciPackedImage,
|
||||
pub struct ConfigBlock {
|
||||
pub image: OciPackedImage,
|
||||
pub file: PathBuf,
|
||||
pub dir: PathBuf,
|
||||
}
|
||||
|
||||
impl ConfigBlock<'_> {
|
||||
pub fn new<'a>(uuid: &Uuid, image: &'a OciPackedImage) -> Result<ConfigBlock<'a>> {
|
||||
impl ConfigBlock {
|
||||
pub fn new(uuid: &Uuid, image: OciPackedImage) -> Result<ConfigBlock> {
|
||||
let mut dir = std::env::temp_dir().clone();
|
||||
dir.push(format!("krata-cfg-{}", uuid));
|
||||
fs::create_dir_all(&dir)?;
|
||||
@ -29,6 +30,7 @@ impl ConfigBlock<'_> {
|
||||
let config = self.image.config.raw();
|
||||
let launch = serde_json::to_string(launch_config)?;
|
||||
let mut writer = FilesystemWriter::default();
|
||||
writer.set_compressor(FilesystemCompressor::new(Compressor::Gzip, None)?);
|
||||
writer.push_dir(
|
||||
"/image",
|
||||
NodeHeader {
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::net::{IpAddr, Ipv6Addr};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::{fs, net::Ipv4Addr, str::FromStr};
|
||||
|
||||
@ -21,6 +22,10 @@ use crate::RuntimeContext;
|
||||
|
||||
use super::{GuestInfo, GuestState};
|
||||
|
||||
pub use xenclient::{
|
||||
pci::PciBdf, DomainPciDevice as PciDevice, DomainPciRdmReservePolicy as PciRdmReservePolicy,
|
||||
};
|
||||
|
||||
pub struct GuestLaunchRequest {
|
||||
pub format: LaunchPackedFormat,
|
||||
pub kernel: Vec<u8>,
|
||||
@ -31,8 +36,10 @@ pub struct GuestLaunchRequest {
|
||||
pub mem: u64,
|
||||
pub env: HashMap<String, String>,
|
||||
pub run: Option<Vec<String>>,
|
||||
pub pcis: Vec<PciDevice>,
|
||||
pub debug: bool,
|
||||
pub image: OciPackedImage,
|
||||
pub addons_image: Option<PathBuf>,
|
||||
}
|
||||
|
||||
pub struct GuestLauncher {
|
||||
@ -100,8 +107,10 @@ impl GuestLauncher {
|
||||
run: request.run,
|
||||
};
|
||||
|
||||
let cfgblk = ConfigBlock::new(&uuid, &request.image)?;
|
||||
cfgblk.build(&launch_config)?;
|
||||
let cfgblk = ConfigBlock::new(&uuid, request.image.clone())?;
|
||||
let cfgblk_file = cfgblk.file.clone();
|
||||
let cfgblk_dir = cfgblk.dir.clone();
|
||||
tokio::task::spawn_blocking(move || cfgblk.build(&launch_config)).await??;
|
||||
|
||||
let image_squashfs_path = request
|
||||
.image
|
||||
@ -109,18 +118,33 @@ impl GuestLauncher {
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("failed to convert image path to string"))?;
|
||||
|
||||
let cfgblk_dir_path = cfgblk
|
||||
.dir
|
||||
let cfgblk_dir_path = cfgblk_dir
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("failed to convert cfgblk directory path to string"))?;
|
||||
let cfgblk_squashfs_path = cfgblk
|
||||
.file
|
||||
let cfgblk_squashfs_path = cfgblk_file
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("failed to convert cfgblk squashfs path to string"))?;
|
||||
let addons_squashfs_path = request
|
||||
.addons_image
|
||||
.map(|x| x.to_str().map(|x| x.to_string()))
|
||||
.map(|x| {
|
||||
Some(x.ok_or_else(|| anyhow!("failed to convert addons squashfs path to string")))
|
||||
})
|
||||
.unwrap_or(None);
|
||||
|
||||
let addons_squashfs_path = if let Some(path) = addons_squashfs_path {
|
||||
Some(path?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let image_squashfs_loop = context.autoloop.loopify(image_squashfs_path)?;
|
||||
let cfgblk_squashfs_loop = context.autoloop.loopify(cfgblk_squashfs_path)?;
|
||||
|
||||
let addons_squashfs_loop = if let Some(ref addons_squashfs_path) = addons_squashfs_path {
|
||||
Some(context.autoloop.loopify(addons_squashfs_path)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let cmdline_options = [
|
||||
if request.debug { "debug" } else { "quiet" },
|
||||
"elevator=noop",
|
||||
@ -130,19 +154,48 @@ impl GuestLauncher {
|
||||
let guest_mac_string = container_mac.to_string().replace('-', ":");
|
||||
let gateway_mac_string = gateway_mac.to_string().replace('-', ":");
|
||||
|
||||
let mut disks = vec![
|
||||
DomainDisk {
|
||||
vdev: "xvda".to_string(),
|
||||
block: image_squashfs_loop.clone(),
|
||||
writable: false,
|
||||
},
|
||||
DomainDisk {
|
||||
vdev: "xvdb".to_string(),
|
||||
block: cfgblk_squashfs_loop.clone(),
|
||||
writable: false,
|
||||
},
|
||||
];
|
||||
|
||||
if let Some(ref addons) = addons_squashfs_loop {
|
||||
disks.push(DomainDisk {
|
||||
vdev: "xvdc".to_string(),
|
||||
block: addons.clone(),
|
||||
writable: false,
|
||||
});
|
||||
}
|
||||
|
||||
let mut loops = vec![
|
||||
format!("{}:{}:none", image_squashfs_loop.path, image_squashfs_path),
|
||||
format!(
|
||||
"{}:{}:{}",
|
||||
cfgblk_squashfs_loop.path, cfgblk_squashfs_path, cfgblk_dir_path
|
||||
),
|
||||
];
|
||||
|
||||
if let Some(ref addons) = addons_squashfs_loop {
|
||||
loops.push(format!(
|
||||
"{}:{}:none",
|
||||
addons.path,
|
||||
addons_squashfs_path
|
||||
.clone()
|
||||
.ok_or_else(|| anyhow!("addons squashfs path missing"))?
|
||||
));
|
||||
}
|
||||
|
||||
let mut extra_keys = vec![
|
||||
("krata/uuid".to_string(), uuid.to_string()),
|
||||
(
|
||||
"krata/loops".to_string(),
|
||||
format!(
|
||||
"{}:{}:none,{}:{}:{}",
|
||||
&image_squashfs_loop.path,
|
||||
image_squashfs_path,
|
||||
&cfgblk_squashfs_loop.path,
|
||||
cfgblk_squashfs_path,
|
||||
cfgblk_dir_path,
|
||||
),
|
||||
),
|
||||
("krata/loops".to_string(), loops.join(",")),
|
||||
(
|
||||
"krata/network/guest/ipv4".to_string(),
|
||||
format!("{}/{}", guest_ipv4, ipv4_network_mask),
|
||||
@ -182,18 +235,7 @@ impl GuestLauncher {
|
||||
initrd: request.initrd,
|
||||
cmdline,
|
||||
use_console_backend: Some("krata-console".to_string()),
|
||||
disks: vec![
|
||||
DomainDisk {
|
||||
vdev: "xvda".to_string(),
|
||||
block: image_squashfs_loop.clone(),
|
||||
writable: false,
|
||||
},
|
||||
DomainDisk {
|
||||
vdev: "xvdb".to_string(),
|
||||
block: cfgblk_squashfs_loop.clone(),
|
||||
writable: false,
|
||||
},
|
||||
],
|
||||
disks,
|
||||
channels: vec![DomainChannel {
|
||||
typ: "krata-channel".to_string(),
|
||||
initialized: false,
|
||||
@ -204,6 +246,7 @@ impl GuestLauncher {
|
||||
bridge: None,
|
||||
script: None,
|
||||
}],
|
||||
pcis: request.pcis.clone(),
|
||||
filesystems: vec![],
|
||||
event_channels: vec![],
|
||||
extra_keys,
|
||||
@ -239,7 +282,7 @@ impl GuestLauncher {
|
||||
Err(error) => {
|
||||
let _ = context.autoloop.unloop(&image_squashfs_loop.path).await;
|
||||
let _ = context.autoloop.unloop(&cfgblk_squashfs_loop.path).await;
|
||||
let _ = fs::remove_dir(&cfgblk.dir);
|
||||
let _ = fs::remove_dir(&cfgblk_dir);
|
||||
Err(error.into())
|
||||
}
|
||||
}
|
||||
|
@ -3,23 +3,29 @@ pub mod sys;
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::sys::{
|
||||
AddressSize, CreateDomain, DomCtl, DomCtlValue, DomCtlVcpuContext, EvtChnAllocUnbound,
|
||||
GetDomainInfo, GetPageFrameInfo3, Hypercall, HypercallInit, MaxMem, MaxVcpus, MemoryMap,
|
||||
MemoryReservation, MmapBatch, MmapResource, MmuExtOp, MultiCallEntry, VcpuGuestContext,
|
||||
VcpuGuestContextAny, XenCapabilitiesInfo, HYPERVISOR_DOMCTL, HYPERVISOR_EVENT_CHANNEL_OP,
|
||||
HYPERVISOR_MEMORY_OP, HYPERVISOR_MMUEXT_OP, HYPERVISOR_MULTICALL, HYPERVISOR_XEN_VERSION,
|
||||
XENVER_CAPABILITIES, XEN_DOMCTL_CREATEDOMAIN, XEN_DOMCTL_DESTROYDOMAIN,
|
||||
XEN_DOMCTL_GETDOMAININFO, XEN_DOMCTL_GETPAGEFRAMEINFO3, XEN_DOMCTL_GETVCPUCONTEXT,
|
||||
XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_MAX_MEM, XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN,
|
||||
XEN_DOMCTL_SETVCPUCONTEXT, XEN_DOMCTL_SET_ADDRESS_SIZE, XEN_DOMCTL_UNPAUSEDOMAIN,
|
||||
XEN_MEM_CLAIM_PAGES, XEN_MEM_MEMORY_MAP, XEN_MEM_POPULATE_PHYSMAP,
|
||||
AddressSize, AssignDevice, CreateDomain, DomCtl, DomCtlValue, DomCtlVcpuContext,
|
||||
EvtChnAllocUnbound, GetDomainInfo, GetPageFrameInfo3, Hypercall, HypercallInit,
|
||||
IoMemPermission, IoPortPermission, IrqPermission, MaxMem, MaxVcpus, MemoryMap,
|
||||
MemoryReservation, MmapBatch, MmapResource, MmuExtOp, MultiCallEntry, PciAssignDevice,
|
||||
VcpuGuestContext, VcpuGuestContextAny, XenCapabilitiesInfo, DOMCTL_DEV_PCI, HYPERVISOR_DOMCTL,
|
||||
HYPERVISOR_EVENT_CHANNEL_OP, HYPERVISOR_MEMORY_OP, HYPERVISOR_MMUEXT_OP, HYPERVISOR_MULTICALL,
|
||||
HYPERVISOR_XEN_VERSION, XENVER_CAPABILITIES, XEN_DOMCTL_ASSIGN_DEVICE, XEN_DOMCTL_CREATEDOMAIN,
|
||||
XEN_DOMCTL_DESTROYDOMAIN, XEN_DOMCTL_GETDOMAININFO, XEN_DOMCTL_GETPAGEFRAMEINFO3,
|
||||
XEN_DOMCTL_GETVCPUCONTEXT, XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_IOMEM_PERMISSION,
|
||||
XEN_DOMCTL_IOPORT_PERMISSION, XEN_DOMCTL_IRQ_PERMISSION, XEN_DOMCTL_MAX_MEM,
|
||||
XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN, XEN_DOMCTL_SETVCPUCONTEXT,
|
||||
XEN_DOMCTL_SET_ADDRESS_SIZE, XEN_DOMCTL_UNPAUSEDOMAIN, XEN_MEM_CLAIM_PAGES, XEN_MEM_MEMORY_MAP,
|
||||
XEN_MEM_POPULATE_PHYSMAP,
|
||||
};
|
||||
use libc::{c_int, mmap, usleep, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE};
|
||||
use log::trace;
|
||||
use nix::errno::Errno;
|
||||
use std::ffi::{c_long, c_uint, c_ulong, c_void};
|
||||
use std::sync::Arc;
|
||||
use sys::{XEN_DOMCTL_MAX_INTERFACE_VERSION, XEN_DOMCTL_MIN_INTERFACE_VERSION};
|
||||
use sys::{
|
||||
E820Entry, ForeignMemoryMap, PhysdevMapPirq, HYPERVISOR_PHYSDEV_OP, PHYSDEVOP_MAP_PIRQ,
|
||||
XEN_DOMCTL_MAX_INTERFACE_VERSION, XEN_DOMCTL_MIN_INTERFACE_VERSION, XEN_MEM_SET_MEMORY_MAP,
|
||||
};
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use std::fs::{File, OpenOptions};
|
||||
@ -569,26 +575,42 @@ impl XenCall {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_memory_map(&self, size_of_entry: usize) -> Result<Vec<u8>> {
|
||||
pub async fn get_memory_map(&self, max_entries: u32) -> Result<Vec<E820Entry>> {
|
||||
let mut memory_map = MemoryMap {
|
||||
count: 0,
|
||||
count: max_entries,
|
||||
buffer: 0,
|
||||
};
|
||||
let mut entries = vec![E820Entry::default(); max_entries as usize];
|
||||
memory_map.buffer = entries.as_mut_ptr() as c_ulong;
|
||||
self.hypercall2(
|
||||
HYPERVISOR_MEMORY_OP,
|
||||
XEN_MEM_MEMORY_MAP as c_ulong,
|
||||
addr_of_mut!(memory_map) as c_ulong,
|
||||
)
|
||||
.await?;
|
||||
entries.truncate(memory_map.count as usize);
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub async fn set_memory_map(
|
||||
&self,
|
||||
domid: u32,
|
||||
entries: Vec<E820Entry>,
|
||||
) -> Result<Vec<E820Entry>> {
|
||||
let mut memory_map = ForeignMemoryMap {
|
||||
domid: domid as u16,
|
||||
map: MemoryMap {
|
||||
count: entries.len() as u32,
|
||||
buffer: entries.as_ptr() as u64,
|
||||
},
|
||||
};
|
||||
self.hypercall2(
|
||||
HYPERVISOR_MEMORY_OP,
|
||||
XEN_MEM_MEMORY_MAP as c_ulong,
|
||||
XEN_MEM_SET_MEMORY_MAP as c_ulong,
|
||||
addr_of_mut!(memory_map) as c_ulong,
|
||||
)
|
||||
.await?;
|
||||
let mut buffer = vec![0u8; memory_map.count as usize * size_of_entry];
|
||||
memory_map.buffer = buffer.as_mut_ptr() as c_ulong;
|
||||
self.hypercall2(
|
||||
HYPERVISOR_MEMORY_OP,
|
||||
XEN_MEM_MEMORY_MAP as c_ulong,
|
||||
addr_of_mut!(memory_map) as c_ulong,
|
||||
)
|
||||
.await?;
|
||||
Ok(buffer)
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub async fn populate_physmap(
|
||||
@ -671,4 +693,140 @@ impl XenCall {
|
||||
.await
|
||||
.map(|_| ())
|
||||
}
|
||||
|
||||
pub async fn iomem_permission(
|
||||
&self,
|
||||
domid: u32,
|
||||
first_mfn: u64,
|
||||
nr_mfns: u64,
|
||||
allow: bool,
|
||||
) -> Result<()> {
|
||||
trace!(
|
||||
"domctl fd={} iomem_permission domid={} first_mfn={:#x}, nr_mfns={:#x} allow={}",
|
||||
self.handle.as_raw_fd(),
|
||||
domid,
|
||||
first_mfn,
|
||||
nr_mfns,
|
||||
allow,
|
||||
);
|
||||
let mut domctl = DomCtl {
|
||||
cmd: XEN_DOMCTL_IOMEM_PERMISSION,
|
||||
interface_version: self.domctl_interface_version,
|
||||
domid,
|
||||
value: DomCtlValue {
|
||||
iomem_permission: IoMemPermission {
|
||||
first_mfn,
|
||||
nr_mfns,
|
||||
allow: if allow { 1 } else { 0 },
|
||||
},
|
||||
},
|
||||
};
|
||||
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn ioport_permission(
|
||||
&self,
|
||||
domid: u32,
|
||||
first_port: u32,
|
||||
nr_ports: u32,
|
||||
allow: bool,
|
||||
) -> Result<()> {
|
||||
trace!(
|
||||
"domctl fd={} ioport_permission domid={} first_port={:#x}, nr_ports={:#x} allow={}",
|
||||
self.handle.as_raw_fd(),
|
||||
domid,
|
||||
first_port,
|
||||
nr_ports,
|
||||
allow,
|
||||
);
|
||||
let mut domctl = DomCtl {
|
||||
cmd: XEN_DOMCTL_IOPORT_PERMISSION,
|
||||
interface_version: self.domctl_interface_version,
|
||||
domid,
|
||||
value: DomCtlValue {
|
||||
ioport_permission: IoPortPermission {
|
||||
first_port,
|
||||
nr_ports,
|
||||
allow: if allow { 1 } else { 0 },
|
||||
},
|
||||
},
|
||||
};
|
||||
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn irq_permission(&self, domid: u32, irq: u32, allow: bool) -> Result<()> {
|
||||
trace!(
|
||||
"domctl fd={} irq_permission domid={} irq={} allow={}",
|
||||
self.handle.as_raw_fd(),
|
||||
domid,
|
||||
irq,
|
||||
allow,
|
||||
);
|
||||
let mut domctl = DomCtl {
|
||||
cmd: XEN_DOMCTL_IRQ_PERMISSION,
|
||||
interface_version: self.domctl_interface_version,
|
||||
domid,
|
||||
value: DomCtlValue {
|
||||
irq_permission: IrqPermission {
|
||||
pirq: irq,
|
||||
allow: if allow { 1 } else { 0 },
|
||||
pad: [0; 3],
|
||||
},
|
||||
},
|
||||
};
|
||||
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn map_pirq(&self, domid: u32, index: isize, pirq: Option<u32>) -> Result<u32> {
|
||||
trace!(
|
||||
"physdev fd={} map_pirq domid={} index={} pirq={:?}",
|
||||
self.handle.as_raw_fd(),
|
||||
domid,
|
||||
index,
|
||||
pirq,
|
||||
);
|
||||
let mut physdev = PhysdevMapPirq::default();
|
||||
physdev.domid = domid as u16;
|
||||
physdev.typ = 0x1;
|
||||
physdev.index = index as c_int;
|
||||
physdev.pirq = pirq.map(|x| x as c_int).unwrap_or(index as c_int);
|
||||
self.hypercall2(
|
||||
HYPERVISOR_PHYSDEV_OP,
|
||||
PHYSDEVOP_MAP_PIRQ,
|
||||
addr_of_mut!(physdev) as c_ulong,
|
||||
)
|
||||
.await?;
|
||||
Ok(physdev.pirq as u32)
|
||||
}
|
||||
|
||||
pub async fn assign_device(&self, domid: u32, sbdf: u32, flags: u32) -> Result<()> {
|
||||
trace!(
|
||||
"domctl fd={} assign_device domid={} sbdf={} flags={}",
|
||||
self.handle.as_raw_fd(),
|
||||
domid,
|
||||
sbdf,
|
||||
flags,
|
||||
);
|
||||
let mut domctl = DomCtl {
|
||||
cmd: XEN_DOMCTL_ASSIGN_DEVICE,
|
||||
interface_version: self.domctl_interface_version,
|
||||
domid,
|
||||
value: DomCtlValue {
|
||||
assign_device: AssignDevice {
|
||||
device: DOMCTL_DEV_PCI,
|
||||
flags,
|
||||
pci_assign_device: PciAssignDevice { sbdf, padding: 0 },
|
||||
},
|
||||
},
|
||||
};
|
||||
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -104,6 +104,7 @@ pub const XEN_DOMCTL_CDF_HAP: u32 = 1u32 << 1;
|
||||
pub const XEN_DOMCTL_CDF_S3_INTEGRITY: u32 = 1u32 << 2;
|
||||
pub const XEN_DOMCTL_CDF_OOS_OFF: u32 = 1u32 << 3;
|
||||
pub const XEN_DOMCTL_CDF_XS_DOMAIN: u32 = 1u32 << 4;
|
||||
pub const XEN_DOMCTL_CDF_IOMMU: u32 = 1u32 << 5;
|
||||
|
||||
pub const XEN_X86_EMU_LAPIC: u32 = 1 << 0;
|
||||
pub const XEN_X86_EMU_HPET: u32 = 1 << 1;
|
||||
@ -237,6 +238,10 @@ pub union DomCtlValue {
|
||||
pub vcpu_context: DomCtlVcpuContext,
|
||||
pub address_size: AddressSize,
|
||||
pub get_page_frame_info: GetPageFrameInfo3,
|
||||
pub ioport_permission: IoPortPermission,
|
||||
pub iomem_permission: IoMemPermission,
|
||||
pub irq_permission: IrqPermission,
|
||||
pub assign_device: AssignDevice,
|
||||
pub pad: [u8; 128],
|
||||
}
|
||||
|
||||
@ -309,6 +314,30 @@ pub struct GetPageFrameInfo3 {
|
||||
pub array: c_ulong,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct IoPortPermission {
|
||||
pub first_port: u32,
|
||||
pub nr_ports: u32,
|
||||
pub allow: u8,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct IoMemPermission {
|
||||
pub first_mfn: u64,
|
||||
pub nr_mfns: u64,
|
||||
pub allow: u8,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct IrqPermission {
|
||||
pub pirq: u32,
|
||||
pub allow: u8,
|
||||
pub pad: [u8; 3],
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Debug, Default)]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
@ -378,7 +407,8 @@ pub struct MultiCallEntry {
|
||||
}
|
||||
|
||||
pub const XEN_MEM_POPULATE_PHYSMAP: u32 = 6;
|
||||
pub const XEN_MEM_MEMORY_MAP: u32 = 9;
|
||||
pub const XEN_MEM_MEMORY_MAP: u32 = 10;
|
||||
pub const XEN_MEM_SET_MEMORY_MAP: u32 = 13;
|
||||
pub const XEN_MEM_CLAIM_PAGES: u32 = 24;
|
||||
|
||||
#[repr(C)]
|
||||
@ -388,6 +418,13 @@ pub struct MemoryMap {
|
||||
pub buffer: c_ulong,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct ForeignMemoryMap {
|
||||
pub domid: u16,
|
||||
pub map: MemoryMap,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct VcpuGuestContextFpuCtx {
|
||||
@ -582,3 +619,60 @@ pub struct EvtChnAllocUnbound {
|
||||
pub remote_dom: u16,
|
||||
pub port: u32,
|
||||
}
|
||||
|
||||
#[repr(C, packed)]
|
||||
#[derive(Debug, Copy, Clone, Default)]
|
||||
pub struct E820Entry {
|
||||
pub addr: u64,
|
||||
pub size: u64,
|
||||
pub typ: u32,
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub const E820_MAX: u32 = 1024;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub const E820_RAM: u32 = 1;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub const E820_RESERVED: u32 = 2;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub const E820_ACPI: u32 = 3;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub const E820_NVS: u32 = 4;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub const E820_UNUSABLE: u32 = 5;
|
||||
|
||||
pub const PHYSDEVOP_MAP_PIRQ: u64 = 13;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy, Debug)]
|
||||
pub struct PhysdevMapPirq {
|
||||
pub domid: u16,
|
||||
pub typ: c_int,
|
||||
pub index: c_int,
|
||||
pub pirq: c_int,
|
||||
pub bus: c_int,
|
||||
pub devfn: c_int,
|
||||
pub entry_nr: u16,
|
||||
pub table_base: u64,
|
||||
}
|
||||
|
||||
pub const DOMCTL_DEV_RDM_RELAXED: u32 = 1;
|
||||
pub const DOMCTL_DEV_PCI: u32 = 0;
|
||||
pub const DOMCTL_DEV_DT: u32 = 1;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy, Debug)]
|
||||
pub struct PciAssignDevice {
|
||||
pub sbdf: u32,
|
||||
pub padding: u64,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy, Debug)]
|
||||
pub struct AssignDevice {
|
||||
pub device: u32,
|
||||
pub flags: u32,
|
||||
pub pci_assign_device: PciAssignDevice,
|
||||
}
|
||||
|
||||
pub const DOMID_IO: u32 = 0x7FF1;
|
||||
|
@ -18,6 +18,7 @@ krata-xencall = { path = "../xencall", version = "^0.0.10" }
|
||||
krata-xenstore = { path = "../xenstore", version = "^0.0.10" }
|
||||
memchr = { workspace = true }
|
||||
nix = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
slice-copy = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
@ -34,3 +35,7 @@ name = "xenclient"
|
||||
[[example]]
|
||||
name = "xenclient-boot"
|
||||
path = "examples/boot.rs"
|
||||
|
||||
[[example]]
|
||||
name = "xenclient-pci"
|
||||
path = "examples/pci.rs"
|
||||
|
@ -27,6 +27,7 @@ async fn main() -> Result<()> {
|
||||
disks: vec![],
|
||||
channels: vec![],
|
||||
vifs: vec![],
|
||||
pcis: vec![],
|
||||
filesystems: vec![],
|
||||
extra_keys: vec![],
|
||||
extra_rw_paths: vec![],
|
||||
|
32
crates/xen/xenclient/examples/pci.rs
Normal file
32
crates/xen/xenclient/examples/pci.rs
Normal file
@ -0,0 +1,32 @@
|
||||
use xenclient::pci::*;
|
||||
|
||||
use xenclient::error::Result;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let backend = XenPciBackend::new();
|
||||
if !backend.is_loaded().await? {
|
||||
return Err(xenclient::error::Error::GenericError(
|
||||
"xen-pciback module not loaded".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
println!("assignable devices:");
|
||||
for device in backend.list_devices().await? {
|
||||
let is_assigned = backend.is_assigned(&device).await?;
|
||||
let has_slot = backend.has_slot(&device).await?;
|
||||
println!("{} slot={} assigned={}", device, has_slot, is_assigned);
|
||||
let resources = backend.read_resources(&device).await?;
|
||||
for resource in resources {
|
||||
println!(
|
||||
" resource start={:#x} end={:#x} flags={:#x} bar-io={}",
|
||||
resource.start,
|
||||
resource.end,
|
||||
resource.flags,
|
||||
resource.is_bar_io()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,5 +1,7 @@
|
||||
use std::io;
|
||||
|
||||
use crate::pci::PciBdf;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("io issue encountered: {0}")]
|
||||
@ -34,6 +36,16 @@ pub enum Error {
|
||||
ElfInvalidImage,
|
||||
#[error("provided elf image does not contain xen support")]
|
||||
ElfXenSupportMissing,
|
||||
#[error("regex error: {0}")]
|
||||
RegexError(#[from] regex::Error),
|
||||
#[error("error: {0}")]
|
||||
GenericError(String),
|
||||
#[error("failed to parse int: {0}")]
|
||||
ParseIntError(#[from] std::num::ParseIntError),
|
||||
#[error("invalid pci bdf string")]
|
||||
InvalidPciBdfString,
|
||||
#[error("pci device {0} is not assignable")]
|
||||
PciDeviceNotAssignable(PciBdf),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
@ -21,18 +21,26 @@ use crate::elfloader::ElfImageLoader;
|
||||
use crate::error::{Error, Result};
|
||||
use boot::BootState;
|
||||
use log::{debug, trace, warn};
|
||||
use pci::{PciBdf, XenPciBackend};
|
||||
use sys::XEN_PAGE_SHIFT;
|
||||
use tokio::time::timeout;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
use xencall::sys::{CreateDomain, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST};
|
||||
use xencall::sys::{
|
||||
CreateDomain, DOMCTL_DEV_RDM_RELAXED, XEN_DOMCTL_CDF_HAP, XEN_DOMCTL_CDF_HVM_GUEST,
|
||||
XEN_DOMCTL_CDF_IOMMU,
|
||||
};
|
||||
use xencall::XenCall;
|
||||
use xenstore::{
|
||||
XsPermission, XsdClient, XsdInterface, XS_PERM_NONE, XS_PERM_READ, XS_PERM_READ_WRITE,
|
||||
};
|
||||
|
||||
pub mod pci;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct XenClient {
|
||||
pub store: XsdClient,
|
||||
@ -78,6 +86,33 @@ pub struct DomainEventChannel {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
pub enum DomainPciRdmReservePolicy {
|
||||
Invalid,
|
||||
#[default]
|
||||
Strict,
|
||||
Relaxed,
|
||||
}
|
||||
|
||||
impl DomainPciRdmReservePolicy {
|
||||
pub fn to_option_str(&self) -> &str {
|
||||
match self {
|
||||
DomainPciRdmReservePolicy::Invalid => "-1",
|
||||
DomainPciRdmReservePolicy::Strict => "0",
|
||||
DomainPciRdmReservePolicy::Relaxed => "1",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DomainPciDevice {
|
||||
pub bdf: PciBdf,
|
||||
pub permissive: bool,
|
||||
pub msi_translate: bool,
|
||||
pub power_management: bool,
|
||||
pub rdm_reserve_policy: DomainPciRdmReservePolicy,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DomainConfig {
|
||||
pub backend_domid: u32,
|
||||
@ -93,6 +128,7 @@ pub struct DomainConfig {
|
||||
pub vifs: Vec<DomainNetworkInterface>,
|
||||
pub filesystems: Vec<DomainFilesystem>,
|
||||
pub event_channels: Vec<DomainEventChannel>,
|
||||
pub pcis: Vec<DomainPciDevice>,
|
||||
pub extra_keys: Vec<(String, String)>,
|
||||
pub extra_rw_paths: Vec<String>,
|
||||
}
|
||||
@ -118,12 +154,14 @@ impl XenClient {
|
||||
|
||||
pub async fn create(&self, config: &DomainConfig) -> Result<CreatedDomain> {
|
||||
let mut domain = CreateDomain {
|
||||
max_vcpus: config.max_vcpus,
|
||||
..Default::default()
|
||||
};
|
||||
domain.max_vcpus = config.max_vcpus;
|
||||
|
||||
if cfg!(target_arch = "aarch64") {
|
||||
domain.flags = XEN_DOMCTL_CDF_HVM_GUEST | XEN_DOMCTL_CDF_HAP;
|
||||
} else {
|
||||
domain.flags = XEN_DOMCTL_CDF_IOMMU;
|
||||
}
|
||||
|
||||
let domid = self.call.create_domain(domain).await?;
|
||||
@ -411,6 +449,19 @@ impl XenClient {
|
||||
.await?;
|
||||
}
|
||||
|
||||
for (index, pci) in config.pcis.iter().enumerate() {
|
||||
self.pci_device_add(
|
||||
&dom_path,
|
||||
&backend_dom_path,
|
||||
config.backend_domid,
|
||||
domid,
|
||||
index,
|
||||
config.pcis.len(),
|
||||
pci,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
for channel in &config.event_channels {
|
||||
let id = self
|
||||
.call
|
||||
@ -645,6 +696,129 @@ impl XenClient {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn pci_device_add(
|
||||
&self,
|
||||
dom_path: &str,
|
||||
backend_dom_path: &str,
|
||||
backend_domid: u32,
|
||||
domid: u32,
|
||||
index: usize,
|
||||
device_count: usize,
|
||||
device: &DomainPciDevice,
|
||||
) -> Result<()> {
|
||||
let backend = XenPciBackend::new();
|
||||
if !backend.is_assigned(&device.bdf).await? {
|
||||
return Err(Error::PciDeviceNotAssignable(device.bdf));
|
||||
}
|
||||
let resources = backend.read_resources(&device.bdf).await?;
|
||||
for resource in resources {
|
||||
if resource.is_bar_io() {
|
||||
self.call
|
||||
.ioport_permission(domid, resource.start as u32, resource.size() as u32, true)
|
||||
.await?;
|
||||
} else {
|
||||
self.call
|
||||
.iomem_permission(
|
||||
domid,
|
||||
resource.start >> XEN_PAGE_SHIFT,
|
||||
(resource.size() + (XEN_PAGE_SHIFT - 1)) >> XEN_PAGE_SHIFT,
|
||||
true,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
// backend.reset(&device.bdf).await?;
|
||||
|
||||
self.call
|
||||
.assign_device(
|
||||
domid,
|
||||
device.bdf.encode(),
|
||||
if device.rdm_reserve_policy == DomainPciRdmReservePolicy::Relaxed {
|
||||
DOMCTL_DEV_RDM_RELAXED
|
||||
} else {
|
||||
0
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let id = 60;
|
||||
|
||||
if index == 0 {
|
||||
let backend_items: Vec<(&str, String)> = vec![
|
||||
("frontend-id", domid.to_string()),
|
||||
("online", "1".to_string()),
|
||||
("state", "1".to_string()),
|
||||
("num_devs", device_count.to_string()),
|
||||
];
|
||||
|
||||
let frontend_items: Vec<(&str, String)> = vec![
|
||||
("backend-id", backend_domid.to_string()),
|
||||
("state", "1".to_string()),
|
||||
];
|
||||
|
||||
self.device_add(
|
||||
"pci",
|
||||
id,
|
||||
dom_path,
|
||||
backend_dom_path,
|
||||
backend_domid,
|
||||
domid,
|
||||
frontend_items,
|
||||
backend_items,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let backend_path = format!("{}/backend/{}/{}/{}", backend_dom_path, "pci", domid, id);
|
||||
|
||||
let transaction = self.store.transaction().await?;
|
||||
|
||||
transaction
|
||||
.write_string(
|
||||
format!("{}/key-{}", backend_path, index),
|
||||
&device.bdf.to_string(),
|
||||
)
|
||||
.await?;
|
||||
transaction
|
||||
.write_string(
|
||||
format!("{}/dev-{}", backend_path, index),
|
||||
&device.bdf.to_string(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(vdefn) = device.bdf.vdefn {
|
||||
transaction
|
||||
.write_string(
|
||||
format!("{}/vdefn-{}", backend_path, index),
|
||||
&format!("{:#x}", vdefn),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let mut options = HashMap::new();
|
||||
options.insert("permissive", if device.permissive { "1" } else { "0" });
|
||||
options.insert("rdm_policy", device.rdm_reserve_policy.to_option_str());
|
||||
options.insert("msitranslate", if device.msi_translate { "1" } else { "0" });
|
||||
options.insert(
|
||||
"power_mgmt",
|
||||
if device.power_management { "1" } else { "0" },
|
||||
);
|
||||
let options = options
|
||||
.into_iter()
|
||||
.map(|(key, value)| format!("{}={}", key, value))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",");
|
||||
|
||||
transaction
|
||||
.write_string(format!("{}/opts-{}", backend_path, index), &options)
|
||||
.await?;
|
||||
|
||||
transaction.commit().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn device_add(
|
||||
&self,
|
||||
@ -809,21 +983,4 @@ impl XenClient {
|
||||
tx.commit().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_console_path(&self, domid: u32) -> Result<String> {
|
||||
let dom_path = self.store.get_domain_path(domid).await?;
|
||||
let console_tty_path = format!("{}/console/tty", dom_path);
|
||||
let mut tty: Option<String> = None;
|
||||
for _ in 0..5 {
|
||||
tty = self.store.read_string(&console_tty_path).await?;
|
||||
if tty.is_some() {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
}
|
||||
let Some(tty) = tty else {
|
||||
return Err(Error::TtyNotFound);
|
||||
};
|
||||
Ok(tty)
|
||||
}
|
||||
}
|
||||
|
305
crates/xen/xenclient/src/pci.rs
Normal file
305
crates/xen/xenclient/src/pci.rs
Normal file
@ -0,0 +1,305 @@
|
||||
use regex::Regex;
|
||||
use std::{fmt::Display, path::PathBuf, str::FromStr};
|
||||
use tokio::fs;
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
|
||||
const PCIBACK_SYSFS_PATH: &str = "/sys/bus/pci/drivers/pciback";
|
||||
const PCI_BDF_REGEX: &str = r"^([0-9a-f]{4}):([0-9a-f]{2}):([0-9a-f]{2}).([0-9a-f]{1})$";
|
||||
const PCI_BDF_SHORT_REGEX: &str = r"^([0-9a-f]{2}):([0-9a-f]{2}).([0-9a-f]{1})$";
|
||||
const PCI_BDF_VDEFN_REGEX: &str =
|
||||
r"^([0-9a-f]{4}):([0-9a-f]{2}):([0-9a-f]{2}).([0-9a-f]{1})@([0-9a-f]{2})$";
|
||||
const FLAG_PCI_BAR_IO: u64 = 0x1;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct XenPciBackend {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl Default for XenPciBackend {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl XenPciBackend {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
path: PathBuf::from(PCIBACK_SYSFS_PATH),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn is_loaded(&self) -> Result<bool> {
|
||||
Ok(fs::try_exists(&self.path).await?)
|
||||
}
|
||||
|
||||
pub async fn list_devices(&self) -> Result<Vec<PciBdf>> {
|
||||
let mut devices = Vec::new();
|
||||
let mut dir = fs::read_dir(&self.path).await?;
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let file_name_string = entry.file_name().to_string_lossy().to_string();
|
||||
let Some(bdf) = PciBdf::from_str(&file_name_string).ok() else {
|
||||
continue;
|
||||
};
|
||||
devices.push(bdf);
|
||||
}
|
||||
Ok(devices)
|
||||
}
|
||||
|
||||
pub async fn is_assigned(&self, bdf: &PciBdf) -> Result<bool> {
|
||||
let mut path = self.path.clone();
|
||||
path.push(bdf.to_string());
|
||||
Ok(fs::try_exists(path).await?)
|
||||
}
|
||||
|
||||
pub async fn read_irq(&self, bdf: &PciBdf) -> Result<Option<u32>> {
|
||||
let mut path: PathBuf = self.path.clone();
|
||||
path.push(bdf.to_string());
|
||||
path.push("irq");
|
||||
|
||||
if !path.exists() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let content = fs::read_to_string(&path).await?;
|
||||
Ok(u32::from_str(content.trim()).ok())
|
||||
}
|
||||
|
||||
pub async fn read_resources(&self, bdf: &PciBdf) -> Result<Vec<PciMemoryResource>> {
|
||||
let mut resources = Vec::new();
|
||||
let mut path: PathBuf = self.path.clone();
|
||||
path.push(bdf.to_string());
|
||||
path.push("resource");
|
||||
let content = fs::read_to_string(&path).await?;
|
||||
for line in content.lines() {
|
||||
let parts = line.split(' ').collect::<Vec<_>>();
|
||||
if parts.len() != 3 {
|
||||
continue;
|
||||
}
|
||||
let Some(start) = parts.first() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let Some(end) = parts.get(1) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let Some(flags) = parts.get(2) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if !start.starts_with("0x") || !end.starts_with("0x") || !flags.starts_with("0x") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let start = &start[2..];
|
||||
let end = &end[2..];
|
||||
let flags = &flags[2..];
|
||||
let Some(start) = u64::from_str_radix(start, 16).ok() else {
|
||||
continue;
|
||||
};
|
||||
let Some(end) = u64::from_str_radix(end, 16).ok() else {
|
||||
continue;
|
||||
};
|
||||
let Some(flags) = u64::from_str_radix(flags, 16).ok() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if start > 0 {
|
||||
resources.push(PciMemoryResource::new(start, end, flags));
|
||||
}
|
||||
}
|
||||
Ok(resources)
|
||||
}
|
||||
|
||||
pub async fn has_slot(&self, bdf: &PciBdf) -> Result<bool> {
|
||||
let mut slots_path = self.path.clone();
|
||||
slots_path.push("slots");
|
||||
let content = fs::read_to_string(&slots_path).await?;
|
||||
for line in content.lines() {
|
||||
if let Ok(slot) = PciBdf::from_str(line) {
|
||||
if slot == *bdf {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
pub async fn reset(&self, bdf: &PciBdf) -> Result<()> {
|
||||
let mut path: PathBuf = self.path.clone();
|
||||
path.push("do_flr");
|
||||
fs::write(&path, bdf.to_string()).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
|
||||
pub struct PciBdf {
|
||||
pub domain: Option<u32>,
|
||||
pub bus: u16,
|
||||
pub device: u16,
|
||||
pub function: u16,
|
||||
pub vdefn: Option<u16>,
|
||||
}
|
||||
|
||||
impl PciBdf {
|
||||
pub fn new(
|
||||
domain: Option<u32>,
|
||||
bus: u16,
|
||||
device: u16,
|
||||
function: u16,
|
||||
vdefn: Option<u16>,
|
||||
) -> Self {
|
||||
Self {
|
||||
domain,
|
||||
bus,
|
||||
device,
|
||||
function,
|
||||
vdefn,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_domain(&self, domain: u32) -> PciBdf {
|
||||
PciBdf {
|
||||
domain: Some(domain),
|
||||
bus: self.bus,
|
||||
device: self.device,
|
||||
function: self.function,
|
||||
vdefn: self.vdefn,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode(&self) -> u32 {
|
||||
let mut value = self.domain.unwrap_or(0) << 16u32;
|
||||
value |= ((self.bus & 0xff) << 8u32) as u32;
|
||||
value |= ((self.device & 0x1f) << 3u32) as u32;
|
||||
value |= (self.function & 0x7) as u32;
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for PciBdf {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self> {
|
||||
let pci_bdf_regex = Regex::from_str(PCI_BDF_REGEX)?;
|
||||
let pci_bdf_vdefn_regex = Regex::from_str(PCI_BDF_VDEFN_REGEX)?;
|
||||
let pci_bdf_short_regex = Regex::from_str(PCI_BDF_SHORT_REGEX)?;
|
||||
|
||||
if let Some(pci_bdf_captures) = pci_bdf_regex.captures(s) {
|
||||
let domain = pci_bdf_captures
|
||||
.get(1)
|
||||
.ok_or_else(|| Error::GenericError("capture group 1 did not exist".to_string()))?;
|
||||
let bus = pci_bdf_captures
|
||||
.get(2)
|
||||
.ok_or_else(|| Error::GenericError("capture group 2 did not exist".to_string()))?;
|
||||
let device = pci_bdf_captures
|
||||
.get(3)
|
||||
.ok_or_else(|| Error::GenericError("capture group 3 did not exist".to_string()))?;
|
||||
let function = pci_bdf_captures
|
||||
.get(4)
|
||||
.ok_or_else(|| Error::GenericError("capture group 4 did not exist".to_string()))?;
|
||||
|
||||
let domain = u32::from_str_radix(domain.as_str(), 16)?;
|
||||
let bus = u16::from_str_radix(bus.as_str(), 16)?;
|
||||
let device = u16::from_str_radix(device.as_str(), 16)?;
|
||||
let function = u16::from_str_radix(function.as_str(), 16)?;
|
||||
|
||||
Ok(PciBdf::new(Some(domain), bus, device, function, None))
|
||||
} else if let Some(pci_bdf_vdefn_captures) = pci_bdf_vdefn_regex.captures(s) {
|
||||
let domain = pci_bdf_vdefn_captures
|
||||
.get(1)
|
||||
.ok_or_else(|| Error::GenericError("capture group 1 did not exist".to_string()))?;
|
||||
let bus = pci_bdf_vdefn_captures
|
||||
.get(2)
|
||||
.ok_or_else(|| Error::GenericError("capture group 2 did not exist".to_string()))?;
|
||||
let device = pci_bdf_vdefn_captures
|
||||
.get(3)
|
||||
.ok_or_else(|| Error::GenericError("capture group 3 did not exist".to_string()))?;
|
||||
let function = pci_bdf_vdefn_captures
|
||||
.get(4)
|
||||
.ok_or_else(|| Error::GenericError("capture group 4 did not exist".to_string()))?;
|
||||
let vdefn = pci_bdf_vdefn_captures
|
||||
.get(5)
|
||||
.ok_or_else(|| Error::GenericError("capture group 5 did not exist".to_string()))?;
|
||||
|
||||
let domain = u32::from_str_radix(domain.as_str(), 16)?;
|
||||
let bus = u16::from_str_radix(bus.as_str(), 16)?;
|
||||
let device = u16::from_str_radix(device.as_str(), 16)?;
|
||||
let function = u16::from_str_radix(function.as_str(), 16)?;
|
||||
let vdefn = u16::from_str_radix(vdefn.as_str(), 16)?;
|
||||
Ok(PciBdf::new(
|
||||
Some(domain),
|
||||
bus,
|
||||
device,
|
||||
function,
|
||||
Some(vdefn),
|
||||
))
|
||||
} else if let Some(pci_bdf_short_captures) = pci_bdf_short_regex.captures(s) {
|
||||
let bus = pci_bdf_short_captures
|
||||
.get(1)
|
||||
.ok_or_else(|| Error::GenericError("capture group 1 did not exist".to_string()))?;
|
||||
let device = pci_bdf_short_captures
|
||||
.get(2)
|
||||
.ok_or_else(|| Error::GenericError("capture group 2 did not exist".to_string()))?;
|
||||
let function = pci_bdf_short_captures
|
||||
.get(3)
|
||||
.ok_or_else(|| Error::GenericError("capture group 3 did not exist".to_string()))?;
|
||||
|
||||
let bus = u16::from_str_radix(bus.as_str(), 16)?;
|
||||
let device = u16::from_str_radix(device.as_str(), 16)?;
|
||||
let function = u16::from_str_radix(function.as_str(), 16)?;
|
||||
Ok(PciBdf::new(None, bus, device, function, None))
|
||||
} else {
|
||||
Err(Error::InvalidPciBdfString)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PciBdf {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(domain) = self.domain {
|
||||
if let Some(vdefn) = self.vdefn {
|
||||
write!(
|
||||
f,
|
||||
"{:04x}:{:02x}:{:02x}.{:01x}@{:02x}",
|
||||
domain, self.bus, self.device, self.function, vdefn
|
||||
)
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"{:04x}:{:02x}:{:02x}.{:01x}",
|
||||
domain, self.bus, self.device, self.function
|
||||
)
|
||||
}
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"{:02x}:{:02x}.{:01x}",
|
||||
self.bus, self.device, self.function
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct PciMemoryResource {
|
||||
pub start: u64,
|
||||
pub end: u64,
|
||||
pub flags: u64,
|
||||
}
|
||||
|
||||
impl PciMemoryResource {
|
||||
pub fn new(start: u64, end: u64, flags: u64) -> PciMemoryResource {
|
||||
PciMemoryResource { start, end, flags }
|
||||
}
|
||||
|
||||
pub fn is_bar_io(&self) -> bool {
|
||||
(self.flags & FLAG_PCI_BAR_IO) != 0
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
(self.end - self.start) + 1
|
||||
}
|
||||
}
|
@ -11,7 +11,9 @@ use slice_copy::copy;
|
||||
use std::cmp::{max, min};
|
||||
use std::mem::size_of;
|
||||
use std::slice;
|
||||
use xencall::sys::{VcpuGuestContext, MMUEXT_PIN_L4_TABLE};
|
||||
use xencall::sys::{
|
||||
E820Entry, VcpuGuestContext, E820_MAX, E820_RAM, E820_UNUSABLE, MMUEXT_PIN_L4_TABLE,
|
||||
};
|
||||
|
||||
pub const X86_PAGE_SHIFT: u64 = 12;
|
||||
pub const X86_PAGE_SIZE: u64 = 1 << X86_PAGE_SHIFT;
|
||||
@ -273,6 +275,154 @@ impl X86BootSetup {
|
||||
self.table.mappings[m] = map;
|
||||
Ok(m)
|
||||
}
|
||||
|
||||
fn e820_sanitize(
|
||||
&self,
|
||||
mut source: Vec<E820Entry>,
|
||||
map_limit_kb: u64,
|
||||
balloon_kb: u64,
|
||||
) -> Result<Vec<E820Entry>> {
|
||||
let mut e820 = vec![E820Entry::default(); E820_MAX as usize];
|
||||
|
||||
for entry in &mut source {
|
||||
if entry.addr > 0x100000 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// entries under 1MB should be removed.
|
||||
entry.typ = 0;
|
||||
entry.size = 0;
|
||||
entry.addr = u64::MAX;
|
||||
}
|
||||
|
||||
let mut lowest = u64::MAX;
|
||||
let mut highest = 0;
|
||||
|
||||
for entry in &source {
|
||||
if entry.typ == E820_RAM || entry.typ == E820_UNUSABLE || entry.typ == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
lowest = if entry.addr < lowest {
|
||||
entry.addr
|
||||
} else {
|
||||
lowest
|
||||
};
|
||||
|
||||
highest = if entry.addr + entry.size > highest {
|
||||
entry.addr + entry.size
|
||||
} else {
|
||||
highest
|
||||
}
|
||||
}
|
||||
|
||||
let start_kb = if lowest > 1024 { lowest >> 10 } else { 0 };
|
||||
|
||||
let mut idx: usize = 0;
|
||||
|
||||
e820[idx].addr = 0;
|
||||
e820[idx].size = map_limit_kb << 10;
|
||||
e820[idx].typ = E820_RAM;
|
||||
|
||||
let mut delta_kb = 0u64;
|
||||
|
||||
if start_kb > 0 && map_limit_kb > start_kb {
|
||||
delta_kb = map_limit_kb - start_kb;
|
||||
if delta_kb > 0 {
|
||||
e820[idx].size -= delta_kb << 10;
|
||||
}
|
||||
}
|
||||
|
||||
let ram_end = source[0].addr + source[0].size;
|
||||
idx += 1;
|
||||
|
||||
for src in &mut source {
|
||||
let end = src.addr + src.size;
|
||||
if src.typ == E820_UNUSABLE || end < ram_end {
|
||||
src.typ = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if src.typ != E820_RAM {
|
||||
continue;
|
||||
}
|
||||
|
||||
if src.addr >= (1 << 32) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if src.addr < ram_end {
|
||||
let delta = ram_end - src.addr;
|
||||
src.typ = E820_UNUSABLE;
|
||||
|
||||
if src.size < delta {
|
||||
src.typ = 0;
|
||||
} else {
|
||||
src.size -= delta;
|
||||
src.addr = ram_end;
|
||||
}
|
||||
|
||||
if src.addr + src.size != end {
|
||||
src.typ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if end > ram_end {
|
||||
src.typ = E820_UNUSABLE;
|
||||
}
|
||||
}
|
||||
|
||||
if lowest > ram_end {
|
||||
let mut add_unusable = true;
|
||||
|
||||
for src in &mut source {
|
||||
if !add_unusable {
|
||||
break;
|
||||
}
|
||||
|
||||
if src.typ != E820_UNUSABLE {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ram_end != src.addr {
|
||||
continue;
|
||||
}
|
||||
|
||||
if lowest != src.addr + src.size {
|
||||
src.size = lowest - src.addr;
|
||||
}
|
||||
add_unusable = false;
|
||||
}
|
||||
|
||||
if add_unusable {
|
||||
e820[1].typ = E820_UNUSABLE;
|
||||
e820[1].addr = ram_end;
|
||||
e820[1].size = lowest - ram_end;
|
||||
}
|
||||
}
|
||||
|
||||
for src in &source {
|
||||
if src.typ == E820_RAM || src.typ == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
e820[idx].typ = src.typ;
|
||||
e820[idx].addr = src.addr;
|
||||
e820[idx].size = src.size;
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
if balloon_kb > 0 || delta_kb > 0 {
|
||||
e820[idx].typ = E820_RAM;
|
||||
e820[idx].addr = if (1u64 << 32u64) > highest {
|
||||
1u64 << 32u64
|
||||
} else {
|
||||
highest
|
||||
};
|
||||
e820[idx].size = (delta_kb << 10) + (balloon_kb << 10);
|
||||
}
|
||||
Ok(e820)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@ -615,6 +765,13 @@ impl ArchBootSetup for X86BootSetup {
|
||||
let pg_mfn = setup.phys.p2m[pg_pfn as usize];
|
||||
setup.phys.unmap(pg_pfn)?;
|
||||
setup.phys.unmap(p2m_segment.pfn)?;
|
||||
|
||||
let map = setup.call.get_memory_map(E820_MAX).await?;
|
||||
let mem_mb = setup.total_pages >> (20 - self.page_shift());
|
||||
let mem_kb = mem_mb * 1024;
|
||||
let e820 = self.e820_sanitize(map, mem_kb, 0)?;
|
||||
setup.call.set_memory_map(setup.domid, e820).await?;
|
||||
|
||||
setup
|
||||
.call
|
||||
.mmuext(setup.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)
|
||||
|
1
hack/dist/bundle.sh
vendored
1
hack/dist/bundle.sh
vendored
@ -33,6 +33,7 @@ cd "${BUNDLE_DIR}"
|
||||
|
||||
cp "${KRATA_DIR}/target/initrd/initrd-${TARGET_ARCH}" initrd
|
||||
cp "${KRATA_DIR}/target/kernel/kernel-${TARGET_ARCH}" kernel
|
||||
cp "${KRATA_DIR}/target/kernel/addons-${TARGET_ARCH}.squashfs" addons.squashfs
|
||||
cp "${KRATA_DIR}/resources/systemd/kratad.service" kratad.service
|
||||
cp "${KRATA_DIR}/resources/systemd/kratanet.service" kratanet.service
|
||||
cp "${KRATA_DIR}/resources/bundle/install.sh" install.sh
|
||||
|
1
hack/dist/systar.sh
vendored
1
hack/dist/systar.sh
vendored
@ -40,6 +40,7 @@ fi
|
||||
|
||||
mkdir -p usr/share/krata/guest
|
||||
mv ../krata/kernel ../krata/initrd usr/share/krata/guest
|
||||
mv ../krata/addons.squashfs usr/share/krata/guest/addons.squashfs
|
||||
|
||||
tar czf "${SYSTAR}" --owner 0 --group 0 .
|
||||
|
||||
|
@ -3,7 +3,7 @@ set -e
|
||||
|
||||
REAL_SCRIPT="$(realpath "${0}")"
|
||||
cd "$(dirname "${REAL_SCRIPT}")/../.."
|
||||
KRATA_DIR="${PWD}"
|
||||
KRATA_DIR="$(realpath "${PWD}")"
|
||||
KERNEL_DIR="${KRATA_DIR}/kernel"
|
||||
|
||||
cd "${KRATA_DIR}"
|
||||
@ -62,7 +62,26 @@ then
|
||||
IMAGE_TARGET="Image.gz"
|
||||
fi
|
||||
|
||||
make -C "${KERNEL_SRC}" ARCH="${TARGET_ARCH_KERNEL}" -j"${KRATA_KERNEL_BUILD_JOBS}" "${CROSS_COMPILE_MAKE}" "${IMAGE_TARGET}"
|
||||
make -C "${KERNEL_SRC}" ARCH="${TARGET_ARCH_KERNEL}" -j"${KRATA_KERNEL_BUILD_JOBS}" "${CROSS_COMPILE_MAKE}" "${IMAGE_TARGET}" modules
|
||||
|
||||
MODULES_INSTALL_PATH="${OUTPUT_DIR}/modules-install-${TARGET_ARCH_STANDARD}"
|
||||
ADDONS_OUTPUT_PATH="${OUTPUT_DIR}/addons-${TARGET_ARCH_STANDARD}"
|
||||
MODULES_OUTPUT_PATH="${ADDONS_OUTPUT_PATH}/modules"
|
||||
ADDONS_SQUASHFS_PATH="${OUTPUT_DIR}/addons-${TARGET_ARCH_STANDARD}.squashfs"
|
||||
|
||||
rm -rf "${MODULES_INSTALL_PATH}"
|
||||
rm -rf "${ADDONS_OUTPUT_PATH}"
|
||||
rm -rf "${ADDONS_SQUASHFS_PATH}"
|
||||
|
||||
make -C "${KERNEL_SRC}" ARCH="${TARGET_ARCH_KERNEL}" -j"${KRATA_KERNEL_BUILD_JOBS}" "${CROSS_COMPILE_MAKE}" INSTALL_MOD_PATH="${MODULES_INSTALL_PATH}" modules_install
|
||||
KERNEL_MODULES_VER="$(ls "${MODULES_INSTALL_PATH}/lib/modules")"
|
||||
|
||||
mkdir -p "${ADDONS_OUTPUT_PATH}"
|
||||
mv "${MODULES_INSTALL_PATH}/lib/modules/${KERNEL_MODULES_VER}" "${MODULES_OUTPUT_PATH}"
|
||||
rm -rf "${MODULES_INSTALL_PATH}"
|
||||
[ -L "${MODULES_OUTPUT_PATH}/build" ] && unlink "${MODULES_OUTPUT_PATH}/build"
|
||||
|
||||
mksquashfs "${ADDONS_OUTPUT_PATH}" "${ADDONS_SQUASHFS_PATH}" -all-root
|
||||
|
||||
if [ "${TARGET_ARCH_STANDARD}" = "x86_64" ]
|
||||
then
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user