feature(krata): first pass on cpu hotplug support (#340)

* fix(runtime): adjust memory resources inside a transaction

* feature(krata): first pass on cpu hotplug support
This commit is contained in:
Alex Zenla 2024-08-15 01:06:04 -07:00 committed by GitHub
parent 506d2ccf46
commit 18bf370f74
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 144 additions and 41 deletions

View File

@ -39,20 +39,32 @@ pub struct ZoneLaunchCommand {
pull_update: bool,
#[arg(short, long, help = "Name of the zone")]
name: Option<String>,
#[arg(short, long, default_value_t = 1, help = "vCPUs available to the zone")]
cpus: u32,
#[arg(
short = 'C',
long = "max-cpus",
default_value_t = 4,
help = "Maximum vCPUs available for the zone"
)]
max_cpus: u32,
#[arg(
short = 'c',
long = "target-cpus",
default_value_t = 1,
help = "Target vCPUs for the zone to use"
)]
target_cpus: u32,
#[arg(
short = 'M',
long = "max-memory",
default_value_t = 512,
default_value_t = 1024,
help = "Maximum memory available to the zone, in megabytes"
)]
max_memory: u64,
#[arg(
short = 'm',
long = "target-memory",
default_value_t = 512,
help = "Memory target for the zone, in megabytes"
default_value_t = 1024,
help = "Target memory for the zone to use, in megabytes"
)]
target_memory: u64,
#[arg[short = 'D', long = "device", help = "Devices to request for the zone"]]
@ -131,9 +143,10 @@ impl ZoneLaunchCommand {
kernel,
initrd,
initial_resources: Some(ZoneResourceSpec {
cpus: self.cpus,
max_memory: self.max_memory,
target_memory: self.target_memory,
max_cpus: self.max_cpus,
target_cpus: self.target_cpus,
}),
task: Some(ZoneTaskSpec {
environment: env_map(&self.env.unwrap_or_default())

View File

@ -14,20 +14,32 @@ use tonic::{transport::Channel, Request};
pub struct ZoneUpdateResourcesCommand {
#[arg(help = "Zone to update resources of, either the name or the uuid")]
zone: String,
#[arg(short, long, default_value_t = 0, help = "vCPUs available to the zone")]
cpus: u32,
#[arg(
short = 'C',
long = "max-cpus",
default_value_t = 0,
help = "Maximum vCPUs available to the zone (0 means previous value)"
)]
max_cpus: u32,
#[arg(
short = 'c',
long = "target-cpus",
default_value_t = 0,
help = "Target vCPUs for the zone to use (0 means previous value)"
)]
target_cpus: u32,
#[arg(
short = 'M',
long = "max-memory",
default_value_t = 0,
help = "Maximum memory available to the zone, in megabytes"
help = "Maximum memory available to the zone, in megabytes (0 means previous value)"
)]
max_memory: u64,
#[arg(
short = 'm',
long = "target-memory",
default_value_t = 0,
help = "Memory target for the zone, in megabytes"
help = "Target memory for the zone to use, in megabytes (0 means previous value)"
)]
target_memory: u64,
}
@ -63,10 +75,15 @@ impl ZoneUpdateResourcesCommand {
} else {
self.target_memory
},
cpus: if self.cpus == 0 {
active_resources.cpus
max_cpus: if self.max_cpus == 0 {
active_resources.max_cpus
} else {
self.cpus
self.max_cpus
},
target_cpus: if self.target_cpus == 0 {
active_resources.target_cpus
} else {
self.target_cpus
},
}),
}))

View File

@ -664,22 +664,42 @@ impl ControlService for DaemonControlService {
.into());
}
let resources = request.resources.unwrap_or_default();
let mut resources = request.resources.unwrap_or_default();
if resources.target_memory > resources.max_memory {
resources.max_memory = resources.target_memory;
}
if resources.target_cpus < 1 {
resources.target_cpus = 1;
}
let initial_resources = zone
.spec
.clone()
.unwrap_or_default()
.initial_resources
.unwrap_or_default();
if resources.target_cpus > initial_resources.max_cpus {
resources.target_cpus = initial_resources.max_cpus;
}
resources.max_cpus = initial_resources.max_cpus;
self.runtime
.set_max_memory(status.domid, resources.max_memory * 1024 * 1024)
.set_memory_resources(
status.domid,
resources.target_memory * 1024 * 1024,
resources.max_memory * 1024 * 1024,
)
.await
.map_err(|error| ApiError {
message: format!("failed to set maximum memory: {}", error),
message: format!("failed to set memory resources: {}", error),
})?;
self.runtime
.set_target_memory(status.domid, resources.target_memory * 1024 * 1024)
.set_cpu_resources(status.domid, resources.target_cpus)
.await
.map_err(|error| ApiError {
message: format!("failed to set target memory: {}", error),
message: format!("failed to set cpu resources: {}", error),
})?;
status.resource_status = Some(ZoneResourceStatus {
active_resources: Some(resources),
});

View File

@ -76,7 +76,7 @@ impl ZoneCreator<'_> {
}
pub async fn create(&self, uuid: Uuid, zone: &mut Zone) -> Result<ZoneReconcilerResult> {
let Some(ref spec) = zone.spec else {
let Some(ref mut spec) = zone.spec else {
return Err(anyhow!("zone spec not specified"));
};
@ -176,7 +176,14 @@ impl ZoneCreator<'_> {
let reservation = self.ip_assignment.assign(uuid).await?;
let initial_resources = spec.initial_resources.unwrap_or_default();
let mut initial_resources = spec.initial_resources.unwrap_or_default();
if initial_resources.target_cpus < 1 {
initial_resources.target_cpus = 1;
}
if initial_resources.target_cpus > initial_resources.max_cpus {
initial_resources.max_cpus = initial_resources.target_cpus;
}
spec.initial_resources = Some(initial_resources);
let info = self
.runtime
.launch(ZoneLaunchRequest {
@ -190,7 +197,8 @@ impl ZoneCreator<'_> {
image,
kernel,
initrd,
cpus: initial_resources.cpus,
target_cpus: initial_resources.target_cpus,
max_cpus: initial_resources.max_cpus,
max_memory: initial_resources.max_memory,
target_memory: initial_resources.target_memory,
pcis,

View File

@ -30,7 +30,8 @@ message ZoneSpec {
message ZoneResourceSpec {
uint64 max_memory = 1;
uint64 target_memory = 2;
uint32 cpus = 3;
uint32 max_cpus = 3;
uint32 target_cpus = 4;
}
message ZoneImageSpec {

View File

@ -30,7 +30,8 @@ pub struct ZoneLaunchRequest {
pub initrd: Vec<u8>,
pub uuid: Option<Uuid>,
pub name: Option<String>,
pub cpus: u32,
pub target_cpus: u32,
pub max_cpus: u32,
pub target_memory: u64,
pub max_memory: u64,
pub env: HashMap<String, String>,
@ -195,7 +196,8 @@ impl ZoneLauncher {
let config = DomainConfig {
base: BaseDomainConfig {
max_vcpus: request.cpus,
max_vcpus: request.max_cpus,
target_vcpus: request.target_cpus,
max_mem_mb: request.max_memory,
target_mem_mb: request.target_memory,
kernel: request.kernel,

View File

@ -1,11 +1,11 @@
use std::{fs, path::PathBuf, str::FromStr, sync::Arc};
use anyhow::{anyhow, Result};
use krataloopdev::LoopControl;
use std::{fs, path::PathBuf, str::FromStr, sync::Arc};
use tokio::sync::Semaphore;
use uuid::Uuid;
use xenclient::XenClient;
use xenplatform::domain::XEN_EXTRA_MEMORY_KB;
use xenstore::{XsdClient, XsdInterface};
use self::{
@ -226,33 +226,62 @@ impl Runtime {
Ok(uuid)
}
pub async fn set_max_memory(&self, domid: u32, max_memory_bytes: u64) -> Result<()> {
pub async fn set_memory_resources(
&self,
domid: u32,
target_memory_bytes: u64,
max_memory_bytes: u64,
) -> Result<()> {
let mut max_memory_bytes = max_memory_bytes + (XEN_EXTRA_MEMORY_KB * 1024);
if target_memory_bytes > max_memory_bytes {
max_memory_bytes = target_memory_bytes + (XEN_EXTRA_MEMORY_KB * 1024);
}
self.context
.xen
.call
.set_max_mem(domid, max_memory_bytes / 1024)
.await?;
let domain_path = self.context.xen.store.get_domain_path(domid).await?;
let tx = self.context.xen.store.transaction().await?;
let max_memory_path = format!("{}/memory/static-max", domain_path);
self.context
.xen
.store
.write_string(max_memory_path, &(max_memory_bytes / 1024).to_string())
tx.write_string(max_memory_path, &(max_memory_bytes / 1024).to_string())
.await?;
let target_memory_path = format!("{}/memory/target", domain_path);
tx.write_string(
target_memory_path,
&(target_memory_bytes / 1024).to_string(),
)
.await?;
tx.commit().await?;
Ok(())
}
pub async fn set_target_memory(&self, domid: u32, target_memory_bytes: u64) -> Result<()> {
pub async fn set_cpu_resources(&self, domid: u32, target_cpus: u32) -> Result<()> {
let domain_path = self.context.xen.store.get_domain_path(domid).await?;
let target_memory_path = format!("{}/memory/target", domain_path);
self.context
let cpus = self
.context
.xen
.store
.write_string(
target_memory_path,
&(target_memory_bytes / 1024).to_string(),
.list(&format!("{}/cpu", domain_path))
.await?;
let tx = self.context.xen.store.transaction().await?;
for cpu in cpus {
let Some(id) = cpu.parse::<u32>().ok() else {
continue;
};
let available = if id >= target_cpus {
"offline"
} else {
"online"
};
tx.write_string(
format!("{}/cpu/{}/availability", domain_path, id),
available,
)
.await?;
}
tx.commit().await?;
Ok(())
}

View File

@ -27,6 +27,7 @@ async fn main() -> Result<()> {
base: BaseDomainConfig {
uuid: Uuid::new_v4(),
max_vcpus: 1,
target_vcpus: 1,
max_mem_mb: 512,
target_mem_mb: 512,
enable_iommu: true,

View File

@ -194,7 +194,16 @@ impl ClientTransaction {
self.tx.mkdir(&path).await?;
self.tx.set_perms(&path, ro_perm).await?;
let path = format!("{}/cpu/{}/availability", self.dom_path, i);
self.tx.write_string(&path, "online").await?;
self.tx
.write_string(
&path,
if i < base.target_vcpus {
"online"
} else {
"offline"
},
)
.await?;
self.tx.set_perms(&path, ro_perm).await?;
}
Ok(())

View File

@ -9,6 +9,8 @@ use xencall::XenCall;
use crate::error::Result;
pub const XEN_EXTRA_MEMORY_KB: u64 = 2048;
pub struct BaseDomainManager<P: BootSetupPlatform> {
call: XenCall,
pub platform: Arc<P>,
@ -29,7 +31,7 @@ impl<P: BootSetupPlatform> BaseDomainManager<P> {
let domid = self.call.create_domain(domain).await?;
self.call.set_max_vcpus(domid, config.max_vcpus).await?;
self.call
.set_max_mem(domid, (config.max_mem_mb * 1024) + 2048)
.set_max_mem(domid, (config.max_mem_mb * 1024) + XEN_EXTRA_MEMORY_KB)
.await?;
let loader = ElfImageLoader::load_file_kernel(&config.kernel)?;
let platform = (*self.platform).clone();
@ -64,6 +66,7 @@ pub struct BaseDomainConfig {
pub uuid: Uuid,
pub owner_domid: u32,
pub max_vcpus: u32,
pub target_vcpus: u32,
pub max_mem_mb: u64,
pub target_mem_mb: u64,
pub kernel: Vec<u8>,