guest: place running tasks in cgroup

This commit is contained in:
Alex Zenla
2024-03-30 23:23:59 +00:00
parent 7e9bc4b910
commit 8ec7042ea4
4 changed files with 77 additions and 22 deletions

View File

@ -30,6 +30,7 @@ async-trait = "0.1.77"
backhand = "0.15.0" backhand = "0.15.0"
byteorder = "1" byteorder = "1"
bytes = "1.5.0" bytes = "1.5.0"
cgroups-rs = "0.3.4"
cli-tables = "0.2.1" cli-tables = "0.2.1"
crossterm = "0.27.0" crossterm = "0.27.0"
ctrlc = "3.4.4" ctrlc = "3.4.4"

View File

@ -10,6 +10,7 @@ resolver = "2"
[dependencies] [dependencies]
anyhow = { workspace = true } anyhow = { workspace = true }
cgroups-rs = { workspace = true }
env_logger = { workspace = true } env_logger = { workspace = true }
futures = { workspace = true } futures = { workspace = true }
ipnetwork = { workspace = true } ipnetwork = { workspace = true }

View File

@ -3,6 +3,7 @@ use crate::{
death, death,
}; };
use anyhow::Result; use anyhow::Result;
use cgroups_rs::Cgroup;
use krata::idm::{ use krata::idm::{
client::IdmClient, client::IdmClient,
protocol::{idm_event::Event, IdmEvent, IdmExitEvent, IdmPacket}, protocol::{idm_event::Event, IdmEvent, IdmExitEvent, IdmPacket},
@ -14,14 +15,16 @@ use tokio::select;
pub struct GuestBackground { pub struct GuestBackground {
idm: IdmClient, idm: IdmClient,
child: Pid, child: Pid,
_cgroup: Cgroup,
wait: ChildWait, wait: ChildWait,
} }
impl GuestBackground { impl GuestBackground {
pub async fn new(idm: IdmClient, child: Pid) -> Result<GuestBackground> { pub async fn new(idm: IdmClient, cgroup: Cgroup, child: Pid) -> Result<GuestBackground> {
Ok(GuestBackground { Ok(GuestBackground {
idm, idm,
child, child,
_cgroup: cgroup,
wait: ChildWait::new()?, wait: ChildWait::new()?,
}) })
} }

View File

@ -1,4 +1,7 @@
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use cgroups_rs::cgroup_builder::CgroupBuilder;
use cgroups_rs::devices::DeviceType;
use cgroups_rs::{Cgroup, CgroupPid};
use futures::stream::TryStreamExt; use futures::stream::TryStreamExt;
use ipnetwork::IpNetwork; use ipnetwork::IpNetwork;
use krata::ethtool::EthtoolHandle; use krata::ethtool::EthtoolHandle;
@ -7,6 +10,7 @@ use krata::launchcfg::{LaunchInfo, LaunchNetwork};
use libc::{setsid, TIOCSCTTY}; use libc::{setsid, TIOCSCTTY};
use log::{trace, warn}; use log::{trace, warn};
use nix::ioctl_write_int_bad; use nix::ioctl_write_int_bad;
use nix::sys::stat::{major, minor};
use nix::unistd::{dup2, execve, fork, ForkResult, Pid}; use nix::unistd::{dup2, execve, fork, ForkResult, Pid};
use oci_spec::image::{Config, ImageConfiguration}; use oci_spec::image::{Config, ImageConfiguration};
use path_absolutize::Absolutize; use path_absolutize::Absolutize;
@ -18,7 +22,7 @@ use std::net::{Ipv4Addr, Ipv6Addr};
use std::os::fd::AsRawFd; use std::os::fd::AsRawFd;
use std::os::linux::fs::MetadataExt; use std::os::linux::fs::MetadataExt;
use std::os::unix::ffi::OsStrExt; use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::{chroot, symlink, PermissionsExt}; use std::os::unix::fs::{chroot, PermissionsExt};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::str::FromStr; use std::str::FromStr;
use sys_mount::{FilesystemType, Mount, MountFlags}; use sys_mount::{FilesystemType, Mount, MountFlags};
@ -112,14 +116,20 @@ impl GuestInit {
trace!("early init"); trace!("early init");
self.create_dir("/dev", Some(0o0755)).await?; self.create_dir("/dev", Some(0o0755)).await?;
self.create_dir("/proc", None).await?; self.create_dir("/proc", None).await?;
self.create_dir("/sys", None).await?; self.create_dir("/sys", Some(0o0555)).await?;
self.create_dir("/root", Some(0o0700)).await?; self.create_dir("/root", Some(0o0700)).await?;
self.create_dir("/tmp", None).await?; self.create_dir("/tmp", None).await?;
self.mount_kernel_fs("devtmpfs", "/dev", "mode=0755") self.create_dir("/run", Some(0o0755)).await?;
self.mount_kernel_fs("devtmpfs", "/dev", "mode=0755", None)
.await?;
self.mount_kernel_fs("proc", "/proc", "", None).await?;
self.mount_kernel_fs("sysfs", "/sys", "", None).await?;
fs::symlink("/proc/self/fd", "/dev/fd").await?;
fs::symlink("/proc/self/fd/0", "/dev/stdin").await?;
fs::symlink("/proc/self/fd/1", "/dev/stdout").await?;
fs::symlink("/proc/self/fd/2", "/dev/stderr").await?;
self.mount_kernel_fs("cgroup2", "/sys/fs/cgroup", "", Some(MountFlags::RELATIME))
.await?; .await?;
self.mount_kernel_fs("proc", "/proc", "").await?;
self.mount_kernel_fs("sysfs", "/sys", "").await?;
symlink("/proc/self/fd", "/dev/fd")?;
Ok(()) Ok(())
} }
@ -137,16 +147,19 @@ impl GuestInit {
Ok(()) Ok(())
} }
async fn mount_kernel_fs(&mut self, fstype: &str, path: &str, data: &str) -> Result<()> { async fn mount_kernel_fs(
let metadata = fs::metadata(path).await?; &mut self,
if metadata.st_dev() == fs::metadata("/").await?.st_dev() { fstype: &str,
path: &str,
data: &str,
flags: Option<MountFlags>,
) -> Result<()> {
trace!("mounting kernel fs {} to {}", fstype, path); trace!("mounting kernel fs {} to {}", fstype, path);
Mount::builder() Mount::builder()
.fstype(FilesystemType::Manual(fstype)) .fstype(FilesystemType::Manual(fstype))
.flags(MountFlags::NOEXEC | MountFlags::NOSUID) .flags(MountFlags::NOEXEC | MountFlags::NOSUID | flags.unwrap_or(MountFlags::empty()))
.data(data) .data(data)
.mount(fstype, path)?; .mount(fstype, path)?;
}
Ok(()) Ok(())
} }
@ -454,10 +467,44 @@ impl GuestInit {
working_dir = "/".to_string(); working_dir = "/".to_string();
} }
self.fork_and_exec(idm, working_dir, path, cmd, env).await?; let cgroup = self.init_cgroup().await?;
self.fork_and_exec(idm, cgroup, working_dir, path, cmd, env)
.await?;
Ok(()) Ok(())
} }
async fn init_cgroup(&self) -> Result<Cgroup> {
trace!("initializing cgroup");
let hierarchy = cgroups_rs::hierarchies::auto();
let cgroup = CgroupBuilder::new("krata-guest-task");
let idm_device = fs::metadata("/dev/hvc1").await?.st_rdev();
let config_block = fs::metadata(CONFIG_BLOCK_DEVICE_PATH).await?.st_rdev();
let cgroup = cgroup
.devices()
.device(
major(idm_device) as i64,
minor(idm_device) as i64,
DeviceType::All,
false,
Vec::new(),
)
.device(
major(config_block) as i64,
minor(config_block) as i64,
DeviceType::All,
false,
Vec::new(),
)
.done();
let cgroup = cgroup.build(hierarchy)?;
cgroup.set_cgroup_type("threaded")?;
trace!("initialized cgroup");
Ok(cgroup)
}
fn strings_as_cstrings(values: Vec<String>) -> Result<Vec<CString>> { fn strings_as_cstrings(values: Vec<String>) -> Result<Vec<CString>> {
let mut results: Vec<CString> = vec![]; let mut results: Vec<CString> = vec![];
for value in values { for value in values {
@ -506,19 +553,21 @@ impl GuestInit {
async fn fork_and_exec( async fn fork_and_exec(
&mut self, &mut self,
idm: IdmClient, idm: IdmClient,
cgroup: Cgroup,
working_dir: String, working_dir: String,
path: CString, path: CString,
cmd: Vec<CString>, cmd: Vec<CString>,
env: Vec<CString>, env: Vec<CString>,
) -> Result<()> { ) -> Result<()> {
match unsafe { fork()? } { match unsafe { fork()? } {
ForkResult::Parent { child } => self.background(idm, child).await, ForkResult::Parent { child } => self.background(idm, cgroup, child).await,
ForkResult::Child => self.foreground(working_dir, path, cmd, env).await, ForkResult::Child => self.foreground(cgroup, working_dir, path, cmd, env).await,
} }
} }
async fn foreground( async fn foreground(
&mut self, &mut self,
cgroup: Cgroup,
working_dir: String, working_dir: String,
path: CString, path: CString,
cmd: Vec<CString>, cmd: Vec<CString>,
@ -526,6 +575,7 @@ impl GuestInit {
) -> Result<()> { ) -> Result<()> {
GuestInit::set_controlling_terminal()?; GuestInit::set_controlling_terminal()?;
std::env::set_current_dir(working_dir)?; std::env::set_current_dir(working_dir)?;
cgroup.add_task(CgroupPid::from(std::process::id() as u64))?;
execve(&path, &cmd, &env)?; execve(&path, &cmd, &env)?;
Ok(()) Ok(())
} }
@ -538,8 +588,8 @@ impl GuestInit {
Ok(()) Ok(())
} }
async fn background(&mut self, idm: IdmClient, executed: Pid) -> Result<()> { async fn background(&mut self, idm: IdmClient, cgroup: Cgroup, executed: Pid) -> Result<()> {
let mut background = GuestBackground::new(idm, executed).await?; let mut background = GuestBackground::new(idm, cgroup, executed).await?;
background.run().await?; background.run().await?;
Ok(()) Ok(())
} }