feature(krata): rename guest to zone (#266)

This commit is contained in:
Alex Zenla
2024-07-18 20:47:18 -07:00
committed by GitHub
parent 9bd8d1bb1d
commit 5ee1035896
58 changed files with 854 additions and 879 deletions

37
crates/zone/Cargo.toml Normal file
View File

@ -0,0 +1,37 @@
[package]
name = "krata-zone"
description = "zone services for the krata isolation engine"
license.workspace = true
version.workspace = true
homepage.workspace = true
repository.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
anyhow = { workspace = true }
cgroups-rs = { workspace = true }
env_logger = { workspace = true }
futures = { workspace = true }
ipnetwork = { workspace = true }
krata = { path = "../krata", version = "^0.0.12" }
krata-xenstore = { path = "../xen/xenstore", version = "^0.0.12" }
libc = { workspace = true }
log = { workspace = true }
nix = { workspace = true, features = ["ioctl", "process", "fs"] }
oci-spec = { workspace = true }
path-absolutize = { workspace = true }
platform-info = { workspace = true }
rtnetlink = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
sys-mount = { workspace = true }
sysinfo = { workspace = true }
tokio = { workspace = true }
[lib]
name = "kratazone"
[[bin]]
name = "krata-zone"
path = "bin/init.rs"

19
crates/zone/bin/init.rs Normal file
View File

@ -0,0 +1,19 @@
use anyhow::Result;
use env_logger::Env;
use kratazone::{death, init::ZoneInit};
use log::error;
use std::env;
#[tokio::main]
async fn main() -> Result<()> {
env::set_var("RUST_BACKTRACE", "1");
env_logger::Builder::from_env(Env::default().default_filter_or("warn")).init();
let mut zone = ZoneInit::new();
if let Err(error) = zone.init().await {
error!("failed to initialize zone: {}", error);
death(127).await?;
return Ok(());
}
death(1).await?;
Ok(())
}

View File

@ -0,0 +1,164 @@
use crate::{
childwait::{ChildEvent, ChildWait},
death,
exec::ZoneExecTask,
metrics::MetricsCollector,
};
use anyhow::Result;
use cgroups_rs::Cgroup;
use krata::idm::{
client::{IdmClientStreamResponseHandle, IdmInternalClient},
internal::{
event::Event as EventType, request::Request as RequestType,
response::Response as ResponseType, Event, ExecStreamResponseUpdate, ExitEvent,
MetricsResponse, PingResponse, Request, Response,
},
};
use log::debug;
use nix::unistd::Pid;
use tokio::{select, sync::broadcast};
pub struct ZoneBackground {
idm: IdmInternalClient,
child: Pid,
_cgroup: Cgroup,
wait: ChildWait,
}
impl ZoneBackground {
pub async fn new(idm: IdmInternalClient, cgroup: Cgroup, child: Pid) -> Result<ZoneBackground> {
Ok(ZoneBackground {
idm,
child,
_cgroup: cgroup,
wait: ChildWait::new()?,
})
}
pub async fn run(&mut self) -> Result<()> {
let mut event_subscription = self.idm.subscribe().await?;
let mut requests_subscription = self.idm.requests().await?;
let mut request_streams_subscription = self.idm.request_streams().await?;
loop {
select! {
x = event_subscription.recv() => match x {
Ok(_event) => {
},
Err(broadcast::error::RecvError::Closed) => {
debug!("idm packet channel closed");
break;
},
_ => {
continue;
}
},
x = requests_subscription.recv() => match x {
Ok((id, request)) => {
self.handle_idm_request(id, request).await?;
},
Err(broadcast::error::RecvError::Closed) => {
debug!("idm packet channel closed");
break;
},
_ => {
continue;
}
},
x = request_streams_subscription.recv() => match x {
Ok(handle) => {
self.handle_idm_stream_request(handle).await?;
},
Err(broadcast::error::RecvError::Closed) => {
debug!("idm packet channel closed");
break;
},
_ => {
continue;
}
},
event = self.wait.recv() => match event {
Some(event) => self.child_event(event).await?,
None => {
break;
}
}
};
}
Ok(())
}
async fn handle_idm_request(&mut self, id: u64, packet: Request) -> Result<()> {
match packet.request {
Some(RequestType::Ping(_)) => {
self.idm
.respond(
id,
Response {
response: Some(ResponseType::Ping(PingResponse {})),
},
)
.await?;
}
Some(RequestType::Metrics(_)) => {
let metrics = MetricsCollector::new()?;
let root = metrics.collect()?;
let response = Response {
response: Some(ResponseType::Metrics(MetricsResponse { root: Some(root) })),
};
self.idm.respond(id, response).await?;
}
_ => {}
}
Ok(())
}
async fn handle_idm_stream_request(
&mut self,
handle: IdmClientStreamResponseHandle<Request>,
) -> Result<()> {
if let Some(RequestType::ExecStream(_)) = &handle.initial.request {
tokio::task::spawn(async move {
let exec = ZoneExecTask { handle };
if let Err(error) = exec.run().await {
let _ = exec
.handle
.respond(Response {
response: Some(ResponseType::ExecStream(ExecStreamResponseUpdate {
exited: true,
error: error.to_string(),
exit_code: -1,
stdout: vec![],
stderr: vec![],
})),
})
.await;
}
});
}
Ok(())
}
async fn child_event(&mut self, event: ChildEvent) -> Result<()> {
if event.pid == self.child {
self.idm
.emit(Event {
event: Some(EventType::Exit(ExitEvent { code: event.status })),
})
.await?;
death(event.status).await?;
}
Ok(())
}
}

View File

@ -0,0 +1,85 @@
use std::{
ptr::addr_of_mut,
sync::{
atomic::{AtomicBool, Ordering},
Arc,
},
thread::{self, JoinHandle},
};
use anyhow::Result;
use libc::{c_int, waitpid, WEXITSTATUS, WIFEXITED};
use log::warn;
use nix::unistd::Pid;
use tokio::sync::mpsc::{channel, Receiver, Sender};
const CHILD_WAIT_QUEUE_LEN: usize = 10;
#[derive(Clone, Copy, Debug)]
pub struct ChildEvent {
pub pid: Pid,
pub status: c_int,
}
pub struct ChildWait {
receiver: Receiver<ChildEvent>,
signal: Arc<AtomicBool>,
_task: JoinHandle<()>,
}
impl ChildWait {
pub fn new() -> Result<ChildWait> {
let (sender, receiver) = channel(CHILD_WAIT_QUEUE_LEN);
let signal = Arc::new(AtomicBool::new(false));
let mut processor = ChildWaitTask {
sender,
signal: signal.clone(),
};
let task = thread::spawn(move || {
if let Err(error) = processor.process() {
warn!("failed to process child updates: {}", error);
}
});
Ok(ChildWait {
receiver,
signal,
_task: task,
})
}
pub async fn recv(&mut self) -> Option<ChildEvent> {
self.receiver.recv().await
}
}
struct ChildWaitTask {
sender: Sender<ChildEvent>,
signal: Arc<AtomicBool>,
}
impl ChildWaitTask {
fn process(&mut self) -> Result<()> {
loop {
let mut status: c_int = 0;
let pid = unsafe { waitpid(-1, addr_of_mut!(status), 0) };
if WIFEXITED(status) {
let event = ChildEvent {
pid: Pid::from_raw(pid),
status: WEXITSTATUS(status),
};
let _ = self.sender.try_send(event);
if self.signal.load(Ordering::Acquire) {
return Ok(());
}
}
}
}
}
impl Drop for ChildWait {
fn drop(&mut self) {
self.signal.store(true, Ordering::Release);
}
}

172
crates/zone/src/exec.rs Normal file
View File

@ -0,0 +1,172 @@
use std::{collections::HashMap, process::Stdio};
use anyhow::{anyhow, Result};
use krata::idm::{
client::IdmClientStreamResponseHandle,
internal::{
exec_stream_request_update::Update, request::Request as RequestType,
ExecStreamResponseUpdate,
},
internal::{response::Response as ResponseType, Request, Response},
};
use tokio::{
io::{AsyncReadExt, AsyncWriteExt},
join,
process::Command,
};
pub struct ZoneExecTask {
pub handle: IdmClientStreamResponseHandle<Request>,
}
impl ZoneExecTask {
pub async fn run(&self) -> Result<()> {
let mut receiver = self.handle.take().await?;
let Some(ref request) = self.handle.initial.request else {
return Err(anyhow!("request was empty"));
};
let RequestType::ExecStream(update) = request else {
return Err(anyhow!("request was not an exec update"));
};
let Some(Update::Start(ref start)) = update.update else {
return Err(anyhow!("first request did not contain a start update"));
};
let mut cmd = start.command.clone();
if cmd.is_empty() {
return Err(anyhow!("command line was empty"));
}
let exe = cmd.remove(0);
let mut env = HashMap::new();
for entry in &start.environment {
env.insert(entry.key.clone(), entry.value.clone());
}
if !env.contains_key("PATH") {
env.insert(
"PATH".to_string(),
"/bin:/usr/bin:/usr/local/bin".to_string(),
);
}
let dir = if start.working_directory.is_empty() {
"/".to_string()
} else {
start.working_directory.clone()
};
let mut child = Command::new(exe)
.args(cmd)
.envs(env)
.current_dir(dir)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true)
.spawn()
.map_err(|error| anyhow!("failed to spawn: {}", error))?;
let mut stdin = child
.stdin
.take()
.ok_or_else(|| anyhow!("stdin was missing"))?;
let mut stdout = child
.stdout
.take()
.ok_or_else(|| anyhow!("stdout was missing"))?;
let mut stderr = child
.stderr
.take()
.ok_or_else(|| anyhow!("stderr was missing"))?;
let stdout_handle = self.handle.clone();
let stdout_task = tokio::task::spawn(async move {
let mut stdout_buffer = vec![0u8; 8 * 1024];
loop {
let Ok(size) = stdout.read(&mut stdout_buffer).await else {
break;
};
if size > 0 {
let response = Response {
response: Some(ResponseType::ExecStream(ExecStreamResponseUpdate {
exited: false,
exit_code: 0,
error: String::new(),
stdout: stdout_buffer[0..size].to_vec(),
stderr: vec![],
})),
};
let _ = stdout_handle.respond(response).await;
} else {
break;
}
}
});
let stderr_handle = self.handle.clone();
let stderr_task = tokio::task::spawn(async move {
let mut stderr_buffer = vec![0u8; 8 * 1024];
loop {
let Ok(size) = stderr.read(&mut stderr_buffer).await else {
break;
};
if size > 0 {
let response = Response {
response: Some(ResponseType::ExecStream(ExecStreamResponseUpdate {
exited: false,
exit_code: 0,
error: String::new(),
stdout: vec![],
stderr: stderr_buffer[0..size].to_vec(),
})),
};
let _ = stderr_handle.respond(response).await;
} else {
break;
}
}
});
let stdin_task = tokio::task::spawn(async move {
loop {
let Some(request) = receiver.recv().await else {
break;
};
let Some(RequestType::ExecStream(update)) = request.request else {
continue;
};
let Some(Update::Stdin(update)) = update.update else {
continue;
};
if stdin.write_all(&update.data).await.is_err() {
break;
}
}
});
let exit = child.wait().await?;
let code = exit.code().unwrap_or(-1);
let _ = join!(stdout_task, stderr_task);
stdin_task.abort();
let response = Response {
response: Some(ResponseType::ExecStream(ExecStreamResponseUpdate {
exited: true,
exit_code: code,
error: String::new(),
stdout: vec![],
stderr: vec![],
})),
};
self.handle.respond(response).await?;
Ok(())
}
}

668
crates/zone/src/init.rs Normal file
View File

@ -0,0 +1,668 @@
use anyhow::{anyhow, Result};
use cgroups_rs::{Cgroup, CgroupPid};
use futures::stream::TryStreamExt;
use ipnetwork::IpNetwork;
use krata::ethtool::EthtoolHandle;
use krata::idm::client::IdmInternalClient;
use krata::idm::internal::INTERNAL_IDM_CHANNEL;
use krata::launchcfg::{LaunchInfo, LaunchNetwork, LaunchPackedFormat};
use libc::{sethostname, setsid, TIOCSCTTY};
use log::{trace, warn};
use nix::ioctl_write_int_bad;
use nix::unistd::{dup2, execve, fork, ForkResult, Pid};
use oci_spec::image::{Config, ImageConfiguration};
use path_absolutize::Absolutize;
use platform_info::{PlatformInfo, PlatformInfoAPI, UNameAPI};
use std::collections::HashMap;
use std::ffi::CString;
use std::fs::{File, OpenOptions, Permissions};
use std::io;
use std::net::{Ipv4Addr, Ipv6Addr};
use std::os::fd::AsRawFd;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::{chroot, PermissionsExt};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use sys_mount::{FilesystemType, Mount, MountFlags};
use tokio::fs;
use crate::background::ZoneBackground;
const IMAGE_BLOCK_DEVICE_PATH: &str = "/dev/xvda";
const CONFIG_BLOCK_DEVICE_PATH: &str = "/dev/xvdb";
const IMAGE_MOUNT_PATH: &str = "/image";
const CONFIG_MOUNT_PATH: &str = "/config";
const OVERLAY_MOUNT_PATH: &str = "/overlay";
const OVERLAY_IMAGE_BIND_PATH: &str = "/overlay/image";
const OVERLAY_WORK_PATH: &str = "/overlay/work";
const OVERLAY_UPPER_PATH: &str = "/overlay/upper";
const SYS_PATH: &str = "/sys";
const PROC_PATH: &str = "/proc";
const DEV_PATH: &str = "/dev";
const NEW_ROOT_PATH: &str = "/newroot";
const NEW_ROOT_SYS_PATH: &str = "/newroot/sys";
const NEW_ROOT_PROC_PATH: &str = "/newroot/proc";
const NEW_ROOT_DEV_PATH: &str = "/newroot/dev";
const IMAGE_CONFIG_JSON_PATH: &str = "/config/image/config.json";
const LAUNCH_CONFIG_JSON_PATH: &str = "/config/launch.json";
const ADDONS_DEVICE_PATH: &str = "/dev/xvdc";
const ADDONS_MOUNT_PATH: &str = "/addons";
const ADDONS_MODULES_PATH: &str = "/addons/modules";
ioctl_write_int_bad!(set_controlling_terminal, TIOCSCTTY);
pub struct ZoneInit {}
impl Default for ZoneInit {
fn default() -> Self {
Self::new()
}
}
impl ZoneInit {
pub fn new() -> ZoneInit {
ZoneInit {}
}
pub async fn init(&mut self) -> Result<()> {
self.early_init().await?;
trace!("opening console descriptor");
match OpenOptions::new()
.read(true)
.write(true)
.open("/dev/console")
{
Ok(console) => self.map_console(&console)?,
Err(error) => warn!("failed to open console: {}", error),
};
let idm = IdmInternalClient::open(INTERNAL_IDM_CHANNEL, "/dev/hvc1")
.await
.map_err(|x| anyhow!("failed to open idm client: {}", x))?;
self.mount_config_image().await?;
let config = self.parse_image_config().await?;
let launch = self.parse_launch_config().await?;
self.mount_root_image(launch.root.format.clone()).await?;
self.mount_addons().await?;
self.mount_new_root().await?;
self.mount_kernel_modules().await?;
self.bind_new_root().await?;
if let Some(hostname) = launch.hostname.clone() {
let result = unsafe {
sethostname(
hostname.as_bytes().as_ptr() as *mut libc::c_char,
hostname.len(),
)
};
if result != 0 {
warn!("failed to set hostname: {}", result);
}
let etc = PathBuf::from_str("/etc")?;
if !etc.exists() {
fs::create_dir(&etc).await?;
}
let mut etc_hostname = etc;
etc_hostname.push("hostname");
fs::write(&etc_hostname, hostname + "\n").await?;
}
if let Some(network) = &launch.network {
trace!("initializing network");
if let Err(error) = self.network_setup(&launch, network).await {
warn!("failed to initialize network: {}", error);
}
}
if let Some(cfg) = config.config() {
trace!("running zone task");
self.run(cfg, &launch, idm).await?;
} else {
return Err(anyhow!(
"unable to determine what to execute, image config doesn't tell us"
));
}
Ok(())
}
async fn early_init(&mut self) -> Result<()> {
trace!("early init");
self.create_dir("/dev", Some(0o0755)).await?;
self.create_dir("/proc", None).await?;
self.create_dir("/sys", Some(0o0555)).await?;
self.create_dir("/root", Some(0o0700)).await?;
self.create_dir("/tmp", None).await?;
self.create_dir("/run", Some(0o0755)).await?;
self.mount_kernel_fs("devtmpfs", "/dev", "mode=0755", None, None)
.await?;
self.mount_kernel_fs("proc", "/proc", "", None, None)
.await?;
self.mount_kernel_fs("sysfs", "/sys", "", None, None)
.await?;
self.create_dir("/dev/pts", Some(0o0755)).await?;
self.mount_kernel_fs("devpts", "/dev/pts", "", None, Some("/dev/ptmx"))
.await?;
fs::symlink("/proc/self/fd", "/dev/fd").await?;
fs::symlink("/proc/self/fd/0", "/dev/stdin").await?;
fs::symlink("/proc/self/fd/1", "/dev/stdout").await?;
fs::symlink("/proc/self/fd/2", "/dev/stderr").await?;
self.mount_kernel_fs(
"cgroup2",
"/sys/fs/cgroup",
"",
Some(MountFlags::RELATIME),
None,
)
.await?;
Ok(())
}
async fn mount_addons(&mut self) -> Result<()> {
if !fs::try_exists(ADDONS_DEVICE_PATH).await? {
return Ok(());
}
self.mount_image(
&PathBuf::from(ADDONS_DEVICE_PATH),
&PathBuf::from(ADDONS_MOUNT_PATH),
LaunchPackedFormat::Squashfs,
)
.await?;
Ok(())
}
async fn mount_kernel_modules(&mut self) -> Result<()> {
if !fs::try_exists(ADDONS_MODULES_PATH).await? {
return Ok(());
}
let Some(platform_info) = PlatformInfo::new().ok() else {
return Ok(());
};
let kernel_release = platform_info.release().to_string_lossy().to_string();
let modules_path = format!("/newroot/lib/modules/{}", kernel_release);
fs::create_dir_all(&modules_path).await?;
Mount::builder()
.fstype(FilesystemType::Manual("none"))
.flags(MountFlags::BIND | MountFlags::RDONLY)
.mount(ADDONS_MODULES_PATH, modules_path)?;
Ok(())
}
async fn create_dir(&mut self, path: &str, mode: Option<u32>) -> Result<()> {
let path = Path::new(path);
if !path.is_dir() {
trace!("creating directory {:?}", path);
fs::create_dir(path).await?;
}
if let Some(mode) = mode {
let permissions = Permissions::from_mode(mode);
trace!("setting directory {:?} permissions to {:?}", path, mode);
fs::set_permissions(path, permissions).await?;
}
Ok(())
}
async fn mount_kernel_fs(
&mut self,
fstype: &str,
path: &str,
data: &str,
flags: Option<MountFlags>,
source: Option<&str>,
) -> Result<()> {
trace!("mounting kernel fs {} to {}", fstype, path);
Mount::builder()
.fstype(FilesystemType::Manual(fstype))
.flags(MountFlags::NOEXEC | MountFlags::NOSUID | flags.unwrap_or(MountFlags::empty()))
.data(data)
.mount(source.unwrap_or(fstype), path)?;
Ok(())
}
fn map_console(&mut self, console: &File) -> Result<()> {
trace!("mapping console");
dup2(console.as_raw_fd(), 0)?;
dup2(console.as_raw_fd(), 1)?;
dup2(console.as_raw_fd(), 2)?;
Ok(())
}
async fn mount_config_image(&mut self) -> Result<()> {
trace!("mounting config image");
let config_mount_path = Path::new(CONFIG_MOUNT_PATH);
self.mount_image(
Path::new(CONFIG_BLOCK_DEVICE_PATH),
config_mount_path,
LaunchPackedFormat::Squashfs,
)
.await?;
Ok(())
}
async fn mount_root_image(&mut self, format: LaunchPackedFormat) -> Result<()> {
trace!("mounting root image");
let image_mount_path = Path::new(IMAGE_MOUNT_PATH);
self.mount_image(Path::new(IMAGE_BLOCK_DEVICE_PATH), image_mount_path, format)
.await?;
Ok(())
}
async fn mount_image(
&mut self,
from: &Path,
to: &Path,
format: LaunchPackedFormat,
) -> Result<()> {
trace!("mounting {:?} image {:?} to {:?}", format, from, to);
if !to.is_dir() {
fs::create_dir(to).await?;
}
Mount::builder()
.fstype(FilesystemType::Manual(match format {
LaunchPackedFormat::Squashfs => "squashfs",
LaunchPackedFormat::Erofs => "erofs",
}))
.flags(MountFlags::RDONLY)
.mount(from, to)?;
Ok(())
}
async fn mount_move_subtree(&mut self, from: &Path, to: &Path) -> Result<()> {
trace!("moving subtree {:?} to {:?}", from, to);
if !to.is_dir() {
fs::create_dir(to).await?;
}
Mount::builder()
.fstype(FilesystemType::Manual("none"))
.flags(MountFlags::MOVE)
.mount(from, to)?;
Ok(())
}
async fn mount_new_root(&mut self) -> Result<()> {
trace!("mounting new root");
self.mount_overlay_tmpfs().await?;
self.bind_image_to_overlay_tmpfs().await?;
self.mount_overlay_to_new_root().await?;
std::env::set_current_dir(NEW_ROOT_PATH)?;
trace!("mounted new root");
Ok(())
}
async fn mount_overlay_tmpfs(&mut self) -> Result<()> {
fs::create_dir(OVERLAY_MOUNT_PATH).await?;
Mount::builder()
.fstype(FilesystemType::Manual("tmpfs"))
.mount("tmpfs", OVERLAY_MOUNT_PATH)?;
fs::create_dir(OVERLAY_UPPER_PATH).await?;
fs::create_dir(OVERLAY_WORK_PATH).await?;
Ok(())
}
async fn bind_image_to_overlay_tmpfs(&mut self) -> Result<()> {
fs::create_dir(OVERLAY_IMAGE_BIND_PATH).await?;
Mount::builder()
.fstype(FilesystemType::Manual("none"))
.flags(MountFlags::BIND | MountFlags::RDONLY)
.mount(IMAGE_MOUNT_PATH, OVERLAY_IMAGE_BIND_PATH)?;
Ok(())
}
async fn mount_overlay_to_new_root(&mut self) -> Result<()> {
fs::create_dir(NEW_ROOT_PATH).await?;
Mount::builder()
.fstype(FilesystemType::Manual("overlay"))
.flags(MountFlags::NOATIME)
.data(&format!(
"lowerdir={},upperdir={},workdir={}",
OVERLAY_IMAGE_BIND_PATH, OVERLAY_UPPER_PATH, OVERLAY_WORK_PATH
))
.mount(format!("overlayfs:{}", OVERLAY_MOUNT_PATH), NEW_ROOT_PATH)?;
Ok(())
}
async fn parse_image_config(&mut self) -> Result<ImageConfiguration> {
let image_config_path = Path::new(IMAGE_CONFIG_JSON_PATH);
let content = fs::read_to_string(image_config_path).await?;
let config = serde_json::from_str(&content)?;
Ok(config)
}
async fn parse_launch_config(&mut self) -> Result<LaunchInfo> {
trace!("parsing launch config");
let launch_config = Path::new(LAUNCH_CONFIG_JSON_PATH);
let content = fs::read_to_string(launch_config).await?;
Ok(serde_json::from_str(&content)?)
}
async fn bind_new_root(&mut self) -> Result<()> {
self.mount_move_subtree(Path::new(SYS_PATH), Path::new(NEW_ROOT_SYS_PATH))
.await?;
self.mount_move_subtree(Path::new(PROC_PATH), Path::new(NEW_ROOT_PROC_PATH))
.await?;
self.mount_move_subtree(Path::new(DEV_PATH), Path::new(NEW_ROOT_DEV_PATH))
.await?;
trace!("binding new root");
Mount::builder()
.fstype(FilesystemType::Manual("none"))
.flags(MountFlags::BIND)
.mount(".", "/")?;
trace!("chrooting into new root");
chroot(".")?;
trace!("setting root as current directory");
std::env::set_current_dir("/")?;
Ok(())
}
async fn network_setup(&mut self, cfg: &LaunchInfo, network: &LaunchNetwork) -> Result<()> {
trace!("setting up network for link");
let etc = PathBuf::from_str("/etc")?;
if !etc.exists() {
fs::create_dir(etc).await?;
}
let resolv = PathBuf::from_str("/etc/resolv.conf")?;
{
let mut lines = vec!["# krata resolver configuration".to_string()];
for nameserver in &network.resolver.nameservers {
lines.push(format!("nameserver {}", nameserver));
}
let mut conf = lines.join("\n");
conf.push('\n');
fs::write(resolv, conf).await?;
}
let hosts = PathBuf::from_str("/etc/hosts")?;
if let Some(ref hostname) = cfg.hostname {
let mut lines = if hosts.exists() {
fs::read_to_string(&hosts)
.await?
.lines()
.map(|x| x.to_string())
.collect::<Vec<_>>()
} else {
vec!["127.0.0.1 localhost".to_string()]
};
lines.push(format!("127.0.1.1 {}", hostname));
fs::write(&hosts, lines.join("\n") + "\n").await?;
}
self.network_configure_ethtool(network).await?;
self.network_configure_link(network).await?;
Ok(())
}
async fn network_configure_link(&mut self, network: &LaunchNetwork) -> Result<()> {
let (connection, handle, _) = rtnetlink::new_connection()?;
tokio::spawn(connection);
let mut links = handle.link().get().match_name("lo".to_string()).execute();
let Some(link) = links.try_next().await? else {
warn!("unable to find link named lo");
return Ok(());
};
handle.link().set(link.header.index).up().execute().await?;
let ipv4_network: IpNetwork = network.ipv4.address.parse()?;
let ipv4_gateway: Ipv4Addr = network.ipv4.gateway.parse()?;
let ipv6_network: IpNetwork = network.ipv6.address.parse()?;
let ipv6_gateway: Ipv6Addr = network.ipv6.gateway.parse()?;
let mut links = handle
.link()
.get()
.match_name(network.link.clone())
.execute();
let Some(link) = links.try_next().await? else {
warn!("unable to find link named {}", network.link);
return Ok(());
};
handle
.address()
.add(link.header.index, ipv4_network.ip(), ipv4_network.prefix())
.execute()
.await?;
let ipv6_result = handle
.address()
.add(link.header.index, ipv6_network.ip(), ipv6_network.prefix())
.execute()
.await;
let ipv6_ready = match ipv6_result {
Ok(()) => true,
Err(error) => {
warn!("unable to setup ipv6 network: {}", error);
false
}
};
handle.link().set(link.header.index).up().execute().await?;
handle
.route()
.add()
.v4()
.destination_prefix(Ipv4Addr::UNSPECIFIED, 0)
.output_interface(link.header.index)
.gateway(ipv4_gateway)
.execute()
.await?;
if ipv6_ready {
let ipv6_gw_result = handle
.route()
.add()
.v6()
.destination_prefix(Ipv6Addr::UNSPECIFIED, 0)
.output_interface(link.header.index)
.gateway(ipv6_gateway)
.execute()
.await;
if let Err(error) = ipv6_gw_result {
warn!("failed to add ipv6 gateway route: {}", error);
}
}
Ok(())
}
async fn network_configure_ethtool(&mut self, network: &LaunchNetwork) -> Result<()> {
let mut handle = EthtoolHandle::new()?;
handle.set_gso(&network.link, false)?;
handle.set_tso(&network.link, false)?;
Ok(())
}
async fn run(
&mut self,
config: &Config,
launch: &LaunchInfo,
idm: IdmInternalClient,
) -> Result<()> {
let mut cmd = match config.cmd() {
None => vec![],
Some(value) => value.clone(),
};
if launch.run.is_some() {
cmd.clone_from(launch.run.as_ref().unwrap());
}
if let Some(entrypoint) = config.entrypoint() {
for item in entrypoint.iter().rev() {
cmd.insert(0, item.to_string());
}
}
if cmd.is_empty() {
cmd.push("/bin/sh".to_string());
}
let path = cmd.remove(0);
let mut env = HashMap::new();
if let Some(config_env) = config.env() {
env.extend(ZoneInit::env_map(config_env));
}
env.extend(launch.env.clone());
env.insert("KRATA_CONTAINER".to_string(), "1".to_string());
// If we were not provided a terminal definition in our launch manifest, we
// default to xterm as most terminal emulators support the xterm control codes.
if !env.contains_key("TERM") {
env.insert("TERM".to_string(), "xterm".to_string());
}
let path = resolve_executable(&env, path.into())?;
let Some(file_name) = path.file_name() else {
return Err(anyhow!("cannot get file name of command path"));
};
let Some(file_name) = file_name.to_str() else {
return Err(anyhow!("cannot get file name of command path as str"));
};
cmd.insert(0, file_name.to_string());
let env = ZoneInit::env_list(env);
trace!("running zone command: {}", cmd.join(" "));
let path = CString::new(path.as_os_str().as_bytes())?;
let cmd = ZoneInit::strings_as_cstrings(cmd)?;
let env = ZoneInit::strings_as_cstrings(env)?;
let mut working_dir = config
.working_dir()
.as_ref()
.map(|x| x.to_string())
.unwrap_or("/".to_string());
if working_dir.is_empty() {
working_dir = "/".to_string();
}
let cgroup = self.init_cgroup().await?;
self.fork_and_exec(idm, cgroup, working_dir, path, cmd, env)
.await?;
Ok(())
}
async fn init_cgroup(&self) -> Result<Cgroup> {
trace!("initializing cgroup");
let hierarchy = cgroups_rs::hierarchies::auto();
let cgroup = Cgroup::new(hierarchy, "krata-zone-task")?;
cgroup.set_cgroup_type("threaded")?;
trace!("initialized cgroup");
Ok(cgroup)
}
fn strings_as_cstrings(values: Vec<String>) -> Result<Vec<CString>> {
let mut results: Vec<CString> = vec![];
for value in values {
results.push(CString::new(value.as_bytes().to_vec())?);
}
Ok(results)
}
fn env_map(env: &[String]) -> HashMap<String, String> {
let mut map = HashMap::<String, String>::new();
for item in env {
if let Some((key, value)) = item.split_once('=') {
map.insert(key.to_string(), value.to_string());
}
}
map
}
fn env_list(env: HashMap<String, String>) -> Vec<String> {
env.iter()
.map(|(key, value)| format!("{}={}", key, value))
.collect::<Vec<String>>()
}
async fn fork_and_exec(
&mut self,
idm: IdmInternalClient,
cgroup: Cgroup,
working_dir: String,
path: CString,
cmd: Vec<CString>,
env: Vec<CString>,
) -> Result<()> {
match unsafe { fork()? } {
ForkResult::Parent { child } => self.background(idm, cgroup, child).await,
ForkResult::Child => self.foreground(cgroup, working_dir, path, cmd, env).await,
}
}
async fn foreground(
&mut self,
cgroup: Cgroup,
working_dir: String,
path: CString,
cmd: Vec<CString>,
env: Vec<CString>,
) -> Result<()> {
ZoneInit::set_controlling_terminal()?;
std::env::set_current_dir(working_dir)?;
cgroup.add_task(CgroupPid::from(std::process::id() as u64))?;
execve(&path, &cmd, &env)?;
Ok(())
}
fn set_controlling_terminal() -> Result<()> {
unsafe {
setsid();
set_controlling_terminal(io::stdin().as_raw_fd(), 0)?;
}
Ok(())
}
async fn background(
&mut self,
idm: IdmInternalClient,
cgroup: Cgroup,
executed: Pid,
) -> Result<()> {
let mut background = ZoneBackground::new(idm, cgroup, executed).await?;
background.run().await?;
Ok(())
}
}
pub fn resolve_executable(env: &HashMap<String, String>, path: PathBuf) -> Result<PathBuf> {
if path.is_absolute() {
return Ok(path);
}
if path.is_file() {
return Ok(path.absolutize()?.to_path_buf());
}
if let Some(path_var) = env.get("PATH") {
for item in path_var.split(':') {
let mut exe_path: PathBuf = item.into();
exe_path.push(&path);
if exe_path.is_file() {
return Ok(exe_path);
}
}
}
Ok(path)
}

22
crates/zone/src/lib.rs Normal file
View File

@ -0,0 +1,22 @@
use std::{os::raw::c_int, time::Duration};
use anyhow::Result;
use tokio::time::sleep;
use xenstore::{XsdClient, XsdInterface};
pub mod background;
pub mod childwait;
pub mod exec;
pub mod init;
pub mod metrics;
pub async fn death(code: c_int) -> Result<()> {
let store = XsdClient::open().await?;
store
.write_string("krata/zone/exit-code", &code.to_string())
.await?;
drop(store);
loop {
sleep(Duration::from_secs(1)).await;
}
}

118
crates/zone/src/metrics.rs Normal file
View File

@ -0,0 +1,118 @@
use std::{ops::Add, path::Path};
use anyhow::Result;
use krata::idm::internal::{MetricFormat, MetricNode};
use sysinfo::Process;
pub struct MetricsCollector {}
impl MetricsCollector {
pub fn new() -> Result<Self> {
Ok(MetricsCollector {})
}
pub fn collect(&self) -> Result<MetricNode> {
let mut sysinfo = sysinfo::System::new();
Ok(MetricNode::structural(
"zone",
vec![
self.collect_system(&mut sysinfo)?,
self.collect_processes(&mut sysinfo)?,
],
))
}
fn collect_system(&self, sysinfo: &mut sysinfo::System) -> Result<MetricNode> {
sysinfo.refresh_memory();
Ok(MetricNode::structural(
"system",
vec![MetricNode::structural(
"memory",
vec![
MetricNode::value("total", sysinfo.total_memory(), MetricFormat::Bytes),
MetricNode::value("used", sysinfo.used_memory(), MetricFormat::Bytes),
MetricNode::value("free", sysinfo.free_memory(), MetricFormat::Bytes),
],
)],
))
}
fn collect_processes(&self, sysinfo: &mut sysinfo::System) -> Result<MetricNode> {
sysinfo.refresh_processes();
let mut processes = Vec::new();
let mut sysinfo_processes = sysinfo.processes().values().collect::<Vec<_>>();
sysinfo_processes.sort_by_key(|x| x.pid());
for process in sysinfo_processes {
if process.thread_kind().is_some() {
continue;
}
processes.push(MetricsCollector::process_node(process)?);
}
Ok(MetricNode::structural("process", processes))
}
fn process_node(process: &Process) -> Result<MetricNode> {
let mut metrics = vec![];
if let Some(parent) = process.parent() {
metrics.push(MetricNode::value(
"parent",
parent.as_u32() as u64,
MetricFormat::Integer,
));
}
if let Some(exe) = process.exe().and_then(path_as_str) {
metrics.push(MetricNode::raw_value("executable", exe));
}
if let Some(working_directory) = process.cwd().and_then(path_as_str) {
metrics.push(MetricNode::raw_value("cwd", working_directory));
}
let cmdline = process.cmd().to_vec();
metrics.push(MetricNode::raw_value("cmdline", cmdline));
metrics.push(MetricNode::structural(
"memory",
vec![
MetricNode::value("resident", process.memory(), MetricFormat::Bytes),
MetricNode::value("virtual", process.virtual_memory(), MetricFormat::Bytes),
],
));
metrics.push(MetricNode::value(
"lifetime",
process.run_time(),
MetricFormat::DurationSeconds,
));
metrics.push(MetricNode::value(
"uid",
process.user_id().map(|x| (*x).add(0)).unwrap_or(0) as f64,
MetricFormat::Integer,
));
metrics.push(MetricNode::value(
"gid",
process.group_id().map(|x| (*x).add(0)).unwrap_or(0) as f64,
MetricFormat::Integer,
));
metrics.push(MetricNode::value(
"euid",
process
.effective_user_id()
.map(|x| (*x).add(0))
.unwrap_or(0) as f64,
MetricFormat::Integer,
));
metrics.push(MetricNode::value(
"egid",
process.effective_group_id().map(|x| x.add(0)).unwrap_or(0) as f64,
MetricFormat::Integer,
));
Ok(MetricNode::structural(process.pid().to_string(), metrics))
}
}
fn path_as_str(path: &Path) -> Option<String> {
String::from_utf8(path.as_os_str().as_encoded_bytes().to_vec()).ok()
}