krata: reorganize crates

This commit is contained in:
Alex Zenla
2024-03-07 18:12:47 +00:00
parent c0eeab4047
commit 7bc0c95f00
97 changed files with 24 additions and 24 deletions

26
crates/krata/Cargo.toml Normal file
View File

@ -0,0 +1,26 @@
[package]
name = "krata"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
anyhow = { workspace = true }
libc = { workspace = true }
log = { workspace = true }
nix = { workspace = true, features = ["ioctl", "socket"] }
prost = { workspace = true }
serde = { workspace = true }
tokio = { workspace = true }
tonic = { workspace = true }
url = { workspace = true }
[build-dependencies]
tonic-build = { workspace = true }
[lib]
name = "krata"
[[example]]
name = "ethtool"
path = "examples/ethtool.rs"

5
crates/krata/build.rs Normal file
View File

@ -0,0 +1,5 @@
fn main() {
tonic_build::configure()
.compile(&["proto/krata/control.proto"], &["proto"])
.unwrap();
}

View File

@ -0,0 +1,13 @@
use std::env;
use anyhow::Result;
use krata::ethtool::EthtoolHandle;
fn main() -> Result<()> {
let args = env::args().collect::<Vec<String>>();
let interface = args.get(1).unwrap();
let mut handle = EthtoolHandle::new()?;
handle.set_gso(interface, false)?;
handle.set_tso(interface, false)?;
Ok(())
}

View File

@ -0,0 +1,79 @@
syntax = "proto3";
option java_multiple_files = true;
option java_package = "dev.krata.proto.control";
option java_outer_classname = "ControlProto";
package krata.control;
message GuestInfo {
string id = 1;
string image = 2;
string ipv4 = 3;
string ipv6 = 4;
}
message LaunchGuestRequest {
string image = 1;
uint32 vcpus = 2;
uint64 mem = 3;
repeated string env = 4;
repeated string run = 5;
}
message LaunchGuestReply {
GuestInfo guest = 1;
}
message ListGuestsRequest {}
message ListGuestsReply {
repeated GuestInfo guests = 1;
}
message DestroyGuestRequest {
string guest_id = 1;
}
message DestroyGuestReply {}
message ConsoleDataRequest {
string guest_id = 1;
bytes data = 2;
}
message ConsoleDataReply {
bytes data = 1;
}
message WatchEventsRequest {}
message GuestLaunchedEvent {
string guest_id = 1;
}
message GuestDestroyedEvent {
string guest_id = 1;
}
message GuestExitedEvent {
string guest_id = 1;
int32 code = 2;
}
message WatchEventsReply {
oneof event {
GuestLaunchedEvent guest_launched = 1;
GuestDestroyedEvent guest_destroyed = 2;
GuestExitedEvent guest_exited = 3;
}
}
service ControlService {
rpc LaunchGuest(LaunchGuestRequest) returns (LaunchGuestReply);
rpc DestroyGuest(DestroyGuestRequest) returns (DestroyGuestReply);
rpc ListGuests(ListGuestsRequest) returns (ListGuestsReply);
rpc ConsoleData(stream ConsoleDataRequest) returns (stream ConsoleDataReply);
rpc WatchEvents(WatchEventsRequest) returns (stream WatchEventsReply);
}

View File

@ -0,0 +1 @@
tonic::include_proto!("krata.control");

100
crates/krata/src/dial.rs Normal file
View File

@ -0,0 +1,100 @@
use std::{fmt::Display, str::FromStr};
use anyhow::anyhow;
use url::{Host, Url};
pub const KRATA_DEFAULT_TCP_PORT: u16 = 4350;
pub const KRATA_DEFAULT_TLS_PORT: u16 = 4353;
#[derive(Clone)]
pub enum ControlDialAddress {
UnixSocket {
path: String,
},
Tcp {
host: String,
port: u16,
},
Tls {
host: String,
port: u16,
insecure: bool,
},
}
impl FromStr for ControlDialAddress {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let url: Url = s.parse()?;
let host = url.host().unwrap_or(Host::Domain("localhost")).to_string();
match url.scheme() {
"unix" => Ok(ControlDialAddress::UnixSocket {
path: url.path().to_string(),
}),
"tcp" => {
let port = url.port().unwrap_or(KRATA_DEFAULT_TCP_PORT);
Ok(ControlDialAddress::Tcp { host, port })
}
"tls" | "tls-insecure" => {
let insecure = url.scheme() == "tls-insecure";
let port = url.port().unwrap_or(KRATA_DEFAULT_TLS_PORT);
Ok(ControlDialAddress::Tls {
host,
port,
insecure,
})
}
_ => Err(anyhow!("unknown control address scheme: {}", url.scheme())),
}
}
}
impl From<ControlDialAddress> for Url {
fn from(val: ControlDialAddress) -> Self {
match val {
ControlDialAddress::UnixSocket { path } => {
let mut url = Url::parse("unix:///").unwrap();
url.set_path(&path);
url
}
ControlDialAddress::Tcp { host, port } => {
let mut url = Url::parse("tcp://").unwrap();
url.set_host(Some(&host)).unwrap();
if port != KRATA_DEFAULT_TCP_PORT {
url.set_port(Some(port)).unwrap();
}
url
}
ControlDialAddress::Tls {
host,
port,
insecure,
} => {
let mut url = Url::parse("tls://").unwrap();
if insecure {
url.set_scheme("tls-insecure").unwrap();
}
url.set_host(Some(&host)).unwrap();
if port != KRATA_DEFAULT_TLS_PORT {
url.set_port(Some(port)).unwrap();
}
url
}
}
}
}
impl Display for ControlDialAddress {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let url: Url = self.clone().into();
write!(f, "{}", url)
}
}

View File

@ -0,0 +1,81 @@
use std::{
os::fd::{AsRawFd, FromRawFd, OwnedFd},
ptr::addr_of_mut,
};
use anyhow::Result;
use libc::{ioctl, socket, AF_INET, SOCK_DGRAM};
#[repr(C)]
struct EthtoolValue {
cmd: u32,
data: u32,
}
const ETHTOOL_SGSO: u32 = 0x00000024;
const ETHTOOL_STSO: u32 = 0x0000001f;
#[cfg(not(target_env = "musl"))]
const SIOCETHTOOL: libc::c_ulong = libc::SIOCETHTOOL;
#[cfg(target_env = "musl")]
const SIOCETHTOOL: libc::c_int = libc::SIOCETHTOOL as i32;
#[repr(C)]
#[derive(Debug)]
struct EthtoolIfreq {
ifr_name: [libc::c_char; libc::IF_NAMESIZE],
ifr_data: libc::uintptr_t,
}
impl EthtoolIfreq {
fn new(interface: &str) -> EthtoolIfreq {
let mut ifreq = EthtoolIfreq {
ifr_name: [0; libc::IF_NAMESIZE],
ifr_data: 0,
};
for (i, byte) in interface.as_bytes().iter().enumerate() {
ifreq.ifr_name[i] = *byte as libc::c_char
}
ifreq
}
fn set_value(&mut self, ptr: *mut libc::c_void) {
self.ifr_data = ptr as libc::uintptr_t;
}
}
pub struct EthtoolHandle {
fd: OwnedFd,
}
impl EthtoolHandle {
pub fn new() -> Result<EthtoolHandle> {
let fd = unsafe { socket(AF_INET, SOCK_DGRAM, 0) };
if fd == -1 {
return Err(std::io::Error::last_os_error().into());
}
Ok(EthtoolHandle {
fd: unsafe { OwnedFd::from_raw_fd(fd) },
})
}
pub fn set_gso(&mut self, interface: &str, value: bool) -> Result<()> {
self.set_value(interface, ETHTOOL_SGSO, if value { 1 } else { 0 })
}
pub fn set_tso(&mut self, interface: &str, value: bool) -> Result<()> {
self.set_value(interface, ETHTOOL_STSO, if value { 1 } else { 0 })
}
fn set_value(&mut self, interface: &str, cmd: u32, value: u32) -> Result<()> {
let mut ifreq = EthtoolIfreq::new(interface);
let mut value = EthtoolValue { cmd, data: value };
ifreq.set_value(addr_of_mut!(value) as *mut libc::c_void);
let result = unsafe { ioctl(self.fd.as_raw_fd(), SIOCETHTOOL, addr_of_mut!(ifreq) as u64) };
if result == -1 {
return Err(std::io::Error::last_os_error().into());
}
Ok(())
}
}

View File

@ -0,0 +1,33 @@
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug)]
pub struct LaunchNetworkIpv4 {
pub address: String,
pub gateway: String,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct LaunchNetworkIpv6 {
pub address: String,
pub gateway: String,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct LaunchNetworkResolver {
pub nameservers: Vec<String>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct LaunchNetwork {
pub link: String,
pub ipv4: LaunchNetworkIpv4,
pub ipv6: LaunchNetworkIpv6,
pub resolver: LaunchNetworkResolver,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct LaunchInfo {
pub network: Option<LaunchNetwork>,
pub env: Option<Vec<String>>,
pub run: Option<Vec<String>>,
}

4
crates/krata/src/lib.rs Normal file
View File

@ -0,0 +1,4 @@
pub mod control;
pub mod dial;
pub mod ethtool;
pub mod launchcfg;

View File

@ -0,0 +1,30 @@
[package]
name = "kratactl"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
anyhow = { workspace = true }
async-stream = { workspace = true }
clap = { workspace = true }
cli-tables = { workspace = true }
env_logger = { workspace = true }
krata = { path = "../krata" }
log = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
termion = { workspace = true }
tokio = { workspace = true }
tokio-native-tls = { workspace = true }
tokio-stream = { workspace = true }
tonic = { workspace = true }
tower = { workspace = true }
url = { workspace = true }
[lib]
name = "kratactl"
[[bin]]
name = "kratactl"
path = "bin/control.rs"

View File

@ -0,0 +1,153 @@
use anyhow::{anyhow, Result};
use clap::{Parser, Subcommand};
use env_logger::Env;
use krata::control::{
watch_events_reply::Event, DestroyGuestRequest, LaunchGuestRequest, ListGuestsRequest,
WatchEventsRequest,
};
use kratactl::{client::ControlClientProvider, console::StdioConsoleStream};
use tonic::Request;
#[derive(Parser, Debug)]
#[command(version, about)]
struct ControllerArgs {
#[arg(short, long, default_value = "unix:///var/lib/krata/daemon.socket")]
connection: String,
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand, Debug)]
enum Commands {
List {},
Launch {
#[arg(short, long, default_value_t = 1)]
cpus: u32,
#[arg(short, long, default_value_t = 512)]
mem: u64,
#[arg[short, long]]
env: Option<Vec<String>>,
#[arg(short, long)]
attach: bool,
#[arg()]
image: String,
#[arg(allow_hyphen_values = true, trailing_var_arg = true)]
run: Vec<String>,
},
Destroy {
#[arg()]
guest: String,
},
Console {
#[arg()]
guest: String,
},
Watch {},
}
#[tokio::main]
async fn main() -> Result<()> {
env_logger::Builder::from_env(Env::default().default_filter_or("warn")).init();
let args = ControllerArgs::parse();
let mut client = ControlClientProvider::dial(args.connection.parse()?).await?;
match args.command {
Commands::Launch {
image,
cpus,
mem,
attach,
env,
run,
} => {
let request = LaunchGuestRequest {
image,
vcpus: cpus,
mem,
env: env.unwrap_or_default(),
run,
};
let response = client
.launch_guest(Request::new(request))
.await?
.into_inner();
let Some(guest) = response.guest else {
return Err(anyhow!(
"control service did not return a guest in the response"
));
};
println!("launched guest: {}", guest.id);
if attach {
let input = StdioConsoleStream::stdin_stream(guest.id).await;
let output = client.console_data(input).await?.into_inner();
StdioConsoleStream::stdout(output).await?;
}
}
Commands::Destroy { guest } => {
let _ = client
.destroy_guest(Request::new(DestroyGuestRequest {
guest_id: guest.clone(),
}))
.await?
.into_inner();
println!("destroyed guest: {}", guest);
}
Commands::Console { guest } => {
let input = StdioConsoleStream::stdin_stream(guest).await;
let output = client.console_data(input).await?.into_inner();
StdioConsoleStream::stdout(output).await?;
}
Commands::List { .. } => {
let response = client
.list_guests(Request::new(ListGuestsRequest {}))
.await?
.into_inner();
let mut table = cli_tables::Table::new();
let header = vec!["uuid", "ipv4", "ipv6", "image"];
table.push_row(&header)?;
for guest in response.guests {
table.push_row_string(&vec![guest.id, guest.ipv4, guest.ipv6, guest.image])?;
}
if table.num_records() == 1 {
println!("no guests have been launched");
} else {
println!("{}", table.to_string());
}
}
Commands::Watch {} => {
let response = client
.watch_events(Request::new(WatchEventsRequest {}))
.await?;
let mut stream = response.into_inner();
while let Some(reply) = stream.message().await? {
let Some(event) = reply.event else {
continue;
};
match event {
Event::GuestLaunched(launched) => {
println!("event=guest.launched guest={}", launched.guest_id);
}
Event::GuestDestroyed(destroyed) => {
println!("event=guest.destroyed guest={}", destroyed.guest_id);
}
Event::GuestExited(exited) => {
println!(
"event=guest.exited guest={} code={}",
exited.guest_id, exited.code
);
}
}
}
}
}
Ok(())
}

View File

@ -0,0 +1,44 @@
use anyhow::Result;
use krata::{control::control_service_client::ControlServiceClient, dial::ControlDialAddress};
use tokio::net::UnixStream;
use tonic::transport::{Channel, ClientTlsConfig, Endpoint, Uri};
use tower::service_fn;
pub struct ControlClientProvider {}
impl ControlClientProvider {
pub async fn dial(addr: ControlDialAddress) -> Result<ControlServiceClient<Channel>> {
let channel = match addr {
ControlDialAddress::UnixSocket { path } => {
// This URL is not actually used but is required to be specified.
Endpoint::try_from(format!("unix://localhost/{}", path))?
.connect_with_connector(service_fn(|uri: Uri| {
let path = uri.path().to_string();
UnixStream::connect(path)
}))
.await?
}
ControlDialAddress::Tcp { host, port } => {
Endpoint::try_from(format!("http://{}:{}", host, port))?
.connect()
.await?
}
ControlDialAddress::Tls {
host,
port,
insecure: _,
} => {
let tls_config = ClientTlsConfig::new().domain_name(&host);
let address = format!("https://{}:{}", host, port);
Channel::from_shared(address)?
.tls_config(tls_config)?
.connect()
.await?
}
};
Ok(ControlServiceClient::new(channel))
}
}

View File

@ -0,0 +1,57 @@
use std::{
io::stdout,
os::fd::{AsRawFd, FromRawFd},
};
use anyhow::Result;
use async_stream::stream;
use krata::control::{ConsoleDataReply, ConsoleDataRequest};
use log::debug;
use termion::raw::IntoRawMode;
use tokio::{
fs::File,
io::{stdin, AsyncReadExt, AsyncWriteExt},
};
use tokio_stream::{Stream, StreamExt};
use tonic::Streaming;
pub struct StdioConsoleStream;
impl StdioConsoleStream {
pub async fn stdin_stream(guest: String) -> impl Stream<Item = ConsoleDataRequest> {
let mut stdin = stdin();
stream! {
yield ConsoleDataRequest { guest_id: guest, data: vec![] };
let mut buffer = vec![0u8; 60];
loop {
let size = match stdin.read(&mut buffer).await {
Ok(size) => size,
Err(error) => {
debug!("failed to read stdin: {}", error);
break;
}
};
let data = buffer[0..size].to_vec();
if size == 1 && buffer[0] == 0x1d {
break;
}
yield ConsoleDataRequest { guest_id: String::default(), data };
}
}
}
pub async fn stdout(mut stream: Streaming<ConsoleDataReply>) -> Result<()> {
let terminal = stdout().into_raw_mode()?;
let mut stdout = unsafe { File::from_raw_fd(terminal.as_raw_fd()) };
while let Some(reply) = stream.next().await {
let reply = reply?;
if reply.data.is_empty() {
continue;
}
stdout.write_all(&reply.data).await?;
stdout.flush().await?;
}
Ok(())
}
}

View File

@ -0,0 +1,2 @@
pub mod client;
pub mod console;

28
crates/kratad/Cargo.toml Normal file
View File

@ -0,0 +1,28 @@
[package]
name = "kratad"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
anyhow = { workspace = true }
async-stream = { workspace = true }
async-trait = { workspace = true }
clap = { workspace = true }
env_logger = { workspace = true }
futures = { workspace = true }
krata = { path = "../krata" }
kratart = { path = "../kratart" }
log = { workspace = true }
signal-hook = { workspace = true }
tokio = { workspace = true }
tokio-stream = { workspace = true }
tonic = { workspace = true, features = ["tls"] }
uuid = { workspace = true }
[lib]
name = "kratad"
[[bin]]
name = "kratad"
path = "bin/daemon.rs"

View File

@ -0,0 +1,37 @@
use anyhow::Result;
use clap::Parser;
use env_logger::Env;
use krata::dial::ControlDialAddress;
use kratad::Daemon;
use kratart::Runtime;
use std::{
str::FromStr,
sync::{atomic::AtomicBool, Arc},
};
#[derive(Parser)]
struct Args {
#[arg(short, long, default_value = "unix:///var/lib/krata/daemon.socket")]
listen: String,
#[arg(short, long, default_value = "/var/lib/krata")]
store: String,
}
#[tokio::main(flavor = "multi_thread", worker_threads = 10)]
async fn main() -> Result<()> {
env_logger::Builder::from_env(Env::default().default_filter_or("warn")).init();
mask_sighup()?;
let args = Args::parse();
let addr = ControlDialAddress::from_str(&args.listen)?;
let runtime = Runtime::new(args.store.clone()).await?;
let mut daemon = Daemon::new(args.store.clone(), runtime).await?;
daemon.listen(addr).await?;
Ok(())
}
fn mask_sighup() -> Result<()> {
let flag = Arc::new(AtomicBool::new(false));
signal_hook::flag::register(signal_hook::consts::SIGHUP, flag)?;
Ok(())
}

View File

@ -0,0 +1,189 @@
use std::{io, pin::Pin};
use async_stream::try_stream;
use futures::Stream;
use krata::control::{
control_service_server::ControlService, ConsoleDataReply, ConsoleDataRequest,
DestroyGuestReply, DestroyGuestRequest, GuestInfo, LaunchGuestReply, LaunchGuestRequest,
ListGuestsReply, ListGuestsRequest, WatchEventsReply, WatchEventsRequest,
};
use tokio::{
io::{AsyncReadExt, AsyncWriteExt},
select,
};
use tokio_stream::StreamExt;
use tonic::{Request, Response, Status, Streaming};
use crate::event::DaemonEventContext;
use kratart::{launch::GuestLaunchRequest, Runtime};
pub struct ApiError {
message: String,
}
impl From<anyhow::Error> for ApiError {
fn from(value: anyhow::Error) -> Self {
ApiError {
message: value.to_string(),
}
}
}
impl From<ApiError> for Status {
fn from(value: ApiError) -> Self {
Status::unknown(value.message)
}
}
#[derive(Clone)]
pub struct RuntimeControlService {
events: DaemonEventContext,
runtime: Runtime,
}
impl RuntimeControlService {
pub fn new(events: DaemonEventContext, runtime: Runtime) -> Self {
Self { events, runtime }
}
}
enum ConsoleDataSelect {
Read(io::Result<usize>),
Write(Option<Result<ConsoleDataRequest, tonic::Status>>),
}
#[tonic::async_trait]
impl ControlService for RuntimeControlService {
type ConsoleDataStream =
Pin<Box<dyn Stream<Item = Result<ConsoleDataReply, Status>> + Send + 'static>>;
type WatchEventsStream =
Pin<Box<dyn Stream<Item = Result<WatchEventsReply, Status>> + Send + 'static>>;
async fn launch_guest(
&self,
request: Request<LaunchGuestRequest>,
) -> Result<Response<LaunchGuestReply>, Status> {
let request = request.into_inner();
let guest: GuestInfo = convert_guest_info(
self.runtime
.launch(GuestLaunchRequest {
image: &request.image,
vcpus: request.vcpus,
mem: request.mem,
env: empty_vec_optional(request.env),
run: empty_vec_optional(request.run),
debug: false,
})
.await
.map_err(ApiError::from)?,
);
Ok(Response::new(LaunchGuestReply { guest: Some(guest) }))
}
async fn destroy_guest(
&self,
request: Request<DestroyGuestRequest>,
) -> Result<Response<DestroyGuestReply>, Status> {
let request = request.into_inner();
self.runtime
.destroy(&request.guest_id)
.await
.map_err(ApiError::from)?;
Ok(Response::new(DestroyGuestReply {}))
}
async fn list_guests(
&self,
request: Request<ListGuestsRequest>,
) -> Result<Response<ListGuestsReply>, Status> {
let _ = request.into_inner();
let guests = self.runtime.list().await.map_err(ApiError::from)?;
let guests = guests
.into_iter()
.map(convert_guest_info)
.collect::<Vec<GuestInfo>>();
Ok(Response::new(ListGuestsReply { guests }))
}
async fn console_data(
&self,
request: Request<Streaming<ConsoleDataRequest>>,
) -> Result<Response<Self::ConsoleDataStream>, Status> {
let mut input = request.into_inner();
let Some(request) = input.next().await else {
return Err(ApiError {
message: "expected to have at least one request".to_string(),
}
.into());
};
let request = request?;
let mut console = self
.runtime
.console(&request.guest_id)
.await
.map_err(ApiError::from)?;
let output = try_stream! {
let mut buffer: Vec<u8> = vec![0u8; 256];
loop {
let what = select! {
x = console.read_handle.read(&mut buffer) => ConsoleDataSelect::Read(x),
x = input.next() => ConsoleDataSelect::Write(x),
};
match what {
ConsoleDataSelect::Read(result) => {
let size = result?;
let data = buffer[0..size].to_vec();
yield ConsoleDataReply { data, };
},
ConsoleDataSelect::Write(Some(request)) => {
let request = request?;
if !request.data.is_empty() {
console.write_handle.write_all(&request.data).await?;
}
},
ConsoleDataSelect::Write(None) => {
break;
}
}
}
};
Ok(Response::new(Box::pin(output) as Self::ConsoleDataStream))
}
async fn watch_events(
&self,
request: Request<WatchEventsRequest>,
) -> Result<Response<Self::WatchEventsStream>, Status> {
let _ = request.into_inner();
let mut events = self.events.subscribe();
let output = try_stream! {
while let Ok(event) = events.recv().await {
yield WatchEventsReply { event: Some(event), };
}
};
Ok(Response::new(Box::pin(output) as Self::WatchEventsStream))
}
}
fn empty_vec_optional<T>(value: Vec<T>) -> Option<Vec<T>> {
if value.is_empty() {
None
} else {
Some(value)
}
}
fn convert_guest_info(value: kratart::GuestInfo) -> GuestInfo {
GuestInfo {
id: value.uuid.to_string(),
image: value.image,
ipv4: value.ipv4.map(|x| x.ip().to_string()).unwrap_or_default(),
ipv6: value.ipv6.map(|x| x.ip().to_string()).unwrap_or_default(),
}
}

112
crates/kratad/src/event.rs Normal file
View File

@ -0,0 +1,112 @@
use std::{collections::HashMap, time::Duration};
use anyhow::Result;
use krata::control::{GuestDestroyedEvent, GuestExitedEvent, GuestLaunchedEvent};
use log::error;
use tokio::{sync::broadcast, task::JoinHandle, time};
use uuid::Uuid;
use kratart::{GuestInfo, Runtime};
pub type DaemonEvent = krata::control::watch_events_reply::Event;
const EVENT_CHANNEL_QUEUE_LEN: usize = 1000;
#[derive(Clone)]
pub struct DaemonEventContext {
sender: broadcast::Sender<DaemonEvent>,
}
impl DaemonEventContext {
pub fn subscribe(&self) -> broadcast::Receiver<DaemonEvent> {
self.sender.subscribe()
}
}
pub struct DaemonEventGenerator {
runtime: Runtime,
last: HashMap<Uuid, GuestInfo>,
sender: broadcast::Sender<DaemonEvent>,
}
impl DaemonEventGenerator {
pub async fn new(runtime: Runtime) -> Result<(DaemonEventContext, DaemonEventGenerator)> {
let (sender, _) = broadcast::channel(EVENT_CHANNEL_QUEUE_LEN);
let generator = DaemonEventGenerator {
runtime,
last: HashMap::new(),
sender: sender.clone(),
};
let context = DaemonEventContext { sender };
Ok((context, generator))
}
async fn evaluate(&mut self) -> Result<()> {
let guests = self.runtime.list().await?;
let guests = {
let mut map = HashMap::new();
for guest in guests {
map.insert(guest.uuid, guest);
}
map
};
let mut events: Vec<DaemonEvent> = Vec::new();
for uuid in guests.keys() {
if !self.last.contains_key(uuid) {
events.push(DaemonEvent::GuestLaunched(GuestLaunchedEvent {
guest_id: uuid.to_string(),
}));
}
}
for uuid in self.last.keys() {
if !guests.contains_key(uuid) {
events.push(DaemonEvent::GuestDestroyed(GuestDestroyedEvent {
guest_id: uuid.to_string(),
}));
}
}
for (uuid, guest) in &guests {
let Some(last) = self.last.get(uuid) else {
continue;
};
if last.state.exit_code.is_some() {
continue;
}
let Some(code) = guest.state.exit_code else {
continue;
};
events.push(DaemonEvent::GuestExited(GuestExitedEvent {
guest_id: uuid.to_string(),
code,
}));
}
self.last = guests;
for event in events {
let _ = self.sender.send(event);
}
Ok(())
}
pub async fn launch(mut self) -> Result<JoinHandle<()>> {
Ok(tokio::task::spawn(async move {
loop {
if let Err(error) = self.evaluate().await {
error!("failed to evaluate daemon events: {}", error);
time::sleep(Duration::from_secs(5)).await;
} else {
time::sleep(Duration::from_millis(500)).await;
}
}
}))
}
}

90
crates/kratad/src/lib.rs Normal file
View File

@ -0,0 +1,90 @@
use std::{net::SocketAddr, path::PathBuf, str::FromStr};
use anyhow::Result;
use control::RuntimeControlService;
use event::{DaemonEventContext, DaemonEventGenerator};
use krata::{control::control_service_server::ControlServiceServer, dial::ControlDialAddress};
use kratart::Runtime;
use log::info;
use tokio::{net::UnixListener, task::JoinHandle};
use tokio_stream::wrappers::UnixListenerStream;
use tonic::transport::{Identity, Server, ServerTlsConfig};
pub mod control;
pub mod event;
pub struct Daemon {
store: String,
runtime: Runtime,
events: DaemonEventContext,
task: JoinHandle<()>,
}
impl Daemon {
pub async fn new(store: String, runtime: Runtime) -> Result<Self> {
let runtime_for_events = runtime.dupe().await?;
let (events, generator) = DaemonEventGenerator::new(runtime_for_events).await?;
Ok(Self {
store,
runtime,
events,
task: generator.launch().await?,
})
}
pub async fn listen(&mut self, addr: ControlDialAddress) -> Result<()> {
let control_service = RuntimeControlService::new(self.events.clone(), self.runtime.clone());
let mut server = Server::builder();
if let ControlDialAddress::Tls {
host: _,
port: _,
insecure,
} = &addr
{
let mut tls_config = ServerTlsConfig::new();
if !insecure {
let certificate_path = format!("{}/tls/daemon.pem", self.store);
let key_path = format!("{}/tls/daemon.key", self.store);
tls_config = tls_config.identity(Identity::from_pem(certificate_path, key_path));
}
server = server.tls_config(tls_config)?;
}
let server = server.add_service(ControlServiceServer::new(control_service));
info!("listening on address {}", addr);
match addr {
ControlDialAddress::UnixSocket { path } => {
let path = PathBuf::from(path);
if path.exists() {
tokio::fs::remove_file(&path).await?;
}
let listener = UnixListener::bind(path)?;
let stream = UnixListenerStream::new(listener);
server.serve_with_incoming(stream).await?;
}
ControlDialAddress::Tcp { host, port } => {
let address = format!("{}:{}", host, port);
server.serve(SocketAddr::from_str(&address)?).await?;
}
ControlDialAddress::Tls {
host,
port,
insecure: _,
} => {
let address = format!("{}:{}", host, port);
server.serve(SocketAddr::from_str(&address)?).await?;
}
}
Ok(())
}
}
impl Drop for Daemon {
fn drop(&mut self) {
self.task.abort();
}
}

View File

@ -0,0 +1,31 @@
[package]
name = "krataguest"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
anyhow = { workspace = true }
env_logger = { workspace = true }
futures = { workspace = true }
ipnetwork = { workspace = true }
krata = { path = "../krata" }
log = { workspace = true }
nix = { workspace = true, features = ["process"] }
oci-spec = { workspace = true }
path-absolutize = { workspace = true }
rtnetlink = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
sys-mount = { workspace = true }
tokio = { workspace = true }
tokio-stream = { workspace = true }
walkdir = { workspace = true }
xenstore = { path = "../xen/xenstore" }
[lib]
name = "krataguest"
[[bin]]
name = "krataguest"
path = "bin/init.rs"

View File

@ -0,0 +1,24 @@
use anyhow::{anyhow, Result};
use env_logger::Env;
use krataguest::init::GuestInit;
use std::env;
#[tokio::main]
async fn main() -> Result<()> {
env::set_var("RUST_BACKTRACE", "1");
env_logger::Builder::from_env(Env::default().default_filter_or("warn")).init();
if env::var("KRATA_UNSAFE_ALWAYS_ALLOW_INIT").unwrap_or("0".to_string()) != "1" {
let pid = std::process::id();
if pid > 3 {
return Err(anyhow!(
"not running because the pid of {} indicates this is probably not \
the right context for the init daemon. \
run with KRATA_UNSAFE_ALWAYS_ALLOW_INIT=1 to bypass this check",
pid
));
}
}
let mut guest = GuestInit::new();
guest.init().await?;
Ok(())
}

View File

@ -0,0 +1,53 @@
use std::time::Duration;
use crate::childwait::{ChildEvent, ChildWait};
use anyhow::Result;
use nix::{libc::c_int, unistd::Pid};
use tokio::{select, time::sleep};
use xenstore::client::{XsdClient, XsdInterface};
pub struct ContainerBackground {
child: Pid,
wait: ChildWait,
}
impl ContainerBackground {
pub async fn new(child: Pid) -> Result<ContainerBackground> {
Ok(ContainerBackground {
child,
wait: ChildWait::new()?,
})
}
pub async fn run(&mut self) -> Result<()> {
loop {
select! {
event = self.wait.recv() => match event {
Some(event) => self.child_event(event).await?,
None => {
break;
}
}
};
}
Ok(())
}
async fn child_event(&mut self, event: ChildEvent) -> Result<()> {
if event.pid == self.child {
self.death(event.status).await?;
}
Ok(())
}
async fn death(&mut self, code: c_int) -> Result<()> {
let store = XsdClient::open().await?;
store
.write_string("krata/guest/exit-code", &code.to_string())
.await?;
drop(store);
loop {
sleep(Duration::from_secs(1)).await;
}
}
}

View File

@ -0,0 +1,84 @@
use std::{
ptr::addr_of_mut,
sync::{
atomic::{AtomicBool, Ordering},
Arc,
},
thread::{self, JoinHandle},
};
use anyhow::Result;
use log::warn;
use nix::{
libc::{c_int, wait},
unistd::Pid,
};
use tokio::sync::mpsc::{channel, Receiver, Sender};
const CHILD_WAIT_QUEUE_LEN: usize = 10;
#[derive(Clone, Copy, Debug)]
pub struct ChildEvent {
pub pid: Pid,
pub status: c_int,
}
pub struct ChildWait {
receiver: Receiver<ChildEvent>,
signal: Arc<AtomicBool>,
_task: JoinHandle<()>,
}
impl ChildWait {
pub fn new() -> Result<ChildWait> {
let (sender, receiver) = channel(CHILD_WAIT_QUEUE_LEN);
let signal = Arc::new(AtomicBool::new(false));
let mut processor = ChildWaitTask {
sender,
signal: signal.clone(),
};
let task = thread::spawn(move || {
if let Err(error) = processor.process() {
warn!("failed to process child updates: {}", error);
}
});
Ok(ChildWait {
receiver,
signal,
_task: task,
})
}
pub async fn recv(&mut self) -> Option<ChildEvent> {
self.receiver.recv().await
}
}
struct ChildWaitTask {
sender: Sender<ChildEvent>,
signal: Arc<AtomicBool>,
}
impl ChildWaitTask {
fn process(&mut self) -> Result<()> {
loop {
let mut status: c_int = 0;
let pid = unsafe { wait(addr_of_mut!(status)) };
let event = ChildEvent {
pid: Pid::from_raw(pid),
status,
};
let _ = self.sender.try_send(event);
if self.signal.load(Ordering::Acquire) {
return Ok(());
}
}
}
}
impl Drop for ChildWait {
fn drop(&mut self) {
self.signal.store(true, Ordering::Release);
}
}

View File

@ -0,0 +1,523 @@
use anyhow::{anyhow, Result};
use futures::stream::TryStreamExt;
use ipnetwork::IpNetwork;
use krata::ethtool::EthtoolHandle;
use krata::launchcfg::{LaunchInfo, LaunchNetwork};
use log::{trace, warn};
use nix::libc::{dup2, ioctl};
use nix::unistd::{execve, fork, ForkResult, Pid};
use oci_spec::image::{Config, ImageConfiguration};
use path_absolutize::Absolutize;
use std::collections::HashMap;
use std::ffi::CString;
use std::fs::{File, OpenOptions, Permissions};
use std::net::{Ipv4Addr, Ipv6Addr};
use std::os::fd::AsRawFd;
use std::os::linux::fs::MetadataExt;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::{chroot, PermissionsExt};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::{fs, io};
use sys_mount::{FilesystemType, Mount, MountFlags};
use walkdir::WalkDir;
use crate::background::ContainerBackground;
const IMAGE_BLOCK_DEVICE_PATH: &str = "/dev/xvda";
const CONFIG_BLOCK_DEVICE_PATH: &str = "/dev/xvdb";
const IMAGE_MOUNT_PATH: &str = "/image";
const CONFIG_MOUNT_PATH: &str = "/config";
const OVERLAY_MOUNT_PATH: &str = "/overlay";
const OVERLAY_IMAGE_BIND_PATH: &str = "/overlay/image";
const OVERLAY_WORK_PATH: &str = "/overlay/work";
const OVERLAY_UPPER_PATH: &str = "/overlay/upper";
const SYS_PATH: &str = "/sys";
const PROC_PATH: &str = "/proc";
const DEV_PATH: &str = "/dev";
const NEW_ROOT_PATH: &str = "/newroot";
const NEW_ROOT_SYS_PATH: &str = "/newroot/sys";
const NEW_ROOT_PROC_PATH: &str = "/newroot/proc";
const NEW_ROOT_DEV_PATH: &str = "/newroot/dev";
const IMAGE_CONFIG_JSON_PATH: &str = "/config/image/config.json";
const LAUNCH_CONFIG_JSON_PATH: &str = "/config/launch.json";
pub struct GuestInit {}
impl Default for GuestInit {
fn default() -> Self {
Self::new()
}
}
impl GuestInit {
pub fn new() -> GuestInit {
GuestInit {}
}
pub async fn init(&mut self) -> Result<()> {
self.early_init()?;
trace!("opening console descriptor");
match OpenOptions::new()
.read(true)
.write(true)
.open("/dev/console")
{
Ok(console) => self.map_console(&console)?,
Err(error) => warn!("failed to open console: {}", error),
};
self.mount_squashfs_images()?;
let config = self.parse_image_config()?;
let launch = self.parse_launch_config()?;
self.mount_new_root()?;
self.nuke_initrd()?;
self.bind_new_root()?;
if let Some(network) = &launch.network {
if let Err(error) = self.network_setup(network).await {
warn!("failed to initialize network: {}", error);
}
}
if let Some(cfg) = config.config() {
self.run(cfg, &launch).await?;
} else {
return Err(anyhow!(
"unable to determine what to execute, image config doesn't tell us"
));
}
Ok(())
}
fn early_init(&mut self) -> Result<()> {
trace!("early init");
self.create_dir("/dev", Some(0o0755))?;
self.create_dir("/proc", None)?;
self.create_dir("/sys", None)?;
self.create_dir("/root", Some(0o0700))?;
self.create_dir("/tmp", None)?;
self.mount_kernel_fs("devtmpfs", "/dev", "mode=0755")?;
self.mount_kernel_fs("proc", "/proc", "")?;
self.mount_kernel_fs("sysfs", "/sys", "")?;
Ok(())
}
fn create_dir(&mut self, path: &str, mode: Option<u32>) -> Result<()> {
let path = Path::new(path);
if !path.is_dir() {
trace!("creating directory {:?}", path);
fs::create_dir(path)?;
}
if let Some(mode) = mode {
let permissions = Permissions::from_mode(mode);
trace!("setting directory {:?} permissions to {:?}", path, mode);
fs::set_permissions(path, permissions)?;
}
Ok(())
}
fn mount_kernel_fs(&mut self, fstype: &str, path: &str, data: &str) -> Result<()> {
let metadata = fs::metadata(path)?;
if metadata.st_dev() == fs::metadata("/")?.st_dev() {
trace!("mounting kernel fs {} to {}", fstype, path);
Mount::builder()
.fstype(FilesystemType::Manual(fstype))
.flags(MountFlags::NOEXEC | MountFlags::NOSUID)
.data(data)
.mount(fstype, path)?;
}
Ok(())
}
fn map_console(&mut self, console: &File) -> Result<()> {
trace!("mapping console");
unsafe {
dup2(console.as_raw_fd(), 0);
dup2(console.as_raw_fd(), 1);
dup2(console.as_raw_fd(), 2);
}
Ok(())
}
fn mount_squashfs_images(&mut self) -> Result<()> {
trace!("mounting squashfs images");
let image_mount_path = Path::new(IMAGE_MOUNT_PATH);
let config_mount_path = Path::new(CONFIG_MOUNT_PATH);
self.mount_squashfs(Path::new(IMAGE_BLOCK_DEVICE_PATH), image_mount_path)?;
self.mount_squashfs(Path::new(CONFIG_BLOCK_DEVICE_PATH), config_mount_path)?;
Ok(())
}
fn mount_squashfs(&mut self, from: &Path, to: &Path) -> Result<()> {
trace!("mounting squashfs image {:?} to {:?}", from, to);
if !to.is_dir() {
fs::create_dir(to)?;
}
Mount::builder()
.fstype(FilesystemType::Manual("squashfs"))
.flags(MountFlags::RDONLY)
.mount(from, to)?;
Ok(())
}
fn mount_move_subtree(&mut self, from: &Path, to: &Path) -> Result<()> {
trace!("moving subtree {:?} to {:?}", from, to);
if !to.is_dir() {
fs::create_dir(to)?;
}
Mount::builder()
.fstype(FilesystemType::Manual("none"))
.flags(MountFlags::MOVE)
.mount(from, to)?;
Ok(())
}
fn mount_new_root(&mut self) -> Result<()> {
trace!("mounting new root");
self.mount_overlay_tmpfs()?;
self.bind_image_to_overlay_tmpfs()?;
self.mount_overlay_to_new_root()?;
std::env::set_current_dir(NEW_ROOT_PATH)?;
trace!("mounted new root");
Ok(())
}
fn mount_overlay_tmpfs(&mut self) -> Result<()> {
fs::create_dir(OVERLAY_MOUNT_PATH)?;
Mount::builder()
.fstype(FilesystemType::Manual("tmpfs"))
.mount("tmpfs", OVERLAY_MOUNT_PATH)?;
fs::create_dir(OVERLAY_UPPER_PATH)?;
fs::create_dir(OVERLAY_WORK_PATH)?;
Ok(())
}
fn bind_image_to_overlay_tmpfs(&mut self) -> Result<()> {
fs::create_dir(OVERLAY_IMAGE_BIND_PATH)?;
Mount::builder()
.fstype(FilesystemType::Manual("none"))
.flags(MountFlags::BIND | MountFlags::RDONLY)
.mount(IMAGE_MOUNT_PATH, OVERLAY_IMAGE_BIND_PATH)?;
Ok(())
}
fn mount_overlay_to_new_root(&mut self) -> Result<()> {
fs::create_dir(NEW_ROOT_PATH)?;
Mount::builder()
.fstype(FilesystemType::Manual("overlay"))
.flags(MountFlags::NOATIME)
.data(&format!(
"lowerdir={},upperdir={},workdir={}",
OVERLAY_IMAGE_BIND_PATH, OVERLAY_UPPER_PATH, OVERLAY_WORK_PATH
))
.mount(format!("overlayfs:{}", OVERLAY_MOUNT_PATH), NEW_ROOT_PATH)?;
Ok(())
}
fn parse_image_config(&mut self) -> Result<ImageConfiguration> {
trace!("parsing image config");
let image_config_path = Path::new(IMAGE_CONFIG_JSON_PATH);
let config = ImageConfiguration::from_file(image_config_path)?;
Ok(config)
}
fn parse_launch_config(&mut self) -> Result<LaunchInfo> {
trace!("parsing launch config");
let launch_config = Path::new(LAUNCH_CONFIG_JSON_PATH);
Ok(serde_json::from_str(&fs::read_to_string(launch_config)?)?)
}
fn nuke_initrd(&mut self) -> Result<()> {
trace!("nuking initrd");
let initrd_dev = fs::metadata("/")?.st_dev();
for item in WalkDir::new("/")
.same_file_system(true)
.follow_links(false)
.contents_first(true)
{
if item.is_err() {
continue;
}
let item = item?;
let metadata = match item.metadata() {
Ok(value) => value,
Err(_) => continue,
};
if metadata.st_dev() != initrd_dev {
continue;
}
if metadata.is_symlink() || metadata.is_file() {
let _ = fs::remove_file(item.path());
trace!("deleting file {:?}", item.path());
} else if metadata.is_dir() {
let _ = fs::remove_dir(item.path());
trace!("deleting directory {:?}", item.path());
}
}
trace!("nuked initrd");
Ok(())
}
fn bind_new_root(&mut self) -> Result<()> {
self.mount_move_subtree(Path::new(SYS_PATH), Path::new(NEW_ROOT_SYS_PATH))?;
self.mount_move_subtree(Path::new(PROC_PATH), Path::new(NEW_ROOT_PROC_PATH))?;
self.mount_move_subtree(Path::new(DEV_PATH), Path::new(NEW_ROOT_DEV_PATH))?;
trace!("binding new root");
Mount::builder()
.fstype(FilesystemType::Manual("none"))
.flags(MountFlags::BIND)
.mount(".", "/")?;
trace!("chrooting into new root");
chroot(".")?;
trace!("setting root as current directory");
std::env::set_current_dir("/")?;
Ok(())
}
async fn network_setup(&mut self, network: &LaunchNetwork) -> Result<()> {
trace!("setting up network for link");
let etc = PathBuf::from_str("/etc")?;
if !etc.exists() {
fs::create_dir(etc)?;
}
let resolv = PathBuf::from_str("/etc/resolv.conf")?;
let mut lines = vec!["# krata resolver configuration".to_string()];
for nameserver in &network.resolver.nameservers {
lines.push(format!("nameserver {}", nameserver));
}
let mut conf = lines.join("\n");
conf.push('\n');
fs::write(resolv, conf)?;
self.network_configure_ethtool(network).await?;
self.network_configure_link(network).await?;
Ok(())
}
async fn network_configure_link(&mut self, network: &LaunchNetwork) -> Result<()> {
let (connection, handle, _) = rtnetlink::new_connection()?;
tokio::spawn(connection);
let ipv4_network: IpNetwork = network.ipv4.address.parse()?;
let ipv4_gateway: Ipv4Addr = network.ipv4.gateway.parse()?;
let ipv6_network: IpNetwork = network.ipv6.address.parse()?;
let ipv6_gateway: Ipv6Addr = network.ipv6.gateway.parse()?;
let mut links = handle
.link()
.get()
.match_name(network.link.clone())
.execute();
let Some(link) = links.try_next().await? else {
warn!("unable to find link named {}", network.link);
return Ok(());
};
handle
.address()
.add(link.header.index, ipv4_network.ip(), ipv4_network.prefix())
.execute()
.await?;
let ipv6_result = handle
.address()
.add(link.header.index, ipv6_network.ip(), ipv6_network.prefix())
.execute()
.await;
let ipv6_ready = match ipv6_result {
Ok(()) => true,
Err(error) => {
warn!("unable to setup ipv6 network: {}", error);
false
}
};
handle.link().set(link.header.index).up().execute().await?;
handle
.route()
.add()
.v4()
.destination_prefix(Ipv4Addr::UNSPECIFIED, 0)
.output_interface(link.header.index)
.gateway(ipv4_gateway)
.execute()
.await?;
if ipv6_ready {
let ipv6_gw_result = handle
.route()
.add()
.v6()
.destination_prefix(Ipv6Addr::UNSPECIFIED, 0)
.output_interface(link.header.index)
.gateway(ipv6_gateway)
.execute()
.await;
if let Err(error) = ipv6_gw_result {
warn!("failed to add ipv6 gateway route: {}", error);
}
}
Ok(())
}
async fn network_configure_ethtool(&mut self, network: &LaunchNetwork) -> Result<()> {
let mut handle = EthtoolHandle::new()?;
handle.set_gso(&network.link, false)?;
handle.set_tso(&network.link, false)?;
Ok(())
}
async fn run(&mut self, config: &Config, launch: &LaunchInfo) -> Result<()> {
let mut cmd = match config.cmd() {
None => vec![],
Some(value) => value.clone(),
};
if launch.run.is_some() {
cmd = launch.run.as_ref().unwrap().clone();
}
if cmd.is_empty() {
cmd.push("/bin/sh".to_string());
}
let path = cmd.remove(0);
let mut env = vec!["KRATA_CONTAINER=1".to_string(), "TERM=vt100".to_string()];
if let Some(config_env) = config.env() {
env.extend_from_slice(config_env);
}
if let Some(extra_env) = &launch.env {
env.extend_from_slice(extra_env.as_slice());
}
let env = GuestInit::env_map(env);
let path = GuestInit::resolve_executable(&env, path.into())?;
let Some(file_name) = path.file_name() else {
return Err(anyhow!("cannot get file name of command path"));
};
let Some(file_name) = file_name.to_str() else {
return Err(anyhow!("cannot get file name of command path as str"));
};
cmd.insert(0, file_name.to_string());
let env = GuestInit::env_list(env);
trace!("running container command: {}", cmd.join(" "));
let path = CString::new(path.as_os_str().as_bytes())?;
let cmd = GuestInit::strings_as_cstrings(cmd)?;
let env = GuestInit::strings_as_cstrings(env)?;
let mut working_dir = config
.working_dir()
.as_ref()
.map(|x| x.to_string())
.unwrap_or("/".to_string());
if working_dir.is_empty() {
working_dir = "/".to_string();
}
std::env::set_current_dir(&working_dir)?;
self.fork_and_exec(path, cmd, env).await?;
Ok(())
}
fn strings_as_cstrings(values: Vec<String>) -> Result<Vec<CString>> {
let mut results: Vec<CString> = vec![];
for value in values {
results.push(CString::new(value.as_bytes().to_vec())?);
}
Ok(results)
}
fn env_map(env: Vec<String>) -> HashMap<String, String> {
let mut map = HashMap::<String, String>::new();
for item in env {
if let Some((key, value)) = item.split_once('=') {
map.insert(key.to_string(), value.to_string());
}
}
map
}
fn resolve_executable(env: &HashMap<String, String>, path: PathBuf) -> Result<PathBuf> {
if path.is_absolute() {
return Ok(path);
}
if path.is_file() {
return Ok(path.absolutize()?.to_path_buf());
}
if let Some(path_var) = env.get("PATH") {
for item in path_var.split(':') {
let mut exe_path: PathBuf = item.into();
exe_path.push(&path);
if exe_path.is_file() {
return Ok(exe_path);
}
}
}
Ok(path)
}
fn env_list(env: HashMap<String, String>) -> Vec<String> {
env.iter()
.map(|(key, value)| format!("{}={}", key, value))
.collect::<Vec<String>>()
}
async fn fork_and_exec(
&mut self,
path: CString,
cmd: Vec<CString>,
env: Vec<CString>,
) -> Result<()> {
match unsafe { fork()? } {
ForkResult::Parent { child } => self.background(child).await,
ForkResult::Child => self.foreground(path, cmd, env).await,
}
}
async fn foreground(
&mut self,
path: CString,
cmd: Vec<CString>,
env: Vec<CString>,
) -> Result<()> {
GuestInit::set_controlling_terminal()?;
execve(&path, &cmd, &env)?;
Ok(())
}
fn set_controlling_terminal() -> Result<()> {
unsafe { nix::libc::setsid() };
let result = unsafe { ioctl(io::stdin().as_raw_fd(), nix::libc::TIOCSCTTY, 0) };
if result != 0 {
warn!("failed to set controlling terminal, result={}", result);
}
Ok(())
}
async fn background(&mut self, executed: Pid) -> Result<()> {
let mut background = ContainerBackground::new(executed).await?;
background.run().await?;
Ok(())
}
}

View File

@ -0,0 +1,3 @@
pub mod background;
pub mod childwait;
pub mod init;

View File

@ -0,0 +1,40 @@
[package]
name = "kratanet"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
advmac = { path = "../vendor/advmac" }
anyhow = { workspace = true }
async-trait = { workspace = true }
bytes = { workspace = true }
clap = { workspace = true }
env_logger = { workspace = true }
etherparse = { workspace = true }
futures = { workspace = true }
libc = { workspace = true }
log = { workspace = true }
netlink-packet-route = { workspace = true }
rtnetlink = { workspace = true }
smoltcp = { workspace = true }
tokio = { workspace = true }
tokio-tun = { workspace = true }
udp-stream = { workspace = true }
uuid = { workspace = true }
xenstore = { path = "../xen/xenstore" }
[lib]
name = "kratanet"
[[bin]]
name = "kratanet"
path = "bin/network.rs"
[[example]]
name = "ping"
path = "examples/ping.rs"
[[example]]
name = "autonet"
path = "examples/autonet.rs"

View File

@ -0,0 +1,15 @@
use anyhow::Result;
use clap::Parser;
use env_logger::Env;
use kratanet::NetworkService;
#[derive(Parser, Debug)]
struct NetworkArgs {}
#[tokio::main(flavor = "multi_thread", worker_threads = 10)]
async fn main() -> Result<()> {
env_logger::Builder::from_env(Env::default().default_filter_or("warn")).init();
let _ = NetworkArgs::parse();
let mut service = NetworkService::new().await?;
service.watch().await
}

View File

@ -0,0 +1,15 @@
use std::time::Duration;
use anyhow::Result;
use kratanet::autonet::AutoNetworkCollector;
use tokio::time::sleep;
#[tokio::main]
async fn main() -> Result<()> {
let mut collector = AutoNetworkCollector::new().await?;
loop {
let changeset = collector.read_changes().await?;
println!("{:?}", changeset);
sleep(Duration::from_secs(2)).await;
}
}

View File

@ -0,0 +1,21 @@
use std::{net::Ipv6Addr, str::FromStr, time::Duration};
use anyhow::Result;
use kratanet::icmp::{IcmpClient, IcmpProtocol};
#[tokio::main]
async fn main() -> Result<()> {
let client = IcmpClient::new(IcmpProtocol::Icmpv6)?;
let payload: [u8; 4] = [12u8, 14u8, 16u8, 32u8];
let result = client
.ping6(
Ipv6Addr::from_str("2606:4700:4700::1111")?,
0,
1,
&payload,
Duration::from_secs(10),
)
.await?;
println!("reply: {:?}", result);
Ok(())
}

View File

@ -0,0 +1,185 @@
use anyhow::{anyhow, Result};
use smoltcp::wire::{EthernetAddress, Ipv4Cidr, Ipv6Cidr};
use std::{collections::HashMap, str::FromStr};
use uuid::Uuid;
use xenstore::client::{XsdClient, XsdInterface, XsdTransaction};
pub struct AutoNetworkCollector {
client: XsdClient,
known: HashMap<Uuid, NetworkMetadata>,
}
#[derive(Debug, Clone)]
pub struct NetworkSide {
pub ipv4: Ipv4Cidr,
pub ipv6: Ipv6Cidr,
pub mac: EthernetAddress,
}
#[derive(Debug, Clone)]
pub struct NetworkMetadata {
pub domid: u32,
pub uuid: Uuid,
pub guest: NetworkSide,
pub gateway: NetworkSide,
}
impl NetworkMetadata {
pub fn interface(&self) -> String {
format!("vif{}.20", self.domid)
}
}
#[derive(Debug, Clone)]
pub struct AutoNetworkChangeset {
pub added: Vec<NetworkMetadata>,
pub removed: Vec<NetworkMetadata>,
}
impl AutoNetworkCollector {
pub async fn new() -> Result<AutoNetworkCollector> {
Ok(AutoNetworkCollector {
client: XsdClient::open().await?,
known: HashMap::new(),
})
}
pub async fn read(&mut self) -> Result<Vec<NetworkMetadata>> {
let mut networks = Vec::new();
let tx = self.client.transaction().await?;
for domid_string in tx.list("/local/domain").await? {
let Ok(domid) = domid_string.parse::<u32>() else {
continue;
};
let dom_path = format!("/local/domain/{}", domid_string);
let Some(uuid_string) = tx.read_string(&format!("{}/krata/uuid", dom_path)).await?
else {
continue;
};
let Ok(uuid) = uuid_string.parse::<Uuid>() else {
continue;
};
let Ok(guest) =
AutoNetworkCollector::read_network_side(uuid, &tx, &dom_path, "guest").await
else {
continue;
};
let Ok(gateway) =
AutoNetworkCollector::read_network_side(uuid, &tx, &dom_path, "gateway").await
else {
continue;
};
networks.push(NetworkMetadata {
domid,
uuid,
guest,
gateway,
});
}
tx.commit().await?;
Ok(networks)
}
async fn read_network_side(
uuid: Uuid,
tx: &XsdTransaction,
dom_path: &str,
side: &str,
) -> Result<NetworkSide> {
let side_path = format!("{}/krata/network/{}", dom_path, side);
let Some(ipv4) = tx.read_string(&format!("{}/ipv4", side_path)).await? else {
return Err(anyhow!(
"krata domain {} is missing {} ipv4 network entry",
uuid,
side
));
};
let Some(ipv6) = tx.read_string(&format!("{}/ipv6", side_path)).await? else {
return Err(anyhow!(
"krata domain {} is missing {} ipv6 network entry",
uuid,
side
));
};
let Some(mac) = tx.read_string(&format!("{}/mac", side_path)).await? else {
return Err(anyhow!(
"krata domain {} is missing {} mac address entry",
uuid,
side
));
};
let Ok(ipv4) = Ipv4Cidr::from_str(&ipv4) else {
return Err(anyhow!(
"krata domain {} has invalid {} ipv4 network cidr entry: {}",
uuid,
side,
ipv4
));
};
let Ok(ipv6) = Ipv6Cidr::from_str(&ipv6) else {
return Err(anyhow!(
"krata domain {} has invalid {} ipv6 network cidr entry: {}",
uuid,
side,
ipv6
));
};
let Ok(mac) = EthernetAddress::from_str(&mac) else {
return Err(anyhow!(
"krata domain {} has invalid {} mac address entry: {}",
uuid,
side,
mac
));
};
Ok(NetworkSide { ipv4, ipv6, mac })
}
pub async fn read_changes(&mut self) -> Result<AutoNetworkChangeset> {
let mut seen: Vec<Uuid> = Vec::new();
let mut added: Vec<NetworkMetadata> = Vec::new();
let mut removed: Vec<NetworkMetadata> = Vec::new();
for network in self.read().await? {
seen.push(network.uuid);
if self.known.contains_key(&network.uuid) {
continue;
}
let _ = self.known.insert(network.uuid, network.clone());
added.push(network);
}
let mut gone: Vec<Uuid> = Vec::new();
for uuid in self.known.keys() {
if seen.contains(uuid) {
continue;
}
gone.push(*uuid);
}
for uuid in &gone {
let Some(network) = self.known.remove(uuid) else {
continue;
};
removed.push(network);
}
Ok(AutoNetworkChangeset { added, removed })
}
pub fn mark_unknown(&mut self, uuid: Uuid) -> Result<bool> {
Ok(self.known.remove(&uuid).is_some())
}
}

View File

@ -0,0 +1,175 @@
use crate::autonet::NetworkMetadata;
use crate::chandev::ChannelDevice;
use crate::nat::Nat;
use crate::proxynat::ProxyNatHandlerFactory;
use crate::raw_socket::{AsyncRawSocketChannel, RawSocketHandle, RawSocketProtocol};
use crate::vbridge::{BridgeJoinHandle, VirtualBridge};
use crate::EXTRA_MTU;
use anyhow::{anyhow, Result};
use bytes::BytesMut;
use futures::TryStreamExt;
use log::{info, trace, warn};
use smoltcp::iface::{Config, Interface, SocketSet};
use smoltcp::phy::Medium;
use smoltcp::time::Instant;
use smoltcp::wire::{HardwareAddress, IpCidr};
use tokio::select;
use tokio::sync::mpsc::{channel, Receiver};
use tokio::task::JoinHandle;
const TX_CHANNEL_BUFFER_LEN: usize = 3000;
#[derive(Clone)]
pub struct NetworkBackend {
metadata: NetworkMetadata,
bridge: VirtualBridge,
}
#[derive(Debug)]
enum NetworkStackSelect {
Receive(Option<BytesMut>),
Send(Option<BytesMut>),
}
struct NetworkStack<'a> {
tx: Receiver<BytesMut>,
kdev: AsyncRawSocketChannel,
udev: ChannelDevice,
interface: Interface,
sockets: SocketSet<'a>,
nat: Nat,
bridge: BridgeJoinHandle,
}
impl NetworkStack<'_> {
async fn poll(&mut self) -> Result<bool> {
let what = select! {
x = self.kdev.receiver.recv() => NetworkStackSelect::Receive(x),
x = self.bridge.from_bridge_receiver.recv() => NetworkStackSelect::Send(x),
x = self.bridge.from_broadcast_receiver.recv() => NetworkStackSelect::Send(x.ok()),
x = self.tx.recv() => NetworkStackSelect::Send(x),
};
match what {
NetworkStackSelect::Receive(Some(packet)) => {
if let Err(error) = self.bridge.to_bridge_sender.try_send(packet.clone()) {
trace!("failed to send guest packet to bridge: {}", error);
}
if let Err(error) = self.nat.receive_sender.try_send(packet.clone()) {
trace!("failed to send guest packet to nat: {}", error);
}
self.udev.rx = Some(packet);
self.interface
.poll(Instant::now(), &mut self.udev, &mut self.sockets);
}
NetworkStackSelect::Send(Some(packet)) => {
if let Err(error) = self.kdev.sender.try_send(packet) {
warn!("failed to transmit packet to interface: {}", error);
}
}
NetworkStackSelect::Receive(None) | NetworkStackSelect::Send(None) => {
return Ok(false);
}
}
Ok(true)
}
}
impl NetworkBackend {
pub fn new(metadata: NetworkMetadata, bridge: VirtualBridge) -> Result<Self> {
Ok(Self { metadata, bridge })
}
pub async fn init(&mut self) -> Result<()> {
let interface = self.metadata.interface();
let (connection, handle, _) = rtnetlink::new_connection()?;
tokio::spawn(connection);
let mut links = handle.link().get().match_name(interface.clone()).execute();
let link = links.try_next().await?;
if link.is_none() {
return Err(anyhow!(
"unable to find network interface named {}",
interface
));
}
let link = link.unwrap();
handle.link().set(link.header.index).up().execute().await?;
Ok(())
}
pub async fn run(&self) -> Result<()> {
let mut stack = self.create_network_stack().await?;
loop {
if !stack.poll().await? {
break;
}
}
Ok(())
}
async fn create_network_stack(&self) -> Result<NetworkStack> {
let interface = self.metadata.interface();
let proxy = Box::new(ProxyNatHandlerFactory::new());
let addresses: Vec<IpCidr> = vec![
self.metadata.gateway.ipv4.into(),
self.metadata.gateway.ipv6.into(),
];
let mut kdev =
RawSocketHandle::bound_to_interface(&interface, RawSocketProtocol::Ethernet)?;
let mtu = kdev.mtu_of_interface(&interface)? + EXTRA_MTU;
let (tx_sender, tx_receiver) = channel::<BytesMut>(TX_CHANNEL_BUFFER_LEN);
let mut udev = ChannelDevice::new(mtu, Medium::Ethernet, tx_sender.clone());
let mac = self.metadata.gateway.mac;
let nat = Nat::new(mtu, proxy, mac, addresses.clone(), tx_sender.clone())?;
let hardware_addr = HardwareAddress::Ethernet(mac);
let config = Config::new(hardware_addr);
let mut iface = Interface::new(config, &mut udev, Instant::now());
iface.update_ip_addrs(|addrs| {
addrs
.extend_from_slice(&addresses)
.expect("failed to set ip addresses");
});
let sockets = SocketSet::new(vec![]);
let handle = self.bridge.join(self.metadata.guest.mac).await?;
let kdev = AsyncRawSocketChannel::new(mtu, kdev)?;
Ok(NetworkStack {
tx: tx_receiver,
kdev,
udev,
interface: iface,
sockets,
nat,
bridge: handle,
})
}
pub async fn launch(self) -> Result<JoinHandle<()>> {
Ok(tokio::task::spawn(async move {
info!(
"lauched network backend for krata guest {}",
self.metadata.uuid
);
if let Err(error) = self.run().await {
warn!(
"network backend for krata guest {} failed: {}",
self.metadata.uuid, error
);
}
}))
}
}
impl Drop for NetworkBackend {
fn drop(&mut self) {
info!(
"destroyed network backend for krata guest {}",
self.metadata.uuid
);
}
}

View File

@ -0,0 +1,89 @@
// Referenced https://github.com/vi/wgslirpy/blob/master/crates/libwgslirpy/src/channelized_smoltcp_device.rs
use bytes::BytesMut;
use log::{debug, warn};
use smoltcp::phy::{Checksum, Device, Medium};
use tokio::sync::mpsc::Sender;
const TEAR_OFF_BUFFER_SIZE: usize = 65536;
pub struct ChannelDevice {
pub mtu: usize,
pub medium: Medium,
pub tx: Sender<BytesMut>,
pub rx: Option<BytesMut>,
tear_off_buffer: BytesMut,
}
impl ChannelDevice {
pub fn new(mtu: usize, medium: Medium, tx: Sender<BytesMut>) -> Self {
Self {
mtu,
medium,
tx,
rx: None,
tear_off_buffer: BytesMut::with_capacity(TEAR_OFF_BUFFER_SIZE),
}
}
}
pub struct RxToken(pub BytesMut);
impl Device for ChannelDevice {
type RxToken<'a> = RxToken where Self: 'a;
type TxToken<'a> = &'a mut ChannelDevice where Self: 'a;
fn receive(
&mut self,
_timestamp: smoltcp::time::Instant,
) -> Option<(Self::RxToken<'_>, Self::TxToken<'_>)> {
self.rx.take().map(|x| (RxToken(x), self))
}
fn transmit(&mut self, _timestamp: smoltcp::time::Instant) -> Option<Self::TxToken<'_>> {
if self.tx.capacity() == 0 {
debug!("ran out of transmission capacity");
return None;
}
Some(self)
}
fn capabilities(&self) -> smoltcp::phy::DeviceCapabilities {
let mut capabilities = smoltcp::phy::DeviceCapabilities::default();
capabilities.medium = self.medium;
capabilities.max_transmission_unit = self.mtu;
capabilities.checksum = smoltcp::phy::ChecksumCapabilities::ignored();
capabilities.checksum.tcp = Checksum::Tx;
capabilities.checksum.ipv4 = Checksum::Tx;
capabilities.checksum.icmpv4 = Checksum::Tx;
capabilities.checksum.icmpv6 = Checksum::Tx;
capabilities
}
}
impl smoltcp::phy::RxToken for RxToken {
fn consume<R, F>(mut self, f: F) -> R
where
F: FnOnce(&mut [u8]) -> R,
{
f(&mut self.0[..])
}
}
impl<'a> smoltcp::phy::TxToken for &'a mut ChannelDevice {
fn consume<R, F>(self, len: usize, f: F) -> R
where
F: FnOnce(&mut [u8]) -> R,
{
self.tear_off_buffer.resize(len, 0);
let result = f(&mut self.tear_off_buffer[..]);
let chunk = self.tear_off_buffer.split();
if let Err(error) = self.tx.try_send(chunk) {
warn!("failed to transmit packet: {}", error);
}
if self.tear_off_buffer.capacity() < self.mtu {
self.tear_off_buffer = BytesMut::with_capacity(TEAR_OFF_BUFFER_SIZE);
}
result
}
}

View File

@ -0,0 +1,145 @@
use std::net::{IpAddr, Ipv4Addr};
use advmac::MacAddr6;
use anyhow::{anyhow, Result};
use bytes::BytesMut;
use futures::TryStreamExt;
use log::error;
use smoltcp::wire::EthernetAddress;
use tokio::{
io::{AsyncReadExt, AsyncWriteExt},
select,
sync::mpsc::channel,
task::JoinHandle,
};
use tokio_tun::Tun;
use crate::vbridge::{BridgeJoinHandle, VirtualBridge};
const RX_BUFFER_QUEUE_LEN: usize = 100;
const HOST_IPV4_ADDR: Ipv4Addr = Ipv4Addr::new(10, 75, 0, 1);
pub struct HostBridge {
task: JoinHandle<()>,
}
impl HostBridge {
pub async fn new(mtu: usize, interface: String, bridge: &VirtualBridge) -> Result<HostBridge> {
let tun = Tun::builder()
.name(&interface)
.tap(true)
.mtu(mtu as i32)
.packet_info(false)
.try_build()?;
let (connection, handle, _) = rtnetlink::new_connection()?;
tokio::spawn(connection);
let mut mac = MacAddr6::random();
mac.set_local(true);
mac.set_multicast(false);
let mut links = handle.link().get().match_name(interface.clone()).execute();
let link = links.try_next().await?;
if link.is_none() {
return Err(anyhow!(
"unable to find network interface named {}",
interface
));
}
let link = link.unwrap();
handle
.address()
.add(link.header.index, IpAddr::V4(HOST_IPV4_ADDR), 16)
.execute()
.await?;
handle
.address()
.add(link.header.index, IpAddr::V6(mac.to_link_local_ipv6()), 10)
.execute()
.await?;
handle
.link()
.set(link.header.index)
.address(mac.to_array().to_vec())
.up()
.execute()
.await?;
let mac = EthernetAddress(mac.to_array());
let bridge_handle = bridge.join(mac).await?;
let task = tokio::task::spawn(async move {
if let Err(error) = HostBridge::process(mtu, tun, bridge_handle).await {
error!("failed to process host bridge: {}", error);
}
});
Ok(HostBridge { task })
}
async fn process(mtu: usize, tun: Tun, mut bridge_handle: BridgeJoinHandle) -> Result<()> {
let (rx_sender, mut rx_receiver) = channel::<BytesMut>(RX_BUFFER_QUEUE_LEN);
let (mut read, mut write) = tokio::io::split(tun);
tokio::task::spawn(async move {
let mut buffer = vec![0u8; mtu];
loop {
let size = match read.read(&mut buffer).await {
Ok(size) => size,
Err(error) => {
error!("failed to read tap device: {}", error);
break;
}
};
match rx_sender.send(buffer[0..size].into()).await {
Ok(_) => {}
Err(error) => {
error!(
"failed to send data from tap device to processor: {}",
error
);
break;
}
}
}
});
loop {
select! {
x = bridge_handle.from_bridge_receiver.recv() => match x {
Some(bytes) => {
write.write_all(&bytes).await?;
},
None => {
break;
}
},
x = bridge_handle.from_broadcast_receiver.recv() => match x {
Ok(bytes) => {
write.write_all(&bytes).await?;
},
Err(error) => {
return Err(error.into());
}
},
x = rx_receiver.recv() => match x {
Some(bytes) => {
bridge_handle.to_bridge_sender.send(bytes).await?;
},
None => {
break;
}
}
};
}
Ok(())
}
}
impl Drop for HostBridge {
fn drop(&mut self) {
self.task.abort();
}
}

250
crates/kratanet/src/icmp.rs Normal file
View File

@ -0,0 +1,250 @@
use crate::raw_socket::{RawSocketHandle, RawSocketProtocol};
use anyhow::{anyhow, Result};
use etherparse::{
IcmpEchoHeader, Icmpv4Header, Icmpv4Slice, Icmpv4Type, Icmpv6Header, Icmpv6Slice, Icmpv6Type,
IpNumber, NetSlice, SlicedPacket,
};
use log::warn;
use std::{
collections::HashMap,
net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6},
os::fd::{FromRawFd, IntoRawFd},
sync::Arc,
time::Duration,
};
use tokio::{
net::UdpSocket,
sync::{oneshot, Mutex},
task::JoinHandle,
time::timeout,
};
#[derive(Debug)]
pub enum IcmpProtocol {
Icmpv4,
Icmpv6,
}
impl IcmpProtocol {
pub fn to_socket_protocol(&self) -> RawSocketProtocol {
match self {
IcmpProtocol::Icmpv4 => RawSocketProtocol::Icmpv4,
IcmpProtocol::Icmpv6 => RawSocketProtocol::Icmpv6,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct IcmpHandlerToken(IpAddr, Option<u16>, u16);
#[derive(Debug)]
pub enum IcmpReply {
Icmpv4 {
header: Icmpv4Header,
echo: IcmpEchoHeader,
payload: Vec<u8>,
},
Icmpv6 {
header: Icmpv6Header,
echo: IcmpEchoHeader,
payload: Vec<u8>,
},
}
type IcmpHandlerMap = Arc<Mutex<HashMap<IcmpHandlerToken, oneshot::Sender<IcmpReply>>>>;
#[derive(Clone)]
pub struct IcmpClient {
socket: Arc<UdpSocket>,
handlers: IcmpHandlerMap,
task: Arc<JoinHandle<Result<()>>>,
}
impl IcmpClient {
pub fn new(protocol: IcmpProtocol) -> Result<IcmpClient> {
let handle = RawSocketHandle::new(protocol.to_socket_protocol())?;
let socket = unsafe { std::net::UdpSocket::from_raw_fd(handle.into_raw_fd()) };
let socket: Arc<UdpSocket> = Arc::new(socket.try_into()?);
let handlers = Arc::new(Mutex::new(HashMap::new()));
let task = Arc::new(tokio::task::spawn(IcmpClient::process(
protocol,
socket.clone(),
handlers.clone(),
)));
Ok(IcmpClient {
socket,
handlers,
task,
})
}
async fn process(
protocol: IcmpProtocol,
socket: Arc<UdpSocket>,
handlers: IcmpHandlerMap,
) -> Result<()> {
let mut buffer = vec![0u8; 2048];
loop {
let (size, addr) = socket.recv_from(&mut buffer).await?;
let packet = &buffer[0..size];
let (token, reply) = match protocol {
IcmpProtocol::Icmpv4 => {
let sliced = match SlicedPacket::from_ip(packet) {
Ok(sliced) => sliced,
Err(error) => {
warn!("received icmp packet but failed to parse it: {}", error);
continue;
}
};
let Some(NetSlice::Ipv4(ipv4)) = sliced.net else {
continue;
};
if ipv4.header().protocol() != IpNumber::ICMP {
continue;
}
let Ok(icmpv4) = Icmpv4Slice::from_slice(ipv4.payload().payload) else {
continue;
};
let Icmpv4Type::EchoReply(echo) = icmpv4.header().icmp_type else {
continue;
};
let token = IcmpHandlerToken(
IpAddr::V4(ipv4.header().source_addr()),
Some(echo.id),
echo.seq,
);
let reply = IcmpReply::Icmpv4 {
header: icmpv4.header(),
echo,
payload: icmpv4.payload().to_vec(),
};
(token, reply)
}
IcmpProtocol::Icmpv6 => {
let Ok(icmpv6) = Icmpv6Slice::from_slice(packet) else {
continue;
};
let Icmpv6Type::EchoReply(echo) = icmpv6.header().icmp_type else {
continue;
};
let SocketAddr::V6(addr) = addr else {
continue;
};
let token = IcmpHandlerToken(IpAddr::V6(*addr.ip()), Some(echo.id), echo.seq);
let reply = IcmpReply::Icmpv6 {
header: icmpv6.header(),
echo,
payload: icmpv6.payload().to_vec(),
};
(token, reply)
}
};
if let Some(sender) = handlers.lock().await.remove(&token) {
let _ = sender.send(reply);
}
}
}
async fn add_handler(&self, token: IcmpHandlerToken) -> Result<oneshot::Receiver<IcmpReply>> {
let (tx, rx) = oneshot::channel();
if self
.handlers
.lock()
.await
.insert(token.clone(), tx)
.is_some()
{
return Err(anyhow!("duplicate icmp request: {:?}", token));
}
Ok(rx)
}
async fn remove_handler(&self, token: IcmpHandlerToken) -> Result<()> {
self.handlers.lock().await.remove(&token);
Ok(())
}
pub async fn ping4(
&self,
addr: Ipv4Addr,
id: u16,
seq: u16,
payload: &[u8],
deadline: Duration,
) -> Result<Option<IcmpReply>> {
let token = IcmpHandlerToken(IpAddr::V4(addr), Some(id), seq);
let rx = self.add_handler(token.clone()).await?;
let echo = IcmpEchoHeader { id, seq };
let mut header = Icmpv4Header::new(Icmpv4Type::EchoRequest(echo));
header.update_checksum(payload);
let mut buffer: Vec<u8> = Vec::new();
header.write(&mut buffer)?;
buffer.extend_from_slice(payload);
self.socket
.send_to(&buffer, SocketAddr::V4(SocketAddrV4::new(addr, 0)))
.await?;
let result = timeout(deadline, rx).await;
self.remove_handler(token).await?;
let reply = match result {
Ok(Ok(packet)) => Some(packet),
Ok(Err(err)) => return Err(anyhow!("failed to wait for icmp packet: {}", err)),
Err(_) => None,
};
Ok(reply)
}
pub async fn ping6(
&self,
addr: Ipv6Addr,
id: u16,
seq: u16,
payload: &[u8],
deadline: Duration,
) -> Result<Option<IcmpReply>> {
let token = IcmpHandlerToken(IpAddr::V6(addr), Some(id), seq);
let rx = self.add_handler(token.clone()).await?;
let echo = IcmpEchoHeader { id, seq };
let header = Icmpv6Header::new(Icmpv6Type::EchoRequest(echo));
let mut buffer: Vec<u8> = Vec::new();
header.write(&mut buffer)?;
buffer.extend_from_slice(payload);
self.socket
.send_to(&buffer, SocketAddr::V6(SocketAddrV6::new(addr, 0, 0, 0)))
.await?;
let result = timeout(deadline, rx).await;
self.remove_handler(token).await?;
let reply = match result {
Ok(Ok(packet)) => Some(packet),
Ok(Err(err)) => return Err(anyhow!("failed to wait for icmp packet: {}", err)),
Err(_) => None,
};
Ok(reply)
}
}
impl Drop for IcmpClient {
fn drop(&mut self) {
if Arc::strong_count(&self.task) <= 1 {
self.task.abort();
}
}
}

117
crates/kratanet/src/lib.rs Normal file
View File

@ -0,0 +1,117 @@
use std::{collections::HashMap, time::Duration};
use anyhow::Result;
use autonet::{AutoNetworkChangeset, AutoNetworkCollector, NetworkMetadata};
use futures::{future::join_all, TryFutureExt};
use hbridge::HostBridge;
use log::warn;
use tokio::{task::JoinHandle, time::sleep};
use uuid::Uuid;
use vbridge::VirtualBridge;
use crate::backend::NetworkBackend;
pub mod autonet;
pub mod backend;
pub mod chandev;
pub mod hbridge;
pub mod icmp;
pub mod nat;
pub mod pkt;
pub mod proxynat;
pub mod raw_socket;
pub mod vbridge;
const HOST_BRIDGE_MTU: usize = 1500;
pub const EXTRA_MTU: usize = 20;
pub struct NetworkService {
pub backends: HashMap<Uuid, JoinHandle<()>>,
pub bridge: VirtualBridge,
pub hbridge: HostBridge,
}
impl NetworkService {
pub async fn new() -> Result<NetworkService> {
let bridge = VirtualBridge::new()?;
let hbridge =
HostBridge::new(HOST_BRIDGE_MTU + EXTRA_MTU, "krata0".to_string(), &bridge).await?;
Ok(NetworkService {
backends: HashMap::new(),
bridge,
hbridge,
})
}
}
impl NetworkService {
pub async fn watch(&mut self) -> Result<()> {
let mut collector = AutoNetworkCollector::new().await?;
loop {
let changeset = collector.read_changes().await?;
self.process_network_changeset(&mut collector, changeset)
.await?;
sleep(Duration::from_secs(2)).await;
}
}
async fn process_network_changeset(
&mut self,
collector: &mut AutoNetworkCollector,
changeset: AutoNetworkChangeset,
) -> Result<()> {
for removal in &changeset.removed {
if let Some(handle) = self.backends.remove(&removal.uuid) {
handle.abort();
}
}
let futures = changeset
.added
.iter()
.map(|metadata| {
self.add_network_backend(metadata)
.map_err(|x| (metadata.clone(), x))
})
.collect::<Vec<_>>();
sleep(Duration::from_secs(1)).await;
let mut failed: Vec<Uuid> = Vec::new();
let mut launched: Vec<(Uuid, JoinHandle<()>)> = Vec::new();
let results = join_all(futures).await;
for result in results {
match result {
Ok(launch) => {
launched.push(launch);
}
Err((metadata, error)) => {
warn!(
"failed to launch network backend for krata guest {}: {}",
metadata.uuid, error
);
failed.push(metadata.uuid);
}
};
}
for (uuid, handle) in launched {
self.backends.insert(uuid, handle);
}
for uuid in failed {
collector.mark_unknown(uuid)?;
}
Ok(())
}
async fn add_network_backend(
&self,
metadata: &NetworkMetadata,
) -> Result<(Uuid, JoinHandle<()>)> {
let mut network = NetworkBackend::new(metadata.clone(), self.bridge.clone())?;
network.init().await?;
Ok((metadata.uuid, network.launch().await?))
}
}

View File

@ -0,0 +1,36 @@
use anyhow::Result;
use async_trait::async_trait;
use bytes::BytesMut;
use tokio::sync::mpsc::Sender;
use super::key::NatKey;
#[derive(Debug, Clone)]
pub struct NatHandlerContext {
pub mtu: usize,
pub key: NatKey,
pub transmit_sender: Sender<BytesMut>,
pub reclaim_sender: Sender<NatKey>,
}
impl NatHandlerContext {
pub fn try_transmit(&self, buffer: BytesMut) -> Result<()> {
self.transmit_sender.try_send(buffer)?;
Ok(())
}
pub async fn reclaim(&self) -> Result<()> {
self.reclaim_sender.try_send(self.key)?;
Ok(())
}
}
#[async_trait]
pub trait NatHandler: Send {
async fn receive(&self, packet: &[u8]) -> Result<bool>;
}
#[async_trait]
pub trait NatHandlerFactory: Send {
async fn nat(&self, context: NatHandlerContext) -> Option<Box<dyn NatHandler>>;
}

View File

@ -0,0 +1,29 @@
use std::fmt::Display;
use smoltcp::wire::{EthernetAddress, IpEndpoint};
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
pub enum NatKeyProtocol {
Tcp,
Udp,
Icmp,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
pub struct NatKey {
pub protocol: NatKeyProtocol,
pub client_mac: EthernetAddress,
pub local_mac: EthernetAddress,
pub client_ip: IpEndpoint,
pub external_ip: IpEndpoint,
}
impl Display for NatKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{} -> {} {:?} {} -> {}",
self.client_mac, self.local_mac, self.protocol, self.client_ip, self.external_ip
)
}
}

View File

@ -0,0 +1,42 @@
use anyhow::Result;
use tokio::sync::mpsc::Sender;
use self::handler::NatHandlerFactory;
use self::processor::NatProcessor;
use bytes::BytesMut;
use smoltcp::wire::EthernetAddress;
use smoltcp::wire::IpCidr;
use tokio::task::JoinHandle;
pub mod handler;
pub mod key;
pub mod processor;
pub mod table;
pub struct Nat {
pub receive_sender: Sender<BytesMut>,
task: JoinHandle<()>,
}
impl Nat {
pub fn new(
mtu: usize,
factory: Box<dyn NatHandlerFactory>,
local_mac: EthernetAddress,
local_cidrs: Vec<IpCidr>,
transmit_sender: Sender<BytesMut>,
) -> Result<Self> {
let (receive_sender, task) =
NatProcessor::launch(mtu, factory, local_mac, local_cidrs, transmit_sender)?;
Ok(Self {
receive_sender,
task,
})
}
}
impl Drop for Nat {
fn drop(&mut self) {
self.task.abort();
}
}

View File

@ -0,0 +1,330 @@
use crate::pkt::RecvPacket;
use crate::pkt::RecvPacketIp;
use anyhow::Result;
use bytes::BytesMut;
use etherparse::Icmpv4Header;
use etherparse::Icmpv4Type;
use etherparse::Icmpv6Header;
use etherparse::Icmpv6Type;
use etherparse::IpNumber;
use etherparse::IpPayloadSlice;
use etherparse::Ipv4Slice;
use etherparse::Ipv6Slice;
use etherparse::SlicedPacket;
use etherparse::TcpHeaderSlice;
use etherparse::UdpHeaderSlice;
use log::warn;
use log::{debug, trace};
use smoltcp::wire::EthernetAddress;
use smoltcp::wire::IpAddress;
use smoltcp::wire::IpCidr;
use smoltcp::wire::IpEndpoint;
use std::collections::hash_map::Entry;
use tokio::select;
use tokio::sync::mpsc::channel;
use tokio::sync::mpsc::Receiver;
use tokio::sync::mpsc::Sender;
use tokio::task::JoinHandle;
use super::handler::NatHandler;
use super::handler::NatHandlerContext;
use super::handler::NatHandlerFactory;
use super::key::NatKey;
use super::key::NatKeyProtocol;
use super::table::NatTable;
const RECEIVE_CHANNEL_QUEUE_LEN: usize = 3000;
const RECLAIM_CHANNEL_QUEUE_LEN: usize = 30;
pub struct NatProcessor {
mtu: usize,
local_mac: EthernetAddress,
local_cidrs: Vec<IpCidr>,
table: NatTable,
factory: Box<dyn NatHandlerFactory>,
transmit_sender: Sender<BytesMut>,
reclaim_sender: Sender<NatKey>,
reclaim_receiver: Receiver<NatKey>,
receive_receiver: Receiver<BytesMut>,
}
enum NatProcessorSelect {
Reclaim(Option<NatKey>),
ReceivedPacket(Option<BytesMut>),
}
impl NatProcessor {
pub fn launch(
mtu: usize,
factory: Box<dyn NatHandlerFactory>,
local_mac: EthernetAddress,
local_cidrs: Vec<IpCidr>,
transmit_sender: Sender<BytesMut>,
) -> Result<(Sender<BytesMut>, JoinHandle<()>)> {
let (reclaim_sender, reclaim_receiver) = channel(RECLAIM_CHANNEL_QUEUE_LEN);
let (receive_sender, receive_receiver) = channel(RECEIVE_CHANNEL_QUEUE_LEN);
let mut processor = Self {
mtu,
local_mac,
local_cidrs,
factory,
table: NatTable::new(),
transmit_sender,
reclaim_sender,
receive_receiver,
reclaim_receiver,
};
let handle = tokio::task::spawn(async move {
if let Err(error) = processor.process().await {
warn!("nat processing failed: {}", error);
}
});
Ok((receive_sender, handle))
}
pub async fn process(&mut self) -> Result<()> {
loop {
let selection = select! {
x = self.reclaim_receiver.recv() => NatProcessorSelect::Reclaim(x),
x = self.receive_receiver.recv() => NatProcessorSelect::ReceivedPacket(x),
};
match selection {
NatProcessorSelect::Reclaim(Some(key)) => {
if self.table.inner.remove(&key).is_some() {
debug!("reclaimed nat key: {}", key);
}
}
NatProcessorSelect::ReceivedPacket(Some(packet)) => {
if let Ok(slice) = SlicedPacket::from_ethernet(&packet) {
let Ok(packet) = RecvPacket::new(&packet, &slice) else {
continue;
};
self.process_packet(&packet).await?;
}
}
NatProcessorSelect::ReceivedPacket(None) | NatProcessorSelect::Reclaim(None) => {
break
}
}
}
Ok(())
}
pub async fn process_reclaim(&mut self) -> Result<Option<NatKey>> {
Ok(if let Some(key) = self.reclaim_receiver.recv().await {
if self.table.inner.remove(&key).is_some() {
debug!("reclaimed nat key: {}", key);
Some(key)
} else {
None
}
} else {
None
})
}
pub async fn process_packet<'a>(&mut self, packet: &RecvPacket<'a>) -> Result<()> {
let Some(ether) = packet.ether else {
return Ok(());
};
let mac = EthernetAddress(ether.destination());
if mac != self.local_mac {
trace!(
"received packet with destination {} which is not the local mac {}",
mac,
self.local_mac
);
return Ok(());
}
let key = match packet.ip {
Some(RecvPacketIp::Ipv4(ipv4)) => self.extract_key_ipv4(packet, ipv4)?,
Some(RecvPacketIp::Ipv6(ipv6)) => self.extract_key_ipv6(packet, ipv6)?,
_ => None,
};
let Some(key) = key else {
return Ok(());
};
for cidr in &self.local_cidrs {
if cidr.contains_addr(&key.external_ip.addr) {
return Ok(());
}
}
let context = NatHandlerContext {
mtu: self.mtu,
key,
transmit_sender: self.transmit_sender.clone(),
reclaim_sender: self.reclaim_sender.clone(),
};
let handler: Option<&mut Box<dyn NatHandler>> = match self.table.inner.entry(key) {
Entry::Occupied(entry) => Some(entry.into_mut()),
Entry::Vacant(entry) => {
if let Some(handler) = self.factory.nat(context).await {
debug!("creating nat entry for key: {}", key);
Some(entry.insert(handler))
} else {
None
}
}
};
if let Some(handler) = handler {
if !handler.receive(packet.raw).await? {
self.reclaim_sender.try_send(key)?;
}
}
Ok(())
}
pub fn extract_key_ipv4<'a>(
&mut self,
packet: &RecvPacket<'a>,
ipv4: &Ipv4Slice<'a>,
) -> Result<Option<NatKey>> {
let source_addr = IpAddress::Ipv4(ipv4.header().source_addr().into());
let dest_addr = IpAddress::Ipv4(ipv4.header().destination_addr().into());
Ok(match ipv4.header().protocol() {
IpNumber::TCP => {
self.extract_key_tcp(packet, source_addr, dest_addr, ipv4.payload())?
}
IpNumber::UDP => {
self.extract_key_udp(packet, source_addr, dest_addr, ipv4.payload())?
}
IpNumber::ICMP => {
self.extract_key_icmpv4(packet, source_addr, dest_addr, ipv4.payload())?
}
_ => None,
})
}
pub fn extract_key_ipv6<'a>(
&mut self,
packet: &RecvPacket<'a>,
ipv6: &Ipv6Slice<'a>,
) -> Result<Option<NatKey>> {
let source_addr = IpAddress::Ipv6(ipv6.header().source_addr().into());
let dest_addr = IpAddress::Ipv6(ipv6.header().destination_addr().into());
Ok(match ipv6.header().next_header() {
IpNumber::TCP => {
self.extract_key_tcp(packet, source_addr, dest_addr, ipv6.payload())?
}
IpNumber::UDP => {
self.extract_key_udp(packet, source_addr, dest_addr, ipv6.payload())?
}
IpNumber::IPV6_ICMP => {
self.extract_key_icmpv6(packet, source_addr, dest_addr, ipv6.payload())?
}
_ => None,
})
}
pub fn extract_key_udp<'a>(
&mut self,
packet: &RecvPacket<'a>,
source_addr: IpAddress,
dest_addr: IpAddress,
payload: &IpPayloadSlice<'a>,
) -> Result<Option<NatKey>> {
let Some(ether) = packet.ether else {
return Ok(None);
};
let header = UdpHeaderSlice::from_slice(payload.payload)?;
let source = IpEndpoint::new(source_addr, header.source_port());
let dest = IpEndpoint::new(dest_addr, header.destination_port());
Ok(Some(NatKey {
protocol: NatKeyProtocol::Udp,
client_mac: EthernetAddress(ether.source()),
local_mac: EthernetAddress(ether.destination()),
client_ip: source,
external_ip: dest,
}))
}
pub fn extract_key_icmpv4<'a>(
&mut self,
packet: &RecvPacket<'a>,
source_addr: IpAddress,
dest_addr: IpAddress,
payload: &IpPayloadSlice<'a>,
) -> Result<Option<NatKey>> {
let Some(ether) = packet.ether else {
return Ok(None);
};
let (header, _) = Icmpv4Header::from_slice(payload.payload)?;
let Icmpv4Type::EchoRequest(_) = header.icmp_type else {
return Ok(None);
};
let source = IpEndpoint::new(source_addr, 0);
let dest = IpEndpoint::new(dest_addr, 0);
Ok(Some(NatKey {
protocol: NatKeyProtocol::Icmp,
client_mac: EthernetAddress(ether.source()),
local_mac: EthernetAddress(ether.destination()),
client_ip: source,
external_ip: dest,
}))
}
pub fn extract_key_icmpv6<'a>(
&mut self,
packet: &RecvPacket<'a>,
source_addr: IpAddress,
dest_addr: IpAddress,
payload: &IpPayloadSlice<'a>,
) -> Result<Option<NatKey>> {
let Some(ether) = packet.ether else {
return Ok(None);
};
let (header, _) = Icmpv6Header::from_slice(payload.payload)?;
let Icmpv6Type::EchoRequest(_) = header.icmp_type else {
return Ok(None);
};
let source = IpEndpoint::new(source_addr, 0);
let dest = IpEndpoint::new(dest_addr, 0);
Ok(Some(NatKey {
protocol: NatKeyProtocol::Icmp,
client_mac: EthernetAddress(ether.source()),
local_mac: EthernetAddress(ether.destination()),
client_ip: source,
external_ip: dest,
}))
}
pub fn extract_key_tcp<'a>(
&mut self,
packet: &RecvPacket<'a>,
source_addr: IpAddress,
dest_addr: IpAddress,
payload: &IpPayloadSlice<'a>,
) -> Result<Option<NatKey>> {
let Some(ether) = packet.ether else {
return Ok(None);
};
let header = TcpHeaderSlice::from_slice(payload.payload)?;
let source = IpEndpoint::new(source_addr, header.source_port());
let dest = IpEndpoint::new(dest_addr, header.destination_port());
Ok(Some(NatKey {
protocol: NatKeyProtocol::Tcp,
client_mac: EthernetAddress(ether.source()),
local_mac: EthernetAddress(ether.destination()),
client_ip: source,
external_ip: dest,
}))
}
}

View File

@ -0,0 +1,21 @@
use std::collections::HashMap;
use super::{handler::NatHandler, key::NatKey};
pub struct NatTable {
pub inner: HashMap<NatKey, Box<dyn NatHandler>>,
}
impl Default for NatTable {
fn default() -> Self {
Self::new()
}
}
impl NatTable {
pub fn new() -> Self {
Self {
inner: HashMap::new(),
}
}
}

View File

@ -0,0 +1,37 @@
use anyhow::Result;
use etherparse::{Ethernet2Slice, Ipv4Slice, Ipv6Slice, LinkSlice, NetSlice, SlicedPacket};
pub enum RecvPacketIp<'a> {
Ipv4(&'a Ipv4Slice<'a>),
Ipv6(&'a Ipv6Slice<'a>),
}
pub struct RecvPacket<'a> {
pub raw: &'a [u8],
pub slice: &'a SlicedPacket<'a>,
pub ether: Option<&'a Ethernet2Slice<'a>>,
pub ip: Option<RecvPacketIp<'a>>,
}
impl RecvPacket<'_> {
pub fn new<'a>(raw: &'a [u8], slice: &'a SlicedPacket<'a>) -> Result<RecvPacket<'a>> {
let ether = match slice.link {
Some(LinkSlice::Ethernet2(ref ether)) => Some(ether),
_ => None,
};
let ip = match slice.net {
Some(NetSlice::Ipv4(ref ipv4)) => Some(RecvPacketIp::Ipv4(ipv4)),
Some(NetSlice::Ipv6(ref ipv6)) => Some(RecvPacketIp::Ipv6(ipv6)),
_ => None,
};
let packet = RecvPacket {
raw,
slice,
ether,
ip,
};
Ok(packet)
}
}

View File

@ -0,0 +1,276 @@
use std::{
net::{IpAddr, Ipv4Addr, Ipv6Addr},
time::Duration,
};
use anyhow::{anyhow, Result};
use async_trait::async_trait;
use bytes::{BufMut, BytesMut};
use etherparse::{
IcmpEchoHeader, Icmpv4Header, Icmpv4Type, Icmpv6Header, Icmpv6Type, IpNumber, Ipv4Slice,
Ipv6Slice, NetSlice, PacketBuilder, SlicedPacket,
};
use log::{debug, trace, warn};
use smoltcp::wire::IpAddress;
use tokio::{
select,
sync::mpsc::{Receiver, Sender},
};
use crate::{
icmp::{IcmpClient, IcmpProtocol, IcmpReply},
nat::handler::{NatHandler, NatHandlerContext},
};
const ICMP_PING_TIMEOUT_SECS: u64 = 20;
const ICMP_TIMEOUT_SECS: u64 = 30;
pub struct ProxyIcmpHandler {
rx_sender: Sender<BytesMut>,
}
#[async_trait]
impl NatHandler for ProxyIcmpHandler {
async fn receive(&self, data: &[u8]) -> Result<bool> {
if self.rx_sender.is_closed() {
Ok(true)
} else {
self.rx_sender.try_send(data.into())?;
Ok(true)
}
}
}
enum ProxyIcmpSelect {
Internal(BytesMut),
Close,
}
impl ProxyIcmpHandler {
pub fn new(rx_sender: Sender<BytesMut>) -> Self {
ProxyIcmpHandler { rx_sender }
}
pub async fn spawn(
&mut self,
context: NatHandlerContext,
rx_receiver: Receiver<BytesMut>,
) -> Result<()> {
let client = IcmpClient::new(match context.key.external_ip.addr {
IpAddress::Ipv4(_) => IcmpProtocol::Icmpv4,
IpAddress::Ipv6(_) => IcmpProtocol::Icmpv6,
})?;
tokio::spawn(async move {
if let Err(error) = ProxyIcmpHandler::process(client, rx_receiver, context).await {
warn!("processing of icmp proxy failed: {}", error);
}
});
Ok(())
}
async fn process(
client: IcmpClient,
mut rx_receiver: Receiver<BytesMut>,
context: NatHandlerContext,
) -> Result<()> {
loop {
let deadline = tokio::time::sleep(Duration::from_secs(ICMP_TIMEOUT_SECS));
let selection = select! {
x = rx_receiver.recv() => if let Some(data) = x {
ProxyIcmpSelect::Internal(data)
} else {
ProxyIcmpSelect::Close
},
_ = deadline => ProxyIcmpSelect::Close,
};
match selection {
ProxyIcmpSelect::Internal(data) => {
let packet = SlicedPacket::from_ethernet(&data)?;
let Some(ref net) = packet.net else {
continue;
};
match net {
NetSlice::Ipv4(ipv4) => {
ProxyIcmpHandler::process_ipv4(&context, ipv4, &client).await?
}
NetSlice::Ipv6(ipv6) => {
ProxyIcmpHandler::process_ipv6(&context, ipv6, &client).await?
}
}
}
ProxyIcmpSelect::Close => {
break;
}
}
}
context.reclaim().await?;
Ok(())
}
async fn process_ipv4(
context: &NatHandlerContext,
ipv4: &Ipv4Slice<'_>,
client: &IcmpClient,
) -> Result<()> {
if ipv4.header().protocol() != IpNumber::ICMP {
return Ok(());
}
let (header, payload) = Icmpv4Header::from_slice(ipv4.payload().payload)?;
if let Icmpv4Type::EchoRequest(echo) = header.icmp_type {
let IpAddr::V4(external_ipv4) = context.key.external_ip.addr.into() else {
return Ok(());
};
let context = context.clone();
let client = client.clone();
let payload = payload.to_vec();
tokio::task::spawn(async move {
if let Err(error) = ProxyIcmpHandler::process_echo_ipv4(
context,
client,
external_ipv4,
echo,
payload,
)
.await
{
trace!("icmp4 echo failed: {}", error);
}
});
}
Ok(())
}
async fn process_ipv6(
context: &NatHandlerContext,
ipv6: &Ipv6Slice<'_>,
client: &IcmpClient,
) -> Result<()> {
if ipv6.header().next_header() != IpNumber::IPV6_ICMP {
return Ok(());
}
let (header, payload) = Icmpv6Header::from_slice(ipv6.payload().payload)?;
if let Icmpv6Type::EchoRequest(echo) = header.icmp_type {
let IpAddr::V6(external_ipv6) = context.key.external_ip.addr.into() else {
return Ok(());
};
let context = context.clone();
let client = client.clone();
let payload = payload.to_vec();
tokio::task::spawn(async move {
if let Err(error) = ProxyIcmpHandler::process_echo_ipv6(
context,
client,
external_ipv6,
echo,
payload,
)
.await
{
trace!("icmp6 echo failed: {}", error);
}
});
}
Ok(())
}
async fn process_echo_ipv4(
context: NatHandlerContext,
client: IcmpClient,
external_ipv4: Ipv4Addr,
echo: IcmpEchoHeader,
payload: Vec<u8>,
) -> Result<()> {
let reply = client
.ping4(
external_ipv4,
echo.id,
echo.seq,
&payload,
Duration::from_secs(ICMP_PING_TIMEOUT_SECS),
)
.await?;
let Some(IcmpReply::Icmpv4 {
header: _,
echo,
payload,
}) = reply
else {
return Ok(());
};
let packet = PacketBuilder::ethernet2(context.key.local_mac.0, context.key.client_mac.0);
let packet = match (context.key.external_ip.addr, context.key.client_ip.addr) {
(IpAddress::Ipv4(external_addr), IpAddress::Ipv4(client_addr)) => {
packet.ipv4(external_addr.0, client_addr.0, 20)
}
_ => {
return Err(anyhow!("IP endpoint mismatch"));
}
};
let packet = packet.icmpv4_echo_reply(echo.id, echo.seq);
let buffer = BytesMut::with_capacity(packet.size(payload.len()));
let mut writer = buffer.writer();
packet.write(&mut writer, &payload)?;
let buffer = writer.into_inner();
if let Err(error) = context.try_transmit(buffer) {
debug!("failed to transmit icmp packet: {}", error);
}
Ok(())
}
async fn process_echo_ipv6(
context: NatHandlerContext,
client: IcmpClient,
external_ipv6: Ipv6Addr,
echo: IcmpEchoHeader,
payload: Vec<u8>,
) -> Result<()> {
let reply = client
.ping6(
external_ipv6,
echo.id,
echo.seq,
&payload,
Duration::from_secs(ICMP_PING_TIMEOUT_SECS),
)
.await?;
let Some(IcmpReply::Icmpv6 {
header: _,
echo,
payload,
}) = reply
else {
return Ok(());
};
let packet = PacketBuilder::ethernet2(context.key.local_mac.0, context.key.client_mac.0);
let packet = match (context.key.external_ip.addr, context.key.client_ip.addr) {
(IpAddress::Ipv6(external_addr), IpAddress::Ipv6(client_addr)) => {
packet.ipv6(external_addr.0, client_addr.0, 20)
}
_ => {
return Err(anyhow!("IP endpoint mismatch"));
}
};
let packet = packet.icmpv6_echo_reply(echo.id, echo.seq);
let buffer = BytesMut::with_capacity(packet.size(payload.len()));
let mut writer = buffer.writer();
packet.write(&mut writer, &payload)?;
let buffer = writer.into_inner();
if let Err(error) = context.try_transmit(buffer) {
debug!("failed to transmit icmp packet: {}", error);
}
Ok(())
}
}

View File

@ -0,0 +1,77 @@
use async_trait::async_trait;
use bytes::BytesMut;
use log::warn;
use tokio::sync::mpsc::channel;
use crate::proxynat::udp::ProxyUdpHandler;
use crate::nat::handler::{NatHandler, NatHandlerContext, NatHandlerFactory};
use crate::nat::key::NatKeyProtocol;
use self::icmp::ProxyIcmpHandler;
use self::tcp::ProxyTcpHandler;
mod icmp;
mod tcp;
mod udp;
const RX_CHANNEL_QUEUE_LEN: usize = 1000;
pub struct ProxyNatHandlerFactory {}
impl Default for ProxyNatHandlerFactory {
fn default() -> Self {
Self::new()
}
}
impl ProxyNatHandlerFactory {
pub fn new() -> Self {
Self {}
}
}
#[async_trait]
impl NatHandlerFactory for ProxyNatHandlerFactory {
async fn nat(&self, context: NatHandlerContext) -> Option<Box<dyn NatHandler>> {
match context.key.protocol {
NatKeyProtocol::Udp => {
let (rx_sender, rx_receiver) = channel::<BytesMut>(RX_CHANNEL_QUEUE_LEN);
let mut handler = ProxyUdpHandler::new(rx_sender);
if let Err(error) = handler.spawn(context, rx_receiver).await {
warn!("unable to spawn udp proxy handler: {}", error);
None
} else {
Some(Box::new(handler))
}
}
NatKeyProtocol::Icmp => {
let (rx_sender, rx_receiver) = channel::<BytesMut>(RX_CHANNEL_QUEUE_LEN);
let mut handler = ProxyIcmpHandler::new(rx_sender);
if let Err(error) = handler.spawn(context, rx_receiver).await {
warn!("unable to spawn icmp proxy handler: {}", error);
None
} else {
Some(Box::new(handler))
}
}
NatKeyProtocol::Tcp => {
let (rx_sender, rx_receiver) = channel::<BytesMut>(RX_CHANNEL_QUEUE_LEN);
let mut handler = ProxyTcpHandler::new(rx_sender);
if let Err(error) = handler.spawn(context, rx_receiver).await {
warn!("unable to spawn tcp proxy handler: {}", error);
None
} else {
Some(Box::new(handler))
}
}
}
}
}

View File

@ -0,0 +1,466 @@
use std::{
net::{IpAddr, SocketAddr},
time::Duration,
};
use anyhow::Result;
use async_trait::async_trait;
use bytes::BytesMut;
use etherparse::{EtherType, Ethernet2Header};
use log::{debug, warn};
use smoltcp::{
iface::{Config, Interface, SocketSet, SocketStorage},
phy::Medium,
socket::tcp::{self, SocketBuffer, State},
time::Instant,
wire::{HardwareAddress, IpAddress, IpCidr},
};
use tokio::{
io::{AsyncReadExt, AsyncWriteExt},
net::TcpStream,
select,
sync::mpsc::channel,
};
use tokio::{sync::mpsc::Receiver, sync::mpsc::Sender};
use crate::{
chandev::ChannelDevice,
nat::handler::{NatHandler, NatHandlerContext},
};
const TCP_BUFFER_SIZE: usize = 65535;
const TCP_IP_BUFFER_QUEUE_LEN: usize = 3000;
const TCP_ACCEPT_TIMEOUT_SECS: u64 = 120;
const TCP_DANGLE_TIMEOUT_SECS: u64 = 10;
pub struct ProxyTcpHandler {
rx_sender: Sender<BytesMut>,
}
#[async_trait]
impl NatHandler for ProxyTcpHandler {
async fn receive(&self, data: &[u8]) -> Result<bool> {
if self.rx_sender.is_closed() {
Ok(false)
} else {
self.rx_sender.try_send(data.into())?;
Ok(true)
}
}
}
#[derive(Debug)]
enum ProxyTcpAcceptSelect {
Internal(BytesMut),
TxIpPacket(BytesMut),
TimePassed,
DoNothing,
Close,
}
#[derive(Debug)]
enum ProxyTcpDataSelect {
ExternalRecv(usize),
ExternalSent(usize),
InternalRecv(BytesMut),
TxIpPacket(BytesMut),
TimePassed,
DoNothing,
Close,
}
#[derive(Debug)]
enum ProxyTcpFinishSelect {
InternalRecv(BytesMut),
TxIpPacket(BytesMut),
Close,
}
impl ProxyTcpHandler {
pub fn new(rx_sender: Sender<BytesMut>) -> Self {
ProxyTcpHandler { rx_sender }
}
pub async fn spawn(
&mut self,
context: NatHandlerContext,
rx_receiver: Receiver<BytesMut>,
) -> Result<()> {
let external_addr = match context.key.external_ip.addr {
IpAddress::Ipv4(addr) => {
SocketAddr::new(IpAddr::V4(addr.0.into()), context.key.external_ip.port)
}
IpAddress::Ipv6(addr) => {
SocketAddr::new(IpAddr::V6(addr.0.into()), context.key.external_ip.port)
}
};
let socket = TcpStream::connect(external_addr).await?;
tokio::spawn(async move {
if let Err(error) = ProxyTcpHandler::process(context, socket, rx_receiver).await {
warn!("processing of tcp proxy failed: {}", error);
}
});
Ok(())
}
async fn process(
context: NatHandlerContext,
mut external_socket: TcpStream,
mut rx_receiver: Receiver<BytesMut>,
) -> Result<()> {
let (ip_sender, mut ip_receiver) = channel::<BytesMut>(TCP_IP_BUFFER_QUEUE_LEN);
let mut external_buffer = vec![0u8; TCP_BUFFER_SIZE];
let mut device = ChannelDevice::new(
context.mtu - Ethernet2Header::LEN,
Medium::Ip,
ip_sender.clone(),
);
let config = Config::new(HardwareAddress::Ip);
let tcp_rx_buffer = SocketBuffer::new(vec![0; TCP_BUFFER_SIZE]);
let tcp_tx_buffer = SocketBuffer::new(vec![0; TCP_BUFFER_SIZE]);
let internal_socket = tcp::Socket::new(tcp_rx_buffer, tcp_tx_buffer);
let mut iface = Interface::new(config, &mut device, Instant::now());
iface.update_ip_addrs(|addrs| {
let _ = addrs.push(IpCidr::new(context.key.external_ip.addr, 0));
});
let mut sockets = SocketSet::new([SocketStorage::EMPTY]);
let internal_socket_handle = sockets.add(internal_socket);
let (mut external_r, mut external_w) = external_socket.split();
{
let socket = sockets.get_mut::<tcp::Socket>(internal_socket_handle);
socket.connect(
iface.context(),
context.key.client_ip,
context.key.external_ip,
)?;
}
iface.poll(Instant::now(), &mut device, &mut sockets);
let mut sleeper: Option<tokio::time::Sleep> = None;
loop {
let socket = sockets.get_mut::<tcp::Socket>(internal_socket_handle);
if socket.is_active() && socket.state() != State::SynSent {
break;
}
if socket.state() == State::Closed {
break;
}
let deadline = tokio::time::sleep(Duration::from_secs(TCP_ACCEPT_TIMEOUT_SECS));
let selection = if let Some(sleep) = sleeper.take() {
select! {
biased;
x = rx_receiver.recv() => if let Some(data) = x {
ProxyTcpAcceptSelect::Internal(data)
} else {
ProxyTcpAcceptSelect::Close
},
x = ip_receiver.recv() => if let Some(data) = x {
ProxyTcpAcceptSelect::TxIpPacket(data)
} else {
ProxyTcpAcceptSelect::Close
},
_ = sleep => ProxyTcpAcceptSelect::TimePassed,
_ = deadline => ProxyTcpAcceptSelect::Close,
}
} else {
select! {
biased;
x = rx_receiver.recv() => if let Some(data) = x {
ProxyTcpAcceptSelect::Internal(data)
} else {
ProxyTcpAcceptSelect::Close
},
x = ip_receiver.recv() => if let Some(data) = x {
ProxyTcpAcceptSelect::TxIpPacket(data)
} else {
ProxyTcpAcceptSelect::Close
},
_ = std::future::ready(()) => ProxyTcpAcceptSelect::DoNothing,
_ = deadline => ProxyTcpAcceptSelect::Close,
}
};
match selection {
ProxyTcpAcceptSelect::TimePassed => {
iface.poll(Instant::now(), &mut device, &mut sockets);
}
ProxyTcpAcceptSelect::DoNothing => {
sleeper = Some(tokio::time::sleep(Duration::from_micros(100)));
}
ProxyTcpAcceptSelect::Internal(data) => {
let (_, payload) = Ethernet2Header::from_slice(&data)?;
device.rx = Some(payload.into());
iface.poll(Instant::now(), &mut device, &mut sockets);
}
ProxyTcpAcceptSelect::TxIpPacket(payload) => {
let mut buffer = BytesMut::with_capacity(Ethernet2Header::LEN + payload.len());
let header = Ethernet2Header {
source: context.key.local_mac.0,
destination: context.key.client_mac.0,
ether_type: match context.key.external_ip.addr {
IpAddress::Ipv4(_) => EtherType::IPV4,
IpAddress::Ipv6(_) => EtherType::IPV6,
},
};
buffer.extend_from_slice(&header.to_bytes());
buffer.extend_from_slice(&payload);
if let Err(error) = context.try_transmit(buffer) {
debug!("failed to transmit tcp packet: {}", error);
}
}
ProxyTcpAcceptSelect::Close => {
break;
}
}
}
let accepted = if sockets
.get_mut::<tcp::Socket>(internal_socket_handle)
.is_active()
{
true
} else {
debug!("failed to accept tcp connection from client");
false
};
let mut already_shutdown = false;
let mut sleeper: Option<tokio::time::Sleep> = None;
loop {
if !accepted {
break;
}
let socket = sockets.get_mut::<tcp::Socket>(internal_socket_handle);
match socket.state() {
State::Closed
| State::Listen
| State::Closing
| State::LastAck
| State::TimeWait => {
break;
}
State::FinWait1
| State::SynSent
| State::CloseWait
| State::FinWait2
| State::SynReceived
| State::Established => {}
}
let bytes_to_client = if socket.can_send() {
socket.send_capacity() - socket.send_queue()
} else {
0
};
let (bytes_to_external, do_shutdown) = if socket.may_recv() {
if let Ok(data) = socket.peek(TCP_BUFFER_SIZE) {
if data.is_empty() {
(None, false)
} else {
(Some(data), false)
}
} else {
(None, false)
}
} else if !already_shutdown && matches!(socket.state(), State::CloseWait) {
(None, true)
} else {
(None, false)
};
let selection = if let Some(sleep) = sleeper.take() {
if !do_shutdown {
select! {
biased;
x = ip_receiver.recv() => if let Some(data) = x {
ProxyTcpDataSelect::TxIpPacket(data)
} else {
ProxyTcpDataSelect::Close
},
x = rx_receiver.recv() => if let Some(data) = x {
ProxyTcpDataSelect::InternalRecv(data)
} else {
ProxyTcpDataSelect::Close
},
x = external_w.write(bytes_to_external.unwrap_or(b"")), if bytes_to_external.is_some() => ProxyTcpDataSelect::ExternalSent(x?),
x = external_r.read(&mut external_buffer[..bytes_to_client]), if bytes_to_client > 0 => ProxyTcpDataSelect::ExternalRecv(x?),
_ = sleep => ProxyTcpDataSelect::TimePassed,
}
} else {
select! {
biased;
x = ip_receiver.recv() => if let Some(data) = x {
ProxyTcpDataSelect::TxIpPacket(data)
} else {
ProxyTcpDataSelect::Close
},
x = rx_receiver.recv() => if let Some(data) = x {
ProxyTcpDataSelect::InternalRecv(data)
} else {
ProxyTcpDataSelect::Close
},
_ = external_w.shutdown() => ProxyTcpDataSelect::ExternalSent(0),
x = external_r.read(&mut external_buffer[..bytes_to_client]), if bytes_to_client > 0 => ProxyTcpDataSelect::ExternalRecv(x?),
_ = sleep => ProxyTcpDataSelect::TimePassed,
}
}
} else if !do_shutdown {
select! {
biased;
x = ip_receiver.recv() => if let Some(data) = x {
ProxyTcpDataSelect::TxIpPacket(data)
} else {
ProxyTcpDataSelect::Close
},
x = rx_receiver.recv() => if let Some(data) = x {
ProxyTcpDataSelect::InternalRecv(data)
} else {
ProxyTcpDataSelect::Close
},
x = external_w.write(bytes_to_external.unwrap_or(b"")), if bytes_to_external.is_some() => ProxyTcpDataSelect::ExternalSent(x?),
x = external_r.read(&mut external_buffer[..bytes_to_client]), if bytes_to_client > 0 => ProxyTcpDataSelect::ExternalRecv(x?),
_ = std::future::ready(()) => ProxyTcpDataSelect::DoNothing,
}
} else {
select! {
biased;
x = ip_receiver.recv() => if let Some(data) = x {
ProxyTcpDataSelect::TxIpPacket(data)
} else {
ProxyTcpDataSelect::Close
},
x = rx_receiver.recv() => if let Some(data) = x {
ProxyTcpDataSelect::InternalRecv(data)
} else {
ProxyTcpDataSelect::Close
},
_ = external_w.shutdown() => ProxyTcpDataSelect::ExternalSent(0),
x = external_r.read(&mut external_buffer[..bytes_to_client]), if bytes_to_client > 0 => ProxyTcpDataSelect::ExternalRecv(x?),
_ = std::future::ready(()) => ProxyTcpDataSelect::DoNothing,
}
};
match selection {
ProxyTcpDataSelect::ExternalRecv(size) => {
if size == 0 {
socket.close();
} else {
socket.send_slice(&external_buffer[..size])?;
}
}
ProxyTcpDataSelect::ExternalSent(size) => {
if size == 0 {
already_shutdown = true;
} else {
socket.recv(|_| (size, ()))?;
}
}
ProxyTcpDataSelect::InternalRecv(data) => {
let (_, payload) = Ethernet2Header::from_slice(&data)?;
device.rx = Some(payload.into());
iface.poll(Instant::now(), &mut device, &mut sockets);
}
ProxyTcpDataSelect::TxIpPacket(payload) => {
let mut buffer = BytesMut::with_capacity(Ethernet2Header::LEN + payload.len());
let header = Ethernet2Header {
source: context.key.local_mac.0,
destination: context.key.client_mac.0,
ether_type: match context.key.external_ip.addr {
IpAddress::Ipv4(_) => EtherType::IPV4,
IpAddress::Ipv6(_) => EtherType::IPV6,
},
};
buffer.extend_from_slice(&header.to_bytes());
buffer.extend_from_slice(&payload);
if let Err(error) = context.try_transmit(buffer) {
debug!("failed to transmit tcp packet: {}", error);
}
}
ProxyTcpDataSelect::TimePassed => {
iface.poll(Instant::now(), &mut device, &mut sockets);
}
ProxyTcpDataSelect::DoNothing => {
sleeper = Some(tokio::time::sleep(Duration::from_micros(100)));
}
ProxyTcpDataSelect::Close => {
break;
}
}
}
let _ = external_socket.shutdown().await;
drop(external_socket);
loop {
let deadline = tokio::time::sleep(Duration::from_secs(TCP_DANGLE_TIMEOUT_SECS));
tokio::pin!(deadline);
let selection = select! {
biased;
x = ip_receiver.recv() => if let Some(data) = x {
ProxyTcpFinishSelect::TxIpPacket(data)
} else {
ProxyTcpFinishSelect::Close
},
x = rx_receiver.recv() => if let Some(data) = x {
ProxyTcpFinishSelect::InternalRecv(data)
} else {
ProxyTcpFinishSelect::Close
},
_ = deadline => ProxyTcpFinishSelect::Close,
};
match selection {
ProxyTcpFinishSelect::InternalRecv(data) => {
let (_, payload) = Ethernet2Header::from_slice(&data)?;
device.rx = Some(payload.into());
iface.poll(Instant::now(), &mut device, &mut sockets);
}
ProxyTcpFinishSelect::TxIpPacket(payload) => {
let mut buffer = BytesMut::with_capacity(Ethernet2Header::LEN + payload.len());
let header = Ethernet2Header {
source: context.key.local_mac.0,
destination: context.key.client_mac.0,
ether_type: match context.key.external_ip.addr {
IpAddress::Ipv4(_) => EtherType::IPV4,
IpAddress::Ipv6(_) => EtherType::IPV6,
},
};
buffer.extend_from_slice(&header.to_bytes());
buffer.extend_from_slice(&payload);
if let Err(error) = context.try_transmit(buffer) {
debug!("failed to transmit tcp packet: {}", error);
}
}
ProxyTcpFinishSelect::Close => {
break;
}
}
}
context.reclaim().await?;
Ok(())
}
}

View File

@ -0,0 +1,142 @@
use std::{
net::{IpAddr, SocketAddr},
time::Duration,
};
use anyhow::{anyhow, Result};
use async_trait::async_trait;
use bytes::{BufMut, BytesMut};
use etherparse::{PacketBuilder, SlicedPacket, UdpSlice};
use log::{debug, warn};
use smoltcp::wire::IpAddress;
use tokio::{
io::{AsyncReadExt, AsyncWriteExt},
select,
};
use tokio::{sync::mpsc::Receiver, sync::mpsc::Sender};
use udp_stream::UdpStream;
use crate::nat::handler::{NatHandler, NatHandlerContext};
const UDP_TIMEOUT_SECS: u64 = 60;
pub struct ProxyUdpHandler {
rx_sender: Sender<BytesMut>,
}
#[async_trait]
impl NatHandler for ProxyUdpHandler {
async fn receive(&self, data: &[u8]) -> Result<bool> {
if self.rx_sender.is_closed() {
Ok(true)
} else {
self.rx_sender.try_send(data.into())?;
Ok(true)
}
}
}
enum ProxyUdpSelect {
External(usize),
Internal(BytesMut),
Close,
}
impl ProxyUdpHandler {
pub fn new(rx_sender: Sender<BytesMut>) -> Self {
ProxyUdpHandler { rx_sender }
}
pub async fn spawn(
&mut self,
context: NatHandlerContext,
rx_receiver: Receiver<BytesMut>,
) -> Result<()> {
let external_addr = match context.key.external_ip.addr {
IpAddress::Ipv4(addr) => {
SocketAddr::new(IpAddr::V4(addr.0.into()), context.key.external_ip.port)
}
IpAddress::Ipv6(addr) => {
SocketAddr::new(IpAddr::V6(addr.0.into()), context.key.external_ip.port)
}
};
let socket = UdpStream::connect(external_addr).await?;
tokio::spawn(async move {
if let Err(error) = ProxyUdpHandler::process(context, socket, rx_receiver).await {
warn!("processing of udp proxy failed: {}", error);
}
});
Ok(())
}
async fn process(
context: NatHandlerContext,
mut socket: UdpStream,
mut rx_receiver: Receiver<BytesMut>,
) -> Result<()> {
let mut external_buffer = vec![0u8; 2048];
loop {
let deadline = tokio::time::sleep(Duration::from_secs(UDP_TIMEOUT_SECS));
let selection = select! {
x = rx_receiver.recv() => if let Some(data) = x {
ProxyUdpSelect::Internal(data)
} else {
ProxyUdpSelect::Close
},
x = socket.read(&mut external_buffer) => ProxyUdpSelect::External(x?),
_ = deadline => ProxyUdpSelect::Close,
};
match selection {
ProxyUdpSelect::External(size) => {
let data = &external_buffer[0..size];
let packet =
PacketBuilder::ethernet2(context.key.local_mac.0, context.key.client_mac.0);
let packet = match (context.key.external_ip.addr, context.key.client_ip.addr) {
(IpAddress::Ipv4(external_addr), IpAddress::Ipv4(client_addr)) => {
packet.ipv4(external_addr.0, client_addr.0, 20)
}
(IpAddress::Ipv6(external_addr), IpAddress::Ipv6(client_addr)) => {
packet.ipv6(external_addr.0, client_addr.0, 20)
}
_ => {
return Err(anyhow!("IP endpoint mismatch"));
}
};
let packet =
packet.udp(context.key.external_ip.port, context.key.client_ip.port);
let buffer = BytesMut::with_capacity(packet.size(data.len()));
let mut writer = buffer.writer();
packet.write(&mut writer, data)?;
let buffer = writer.into_inner();
if let Err(error) = context.try_transmit(buffer) {
debug!("failed to transmit udp packet: {}", error);
}
}
ProxyUdpSelect::Internal(data) => {
let packet = SlicedPacket::from_ethernet(&data)?;
let Some(ref net) = packet.net else {
continue;
};
let Some(ip) = net.ip_payload_ref() else {
continue;
};
let udp = UdpSlice::from_slice(ip.payload)?;
socket.write_all(udp.payload()).await?;
}
ProxyUdpSelect::Close => {
drop(socket);
break;
}
}
}
context.reclaim().await?;
Ok(())
}
}

View File

@ -0,0 +1,299 @@
use anyhow::{anyhow, Result};
use bytes::BytesMut;
use log::{debug, warn};
use std::io::ErrorKind;
use std::os::fd::{FromRawFd, IntoRawFd};
use std::os::unix::io::{AsRawFd, RawFd};
use std::sync::Arc;
use std::{io, mem};
use tokio::net::UdpSocket;
use tokio::select;
use tokio::sync::mpsc::{channel, Receiver, Sender};
use tokio::task::JoinHandle;
const RAW_SOCKET_TRANSMIT_QUEUE_LEN: usize = 3000;
const RAW_SOCKET_RECEIVE_QUEUE_LEN: usize = 3000;
#[derive(Debug)]
pub enum RawSocketProtocol {
Icmpv4,
Icmpv6,
Ethernet,
}
impl RawSocketProtocol {
pub fn to_socket_domain(&self) -> i32 {
match self {
RawSocketProtocol::Icmpv4 => libc::AF_INET,
RawSocketProtocol::Icmpv6 => libc::AF_INET6,
RawSocketProtocol::Ethernet => libc::AF_PACKET,
}
}
pub fn to_socket_protocol(&self) -> u16 {
match self {
RawSocketProtocol::Icmpv4 => libc::IPPROTO_ICMP as u16,
RawSocketProtocol::Icmpv6 => libc::IPPROTO_ICMPV6 as u16,
RawSocketProtocol::Ethernet => (libc::ETH_P_ALL as u16).to_be(),
}
}
pub fn to_socket_type(&self) -> i32 {
libc::SOCK_RAW
}
}
const SIOCGIFINDEX: libc::c_ulong = 0x8933;
const SIOCGIFMTU: libc::c_ulong = 0x8921;
#[derive(Debug)]
pub struct RawSocketHandle {
protocol: RawSocketProtocol,
lower: libc::c_int,
}
impl AsRawFd for RawSocketHandle {
fn as_raw_fd(&self) -> RawFd {
self.lower
}
}
impl IntoRawFd for RawSocketHandle {
fn into_raw_fd(self) -> RawFd {
let fd = self.lower;
mem::forget(self);
fd
}
}
impl RawSocketHandle {
pub fn new(protocol: RawSocketProtocol) -> io::Result<RawSocketHandle> {
let lower = unsafe {
let lower = libc::socket(
protocol.to_socket_domain(),
protocol.to_socket_type() | libc::SOCK_NONBLOCK,
protocol.to_socket_protocol() as i32,
);
if lower == -1 {
return Err(io::Error::last_os_error());
}
lower
};
Ok(RawSocketHandle { protocol, lower })
}
pub fn bound_to_interface(interface: &str, protocol: RawSocketProtocol) -> Result<Self> {
let mut socket = RawSocketHandle::new(protocol)?;
socket.bind_to_interface(interface)?;
Ok(socket)
}
pub fn bind_to_interface(&mut self, interface: &str) -> io::Result<()> {
let mut ifreq = ifreq_for(interface);
let sockaddr = libc::sockaddr_ll {
sll_family: libc::AF_PACKET as u16,
sll_protocol: self.protocol.to_socket_protocol(),
sll_ifindex: ifreq_ioctl(self.lower, &mut ifreq, SIOCGIFINDEX)?,
sll_hatype: 1,
sll_pkttype: 0,
sll_halen: 6,
sll_addr: [0; 8],
};
unsafe {
let res = libc::bind(
self.lower,
&sockaddr as *const libc::sockaddr_ll as *const libc::sockaddr,
mem::size_of::<libc::sockaddr_ll>() as libc::socklen_t,
);
if res == -1 {
return Err(io::Error::last_os_error());
}
}
Ok(())
}
pub fn mtu_of_interface(&mut self, interface: &str) -> io::Result<usize> {
let mut ifreq = ifreq_for(interface);
ifreq_ioctl(self.lower, &mut ifreq, SIOCGIFMTU).map(|mtu| mtu as usize)
}
pub fn recv(&self, buffer: &mut [u8]) -> io::Result<usize> {
unsafe {
let len = libc::recv(
self.lower,
buffer.as_mut_ptr() as *mut libc::c_void,
buffer.len(),
0,
);
if len == -1 {
return Err(io::Error::last_os_error());
}
Ok(len as usize)
}
}
pub fn send(&self, buffer: &[u8]) -> io::Result<usize> {
unsafe {
let len = libc::send(
self.lower,
buffer.as_ptr() as *const libc::c_void,
buffer.len(),
0,
);
if len == -1 {
return Err(io::Error::last_os_error());
}
Ok(len as usize)
}
}
}
impl Drop for RawSocketHandle {
fn drop(&mut self) {
unsafe {
libc::close(self.lower);
}
}
}
#[repr(C)]
#[derive(Debug)]
struct Ifreq {
ifr_name: [libc::c_char; libc::IF_NAMESIZE],
ifr_data: libc::c_int,
}
fn ifreq_for(name: &str) -> Ifreq {
let mut ifreq = Ifreq {
ifr_name: [0; libc::IF_NAMESIZE],
ifr_data: 0,
};
for (i, byte) in name.as_bytes().iter().enumerate() {
ifreq.ifr_name[i] = *byte as libc::c_char
}
ifreq
}
fn ifreq_ioctl(
lower: libc::c_int,
ifreq: &mut Ifreq,
cmd: libc::c_ulong,
) -> io::Result<libc::c_int> {
unsafe {
let res = libc::ioctl(lower, cmd as _, ifreq as *mut Ifreq);
if res == -1 {
return Err(io::Error::last_os_error());
}
}
Ok(ifreq.ifr_data)
}
pub struct AsyncRawSocketChannel {
pub sender: Sender<BytesMut>,
pub receiver: Receiver<BytesMut>,
_task: Arc<JoinHandle<()>>,
}
enum AsyncRawSocketChannelSelect {
TransmitPacket(Option<BytesMut>),
Readable(()),
}
impl AsyncRawSocketChannel {
pub fn new(mtu: usize, socket: RawSocketHandle) -> Result<AsyncRawSocketChannel> {
let (transmit_sender, transmit_receiver) = channel(RAW_SOCKET_TRANSMIT_QUEUE_LEN);
let (receive_sender, receive_receiver) = channel(RAW_SOCKET_RECEIVE_QUEUE_LEN);
let task = AsyncRawSocketChannel::launch(mtu, socket, transmit_receiver, receive_sender)?;
Ok(AsyncRawSocketChannel {
sender: transmit_sender,
receiver: receive_receiver,
_task: Arc::new(task),
})
}
fn launch(
mtu: usize,
socket: RawSocketHandle,
transmit_receiver: Receiver<BytesMut>,
receive_sender: Sender<BytesMut>,
) -> Result<JoinHandle<()>> {
Ok(tokio::task::spawn(async move {
if let Err(error) =
AsyncRawSocketChannel::process(mtu, socket, transmit_receiver, receive_sender).await
{
warn!("failed to process raw socket: {}", error);
}
}))
}
async fn process(
mtu: usize,
socket: RawSocketHandle,
mut transmit_receiver: Receiver<BytesMut>,
receive_sender: Sender<BytesMut>,
) -> Result<()> {
let socket = unsafe { std::net::UdpSocket::from_raw_fd(socket.into_raw_fd()) };
let socket = UdpSocket::from_std(socket)?;
let mut buffer = vec![0; mtu];
loop {
let selection = select! {
x = transmit_receiver.recv() => AsyncRawSocketChannelSelect::TransmitPacket(x),
x = socket.readable() => AsyncRawSocketChannelSelect::Readable(x?),
};
match selection {
AsyncRawSocketChannelSelect::Readable(_) => {
match socket.try_recv(&mut buffer) {
Ok(len) => {
if len == 0 {
continue;
}
let buffer = (&buffer[0..len]).into();
if let Err(error) = receive_sender.try_send(buffer) {
debug!(
"failed to process received packet from raw socket: {}",
error
);
}
}
Err(ref error) => {
if error.kind() == ErrorKind::WouldBlock {
continue;
}
return Err(anyhow!("failed to read from raw socket: {}", error));
}
};
}
AsyncRawSocketChannelSelect::TransmitPacket(Some(packet)) => {
match socket.try_send(&packet) {
Ok(_len) => {}
Err(ref error) => {
if error.kind() == ErrorKind::WouldBlock {
debug!("failed to transmit: would block");
continue;
}
return Err(anyhow!(
"failed to write {} bytes to raw socket: {}",
packet.len(),
error
));
}
};
}
AsyncRawSocketChannelSelect::TransmitPacket(None) => {
break;
}
}
}
Ok(())
}
}

View File

@ -0,0 +1,212 @@
use anyhow::{anyhow, Result};
use bytes::BytesMut;
use etherparse::{EtherType, Ethernet2Header, IpNumber, Ipv4Header, Ipv6Header, TcpHeader};
use log::{debug, trace, warn};
use smoltcp::wire::EthernetAddress;
use std::{
collections::{hash_map::Entry, HashMap},
sync::Arc,
};
use tokio::sync::broadcast::{
channel as broadcast_channel, Receiver as BroadcastReceiver, Sender as BroadcastSender,
};
use tokio::{
select,
sync::{
mpsc::{channel, Receiver, Sender},
Mutex,
},
task::JoinHandle,
};
const TO_BRIDGE_QUEUE_LEN: usize = 3000;
const FROM_BRIDGE_QUEUE_LEN: usize = 3000;
const BROADCAST_QUEUE_LEN: usize = 3000;
const MEMBER_LEAVE_QUEUE_LEN: usize = 30;
#[derive(Debug)]
struct BridgeMember {
pub from_bridge_sender: Sender<BytesMut>,
}
pub struct BridgeJoinHandle {
mac: EthernetAddress,
pub to_bridge_sender: Sender<BytesMut>,
pub from_bridge_receiver: Receiver<BytesMut>,
pub from_broadcast_receiver: BroadcastReceiver<BytesMut>,
member_leave_sender: Sender<EthernetAddress>,
}
impl Drop for BridgeJoinHandle {
fn drop(&mut self) {
if let Err(error) = self.member_leave_sender.try_send(self.mac) {
warn!(
"virtual bridge member {} failed to leave: {}",
self.mac, error
);
}
}
}
type VirtualBridgeMemberMap = Arc<Mutex<HashMap<EthernetAddress, BridgeMember>>>;
#[derive(Clone)]
pub struct VirtualBridge {
to_bridge_sender: Sender<BytesMut>,
from_broadcast_sender: BroadcastSender<BytesMut>,
member_leave_sender: Sender<EthernetAddress>,
members: VirtualBridgeMemberMap,
_task: Arc<JoinHandle<()>>,
}
enum VirtualBridgeSelect {
BroadcastSent(Option<BytesMut>),
PacketReceived(Option<BytesMut>),
MemberLeave(Option<EthernetAddress>),
}
impl VirtualBridge {
pub fn new() -> Result<VirtualBridge> {
let (to_bridge_sender, to_bridge_receiver) = channel::<BytesMut>(TO_BRIDGE_QUEUE_LEN);
let (member_leave_sender, member_leave_reciever) =
channel::<EthernetAddress>(MEMBER_LEAVE_QUEUE_LEN);
let (from_broadcast_sender, from_broadcast_receiver) =
broadcast_channel(BROADCAST_QUEUE_LEN);
let members = Arc::new(Mutex::new(HashMap::new()));
let handle = {
let members = members.clone();
let broadcast_rx_sender = from_broadcast_sender.clone();
tokio::task::spawn(async move {
if let Err(error) = VirtualBridge::process(
members,
member_leave_reciever,
to_bridge_receiver,
broadcast_rx_sender,
from_broadcast_receiver,
)
.await
{
warn!("virtual bridge processing task failed: {}", error);
}
})
};
Ok(VirtualBridge {
to_bridge_sender,
from_broadcast_sender,
member_leave_sender,
members,
_task: Arc::new(handle),
})
}
pub async fn join(&self, mac: EthernetAddress) -> Result<BridgeJoinHandle> {
let (from_bridge_sender, from_bridge_receiver) = channel::<BytesMut>(FROM_BRIDGE_QUEUE_LEN);
let member = BridgeMember { from_bridge_sender };
match self.members.lock().await.entry(mac) {
Entry::Occupied(_) => {
return Err(anyhow!("virtual bridge member {} already exists", mac));
}
Entry::Vacant(entry) => {
entry.insert(member);
}
};
debug!("virtual bridge member {} has joined", mac);
Ok(BridgeJoinHandle {
mac,
member_leave_sender: self.member_leave_sender.clone(),
from_bridge_receiver,
from_broadcast_receiver: self.from_broadcast_sender.subscribe(),
to_bridge_sender: self.to_bridge_sender.clone(),
})
}
async fn process(
members: VirtualBridgeMemberMap,
mut member_leave_reciever: Receiver<EthernetAddress>,
mut to_bridge_receiver: Receiver<BytesMut>,
broadcast_rx_sender: BroadcastSender<BytesMut>,
mut from_broadcast_receiver: BroadcastReceiver<BytesMut>,
) -> Result<()> {
loop {
let selection = select! {
biased;
x = from_broadcast_receiver.recv() => VirtualBridgeSelect::BroadcastSent(x.ok()),
x = to_bridge_receiver.recv() => VirtualBridgeSelect::PacketReceived(x),
x = member_leave_reciever.recv() => VirtualBridgeSelect::MemberLeave(x),
};
match selection {
VirtualBridgeSelect::PacketReceived(Some(mut packet)) => {
let (header, payload) = match Ethernet2Header::from_slice(&packet) {
Ok(data) => data,
Err(error) => {
debug!("virtual bridge failed to parse ethernet header: {}", error);
continue;
}
};
// recalculate TCP checksums when routing packets.
// the xen network backend / frontend drivers for linux
// use checksum offloading but since we bypass some layers
// of the kernel we have to do it ourselves.
if header.ether_type == EtherType::IPV4 {
let (ipv4, payload) = Ipv4Header::from_slice(payload)?;
if ipv4.protocol == IpNumber::TCP {
let (mut tcp, payload) = TcpHeader::from_slice(payload)?;
tcp.checksum = tcp.calc_checksum_ipv4(&ipv4, payload)?;
let tcp_header_offset = Ethernet2Header::LEN + ipv4.header_len();
let tcp_header_bytes = tcp.to_bytes();
for (i, b) in tcp_header_bytes.iter().enumerate() {
packet[tcp_header_offset + i] = *b;
}
}
} else if header.ether_type == EtherType::IPV6 {
let (ipv6, payload) = Ipv6Header::from_slice(payload)?;
if ipv6.next_header == IpNumber::TCP {
let (mut tcp, payload) = TcpHeader::from_slice(payload)?;
tcp.checksum = tcp.calc_checksum_ipv6(&ipv6, payload)?;
let tcp_header_offset = Ethernet2Header::LEN + ipv6.header_len();
let tcp_header_bytes = tcp.to_bytes();
for (i, b) in tcp_header_bytes.iter().enumerate() {
packet[tcp_header_offset + i] = *b;
}
}
}
let destination = EthernetAddress(header.destination);
if destination.is_multicast() {
broadcast_rx_sender.send(packet)?;
continue;
}
match members.lock().await.get(&destination) {
Some(member) => {
member.from_bridge_sender.try_send(packet)?;
trace!(
"sending bridged packet from {} to {}",
EthernetAddress(header.source),
EthernetAddress(header.destination)
);
}
None => {
trace!("no bridge member with address: {}", destination);
}
}
}
VirtualBridgeSelect::MemberLeave(Some(mac)) => {
if members.lock().await.remove(&mac).is_some() {
debug!("virtual bridge member {} has left", mac);
}
}
VirtualBridgeSelect::PacketReceived(None) => break,
VirtualBridgeSelect::MemberLeave(None) => {}
VirtualBridgeSelect::BroadcastSent(_) => {}
}
}
Ok(())
}
}

45
crates/kratart/Cargo.toml Normal file
View File

@ -0,0 +1,45 @@
[package]
name = "kratart"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
advmac = { path = "../vendor/advmac" }
anyhow = { workspace = true }
async-stream = { workspace = true }
async-trait = { workspace = true }
backhand = { workspace = true }
bytes = { workspace = true }
clap = { workspace = true }
cli-tables = { workspace = true }
directories = { workspace = true }
env_logger = { workspace = true }
flate2 = { workspace = true }
futures = { workspace = true }
ipnetwork = { workspace = true }
krata = { path = "../krata" }
log = { workspace = true }
loopdev = { path = "../vendor/loopdev" }
nix = { workspace = true, features = ["process"] }
oci-spec = { workspace = true }
path-clean = { workspace = true }
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
sha256 = { workspace = true }
signal-hook = { workspace = true }
tar = { workspace = true }
termion = { workspace = true }
tokio = { workspace = true }
tokio-stream = { workspace = true }
tonic = { workspace = true, features = ["tls"] }
url = { workspace = true }
uuid = { workspace = true }
walkdir = { workspace = true }
zstd = { workspace = true }
xenclient = { path = "../xen/xenclient" }
xenstore = { path = "../xen/xenstore" }
[lib]
name = "kratart"

View File

@ -0,0 +1,33 @@
use anyhow::{anyhow, Result};
use loopdev::{LoopControl, LoopDevice};
use xenclient::BlockDeviceRef;
pub struct AutoLoop {
control: LoopControl,
}
impl AutoLoop {
pub fn new(control: LoopControl) -> AutoLoop {
AutoLoop { control }
}
pub fn loopify(&self, file: &str) -> Result<BlockDeviceRef> {
let device = self.control.next_free()?;
device.with().read_only(true).attach(file)?;
let path = device
.path()
.ok_or(anyhow!("unable to get loop device path"))?
.to_str()
.ok_or(anyhow!("unable to convert loop device path to string",))?
.to_string();
let major = device.major()?;
let minor = device.minor()?;
Ok(BlockDeviceRef { path, major, minor })
}
pub fn unloop(&self, device: &str) -> Result<()> {
let device = LoopDevice::open(device)?;
device.detach()?;
Ok(())
}
}

View File

@ -0,0 +1,71 @@
use crate::image::ImageInfo;
use anyhow::Result;
use backhand::{FilesystemWriter, NodeHeader};
use krata::launchcfg::LaunchInfo;
use log::trace;
use std::fs;
use std::fs::File;
use std::path::PathBuf;
use uuid::Uuid;
pub struct ConfigBlock<'a> {
pub image_info: &'a ImageInfo,
pub file: PathBuf,
pub dir: PathBuf,
}
impl ConfigBlock<'_> {
pub fn new<'a>(uuid: &Uuid, image_info: &'a ImageInfo) -> Result<ConfigBlock<'a>> {
let mut dir = std::env::temp_dir().clone();
dir.push(format!("krata-cfg-{}", uuid));
fs::create_dir_all(&dir)?;
let mut file = dir.clone();
file.push("config.squashfs");
Ok(ConfigBlock {
image_info,
file,
dir,
})
}
pub fn build(&self, launch_config: &LaunchInfo) -> Result<()> {
trace!("build launch_config={:?}", launch_config);
let manifest = self.image_info.config.to_string()?;
let launch = serde_json::to_string(launch_config)?;
let mut writer = FilesystemWriter::default();
writer.push_dir(
"/image",
NodeHeader {
permissions: 384,
uid: 0,
gid: 0,
mtime: 0,
},
)?;
writer.push_file(
manifest.as_bytes(),
"/image/config.json",
NodeHeader {
permissions: 384,
uid: 0,
gid: 0,
mtime: 0,
},
)?;
writer.push_file(
launch.as_bytes(),
"/launch.json",
NodeHeader {
permissions: 384,
uid: 0,
gid: 0,
mtime: 0,
},
)?;
let mut file = File::create(&self.file)?;
trace!("build write sqaushfs");
writer.write(&mut file)?;
trace!("build complete");
Ok(())
}
}

View File

@ -0,0 +1,18 @@
use anyhow::Result;
use tokio::fs::File;
pub struct XenConsole {
pub read_handle: File,
pub write_handle: File,
}
impl XenConsole {
pub async fn new(tty: &str) -> Result<XenConsole> {
let read_handle = File::options().read(true).write(false).open(tty).await?;
let write_handle = File::options().read(false).write(true).open(tty).await?;
Ok(XenConsole {
read_handle,
write_handle,
})
}
}

View File

@ -0,0 +1,70 @@
use super::ImageInfo;
use anyhow::Result;
use log::debug;
use oci_spec::image::{ImageConfiguration, ImageManifest};
use std::path::{Path, PathBuf};
use tokio::fs;
pub struct ImageCache {
cache_dir: PathBuf,
}
impl ImageCache {
pub fn new(cache_dir: &Path) -> Result<ImageCache> {
Ok(ImageCache {
cache_dir: cache_dir.to_path_buf(),
})
}
pub async fn recall(&self, digest: &str) -> Result<Option<ImageInfo>> {
let mut squashfs_path = self.cache_dir.clone();
let mut config_path = self.cache_dir.clone();
let mut manifest_path = self.cache_dir.clone();
squashfs_path.push(format!("{}.squashfs", digest));
manifest_path.push(format!("{}.manifest.json", digest));
config_path.push(format!("{}.config.json", digest));
Ok(
if squashfs_path.exists() && manifest_path.exists() && config_path.exists() {
let squashfs_metadata = fs::metadata(&squashfs_path).await?;
let manifest_metadata = fs::metadata(&manifest_path).await?;
let config_metadata = fs::metadata(&config_path).await?;
if squashfs_metadata.is_file()
&& manifest_metadata.is_file()
&& config_metadata.is_file()
{
let manifest_text = fs::read_to_string(&manifest_path).await?;
let manifest: ImageManifest = serde_json::from_str(&manifest_text)?;
let config_text = fs::read_to_string(&config_path).await?;
let config: ImageConfiguration = serde_json::from_str(&config_text)?;
debug!("cache hit digest={}", digest);
Some(ImageInfo::new(squashfs_path.clone(), manifest, config)?)
} else {
None
}
} else {
debug!("cache miss digest={}", digest);
None
},
)
}
pub async fn store(&self, digest: &str, info: &ImageInfo) -> Result<ImageInfo> {
debug!("cache store digest={}", digest);
let mut squashfs_path = self.cache_dir.clone();
let mut manifest_path = self.cache_dir.clone();
let mut config_path = self.cache_dir.clone();
squashfs_path.push(format!("{}.squashfs", digest));
manifest_path.push(format!("{}.manifest.json", digest));
config_path.push(format!("{}.config.json", digest));
fs::copy(&info.image_squashfs, &squashfs_path).await?;
let manifest_text = serde_json::to_string_pretty(&info.manifest)?;
fs::write(&manifest_path, manifest_text).await?;
let config_text = serde_json::to_string_pretty(&info.config)?;
fs::write(&config_path, config_text).await?;
ImageInfo::new(
squashfs_path.clone(),
info.manifest.clone(),
info.config.clone(),
)
}
}

View File

@ -0,0 +1,138 @@
use anyhow::{anyhow, Result};
use bytes::Bytes;
use oci_spec::image::{Arch, Descriptor, ImageIndex, ImageManifest, MediaType, Os, ToDockerV2S2};
use reqwest::{Client, RequestBuilder, Response};
use tokio::{fs::File, io::AsyncWriteExt};
use url::Url;
const MANIFEST_PICKER_PLATFORM: Os = Os::Linux;
const MANIFEST_PICKER_ARCHITECTURE: Arch = Arch::Amd64;
pub struct RegistryClient {
agent: Client,
url: Url,
}
impl RegistryClient {
pub fn new(url: Url) -> Result<RegistryClient> {
Ok(RegistryClient {
agent: Client::new(),
url,
})
}
async fn call(&mut self, req: RequestBuilder) -> Result<Response> {
self.agent.execute(req.build()?).await.map_err(|x| x.into())
}
pub async fn get_blob(&mut self, name: &str, descriptor: &Descriptor) -> Result<Bytes> {
let url = self
.url
.join(&format!("/v2/{}/blobs/{}", name, descriptor.digest()))?;
let response = self.call(self.agent.get(url.as_str())).await?;
Ok(response.bytes().await?)
}
pub async fn write_blob_to_file(
&mut self,
name: &str,
descriptor: &Descriptor,
mut dest: File,
) -> Result<u64> {
let url = self
.url
.join(&format!("/v2/{}/blobs/{}", name, descriptor.digest()))?;
let mut response = self.call(self.agent.get(url.as_str())).await?;
let mut size: u64 = 0;
while let Some(chunk) = response.chunk().await? {
dest.write_all(&chunk).await?;
size += chunk.len() as u64;
}
Ok(size)
}
async fn get_raw_manifest_with_digest(
&mut self,
name: &str,
reference: &str,
) -> Result<(ImageManifest, String)> {
let url = self
.url
.join(&format!("/v2/{}/manifests/{}", name, reference))?;
let accept = format!(
"{}, {}, {}, {}",
MediaType::ImageManifest.to_docker_v2s2()?,
MediaType::ImageManifest,
MediaType::ImageIndex,
MediaType::ImageIndex.to_docker_v2s2()?,
);
let response = self
.call(self.agent.get(url.as_str()).header("Accept", &accept))
.await?;
let digest = response
.headers()
.get("Docker-Content-Digest")
.ok_or_else(|| anyhow!("fetching manifest did not yield a content digest"))?
.to_str()?
.to_string();
let manifest = serde_json::from_str(&response.text().await?)?;
Ok((manifest, digest))
}
pub async fn get_manifest_with_digest(
&mut self,
name: &str,
reference: &str,
) -> Result<(ImageManifest, String)> {
let url = self
.url
.join(&format!("/v2/{}/manifests/{}", name, reference))?;
let accept = format!(
"{}, {}, {}, {}",
MediaType::ImageManifest.to_docker_v2s2()?,
MediaType::ImageManifest,
MediaType::ImageIndex,
MediaType::ImageIndex.to_docker_v2s2()?,
);
let response = self
.call(self.agent.get(url.as_str()).header("Accept", &accept))
.await?;
let content_type = response
.headers()
.get("Content-Type")
.ok_or_else(|| anyhow!("registry response did not have a Content-Type header"))?
.to_str()?;
if content_type == MediaType::ImageIndex.to_string()
|| content_type == MediaType::ImageIndex.to_docker_v2s2()?
{
let index = serde_json::from_str(&response.text().await?)?;
let descriptor = self
.pick_manifest(index)
.ok_or_else(|| anyhow!("unable to pick manifest from index"))?;
return self
.get_raw_manifest_with_digest(name, descriptor.digest())
.await;
}
let digest = response
.headers()
.get("Docker-Content-Digest")
.ok_or_else(|| anyhow!("fetching manifest did not yield a content digest"))?
.to_str()?
.to_string();
let manifest = serde_json::from_str(&response.text().await?)?;
Ok((manifest, digest))
}
fn pick_manifest(&mut self, index: ImageIndex) -> Option<Descriptor> {
for item in index.manifests() {
if let Some(platform) = item.platform() {
if *platform.os() == MANIFEST_PICKER_PLATFORM
&& *platform.architecture() == MANIFEST_PICKER_ARCHITECTURE
{
return Some(item.clone());
}
}
}
None
}
}

View File

@ -0,0 +1,414 @@
pub mod cache;
pub mod fetch;
pub mod name;
use crate::image::cache::ImageCache;
use crate::image::fetch::RegistryClient;
use crate::image::name::ImageName;
use anyhow::{anyhow, Result};
use backhand::compression::Compressor;
use backhand::{FilesystemCompressor, FilesystemWriter, NodeHeader};
use flate2::read::GzDecoder;
use log::{debug, trace, warn};
use oci_spec::image::{Descriptor, ImageConfiguration, ImageManifest, MediaType, ToDockerV2S2};
use std::fs::File;
use std::io::{BufReader, Cursor};
use std::os::unix::fs::{FileTypeExt, MetadataExt, PermissionsExt};
use std::path::{Path, PathBuf};
use std::{fs, io};
use tar::{Archive, Entry};
use uuid::Uuid;
use walkdir::WalkDir;
pub const IMAGE_SQUASHFS_VERSION: u64 = 1;
const LAYER_BUFFER_SIZE: usize = 128 * 1024;
// we utilize in-memory buffers when generating the squashfs for files
// under this size. for files of or above this size, we open a file.
// the file is then read during writing. we want to reduce the number
// of open files during squashfs generation, so this limit should be set
// to something that limits the number of files on average, at the expense
// of increased memory usage.
// TODO: it may be wise to, during crawling of the image layers, infer this
// value from the size to file count ratio of all layers.
const SQUASHFS_MEMORY_BUFFER_LIMIT: usize = 8 * 1024 * 1024;
pub struct ImageInfo {
pub image_squashfs: PathBuf,
pub manifest: ImageManifest,
pub config: ImageConfiguration,
}
impl ImageInfo {
fn new(
squashfs: PathBuf,
manifest: ImageManifest,
config: ImageConfiguration,
) -> Result<ImageInfo> {
Ok(ImageInfo {
image_squashfs: squashfs,
manifest,
config,
})
}
}
pub struct ImageCompiler<'a> {
cache: &'a ImageCache,
}
#[derive(Debug)]
enum LayerCompressionType {
None,
Gzip,
Zstd,
}
struct LayerFile {
digest: String,
compression: LayerCompressionType,
path: PathBuf,
}
impl LayerFile {
fn open_reader(&self) -> Result<Box<dyn io::Read>> {
Ok(match self.compression {
LayerCompressionType::None => Box::new(BufReader::with_capacity(
LAYER_BUFFER_SIZE,
File::open(&self.path)?,
)),
LayerCompressionType::Gzip => Box::new(GzDecoder::new(BufReader::with_capacity(
LAYER_BUFFER_SIZE,
File::open(&self.path)?,
))),
LayerCompressionType::Zstd => Box::new(zstd::Decoder::new(BufReader::with_capacity(
LAYER_BUFFER_SIZE,
File::open(&self.path)?,
))?),
})
}
}
impl ImageCompiler<'_> {
pub fn new(cache: &ImageCache) -> Result<ImageCompiler> {
Ok(ImageCompiler { cache })
}
pub async fn compile(&self, image: &ImageName) -> Result<ImageInfo> {
debug!("compile image={image}");
let mut tmp_dir = std::env::temp_dir().clone();
tmp_dir.push(format!("krata-compile-{}", Uuid::new_v4()));
let mut image_dir = tmp_dir.clone();
image_dir.push("image");
fs::create_dir_all(&image_dir)?;
let mut layer_dir = tmp_dir.clone();
layer_dir.push("layer");
fs::create_dir_all(&layer_dir)?;
let mut squash_file = tmp_dir.clone();
squash_file.push("image.squashfs");
let info = self
.download_and_compile(image, &layer_dir, &image_dir, &squash_file)
.await?;
fs::remove_dir_all(&tmp_dir)?;
Ok(info)
}
async fn download_and_compile(
&self,
image: &ImageName,
layer_dir: &Path,
image_dir: &PathBuf,
squash_file: &PathBuf,
) -> Result<ImageInfo> {
debug!(
"download manifest image={image}, image_dir={}",
image_dir.to_str().unwrap()
);
let mut client = RegistryClient::new(image.registry_url()?)?;
let (manifest, digest) = client
.get_manifest_with_digest(&image.name, &image.reference)
.await?;
let cache_key = format!(
"manifest={}:squashfs-version={}\n",
digest, IMAGE_SQUASHFS_VERSION
);
let cache_digest = sha256::digest(cache_key);
if let Some(cached) = self.cache.recall(&cache_digest).await? {
return Ok(cached);
}
debug!(
"download config digest={} size={}",
manifest.config().digest(),
manifest.config().size(),
);
let config_bytes = client.get_blob(&image.name, manifest.config()).await?;
let config: ImageConfiguration = serde_json::from_slice(&config_bytes)?;
let mut layers: Vec<LayerFile> = Vec::new();
for layer in manifest.layers() {
layers.push(
self.download_layer(image, layer, layer_dir, &mut client)
.await?,
);
}
for layer in layers {
debug!(
"process layer digest={} compression={:?}",
&layer.digest, layer.compression
);
let mut archive = Archive::new(layer.open_reader()?);
for entry in archive.entries()? {
let mut entry = entry?;
let path = entry.path()?;
let Some(name) = path.file_name() else {
return Err(anyhow!("unable to get file name"));
};
let Some(name) = name.to_str() else {
return Err(anyhow!("unable to get file name as string"));
};
if name.starts_with(".wh.") {
self.process_whiteout_entry(&entry, name, &layer, image_dir)?;
} else {
self.process_write_entry(&mut entry, &layer, image_dir)?;
}
}
fs::remove_file(&layer.path)?;
}
self.squash(image_dir, squash_file)?;
let info = ImageInfo::new(squash_file.clone(), manifest.clone(), config)?;
self.cache.store(&cache_digest, &info).await
}
fn process_whiteout_entry<T: io::Read>(
&self,
entry: &Entry<T>,
name: &str,
layer: &LayerFile,
image_dir: &PathBuf,
) -> Result<()> {
let dst = self.check_safe_entry(entry, image_dir)?;
let mut dst = dst.clone();
dst.pop();
let opaque = name == ".wh..wh..opq";
if !opaque {
dst.push(name);
self.check_safe_path(&dst, image_dir)?;
}
trace!(
"whiteout entry layer={} path={:?}",
&layer.digest,
entry.path()?
);
if opaque {
if dst.is_dir() {
for entry in fs::read_dir(dst)? {
let entry = entry?;
let path = entry.path();
if path.is_symlink() || path.is_file() {
fs::remove_file(&path)?;
} else if path.is_dir() {
fs::remove_dir_all(&path)?;
} else {
return Err(anyhow!("opaque whiteout entry did not exist"));
}
}
} else {
warn!(
"whiteout entry missing locally layer={} path={:?} local={:?}",
&layer.digest,
entry.path()?,
dst,
);
}
} else if dst.is_file() || dst.is_symlink() {
fs::remove_file(&dst)?;
} else if dst.is_dir() {
fs::remove_dir(&dst)?;
} else {
warn!(
"whiteout entry missing locally layer={} path={:?} local={:?}",
&layer.digest,
entry.path()?,
dst,
);
}
Ok(())
}
fn process_write_entry<T: io::Read>(
&self,
entry: &mut Entry<T>,
layer: &LayerFile,
image_dir: &PathBuf,
) -> Result<()> {
trace!(
"unpack entry layer={} path={:?} type={:?}",
&layer.digest,
entry.path()?,
entry.header().entry_type()
);
entry.unpack_in(image_dir)?;
Ok(())
}
fn check_safe_entry<T: io::Read>(
&self,
entry: &Entry<T>,
image_dir: &PathBuf,
) -> Result<PathBuf> {
let mut dst = image_dir.clone();
dst.push(entry.path()?);
if let Some(name) = dst.file_name() {
if let Some(name) = name.to_str() {
if name.starts_with(".wh.") {
let copy = dst.clone();
dst.pop();
self.check_safe_path(&dst, image_dir)?;
return Ok(copy);
}
}
}
self.check_safe_path(&dst, image_dir)?;
Ok(dst)
}
fn check_safe_path(&self, dst: &PathBuf, image_dir: &PathBuf) -> Result<()> {
let resolved = path_clean::clean(dst);
if !resolved.starts_with(image_dir) {
return Err(anyhow!("layer attempts to work outside image dir"));
}
Ok(())
}
async fn download_layer(
&self,
image: &ImageName,
layer: &Descriptor,
layer_dir: &Path,
client: &mut RegistryClient,
) -> Result<LayerFile> {
debug!(
"download layer digest={} size={}",
layer.digest(),
layer.size()
);
let mut layer_path = layer_dir.to_path_buf();
layer_path.push(layer.digest());
let mut tmp_path = layer_dir.to_path_buf();
tmp_path.push(format!("{}.tmp", layer.digest()));
{
let file = tokio::fs::File::create(&layer_path).await?;
let size = client.write_blob_to_file(&image.name, layer, file).await?;
if layer.size() as u64 != size {
return Err(anyhow!(
"downloaded layer size differs from size in manifest",
));
}
}
let mut media_type = layer.media_type().clone();
// docker layer compatibility
if media_type.to_string() == MediaType::ImageLayerGzip.to_docker_v2s2()? {
media_type = MediaType::ImageLayerGzip;
}
let compression = match media_type {
MediaType::ImageLayer => LayerCompressionType::None,
MediaType::ImageLayerGzip => LayerCompressionType::Gzip,
MediaType::ImageLayerZstd => LayerCompressionType::Zstd,
other => return Err(anyhow!("found layer with unknown media type: {}", other)),
};
Ok(LayerFile {
digest: layer.digest().clone(),
compression,
path: layer_path,
})
}
fn squash(&self, image_dir: &PathBuf, squash_file: &PathBuf) -> Result<()> {
let mut writer = FilesystemWriter::default();
writer.set_compressor(FilesystemCompressor::new(Compressor::Gzip, None)?);
let walk = WalkDir::new(image_dir).follow_links(false);
for entry in walk {
let entry = entry?;
let rel = entry
.path()
.strip_prefix(image_dir)?
.to_str()
.ok_or_else(|| anyhow!("failed to strip prefix of tmpdir"))?;
let rel = format!("/{}", rel);
trace!("squash write {}", rel);
let typ = entry.file_type();
let metadata = fs::symlink_metadata(entry.path())?;
let uid = metadata.uid();
let gid = metadata.gid();
let mode = metadata.permissions().mode();
let mtime = metadata.mtime();
if rel == "/" {
writer.set_root_uid(uid);
writer.set_root_gid(gid);
writer.set_root_mode(mode as u16);
continue;
}
let header = NodeHeader {
permissions: mode as u16,
uid,
gid,
mtime: mtime as u32,
};
if typ.is_symlink() {
let symlink = fs::read_link(entry.path())?;
let symlink = symlink
.to_str()
.ok_or_else(|| anyhow!("failed to read symlink"))?;
writer.push_symlink(symlink, rel, header)?;
} else if typ.is_dir() {
writer.push_dir(rel, header)?;
} else if typ.is_file() {
if metadata.size() >= SQUASHFS_MEMORY_BUFFER_LIMIT as u64 {
let reader =
BufReader::with_capacity(LAYER_BUFFER_SIZE, File::open(entry.path())?);
writer.push_file(reader, rel, header)?;
} else {
let cursor = Cursor::new(fs::read(entry.path())?);
writer.push_file(cursor, rel, header)?;
}
} else if typ.is_block_device() {
let device = metadata.dev();
writer.push_block_device(device as u32, rel, header)?;
} else if typ.is_char_device() {
let device = metadata.dev();
writer.push_char_device(device as u32, rel, header)?;
} else {
return Err(anyhow!("invalid file type"));
}
}
fs::remove_dir_all(image_dir)?;
let squash_file_path = squash_file
.to_str()
.ok_or_else(|| anyhow!("failed to convert squashfs string"))?;
let mut file = File::create(squash_file)?;
trace!("squash generate: {}", squash_file_path);
writer.write(&mut file)?;
Ok(())
}
}

View File

@ -0,0 +1,89 @@
use anyhow::Result;
use std::fmt;
use url::Url;
const DOCKER_HUB_MIRROR: &str = "mirror.gcr.io";
const DEFAULT_IMAGE_TAG: &str = "latest";
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ImageName {
pub hostname: String,
pub port: Option<u16>,
pub name: String,
pub reference: String,
}
impl fmt::Display for ImageName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(port) = self.port {
write!(
f,
"{}:{}/{}:{}",
self.hostname, port, self.name, self.reference
)
} else {
write!(f, "{}/{}:{}", self.hostname, self.name, self.reference)
}
}
}
impl Default for ImageName {
fn default() -> Self {
Self::parse(&format!("{}", uuid::Uuid::new_v4().as_hyphenated()))
.expect("UUID hyphenated must be valid name")
}
}
impl ImageName {
pub fn parse(name: &str) -> Result<Self> {
let full_name = name.to_string();
let name = full_name.clone();
let (mut hostname, mut name) = name
.split_once('/')
.map(|x| (x.0.to_string(), x.1.to_string()))
.unwrap_or_else(|| (DOCKER_HUB_MIRROR.to_string(), format!("library/{}", name)));
// heuristic to find any docker hub image formats
// that may be in the hostname format. for example:
// abc/xyz:latest will trigger this if check, but abc.io/xyz:latest will not,
// and neither will abc/hello/xyz:latest
if !hostname.contains('.') && full_name.chars().filter(|x| *x == '/').count() == 1 {
name = format!("{}/{}", hostname, name);
hostname = DOCKER_HUB_MIRROR.to_string();
}
let (hostname, port) = if let Some((hostname, port)) = hostname
.split_once(':')
.map(|x| (x.0.to_string(), x.1.to_string()))
{
(hostname, Some(str::parse(&port)?))
} else {
(hostname, None)
};
let (name, reference) = name
.split_once(':')
.map(|x| (x.0.to_string(), x.1.to_string()))
.unwrap_or((name.to_string(), DEFAULT_IMAGE_TAG.to_string()));
Ok(ImageName {
hostname,
port,
name,
reference,
})
}
/// URL for OCI distribution API endpoint
pub fn registry_url(&self) -> Result<Url> {
let hostname = if let Some(port) = self.port {
format!("{}:{}", self.hostname, port)
} else {
self.hostname.clone()
};
let url = if self.hostname.starts_with("localhost") {
format!("http://{}", hostname)
} else {
format!("https://{}", hostname)
};
Ok(Url::parse(&url)?)
}
}

View File

@ -0,0 +1,245 @@
use std::net::IpAddr;
use std::{fs, net::Ipv4Addr, str::FromStr};
use advmac::MacAddr6;
use anyhow::{anyhow, Result};
use ipnetwork::{IpNetwork, Ipv4Network};
use krata::launchcfg::{
LaunchInfo, LaunchNetwork, LaunchNetworkIpv4, LaunchNetworkIpv6, LaunchNetworkResolver,
};
use uuid::Uuid;
use xenclient::{DomainConfig, DomainDisk, DomainNetworkInterface};
use xenstore::client::XsdInterface;
use crate::cfgblk::ConfigBlock;
use crate::image::{cache::ImageCache, name::ImageName, ImageCompiler, ImageInfo};
use crate::RuntimeContext;
use super::{GuestInfo, GuestState};
pub struct GuestLaunchRequest<'a> {
pub image: &'a str,
pub vcpus: u32,
pub mem: u64,
pub env: Option<Vec<String>>,
pub run: Option<Vec<String>>,
pub debug: bool,
}
pub struct GuestLauncher {}
impl GuestLauncher {
pub fn new() -> Result<Self> {
Ok(Self {})
}
pub async fn launch<'r>(
&mut self,
context: &mut RuntimeContext,
request: GuestLaunchRequest<'r>,
) -> Result<GuestInfo> {
let uuid = Uuid::new_v4();
let name = format!("krata-{uuid}");
let image_info = self.compile(request.image, &context.image_cache).await?;
let mut gateway_mac = MacAddr6::random();
gateway_mac.set_local(true);
gateway_mac.set_multicast(false);
let mut container_mac = MacAddr6::random();
container_mac.set_local(true);
container_mac.set_multicast(false);
let guest_ipv4 = self.allocate_ipv4(context).await?;
let guest_ipv6 = container_mac.to_link_local_ipv6();
let gateway_ipv4 = "10.75.70.1";
let gateway_ipv6 = "fe80::1";
let ipv4_network_mask: u32 = 16;
let ipv6_network_mask: u32 = 10;
let launch_config = LaunchInfo {
network: Some(LaunchNetwork {
link: "eth0".to_string(),
ipv4: LaunchNetworkIpv4 {
address: format!("{}/{}", guest_ipv4, ipv4_network_mask),
gateway: gateway_ipv4.to_string(),
},
ipv6: LaunchNetworkIpv6 {
address: format!("{}/{}", guest_ipv6, ipv6_network_mask),
gateway: gateway_ipv6.to_string(),
},
resolver: LaunchNetworkResolver {
nameservers: vec![
"1.1.1.1".to_string(),
"1.0.0.1".to_string(),
"2606:4700:4700::1111".to_string(),
"2606:4700:4700::1001".to_string(),
],
},
}),
env: request.env,
run: request.run,
};
let cfgblk = ConfigBlock::new(&uuid, &image_info)?;
cfgblk.build(&launch_config)?;
let image_squashfs_path = image_info
.image_squashfs
.to_str()
.ok_or_else(|| anyhow!("failed to convert image squashfs path to string"))?;
let cfgblk_dir_path = cfgblk
.dir
.to_str()
.ok_or_else(|| anyhow!("failed to convert cfgblk directory path to string"))?;
let cfgblk_squashfs_path = cfgblk
.file
.to_str()
.ok_or_else(|| anyhow!("failed to convert cfgblk squashfs path to string"))?;
let image_squashfs_loop = context.autoloop.loopify(image_squashfs_path)?;
let cfgblk_squashfs_loop = context.autoloop.loopify(cfgblk_squashfs_path)?;
let cmdline_options = [
if request.debug { "debug" } else { "quiet" },
"elevator=noop",
];
let cmdline = cmdline_options.join(" ");
let container_mac_string = container_mac.to_string().replace('-', ":");
let gateway_mac_string = gateway_mac.to_string().replace('-', ":");
let config = DomainConfig {
backend_domid: 0,
name: &name,
max_vcpus: request.vcpus,
mem_mb: request.mem,
kernel_path: &context.kernel,
initrd_path: &context.initrd,
cmdline: &cmdline,
disks: vec![
DomainDisk {
vdev: "xvda",
block: &image_squashfs_loop,
writable: false,
},
DomainDisk {
vdev: "xvdb",
block: &cfgblk_squashfs_loop,
writable: false,
},
],
consoles: vec![],
vifs: vec![DomainNetworkInterface {
mac: &container_mac_string,
mtu: 1500,
bridge: None,
script: None,
}],
filesystems: vec![],
event_channels: vec![],
extra_keys: vec![
("krata/uuid".to_string(), uuid.to_string()),
(
"krata/loops".to_string(),
format!(
"{}:{}:none,{}:{}:{}",
&image_squashfs_loop.path,
image_squashfs_path,
&cfgblk_squashfs_loop.path,
cfgblk_squashfs_path,
cfgblk_dir_path,
),
),
("krata/image".to_string(), request.image.to_string()),
(
"krata/network/guest/ipv4".to_string(),
format!("{}/{}", guest_ipv4, ipv4_network_mask),
),
(
"krata/network/guest/ipv6".to_string(),
format!("{}/{}", guest_ipv6, ipv6_network_mask),
),
(
"krata/network/guest/mac".to_string(),
container_mac_string.clone(),
),
(
"krata/network/gateway/ipv4".to_string(),
format!("{}/{}", gateway_ipv4, ipv4_network_mask),
),
(
"krata/network/gateway/ipv6".to_string(),
format!("{}/{}", gateway_ipv6, ipv6_network_mask),
),
(
"krata/network/gateway/mac".to_string(),
gateway_mac_string.clone(),
),
],
extra_rw_paths: vec!["krata/guest".to_string()],
};
match context.xen.create(&config).await {
Ok(domid) => Ok(GuestInfo {
uuid,
domid,
image: request.image.to_string(),
loops: vec![],
ipv4: Some(IpNetwork::new(
IpAddr::V4(guest_ipv4),
ipv4_network_mask as u8,
)?),
ipv6: Some(IpNetwork::new(
IpAddr::V6(guest_ipv6),
ipv6_network_mask as u8,
)?),
state: GuestState { exit_code: None },
}),
Err(error) => {
let _ = context.autoloop.unloop(&image_squashfs_loop.path);
let _ = context.autoloop.unloop(&cfgblk_squashfs_loop.path);
let _ = fs::remove_dir(&cfgblk.dir);
Err(error.into())
}
}
}
async fn compile(&self, image: &str, image_cache: &ImageCache) -> Result<ImageInfo> {
let image = ImageName::parse(image)?;
let compiler = ImageCompiler::new(image_cache)?;
compiler.compile(&image).await
}
async fn allocate_ipv4(&mut self, context: &mut RuntimeContext) -> Result<Ipv4Addr> {
let network = Ipv4Network::new(Ipv4Addr::new(10, 75, 80, 0), 24)?;
let mut used: Vec<Ipv4Addr> = vec![];
for domid_candidate in context.xen.store.list("/local/domain").await? {
let dom_path = format!("/local/domain/{}", domid_candidate);
let ip_path = format!("{}/krata/network/guest/ipv4", dom_path);
let existing_ip = context.xen.store.read_string(&ip_path).await?;
if let Some(existing_ip) = existing_ip {
let ipv4_network = Ipv4Network::from_str(&existing_ip)?;
used.push(ipv4_network.ip());
}
}
let mut found: Option<Ipv4Addr> = None;
for ip in network.iter() {
let last = ip.octets()[3];
if last == 0 || last == 255 {
continue;
}
if !used.contains(&ip) {
found = Some(ip);
break;
}
}
if found.is_none() {
return Err(anyhow!(
"unable to find ipv4 to allocate to container, ipv4 addresses are exhausted"
));
}
Ok(found.unwrap())
}
}

297
crates/kratart/src/lib.rs Normal file
View File

@ -0,0 +1,297 @@
use std::{
fs,
path::{Path, PathBuf},
str::FromStr,
sync::Arc,
};
use anyhow::{anyhow, Result};
use ipnetwork::IpNetwork;
use loopdev::LoopControl;
use tokio::sync::Mutex;
use uuid::Uuid;
use xenclient::XenClient;
use xenstore::client::{XsdClient, XsdInterface};
use self::{
autoloop::AutoLoop,
console::XenConsole,
image::cache::ImageCache,
launch::{GuestLaunchRequest, GuestLauncher},
};
pub mod autoloop;
pub mod cfgblk;
pub mod console;
pub mod image;
pub mod launch;
pub struct ContainerLoopInfo {
pub device: String,
pub file: String,
pub delete: Option<String>,
}
pub struct GuestState {
pub exit_code: Option<i32>,
}
pub struct GuestInfo {
pub uuid: Uuid,
pub domid: u32,
pub image: String,
pub loops: Vec<ContainerLoopInfo>,
pub ipv4: Option<IpNetwork>,
pub ipv6: Option<IpNetwork>,
pub state: GuestState,
}
pub struct RuntimeContext {
pub image_cache: ImageCache,
pub autoloop: AutoLoop,
pub xen: XenClient,
pub kernel: String,
pub initrd: String,
}
impl RuntimeContext {
pub async fn new(store: String) -> Result<Self> {
let mut image_cache_path = PathBuf::from(&store);
image_cache_path.push("cache");
fs::create_dir_all(&image_cache_path)?;
let xen = XenClient::open().await?;
image_cache_path.push("image");
fs::create_dir_all(&image_cache_path)?;
let image_cache = ImageCache::new(&image_cache_path)?;
let kernel = RuntimeContext::detect_guest_file(&store, "kernel")?;
let initrd = RuntimeContext::detect_guest_file(&store, "initrd")?;
Ok(RuntimeContext {
image_cache,
autoloop: AutoLoop::new(LoopControl::open()?),
xen,
kernel,
initrd,
})
}
fn detect_guest_file(store: &str, name: &str) -> Result<String> {
let mut path = PathBuf::from(format!("{}/guest/{}", store, name));
if path.is_file() {
return path_as_string(&path);
}
path = PathBuf::from(format!("/usr/share/krata/guest/{}", name));
if path.is_file() {
return path_as_string(&path);
}
Err(anyhow!("unable to find required guest file: {}", name))
}
pub async fn list(&mut self) -> Result<Vec<GuestInfo>> {
let mut guests: Vec<GuestInfo> = Vec::new();
for domid_candidate in self.xen.store.list("/local/domain").await? {
let dom_path = format!("/local/domain/{}", domid_candidate);
let uuid_string = match self
.xen
.store
.read_string(&format!("{}/krata/uuid", &dom_path))
.await?
{
None => continue,
Some(value) => value,
};
let domid =
u32::from_str(&domid_candidate).map_err(|_| anyhow!("failed to parse domid"))?;
let uuid = Uuid::from_str(&uuid_string)?;
let image = self
.xen
.store
.read_string(&format!("{}/krata/image", &dom_path))
.await?
.unwrap_or("unknown".to_string());
let loops = self
.xen
.store
.read_string(&format!("{}/krata/loops", &dom_path))
.await?;
let ipv4 = self
.xen
.store
.read_string(&format!("{}/krata/network/guest/ipv4", &dom_path))
.await?;
let ipv6 = self
.xen
.store
.read_string(&format!("{}/krata/network/guest/ipv6", &dom_path))
.await?;
let ipv4 = if let Some(ipv4) = ipv4 {
IpNetwork::from_str(&ipv4).ok()
} else {
None
};
let ipv6 = if let Some(ipv6) = ipv6 {
IpNetwork::from_str(&ipv6).ok()
} else {
None
};
let exit_code = self
.xen
.store
.read_string(&format!("{}/krata/guest/exit-code", &dom_path))
.await?;
let exit_code: Option<i32> = match exit_code {
Some(code) => code.parse().ok(),
None => None,
};
let state = GuestState { exit_code };
let loops = RuntimeContext::parse_loop_set(&loops);
guests.push(GuestInfo {
uuid,
domid,
image,
loops,
ipv4,
ipv6,
state,
});
}
Ok(guests)
}
pub async fn resolve(&mut self, id: &str) -> Result<Option<GuestInfo>> {
for guest in self.list().await? {
let uuid_string = guest.uuid.to_string();
let domid_string = guest.domid.to_string();
if uuid_string == id || domid_string == id || id == format!("krata-{}", uuid_string) {
return Ok(Some(guest));
}
}
Ok(None)
}
fn parse_loop_set(input: &Option<String>) -> Vec<ContainerLoopInfo> {
let Some(input) = input else {
return Vec::new();
};
let sets = input
.split(',')
.map(|x| x.to_string())
.map(|x| x.split(':').map(|v| v.to_string()).collect::<Vec<String>>())
.map(|x| (x[0].clone(), x[1].clone(), x[2].clone()))
.collect::<Vec<(String, String, String)>>();
sets.iter()
.map(|(device, file, delete)| ContainerLoopInfo {
device: device.clone(),
file: file.clone(),
delete: if delete == "none" {
None
} else {
Some(delete.clone())
},
})
.collect::<Vec<ContainerLoopInfo>>()
}
}
#[derive(Clone)]
pub struct Runtime {
store: Arc<String>,
context: Arc<Mutex<RuntimeContext>>,
}
impl Runtime {
pub async fn new(store: String) -> Result<Self> {
let context = RuntimeContext::new(store.clone()).await?;
Ok(Self {
store: Arc::new(store),
context: Arc::new(Mutex::new(context)),
})
}
pub async fn launch<'a>(&self, request: GuestLaunchRequest<'a>) -> Result<GuestInfo> {
let mut context = self.context.lock().await;
let mut launcher = GuestLauncher::new()?;
launcher.launch(&mut context, request).await
}
pub async fn destroy(&self, id: &str) -> Result<Uuid> {
let mut context = self.context.lock().await;
let info = context
.resolve(id)
.await?
.ok_or_else(|| anyhow!("unable to resolve guest: {}", id))?;
let domid = info.domid;
let mut store = XsdClient::open().await?;
let dom_path = store.get_domain_path(domid).await?;
let uuid = match store
.read_string(format!("{}/krata/uuid", dom_path).as_str())
.await?
{
None => {
return Err(anyhow!(
"domain {} was not found or not created by krata",
domid
))
}
Some(value) => value,
};
if uuid.is_empty() {
return Err(anyhow!("unable to find krata uuid based on the domain",));
}
let uuid = Uuid::parse_str(&uuid)?;
let loops = store
.read_string(format!("{}/krata/loops", dom_path).as_str())
.await?;
let loops = RuntimeContext::parse_loop_set(&loops);
context.xen.destroy(domid).await?;
for info in &loops {
context.autoloop.unloop(&info.device)?;
match &info.delete {
None => {}
Some(delete) => {
let delete_path = PathBuf::from(delete);
if delete_path.is_file() || delete_path.is_symlink() {
fs::remove_file(&delete_path)?;
} else if delete_path.is_dir() {
fs::remove_dir_all(&delete_path)?;
}
}
}
}
Ok(uuid)
}
pub async fn console(&self, id: &str) -> Result<XenConsole> {
let mut context = self.context.lock().await;
let info = context
.resolve(id)
.await?
.ok_or_else(|| anyhow!("unable to resolve guest: {}", id))?;
let domid = info.domid;
let tty = context.xen.get_console_path(domid).await?;
XenConsole::new(&tty).await
}
pub async fn list(&self) -> Result<Vec<GuestInfo>> {
let mut context = self.context.lock().await;
context.list().await
}
pub async fn dupe(&self) -> Result<Runtime> {
Runtime::new((*self.store).clone()).await
}
}
fn path_as_string(path: &Path) -> Result<String> {
path.to_str()
.ok_or_else(|| anyhow!("unable to convert path to string"))
.map(|x| x.to_string())
}

15
crates/vendor/advmac/Cargo.toml vendored Normal file
View File

@ -0,0 +1,15 @@
# This package is from https://github.com/GamePad64/advmac
# Edera maintains an in-tree version because of dependencies being out of date.
[package]
name = "advmac"
version.workspace = true
license = "MIT"
edition = "2021"
[dependencies]
arrayvec = { workspace = true, features = ["serde"] }
rand = { workspace = true }
serde = { workspace = true }
[lib]
name = "advmac"

21
crates/vendor/advmac/LICENSE vendored Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 Alexander Shishenko
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

474
crates/vendor/advmac/src/lib.rs vendored Normal file
View File

@ -0,0 +1,474 @@
mod parser;
use arrayvec::ArrayString;
use core::fmt::{self, Debug, Display, Formatter};
use core::str::FromStr;
use rand::Rng;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
pub enum ParseError {
InvalidMac,
InvalidLength { length: usize },
}
impl Display for ParseError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Self::InvalidMac => write!(f, "invalid MAC address"),
Self::InvalidLength { length } => write!(f, "invalid string length: {}", length),
}
}
}
impl std::error::Error for ParseError {}
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
pub enum IpError {
NotLinkLocal,
NotMulticast,
}
impl Display for IpError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Self::NotLinkLocal => write!(f, "not link-local address"),
Self::NotMulticast => write!(f, "not multicast address"),
}
}
}
impl std::error::Error for IpError {}
/// Maximum formatted size.
///
/// It is useful for creating a stack-allocated buffer `[u8; MAC_MAX_SIZE]`
/// and formatting address into it using [MacAddr6::format_write] or [MacAddr8::format_write].
pub const MAC_MAX_SIZE: usize = 23;
/// Size of formatted MAC using [MacAddr6::format_string] and [MacAddrFormat::Canonical].
pub const MAC_CANONICAL_SIZE6: usize = 17;
/// Size of formatted MAC using [MacAddr8::format_string] and [MacAddrFormat::Canonical].
pub const MAC_CANONICAL_SIZE8: usize = 23;
/// Size of formatted MAC using [MacAddr6::format_string] and [MacAddrFormat::ColonNotation].
pub const MAC_COLON_NOTATION_SIZE6: usize = 17;
/// Size of formatted MAC using [MacAddr8::format_string] and [MacAddrFormat::ColonNotation].
pub const MAC_COLON_NOTATION_SIZE8: usize = 23;
/// Size of formatted MAC using [MacAddr6::format_string] and [MacAddrFormat::DotNotation].
pub const MAC_DOT_NOTATION_SIZE6: usize = 14;
/// Size of formatted MAC using [MacAddr8::format_string] and [MacAddrFormat::DotNotation].
pub const MAC_DOT_NOTATION_SIZE8: usize = 19;
/// Size of formatted MAC using [MacAddr6::format_string] and [MacAddrFormat::Hexadecimal].
pub const MAC_HEXADECIMAL_SIZE6: usize = 12;
/// Size of formatted MAC using [MacAddr8::format_string] and [MacAddrFormat::Hexadecimal].
pub const MAC_HEXADECIMAL_SIZE8: usize = 16;
/// Size of formatted MAC using [MacAddr6::format_string] and [MacAddrFormat::Hexadecimal0x].
pub const MAC_HEXADECIMAL0X_SIZE6: usize = 14;
/// Size of formatted MAC using [MacAddr8::format_string] and [MacAddrFormat::Hexadecimal0x].
pub const MAC_HEXADECIMAL0X_SIZE8: usize = 18;
#[derive(Copy, Clone, Eq, PartialEq)]
pub enum MacAddrFormat {
/// `AA-BB-CC-DD-EE-FF` (17 bytes) or `AA-BB-CC-DD-EE-FF-GG-HH` (23 bytes)
Canonical,
/// `AA:BB:CC:DD:EE:FF` (17 bytes) or `AA:BB:CC:DD:EE:FF:GG:HH` (23 bytes)
ColonNotation,
/// `AABB.CCDD.EEFF` (14 bytes) or `AABB.CCDD.EEFF.GGHH` (19 bytes)
DotNotation,
/// `AABBCCDDEEFF` (12 bytes) or `AABBCCDDEEFFGGHH` (16 bytes)
Hexadecimal,
/// `0xAABBCCDDEEFF` (14 bytes) or `0xAABBCCDDEEFFGGHH` (18 bytes)
Hexadecimal0x,
}
macro_rules! mac_impl {
($nm:ident, $sz:literal, $hex_sz:literal) => {
impl $nm {
pub const fn new(eui: [u8; $sz]) -> Self {
Self(eui)
}
pub fn random() -> Self {
let mut result = Self::default();
rand::rngs::OsRng.fill(result.as_mut_slice());
result
}
pub const fn broadcast() -> Self {
Self([0xFF; $sz])
}
pub const fn nil() -> Self {
Self([0; $sz])
}
/// Sets *locally administered* flag
pub fn set_local(&mut self, v: bool) {
if v {
self.0[0] |= 0b0000_0010;
} else {
self.0[0] &= !0b0000_0010;
}
}
/// Returns the state of *locally administered* flag
pub const fn is_local(&self) -> bool {
(self.0[0] & 0b0000_0010) != 0
}
/// Sets *multicast* flag
pub fn set_multicast(&mut self, v: bool) {
if v {
self.0[0] |= 0b0000_0001;
} else {
self.0[0] &= !0b0000_0001;
}
}
/// Returns the state of *multicast* flag
pub const fn is_multicast(&self) -> bool {
(self.0[0] & 0b0000_0001) != 0
}
/// Returns [organizationally unique identifier (OUI)](https://en.wikipedia.org/wiki/Organizationally_unique_identifier) of this MAC address
pub const fn oui(&self) -> [u8; 3] {
[self.0[0], self.0[1], self.0[2]]
}
/// Sets [organizationally unique identifier (OUI)](https://en.wikipedia.org/wiki/Organizationally_unique_identifier) for this MAC address
pub fn set_oui(&mut self, oui: [u8; 3]) {
self.0[..3].copy_from_slice(&oui);
}
/// Returns internal array representation for this MAC address, consuming it
pub const fn to_array(self) -> [u8; $sz] {
self.0
}
/// Returns internal array representation for this MAC address as [u8] slice
pub const fn as_slice(&self) -> &[u8] {
&self.0
}
/// Returns internal array representation for this MAC address as mutable [u8] slice
pub fn as_mut_slice(&mut self) -> &mut [u8] {
&mut self.0
}
/// Returns internal array representation for this MAC address as [core::ffi::c_char] slice.
/// This can be useful in parsing `ifr_hwaddr`, for example.
pub const fn as_c_slice(&self) -> &[core::ffi::c_char] {
unsafe { &*(self.as_slice() as *const _ as *const [core::ffi::c_char]) }
}
/// Parse MAC address from string and return it as `MacAddr`.
/// This function can be used in const context, so MAC address can be parsed in compile-time.
pub const fn parse_str(s: &str) -> Result<Self, ParseError> {
match parser::MacParser::<$sz, $hex_sz>::parse(s) {
Ok(v) => Ok(Self(v)),
Err(e) => Err(e),
}
}
/// Write MAC address to `impl core::fmt::Write`, which can be used in `no_std` environments.
///
/// It can be used like this with [arrayvec::ArrayString] without allocations:
/// ```
/// use arrayvec::ArrayString;
/// use advmac::{MacAddr6, MacAddrFormat, MAC_CANONICAL_SIZE6};
///
/// let mac = MacAddr6::parse_str("AA:BB:CC:DD:EE:FF").unwrap();
///
/// let mut buf = ArrayString::<MAC_CANONICAL_SIZE6>::new();
/// mac.format_write(&mut buf, MacAddrFormat::Canonical).unwrap();
/// # assert_eq!(buf.as_str(), "AA-BB-CC-DD-EE-FF")
/// ```
pub fn format_write<T: fmt::Write>(
&self,
f: &mut T,
format: MacAddrFormat,
) -> fmt::Result {
match format {
MacAddrFormat::Canonical => self.write_internal(f, "", "-", "-"),
MacAddrFormat::ColonNotation => self.write_internal(f, "", ":", ":"),
MacAddrFormat::DotNotation => self.write_internal(f, "", "", "."),
MacAddrFormat::Hexadecimal => self.write_internal(f, "", "", ""),
MacAddrFormat::Hexadecimal0x => self.write_internal(f, "0x", "", ""),
}
}
/// Write MAC address to [String]. This function uses [Self::format_write] internally and
/// produces the same result, but in string form, which can be convenient in non-constrainted
/// environments.
pub fn format_string(&self, format: MacAddrFormat) -> String {
let mut buf = String::new();
self.format_write(&mut buf, format).unwrap();
buf
}
}
impl Display for $nm {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.format_write(f, MacAddrFormat::Canonical)
}
}
impl Debug for $nm {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.format_write(f, MacAddrFormat::Canonical)
}
}
impl From<[u8; $sz]> for $nm {
fn from(arr: [u8; $sz]) -> Self {
Self(arr)
}
}
impl TryFrom<&[u8]> for $nm {
type Error = ParseError;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
Ok(Self(value.try_into().map_err(|_| ParseError::InvalidMac)?))
}
}
#[cfg(not(target_arch = "aarch64"))]
impl TryFrom<&[core::ffi::c_char]> for $nm {
type Error = ParseError;
fn try_from(value: &[core::ffi::c_char]) -> Result<Self, Self::Error> {
Self::try_from(unsafe { &*(value as *const _ as *const [u8]) })
}
}
impl TryFrom<&str> for $nm {
type Error = ParseError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
Self::parse_str(value)
}
}
impl TryFrom<String> for $nm {
type Error = ParseError;
fn try_from(value: String) -> Result<Self, Self::Error> {
Self::parse_str(&value)
}
}
impl FromStr for $nm {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse_str(s)
}
}
impl Serialize for $nm {
fn serialize<S: Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
let mut buf = ArrayString::<MAC_MAX_SIZE>::new();
self.format_write(&mut buf, MacAddrFormat::Canonical)
.unwrap();
s.serialize_str(buf.as_ref())
}
}
impl<'de> Deserialize<'de> for $nm {
fn deserialize<D: Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
Self::from_str(ArrayString::<MAC_MAX_SIZE>::deserialize(d)?.as_ref())
.map_err(serde::de::Error::custom)
}
}
};
}
/// MAC address, represented as EUI-48
#[repr(transparent)]
#[derive(Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
pub struct MacAddr6([u8; 6]);
/// MAC address, represented as EUI-64
#[repr(transparent)]
#[derive(Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
pub struct MacAddr8([u8; 8]);
mac_impl!(MacAddr6, 6, 12);
mac_impl!(MacAddr8, 8, 16);
impl MacAddr6 {
pub const fn to_modified_eui64(self) -> MacAddr8 {
let b = self.to_array();
MacAddr8([b[0] ^ 0b00000010, b[1], b[2], 0xFF, 0xFE, b[3], b[4], b[5]])
}
pub const fn try_from_modified_eui64(eui64: MacAddr8) -> Result<Self, IpError> {
let b = eui64.to_array();
if (b[3] == 0xFF) | (b[4] == 0xFE) {
Ok(Self([b[0] ^ 0b00000010, b[1], b[2], b[5], b[6], b[7]]))
} else {
Err(IpError::NotLinkLocal)
}
}
pub const fn to_link_local_ipv6(self) -> Ipv6Addr {
let mac64 = self.to_modified_eui64().to_array();
Ipv6Addr::new(
0xFE80,
0x0000,
0x0000,
0x0000,
((mac64[0] as u16) << 8) + mac64[1] as u16,
((mac64[2] as u16) << 8) + mac64[3] as u16,
((mac64[4] as u16) << 8) + mac64[5] as u16,
((mac64[6] as u16) << 8) + mac64[7] as u16,
)
}
pub const fn try_from_link_local_ipv6(ip: Ipv6Addr) -> Result<Self, IpError> {
let octets = ip.octets();
if (octets[0] != 0xFE)
| (octets[1] != 0x80)
| (octets[2] != 0x00)
| (octets[3] != 0x00)
| (octets[4] != 0x00)
| (octets[5] != 0x00)
| (octets[6] != 0x00)
| (octets[7] != 0x00)
| (octets[11] != 0xFF)
| (octets[12] != 0xFE)
{
return Err(IpError::NotLinkLocal);
}
Ok(Self([
octets[8] ^ 0b00000010,
octets[9],
octets[10],
octets[13],
octets[14],
octets[15],
]))
}
pub const fn try_from_multicast_ipv4(ip: Ipv4Addr) -> Result<Self, IpError> {
if !ip.is_multicast() {
return Err(IpError::NotMulticast);
}
let b = ip.octets();
Ok(Self::new([0x01, 0x00, 0x5E, b[1] & 0x7F, b[2], b[3]]))
}
pub const fn try_from_multicast_ipv6(ip: Ipv6Addr) -> Result<Self, IpError> {
if !ip.is_multicast() {
return Err(IpError::NotMulticast);
}
let b = ip.octets();
Ok(Self::new([0x33, 0x33, b[12], b[13], b[14], b[15]]))
}
pub const fn try_from_multicast_ip(ip: IpAddr) -> Result<Self, IpError> {
match ip {
IpAddr::V4(ip) => Self::try_from_multicast_ipv4(ip),
IpAddr::V6(ip) => Self::try_from_multicast_ipv6(ip),
}
}
}
impl MacAddr6 {
// String representations
fn write_internal<T: fmt::Write>(
&self,
f: &mut T,
pre: &str,
sep: &str,
sep2: &str,
) -> fmt::Result {
write!(
f,
"{pre}{:02X}{sep}{:02X}{sep2}{:02X}{sep}{:02X}{sep2}{:02X}{sep}{:02X}",
self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5]
)
}
}
impl MacAddr8 {
// String representations
fn write_internal<T: fmt::Write>(
&self,
f: &mut T,
pre: &str,
sep: &str,
sep2: &str,
) -> fmt::Result {
write!(
f,
"{pre}{:02X}{sep}{:02X}{sep2}{:02X}{sep}{:02X}{sep2}{:02X}{sep}{:02X}{sep2}{:02X}{sep}{:02X}",
self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], self.0[6], self.0[7]
)
}
}
/// Convenience macro for creating [MacAddr6] in compile-time.
///
/// Example:
/// ```
/// use advmac::{mac6, MacAddr6};
/// const MAC6: MacAddr6 = mac6!("11:22:33:44:55:66");
/// # assert_eq!(MAC6.to_array(), [0x11, 0x22, 0x33, 0x44, 0x55, 0x66]);
/// ```
#[macro_export]
macro_rules! mac6 {
($s:expr) => {
match $crate::MacAddr6::parse_str($s) {
Ok(mac) => mac,
Err(_) => panic!("Invalid MAC address"),
}
};
}
/// Convenience macro for creating [MacAddr8] in compile-time.
///
/// Example:
/// ```
/// use advmac::{mac8, MacAddr8};
/// const MAC8: MacAddr8 = mac8!("11:22:33:44:55:66:77:88");
/// # assert_eq!(MAC8.to_array(), [0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88]);
/// ```
#[macro_export]
macro_rules! mac8 {
($s:expr) => {
match $crate::MacAddr8::parse_str($s) {
Ok(mac) => mac,
Err(_) => panic!("Invalid MAC address"),
}
};
}
#[cfg(test)]
mod test {
#[test]
fn test_flags_roundtrip() {
let mut addr = mac6!("50:74:f2:b1:a8:7f");
assert!(!addr.is_local());
assert!(!addr.is_multicast());
addr.set_multicast(true);
assert!(!addr.is_local());
assert!(addr.is_multicast());
addr.set_local(true);
assert!(addr.is_local());
assert!(addr.is_multicast());
addr.set_multicast(false);
assert!(addr.is_local());
assert!(!addr.is_multicast());
addr.set_local(false);
assert!(!addr.is_local());
assert!(!addr.is_multicast());
}
}

113
crates/vendor/advmac/src/parser.rs vendored Normal file
View File

@ -0,0 +1,113 @@
use crate::ParseError;
// This whole thing is written this way to be const.
// If you want normal hex handling, just use hex crate
pub struct MacParser<const N: usize, const N2: usize>;
impl<const N: usize, const N2: usize> MacParser<N, N2> {
const CANONICAL_COLON_SIZE: usize = 3 * N - 1;
const DOT_NOTATION_SIZE: usize = (2 * N) + (N / 2 - 1);
const HEXADECIMAL_SIZE: usize = 2 * N;
const HEXADECIMAL0X_SIZE: usize = 2 * N + 2;
#[inline]
const fn nibble(v: u8) -> Result<u8, ParseError> {
match v {
b'A'..=b'F' => Ok(10 + (v - b'A')),
b'a'..=b'f' => Ok(10 + (v - b'a')),
b'0'..=b'9' => Ok(v - b'0'),
_ => Err(ParseError::InvalidMac),
}
}
#[inline]
const fn byte(b1: u8, b2: u8) -> Result<u8, ParseError> {
// ? is not available in const
match (Self::nibble(b1), Self::nibble(b2)) {
(Ok(v1), Ok(v2)) => Ok((v1 << 4) + v2),
(Err(e), _) | (_, Err(e)) => Err(e),
}
}
const fn from_hex(s: &[u8]) -> Result<[u8; N], ParseError> {
if s.len() != Self::HEXADECIMAL_SIZE {
return Err(ParseError::InvalidLength { length: s.len() });
}
let mut result = [0u8; N];
// for-loops and iterators are unavailable in const
let mut i = 0;
while i < N {
result[i] = match Self::byte(s[2 * i], s[2 * i + 1]) {
Ok(v) => v,
Err(e) => return Err(e),
};
i += 1;
}
Ok(result)
}
const fn check_separator(s: &[u8], sep: u8, group_len: usize) -> bool {
let mut i = group_len;
while i < s.len() {
if s[i] != sep {
return false;
}
i += group_len + 1;
}
true
}
const fn parse_separated(s: &[u8], sep: u8, group_len: usize) -> Result<[u8; N], ParseError> {
let expected_len = (2 * N) + ((2 * N) / group_len) - 1;
if s.len() != expected_len {
return Err(ParseError::InvalidLength { length: s.len() });
}
if !Self::check_separator(s, sep, group_len) {
return Err(ParseError::InvalidMac);
}
let mut hex_buf = [0u8; N2];
let (mut in_i, mut out_i) = (0, 0);
while in_i < s.len() {
if (in_i + 1) % (group_len + 1) != 0 {
hex_buf[out_i] = s[in_i];
out_i += 1;
}
in_i += 1;
}
Self::from_hex(&hex_buf)
}
pub const fn parse(s: &str) -> Result<[u8; N], ParseError> {
let s = s.as_bytes();
if s.len() == Self::HEXADECIMAL_SIZE {
Self::from_hex(s)
} else if (s.len() == Self::HEXADECIMAL0X_SIZE) && (s[0] == b'0') && (s[1] == b'x') {
// unsafe is the only way I know to make it const
Self::from_hex(unsafe {
core::slice::from_raw_parts(s.as_ptr().offset(2), s.len() - 2)
})
} else if s.len() == Self::CANONICAL_COLON_SIZE {
let sep = s[2];
match sep {
b'-' | b':' => Self::parse_separated(s, sep, 2),
_ => Err(ParseError::InvalidMac),
}
} else if s.len() == Self::DOT_NOTATION_SIZE {
let sep = s[4];
match sep {
b'.' => Self::parse_separated(s, sep, 4),
_ => Err(ParseError::InvalidMac),
}
} else {
Err(ParseError::InvalidLength { length: s.len() })
}
}
}

17
crates/vendor/loopdev/Cargo.toml vendored Normal file
View File

@ -0,0 +1,17 @@
# This package is from https://github.com/stratis-storage/loopdev-3
# Edera maintains an in-tree version because the goals of krata mean that
# there is as little binding generation as possible, especially bindings which
# prevent development from macOS, like the original library.
[package]
name = "loopdev"
version.workspace = true
license = "MIT"
edition = "2021"
[dependencies]
errno = { workspace = true }
libc = { workspace = true }
nix = { workspace = true, features = ["ioctl"] }
[lib]
name = "loopdev"

21
crates/vendor/loopdev/LICENSE vendored Normal file
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2023 Anne Mulhern
Copyright (c) 2016 Michael Daffin
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

147
crates/vendor/loopdev/src/bindings.rs vendored Normal file
View File

@ -0,0 +1,147 @@
/* originally generated by rust-bindgen */
/* modified to remove unused content by Edera */
#![allow(non_camel_case_types)]
pub const __BITS_PER_LONG: u32 = 64;
pub const __FD_SETSIZE: u32 = 1024;
pub const LOOP_SET_FD: u32 = 19456;
pub const LOOP_CLR_FD: u32 = 19457;
pub const LOOP_SET_STATUS64: u32 = 19460;
pub const LOOP_SET_CAPACITY: u32 = 19463;
pub const LOOP_CTL_ADD: u32 = 19584;
pub const LOOP_CTL_GET_FREE: u32 = 19586;
pub const LO_FLAGS_READ_ONLY: _bindgen_ty_1 = 1;
pub const LO_FLAGS_AUTOCLEAR: _bindgen_ty_1 = 4;
pub const LO_FLAGS_PARTSCAN: _bindgen_ty_1 = 8;
pub type _bindgen_ty_1 = ::std::os::raw::c_uint;
pub type __kernel_old_uid_t = ::std::os::raw::c_ushort;
pub type __kernel_old_gid_t = ::std::os::raw::c_ushort;
pub type __kernel_old_dev_t = ::std::os::raw::c_ulong;
pub type __kernel_long_t = ::std::os::raw::c_long;
pub type __kernel_ulong_t = ::std::os::raw::c_ulong;
pub type __kernel_ino_t = __kernel_ulong_t;
pub type __kernel_mode_t = ::std::os::raw::c_uint;
pub type __kernel_pid_t = ::std::os::raw::c_int;
pub type __kernel_ipc_pid_t = ::std::os::raw::c_int;
pub type __kernel_uid_t = ::std::os::raw::c_uint;
pub type __kernel_gid_t = ::std::os::raw::c_uint;
pub type __kernel_suseconds_t = __kernel_long_t;
pub type __kernel_daddr_t = ::std::os::raw::c_int;
pub type __kernel_uid32_t = ::std::os::raw::c_uint;
pub type __kernel_gid32_t = ::std::os::raw::c_uint;
pub type __kernel_size_t = __kernel_ulong_t;
pub type __kernel_ssize_t = __kernel_long_t;
pub type __kernel_ptrdiff_t = __kernel_long_t;
#[repr(C)]
#[derive(Debug, Default, Copy, Clone)]
pub struct __kernel_fsid_t {
pub val: [::std::os::raw::c_int; 2usize],
}
pub type __kernel_off_t = __kernel_long_t;
pub type __kernel_loff_t = ::std::os::raw::c_longlong;
pub type __kernel_old_time_t = __kernel_long_t;
pub type __kernel_time_t = __kernel_long_t;
pub type __kernel_time64_t = ::std::os::raw::c_longlong;
pub type __kernel_clock_t = __kernel_long_t;
pub type __kernel_timer_t = ::std::os::raw::c_int;
pub type __kernel_clockid_t = ::std::os::raw::c_int;
pub type __kernel_caddr_t = *mut ::std::os::raw::c_char;
pub type __kernel_uid16_t = ::std::os::raw::c_ushort;
pub type __kernel_gid16_t = ::std::os::raw::c_ushort;
pub type __s8 = ::std::os::raw::c_schar;
pub type __u8 = ::std::os::raw::c_uchar;
pub type __s16 = ::std::os::raw::c_short;
pub type __u16 = ::std::os::raw::c_ushort;
pub type __s32 = ::std::os::raw::c_int;
pub type __u32 = ::std::os::raw::c_uint;
pub type __s64 = ::std::os::raw::c_longlong;
pub type __u64 = ::std::os::raw::c_ulonglong;
#[repr(C)]
#[derive(Debug, Default, Copy, Clone)]
pub struct __kernel_fd_set {
pub fds_bits: [::std::os::raw::c_ulong; 16usize],
}
pub type __kernel_sighandler_t = Option<unsafe extern "C" fn(arg1: ::std::os::raw::c_int)>;
pub type __kernel_key_t = ::std::os::raw::c_int;
pub type __kernel_mqd_t = ::std::os::raw::c_int;
pub type __le16 = __u16;
pub type __be16 = __u16;
pub type __le32 = __u32;
pub type __be32 = __u32;
pub type __le64 = __u64;
pub type __be64 = __u64;
pub type __sum16 = __u16;
pub type __wsum = __u32;
pub type __poll_t = ::std::os::raw::c_uint;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct loop_info {
pub lo_number: ::std::os::raw::c_int,
pub lo_device: __kernel_old_dev_t,
pub lo_inode: ::std::os::raw::c_ulong,
pub lo_rdevice: __kernel_old_dev_t,
pub lo_offset: ::std::os::raw::c_int,
pub lo_encrypt_type: ::std::os::raw::c_int,
pub lo_encrypt_key_size: ::std::os::raw::c_int,
pub lo_flags: ::std::os::raw::c_int,
pub lo_name: [::std::os::raw::c_char; 64usize],
pub lo_encrypt_key: [::std::os::raw::c_uchar; 32usize],
pub lo_init: [::std::os::raw::c_ulong; 2usize],
pub reserved: [::std::os::raw::c_char; 4usize],
}
impl Default for loop_info {
fn default() -> Self {
let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
unsafe {
::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
s.assume_init()
}
}
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct loop_info64 {
pub lo_device: __u64,
pub lo_inode: __u64,
pub lo_rdevice: __u64,
pub lo_offset: __u64,
pub lo_sizelimit: __u64,
pub lo_number: __u32,
pub lo_encrypt_type: __u32,
pub lo_encrypt_key_size: __u32,
pub lo_flags: __u32,
pub lo_file_name: [__u8; 64usize],
pub lo_crypt_name: [__u8; 64usize],
pub lo_encrypt_key: [__u8; 32usize],
pub lo_init: [__u64; 2usize],
}
impl Default for loop_info64 {
fn default() -> Self {
let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
unsafe {
::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
s.assume_init()
}
}
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct loop_config {
pub fd: __u32,
pub block_size: __u32,
pub info: loop_info64,
pub __reserved: [__u64; 8usize],
}
impl Default for loop_config {
fn default() -> Self {
let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
unsafe {
::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
s.assume_init()
}
}
}

475
crates/vendor/loopdev/src/lib.rs vendored Normal file
View File

@ -0,0 +1,475 @@
// Taken from https://github.com/stratis-storage/loopdev-3/blob/master/src/lib.rs
// Licensed under MIT.
//! Setup and control loop devices.
//!
//! Provides rust interface with similar functionality to the Linux utility `losetup`.
//!
//! # Examples
//!
//! Default options:
//!
//! ```no_run
//! use loopdev::LoopControl;
//! let lc = LoopControl::open().unwrap();
//! let ld = lc.next_free().unwrap();
//!
//! println!("{}", ld.path().unwrap().display());
//!
//! ld.attach_file("disk.img").unwrap();
//! // ...
//! ld.detach().unwrap();
//! ```
//!
//! Custom options:
//!
//! ```no_run
//! # use loopdev::LoopControl;
//! # let lc = LoopControl::open().unwrap();
//! # let ld = lc.next_free().unwrap();
//! #
//! ld.with()
//! .part_scan(true)
//! .offset(512 * 1024 * 1024) // 512 MiB
//! .size_limit(1024 * 1024 * 1024) // 1GiB
//! .attach("disk.img").unwrap();
//! // ...
//! ld.detach().unwrap();
//! ```
mod bindings;
mod linux;
use crate::bindings::{
loop_info64, LOOP_CLR_FD, LOOP_CTL_ADD, LOOP_CTL_GET_FREE, LOOP_SET_CAPACITY, LOOP_SET_FD,
LOOP_SET_STATUS64, LO_FLAGS_AUTOCLEAR, LO_FLAGS_PARTSCAN, LO_FLAGS_READ_ONLY,
};
use libc::ioctl;
use std::ffi::c_int;
use std::{
default::Default,
fs::{File, OpenOptions},
io,
os::unix::prelude::*,
path::{Path, PathBuf},
};
#[cfg(all(not(target_os = "android"), not(target_env = "musl")))]
type IoctlRequest = std::ffi::c_ulong;
#[cfg(any(target_os = "android", target_env = "musl"))]
type IoctlRequest = c_int;
const LOOP_CONTROL: &str = "/dev/loop-control";
#[cfg(not(target_os = "android"))]
const LOOP_PREFIX: &str = "/dev/loop";
#[cfg(target_os = "android")]
const LOOP_PREFIX: &str = "/dev/block/loop";
/// Interface to the loop control device: `/dev/loop-control`.
#[derive(Debug)]
pub struct LoopControl {
dev_file: File,
}
impl LoopControl {
/// Opens the loop control device.
///
/// # Errors
///
/// This function will return an error for various reasons when opening
/// the loop control file `/dev/loop-control`. See
/// [`OpenOptions::open`](https://doc.rust-lang.org/std/fs/struct.OpenOptions.html)
/// for further details.
pub fn open() -> io::Result<Self> {
Ok(Self {
dev_file: OpenOptions::new()
.read(true)
.write(true)
.open(LOOP_CONTROL)?,
})
}
/// Finds and opens the next available loop device.
///
/// # Examples
///
/// ```no_run
/// use loopdev::LoopControl;
/// let lc = LoopControl::open().unwrap();
/// let ld = lc.next_free().unwrap();
/// println!("{}", ld.path().unwrap().display());
/// ```
///
/// # Errors
///
/// This function will return an error for various reasons when opening
/// the loop device file `/dev/loopX`. See
/// [`OpenOptions::open`](https://doc.rust-lang.org/std/fs/struct.OpenOptions.html)
/// for further details.
pub fn next_free(&self) -> io::Result<LoopDevice> {
let dev_num = ioctl_to_error(unsafe {
ioctl(
self.dev_file.as_raw_fd() as c_int,
LOOP_CTL_GET_FREE as IoctlRequest,
)
})?;
LoopDevice::open(format!("{}{}", LOOP_PREFIX, dev_num))
}
/// Add and opens a new loop device.
///
/// # Examples
///
/// ```no_run
/// use loopdev::LoopControl;
/// let lc = LoopControl::open().unwrap();
/// let ld = lc.add(1).unwrap();
/// println!("{}", ld.path().unwrap().display());
/// ```
///
/// # Errors
///
/// This funcitons will return an error when a loop device with the passed
/// number exists or opening the newly created device fails.
pub fn add(&self, n: u32) -> io::Result<LoopDevice> {
let dev_num = ioctl_to_error(unsafe {
ioctl(
self.dev_file.as_raw_fd() as c_int,
LOOP_CTL_ADD as IoctlRequest,
n as c_int,
)
})?;
LoopDevice::open(format!("{}{}", LOOP_PREFIX, dev_num))
}
}
impl AsRawFd for LoopControl {
fn as_raw_fd(&self) -> RawFd {
self.dev_file.as_raw_fd()
}
}
impl IntoRawFd for LoopControl {
fn into_raw_fd(self) -> RawFd {
self.dev_file.into_raw_fd()
}
}
/// Interface to a loop device ie `/dev/loop0`.
#[derive(Debug)]
pub struct LoopDevice {
device: File,
}
impl AsRawFd for LoopDevice {
fn as_raw_fd(&self) -> RawFd {
self.device.as_raw_fd()
}
}
impl IntoRawFd for LoopDevice {
fn into_raw_fd(self) -> RawFd {
self.device.into_raw_fd()
}
}
impl LoopDevice {
/// Opens a loop device.
///
/// # Errors
///
/// This function will return an error for various reasons when opening
/// the given loop device file. See
/// [`OpenOptions::open`](https://doc.rust-lang.org/std/fs/struct.OpenOptions.html)
/// for further details.
pub fn open<P: AsRef<Path>>(dev: P) -> io::Result<Self> {
// TODO create dev if it does not exist and begins with LOOP_PREFIX
Ok(Self {
device: OpenOptions::new().read(true).write(true).open(dev)?,
})
}
/// Attach the loop device to a file with given options.
///
/// # Examples
///
/// Attach the device to a file.
///
/// ```no_run
/// use loopdev::LoopDevice;
/// let mut ld = LoopDevice::open("/dev/loop0").unwrap();
/// ld.with().part_scan(true).attach("disk.img").unwrap();
/// # ld.detach().unwrap();
/// ```
pub fn with(&self) -> AttachOptions<'_> {
AttachOptions {
device: self,
info: loop_info64::default(),
}
}
/// Attach the loop device to a file that maps to the whole file.
///
/// # Examples
///
/// Attach the device to a file.
///
/// ```no_run
/// use loopdev::LoopDevice;
/// let ld = LoopDevice::open("/dev/loop0").unwrap();
/// ld.attach_file("disk.img").unwrap();
/// # ld.detach().unwrap();
/// ```
///
/// # Errors
///
/// This function will return an error for various reasons. Either when
/// opening the backing file (see
/// [`OpenOptions::open`](https://doc.rust-lang.org/std/fs/struct.OpenOptions.html)
/// for further details) or when calling the ioctl to attach the backing
/// file to the device.
pub fn attach_file<P: AsRef<Path>>(&self, backing_file: P) -> io::Result<()> {
let info = loop_info64 {
..Default::default()
};
Self::attach_with_loop_info(self, backing_file, info)
}
/// Attach the loop device to a file with `loop_info64`.
fn attach_with_loop_info(
&self, // TODO should be mut? - but changing it is a breaking change
backing_file: impl AsRef<Path>,
info: loop_info64,
) -> io::Result<()> {
let write_access = (info.lo_flags & LO_FLAGS_READ_ONLY) == 0;
let bf = OpenOptions::new()
.read(true)
.write(write_access)
.open(backing_file)?;
self.attach_fd_with_loop_info(bf, info)
}
/// Attach the loop device to a fd with `loop_info`.
fn attach_fd_with_loop_info(&self, bf: impl AsRawFd, info: loop_info64) -> io::Result<()> {
// Attach the file
ioctl_to_error(unsafe {
ioctl(
self.device.as_raw_fd() as c_int,
LOOP_SET_FD as IoctlRequest,
bf.as_raw_fd() as c_int,
)
})?;
let result = unsafe {
ioctl(
self.device.as_raw_fd() as c_int,
LOOP_SET_STATUS64 as IoctlRequest,
&info,
)
};
match ioctl_to_error(result) {
Err(err) => {
// Ignore the error to preserve the original error
let _detach_err = self.detach();
Err(err)
}
Ok(_) => Ok(()),
}
}
/// Get the path of the loop device.
pub fn path(&self) -> Option<PathBuf> {
let mut p = PathBuf::from("/proc/self/fd");
p.push(self.device.as_raw_fd().to_string());
std::fs::read_link(&p).ok()
}
/// Get the device major number
///
/// # Errors
///
/// This function needs to stat the backing file and can fail if there is
/// an IO error.
#[allow(clippy::unnecessary_cast)]
pub fn major(&self) -> io::Result<u32> {
self.device
.metadata()
.map(|m| linux::major(m.rdev()))
.map(|m| m as u32)
}
/// Get the device major number
///
/// # Errors
///
/// This function needs to stat the backing file and can fail if there is
/// an IO error.
#[allow(clippy::unnecessary_cast)]
pub fn minor(&self) -> io::Result<u32> {
self.device
.metadata()
.map(|m| linux::minor(m.rdev()))
.map(|m| m as u32)
}
/// Detach a loop device from its backing file.
///
/// Note that the device won't fully detach until a short delay after the underling device file
/// gets closed. This happens when `LoopDev` goes out of scope so you should ensure the `LoopDev`
/// lives for a short a time as possible.
///
/// # Examples
///
/// ```no_run
/// use loopdev::LoopDevice;
/// let ld = LoopDevice::open("/dev/loop0").unwrap();
/// # ld.attach_file("disk.img").unwrap();
/// ld.detach().unwrap();
/// ```
///
/// # Errors
///
/// This function will return an error for various reasons when calling the
/// ioctl to detach the backing file from the device.
pub fn detach(&self) -> io::Result<()> {
ioctl_to_error(unsafe {
ioctl(
self.device.as_raw_fd() as c_int,
LOOP_CLR_FD as IoctlRequest,
0,
)
})?;
Ok(())
}
/// Resize a live loop device. If the size of the backing file changes this can be called to
/// inform the loop driver about the new size.
///
/// # Errors
///
/// This function will return an error for various reasons when calling the
/// ioctl to set the capacity of the device.
pub fn set_capacity(&self) -> io::Result<()> {
ioctl_to_error(unsafe {
ioctl(
self.device.as_raw_fd() as c_int,
LOOP_SET_CAPACITY as IoctlRequest,
0,
)
})?;
Ok(())
}
}
/// Used to set options when attaching a device. Created with [`LoopDevice::with`()].
///
/// # Examples
///
/// Enable partition scanning on attach:
///
/// ```no_run
/// use loopdev::LoopDevice;
/// let mut ld = LoopDevice::open("/dev/loop0").unwrap();
/// ld.with()
/// .part_scan(true)
/// .attach("disk.img")
/// .unwrap();
/// # ld.detach().unwrap();
/// ```
///
/// A 1MiB slice of the file located at 1KiB into the file.
///
/// ```no_run
/// use loopdev::LoopDevice;
/// let mut ld = LoopDevice::open("/dev/loop0").unwrap();
/// ld.with()
/// .offset(1024*1024)
/// .size_limit(1024*1024*1024)
/// .attach("disk.img")
/// .unwrap();
/// # ld.detach().unwrap();
/// ```
#[must_use]
pub struct AttachOptions<'d> {
device: &'d LoopDevice,
info: loop_info64,
}
impl AttachOptions<'_> {
/// Offset in bytes from the start of the backing file the data will start at.
pub fn offset(mut self, offset: u64) -> Self {
self.info.lo_offset = offset;
self
}
/// Maximum size of the data in bytes.
pub fn size_limit(mut self, size_limit: u64) -> Self {
self.info.lo_sizelimit = size_limit;
self
}
/// Set read only flag
pub fn read_only(mut self, read_only: bool) -> Self {
if read_only {
self.info.lo_flags |= LO_FLAGS_READ_ONLY;
} else {
self.info.lo_flags &= !LO_FLAGS_READ_ONLY;
}
self
}
/// Set autoclear flag
pub fn autoclear(mut self, autoclear: bool) -> Self {
if autoclear {
self.info.lo_flags |= LO_FLAGS_AUTOCLEAR;
} else {
self.info.lo_flags &= !LO_FLAGS_AUTOCLEAR;
}
self
}
/// Force the kernel to scan the partition table on a newly created loop device. Note that the
/// partition table parsing depends on sector sizes. The default is sector size is 512 bytes
pub fn part_scan(mut self, enable: bool) -> Self {
if enable {
self.info.lo_flags |= LO_FLAGS_PARTSCAN;
} else {
self.info.lo_flags &= !LO_FLAGS_PARTSCAN;
}
self
}
/// Attach the loop device to a file with the set options.
///
/// # Errors
///
/// This function will return an error for various reasons. Either when
/// opening the backing file (see
/// [`OpenOptions::open`](https://doc.rust-lang.org/std/fs/struct.OpenOptions.html)
/// for further details) or when calling the ioctl to attach the backing
/// file to the device.
pub fn attach(self, backing_file: impl AsRef<Path>) -> io::Result<()> {
self.device.attach_with_loop_info(backing_file, self.info)?;
Ok(())
}
/// Attach the loop device to an fd
///
/// # Errors
///
/// This function will return an error for various reasons when calling the
/// ioctl to attach the backing file to the device.
pub fn attach_fd(self, backing_file_fd: impl AsRawFd) -> io::Result<()> {
self.device
.attach_fd_with_loop_info(backing_file_fd, self.info)?;
Ok(())
}
}
fn ioctl_to_error(ret: i32) -> io::Result<i32> {
if ret < 0 {
Err(io::Error::last_os_error())
} else {
Ok(ret)
}
}

15
crates/vendor/loopdev/src/linux.rs vendored Normal file
View File

@ -0,0 +1,15 @@
use std::ffi::c_uint;
pub fn major(dev: u64) -> c_uint {
let mut major = 0;
major |= (dev & 0x00000000000fff00) >> 8;
major |= (dev & 0xfffff00000000000) >> 32;
major as c_uint
}
pub fn minor(dev: u64) -> c_uint {
let mut minor = 0;
minor |= dev & 0x00000000000000ff;
minor |= (dev & 0x00000ffffff00000) >> 12;
minor as c_uint
}

View File

@ -0,0 +1,34 @@
[package]
name = "xencall"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
libc = { workspace = true }
log = { workspace = true }
nix = { workspace = true, features = ["ioctl"] }
thiserror = { workspace = true }
uuid = { workspace = true }
[lib]
name = "xencall"
[dev-dependencies]
env_logger = { workspace = true }
[[example]]
name = "xencall-domain-info"
path = "examples/domain_info.rs"
[[example]]
name = "xencall-domain-create"
path = "examples/domain_create.rs"
[[example]]
name = "xencall-version-capabilities"
path = "examples/version_capabilities.rs"
[[example]]
name = "xencall-vcpu-context"
path = "examples/vcpu_context.rs"

View File

@ -0,0 +1,12 @@
use xencall::error::Result;
use xencall::sys::CreateDomain;
use xencall::XenCall;
fn main() -> Result<()> {
env_logger::init();
let call = XenCall::open()?;
let domid = call.create_domain(CreateDomain::default())?;
println!("created domain {}", domid);
Ok(())
}

View File

@ -0,0 +1,11 @@
use xencall::error::Result;
use xencall::XenCall;
fn main() -> Result<()> {
env_logger::init();
let call = XenCall::open()?;
let info = call.get_domain_info(1)?;
println!("{:?}", info);
Ok(())
}

View File

@ -0,0 +1,11 @@
use xencall::error::Result;
use xencall::XenCall;
fn main() -> Result<()> {
env_logger::init();
let call = XenCall::open()?;
let context = call.get_vcpu_context(224, 0)?;
println!("{:?}", context);
Ok(())
}

View File

@ -0,0 +1,11 @@
use xencall::error::Result;
use xencall::XenCall;
fn main() -> Result<()> {
env_logger::init();
let call = XenCall::open()?;
let info = call.get_version_capabilities()?;
println!("{:?}", info);
Ok(())
}

View File

@ -0,0 +1,13 @@
use std::io;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("kernel error")]
Kernel(#[from] nix::errno::Errno),
#[error("io issue encountered")]
Io(#[from] io::Error),
#[error("populate physmap failed")]
PopulatePhysmapFailed,
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@ -0,0 +1,624 @@
pub mod error;
pub mod sys;
use crate::error::{Error, Result};
use crate::sys::{
AddressSize, ArchDomainConfig, CreateDomain, DomCtl, DomCtlValue, DomCtlVcpuContext,
EvtChnAllocUnbound, GetDomainInfo, GetPageFrameInfo3, Hypercall, HypercallInit, MaxMem,
MaxVcpus, MemoryMap, MemoryReservation, MmapBatch, MmapResource, MmuExtOp, MultiCallEntry,
VcpuGuestContext, VcpuGuestContextAny, XenCapabilitiesInfo, HYPERVISOR_DOMCTL,
HYPERVISOR_EVENT_CHANNEL_OP, HYPERVISOR_MEMORY_OP, HYPERVISOR_MMUEXT_OP, HYPERVISOR_MULTICALL,
HYPERVISOR_XEN_VERSION, XENVER_CAPABILITIES, XEN_DOMCTL_CREATEDOMAIN, XEN_DOMCTL_DESTROYDOMAIN,
XEN_DOMCTL_GETDOMAININFO, XEN_DOMCTL_GETPAGEFRAMEINFO3, XEN_DOMCTL_GETVCPUCONTEXT,
XEN_DOMCTL_HYPERCALL_INIT, XEN_DOMCTL_INTERFACE_VERSION, XEN_DOMCTL_MAX_MEM,
XEN_DOMCTL_MAX_VCPUS, XEN_DOMCTL_PAUSEDOMAIN, XEN_DOMCTL_SETVCPUCONTEXT,
XEN_DOMCTL_SET_ADDRESS_SIZE, XEN_DOMCTL_UNPAUSEDOMAIN, XEN_MEM_CLAIM_PAGES, XEN_MEM_MEMORY_MAP,
XEN_MEM_POPULATE_PHYSMAP,
};
use libc::{c_int, mmap, usleep, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE};
use log::trace;
use nix::errno::Errno;
use std::ffi::{c_long, c_uint, c_ulong, c_void};
use std::fs::{File, OpenOptions};
use std::os::fd::AsRawFd;
use std::ptr::addr_of_mut;
use std::slice;
pub struct XenCall {
pub handle: File,
}
impl XenCall {
pub fn open() -> Result<XenCall> {
let file = OpenOptions::new()
.read(true)
.write(true)
.open("/dev/xen/privcmd")?;
Ok(XenCall { handle: file })
}
pub fn mmap(&self, addr: u64, len: u64) -> Option<u64> {
trace!(
"call fd={} mmap addr={:#x} len={}",
self.handle.as_raw_fd(),
addr,
len
);
unsafe {
let ptr = mmap(
addr as *mut c_void,
len as usize,
PROT_READ | PROT_WRITE,
MAP_SHARED,
self.handle.as_raw_fd(),
0,
);
if ptr == MAP_FAILED {
None
} else {
Some(ptr as u64)
}
}
}
pub fn hypercall(&self, op: c_ulong, arg: [c_ulong; 5]) -> Result<c_long> {
trace!(
"call fd={} hypercall op={:#x} arg={:?}",
self.handle.as_raw_fd(),
op,
arg
);
unsafe {
let mut call = Hypercall { op, arg };
let result = sys::hypercall(self.handle.as_raw_fd(), &mut call)?;
Ok(result as c_long)
}
}
pub fn hypercall0(&self, op: c_ulong) -> Result<c_long> {
self.hypercall(op, [0, 0, 0, 0, 0])
}
pub fn hypercall1(&self, op: c_ulong, arg1: c_ulong) -> Result<c_long> {
self.hypercall(op, [arg1, 0, 0, 0, 0])
}
pub fn hypercall2(&self, op: c_ulong, arg1: c_ulong, arg2: c_ulong) -> Result<c_long> {
self.hypercall(op, [arg1, arg2, 0, 0, 0])
}
pub fn hypercall3(
&self,
op: c_ulong,
arg1: c_ulong,
arg2: c_ulong,
arg3: c_ulong,
) -> Result<c_long> {
self.hypercall(op, [arg1, arg2, arg3, 0, 0])
}
pub fn hypercall4(
&self,
op: c_ulong,
arg1: c_ulong,
arg2: c_ulong,
arg3: c_ulong,
arg4: c_ulong,
) -> Result<c_long> {
self.hypercall(op, [arg1, arg2, arg3, arg4, 0])
}
pub fn hypercall5(
&self,
op: c_ulong,
arg1: c_ulong,
arg2: c_ulong,
arg3: c_ulong,
arg4: c_ulong,
arg5: c_ulong,
) -> Result<c_long> {
self.hypercall(op, [arg1, arg2, arg3, arg4, arg5])
}
pub fn multicall(&self, calls: &mut [MultiCallEntry]) -> Result<()> {
trace!(
"call fd={} multicall calls={:?}",
self.handle.as_raw_fd(),
calls
);
self.hypercall2(
HYPERVISOR_MULTICALL,
calls.as_mut_ptr() as c_ulong,
calls.len() as c_ulong,
)?;
Ok(())
}
pub fn map_resource(
&self,
domid: u32,
typ: u32,
id: u32,
idx: u32,
num: u64,
addr: u64,
) -> Result<()> {
let mut resource = MmapResource {
dom: domid as u16,
typ,
id,
idx,
num,
addr,
};
unsafe {
sys::mmap_resource(self.handle.as_raw_fd(), &mut resource)?;
}
Ok(())
}
pub fn mmap_batch(&self, domid: u32, num: u64, addr: u64, mfns: Vec<u64>) -> Result<c_long> {
trace!(
"call fd={} mmap_batch domid={} num={} addr={:#x} mfns={:?}",
self.handle.as_raw_fd(),
domid,
num,
addr,
mfns
);
unsafe {
let mut mfns = mfns.clone();
let mut errors = vec![0i32; mfns.len()];
let mut batch = MmapBatch {
num: num as u32,
domid: domid as u16,
addr,
mfns: mfns.as_mut_ptr(),
errors: errors.as_mut_ptr(),
};
let result = sys::mmapbatch(self.handle.as_raw_fd(), &mut batch);
if let Err(errno) = result {
if errno != Errno::ENOENT {
return Err(errno)?;
}
usleep(100);
let mut i: usize = 0;
let mut paged: usize = 0;
loop {
if errors[i] != libc::ENOENT {
i += 1;
continue;
}
paged += 1;
let mut batch = MmapBatch {
num: 1,
domid: domid as u16,
addr: addr + ((i as u64) << 12),
mfns: mfns.as_mut_ptr().add(i),
errors: errors.as_mut_ptr().add(i),
};
loop {
i += 1;
if i < num as usize {
if errors[i] != libc::ENOENT {
break;
}
batch.num += 1;
}
}
let result = sys::mmapbatch(self.handle.as_raw_fd(), &mut batch);
if let Err(n) = result {
if n != Errno::ENOENT {
return Err(n)?;
}
}
if i < num as usize {
break;
}
let count = result.unwrap();
if count <= 0 {
break;
}
}
return Ok(paged as c_long);
}
Ok(result.unwrap() as c_long)
}
}
pub fn get_version_capabilities(&self) -> Result<XenCapabilitiesInfo> {
trace!(
"call fd={} get_version_capabilities",
self.handle.as_raw_fd()
);
let mut info = XenCapabilitiesInfo {
capabilities: [0; 1024],
};
self.hypercall2(
HYPERVISOR_XEN_VERSION,
XENVER_CAPABILITIES,
addr_of_mut!(info) as c_ulong,
)?;
Ok(info)
}
pub fn evtchn_op(&self, cmd: c_int, arg: u64) -> Result<()> {
self.hypercall2(HYPERVISOR_EVENT_CHANNEL_OP, cmd as c_ulong, arg)?;
Ok(())
}
pub fn evtchn_alloc_unbound(&self, domid: u32, remote_domid: u32) -> Result<u32> {
let mut alloc_unbound = EvtChnAllocUnbound {
dom: domid as u16,
remote_dom: remote_domid as u16,
port: 0,
};
self.evtchn_op(6, addr_of_mut!(alloc_unbound) as c_ulong)?;
Ok(alloc_unbound.port)
}
pub fn get_domain_info(&self, domid: u32) -> Result<GetDomainInfo> {
trace!(
"domctl fd={} get_domain_info domid={}",
self.handle.as_raw_fd(),
domid
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_GETDOMAININFO,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue {
get_domain_info: GetDomainInfo {
domid: 0,
pad1: 0,
flags: 0,
total_pages: 0,
max_pages: 0,
outstanding_pages: 0,
shr_pages: 0,
paged_pages: 0,
shared_info_frame: 0,
cpu_time: 0,
number_online_vcpus: 0,
max_vcpu_id: 0,
ssidref: 0,
handle: [0; 16],
cpupool: 0,
gpaddr_bits: 0,
pad2: [0; 7],
arch: ArchDomainConfig {
emulation_flags: 0,
misc_flags: 0,
},
},
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(unsafe { domctl.value.get_domain_info })
}
pub fn create_domain(&self, create_domain: CreateDomain) -> Result<u32> {
trace!(
"domctl fd={} create_domain create_domain={:?}",
self.handle.as_raw_fd(),
create_domain
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_CREATEDOMAIN,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid: 0,
value: DomCtlValue { create_domain },
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(domctl.domid)
}
pub fn pause_domain(&self, domid: u32) -> Result<()> {
trace!(
"domctl fd={} pause_domain domid={:?}",
self.handle.as_raw_fd(),
domid,
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_PAUSEDOMAIN,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue { pad: [0; 128] },
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(())
}
pub fn unpause_domain(&self, domid: u32) -> Result<()> {
trace!(
"domctl fd={} unpause_domain domid={:?}",
self.handle.as_raw_fd(),
domid,
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_UNPAUSEDOMAIN,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue { pad: [0; 128] },
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(())
}
pub fn set_max_mem(&self, domid: u32, memkb: u64) -> Result<()> {
trace!(
"domctl fd={} set_max_mem domid={} memkb={}",
self.handle.as_raw_fd(),
domid,
memkb
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_MAX_MEM,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue {
max_mem: MaxMem { max_memkb: memkb },
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(())
}
pub fn set_max_vcpus(&self, domid: u32, max_vcpus: u32) -> Result<()> {
trace!(
"domctl fd={} set_max_vcpus domid={} max_vcpus={}",
self.handle.as_raw_fd(),
domid,
max_vcpus
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_MAX_VCPUS,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue {
max_cpus: MaxVcpus { max_vcpus },
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(())
}
pub fn set_address_size(&self, domid: u32, size: u32) -> Result<()> {
trace!(
"domctl fd={} set_address_size domid={} size={}",
self.handle.as_raw_fd(),
domid,
size,
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_SET_ADDRESS_SIZE,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue {
address_size: AddressSize { size },
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(())
}
pub fn get_vcpu_context(&self, domid: u32, vcpu: u32) -> Result<VcpuGuestContext> {
trace!(
"domctl fd={} get_vcpu_context domid={}",
self.handle.as_raw_fd(),
domid,
);
let mut wrapper = VcpuGuestContextAny {
value: VcpuGuestContext::default(),
};
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_GETVCPUCONTEXT,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue {
vcpu_context: DomCtlVcpuContext {
vcpu,
ctx: addr_of_mut!(wrapper) as c_ulong,
},
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(unsafe { wrapper.value })
}
pub fn set_vcpu_context(
&self,
domid: u32,
vcpu: u32,
context: &VcpuGuestContext,
) -> Result<()> {
trace!(
"domctl fd={} set_vcpu_context domid={} context={:?}",
self.handle.as_raw_fd(),
domid,
context,
);
let mut value = VcpuGuestContextAny { value: *context };
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_SETVCPUCONTEXT,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue {
vcpu_context: DomCtlVcpuContext {
vcpu,
ctx: addr_of_mut!(value) as c_ulong,
},
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(())
}
pub fn get_page_frame_info(&self, domid: u32, frames: &[u64]) -> Result<Vec<u64>> {
let mut buffer: Vec<u64> = frames.to_vec();
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_GETPAGEFRAMEINFO3,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue {
get_page_frame_info: GetPageFrameInfo3 {
num: buffer.len() as u64,
array: buffer.as_mut_ptr() as c_ulong,
},
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
let slice = unsafe {
slice::from_raw_parts_mut(
domctl.value.get_page_frame_info.array as *mut u64,
domctl.value.get_page_frame_info.num as usize,
)
};
Ok(slice.to_vec())
}
pub fn hypercall_init(&self, domid: u32, gmfn: u64) -> Result<()> {
trace!(
"domctl fd={} hypercall_init domid={} gmfn={}",
self.handle.as_raw_fd(),
domid,
gmfn
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_HYPERCALL_INIT,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue {
hypercall_init: HypercallInit { gmfn },
},
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(())
}
pub fn destroy_domain(&self, domid: u32) -> Result<()> {
trace!(
"domctl fd={} destroy_domain domid={}",
self.handle.as_raw_fd(),
domid
);
let mut domctl = DomCtl {
cmd: XEN_DOMCTL_DESTROYDOMAIN,
interface_version: XEN_DOMCTL_INTERFACE_VERSION,
domid,
value: DomCtlValue { pad: [0; 128] },
};
self.hypercall1(HYPERVISOR_DOMCTL, addr_of_mut!(domctl) as c_ulong)?;
Ok(())
}
pub fn get_memory_map(&self, size_of_entry: usize) -> Result<Vec<u8>> {
let mut memory_map = MemoryMap {
count: 0,
buffer: 0,
};
self.hypercall2(
HYPERVISOR_MEMORY_OP,
XEN_MEM_MEMORY_MAP as c_ulong,
addr_of_mut!(memory_map) as c_ulong,
)?;
let mut buffer = vec![0u8; memory_map.count as usize * size_of_entry];
memory_map.buffer = buffer.as_mut_ptr() as c_ulong;
self.hypercall2(
HYPERVISOR_MEMORY_OP,
XEN_MEM_MEMORY_MAP as c_ulong,
addr_of_mut!(memory_map) as c_ulong,
)?;
Ok(buffer)
}
pub fn populate_physmap(
&self,
domid: u32,
nr_extents: u64,
extent_order: u32,
mem_flags: u32,
extent_starts: &[u64],
) -> Result<Vec<u64>> {
trace!("memory fd={} populate_physmap domid={} nr_extents={} extent_order={} mem_flags={} extent_starts={:?}", self.handle.as_raw_fd(), domid, nr_extents, extent_order, mem_flags, extent_starts);
let mut extent_starts = extent_starts.to_vec();
let ptr = extent_starts.as_mut_ptr();
let mut reservation = MemoryReservation {
extent_start: ptr as c_ulong,
nr_extents,
extent_order,
mem_flags,
domid: domid as u16,
};
let calls = &mut [MultiCallEntry {
op: HYPERVISOR_MEMORY_OP,
result: 0,
args: [
XEN_MEM_POPULATE_PHYSMAP as c_ulong,
addr_of_mut!(reservation) as c_ulong,
0,
0,
0,
0,
],
}];
self.multicall(calls)?;
let code = calls[0].result;
if code > !0xfff {
return Err(Error::PopulatePhysmapFailed);
}
if code as usize > extent_starts.len() {
return Err(Error::PopulatePhysmapFailed);
}
let extents = extent_starts[0..code as usize].to_vec();
Ok(extents)
}
pub fn claim_pages(&self, domid: u32, pages: u64) -> Result<()> {
trace!(
"memory fd={} claim_pages domid={} pages={}",
self.handle.as_raw_fd(),
domid,
pages
);
let mut reservation = MemoryReservation {
extent_start: 0,
nr_extents: pages,
extent_order: 0,
mem_flags: 0,
domid: domid as u16,
};
self.hypercall2(
HYPERVISOR_MEMORY_OP,
XEN_MEM_CLAIM_PAGES as c_ulong,
addr_of_mut!(reservation) as c_ulong,
)?;
Ok(())
}
pub fn mmuext(&self, domid: u32, cmd: c_uint, arg1: u64, arg2: u64) -> Result<()> {
let mut ops = MmuExtOp { cmd, arg1, arg2 };
self.hypercall4(
HYPERVISOR_MMUEXT_OP,
addr_of_mut!(ops) as c_ulong,
1,
0,
domid as c_ulong,
)
.map(|_| ())
}
}

View File

@ -0,0 +1,507 @@
/// Handwritten hypercall bindings.
use nix::ioctl_readwrite_bad;
use std::ffi::{c_char, c_int, c_uint, c_ulong};
use uuid::Uuid;
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct Hypercall {
pub op: c_ulong,
pub arg: [c_ulong; 5],
}
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
pub struct MmapEntry {
pub va: u64,
pub mfn: u64,
pub npages: u64,
}
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
pub struct MmapResource {
pub dom: u16,
pub typ: u32,
pub id: u32,
pub idx: u32,
pub num: u64,
pub addr: u64,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct MmapBatch {
pub num: u32,
pub domid: u16,
pub addr: u64,
pub mfns: *mut u64,
pub errors: *mut c_int,
}
#[repr(C)]
#[derive(Clone, Debug)]
pub struct Mmap {
pub num: c_int,
pub dom: u16,
pub entry: *mut MmapEntry,
}
const IOCTL_PRIVCMD_HYPERCALL: u64 = 0x305000;
const IOCTL_PRIVCMD_MMAP: u64 = 0x105002;
const IOCTL_PRIVCMD_MMAPBATCH_V2: u64 = 0x205004;
const IOCTL_PRIVCMD_MMAP_RESOURCE: u64 = 0x205007;
ioctl_readwrite_bad!(hypercall, IOCTL_PRIVCMD_HYPERCALL, Hypercall);
ioctl_readwrite_bad!(mmap, IOCTL_PRIVCMD_MMAP, Mmap);
ioctl_readwrite_bad!(mmapbatch, IOCTL_PRIVCMD_MMAPBATCH_V2, MmapBatch);
ioctl_readwrite_bad!(mmap_resource, IOCTL_PRIVCMD_MMAP_RESOURCE, MmapResource);
pub const HYPERVISOR_SET_TRAP_TABLE: c_ulong = 0;
pub const HYPERVISOR_MMU_UPDATE: c_ulong = 1;
pub const HYPERVISOR_SET_GDT: c_ulong = 2;
pub const HYPERVISOR_STACK_SWITCH: c_ulong = 3;
pub const HYPERVISOR_SET_CALLBACKS: c_ulong = 4;
pub const HYPERVISOR_FPU_TASKSWITCH: c_ulong = 5;
pub const HYPERVISOR_SCHED_OP_COMPAT: c_ulong = 6;
pub const HYPERVISOR_PLATFORM_OP: c_ulong = 7;
pub const HYPERVISOR_SET_DEBUGREG: c_ulong = 8;
pub const HYPERVISOR_GET_DEBUGREG: c_ulong = 9;
pub const HYPERVISOR_UPDATE_DESCRIPTOR: c_ulong = 10;
pub const HYPERVISOR_MEMORY_OP: c_ulong = 12;
pub const HYPERVISOR_MULTICALL: c_ulong = 13;
pub const HYPERVISOR_UPDATE_VA_MAPPING: c_ulong = 14;
pub const HYPERVISOR_SET_TIMER_OP: c_ulong = 15;
pub const HYPERVISOR_EVENT_CHANNEL_OP_COMPAT: c_ulong = 16;
pub const HYPERVISOR_XEN_VERSION: c_ulong = 17;
pub const HYPERVISOR_CONSOLE_IO: c_ulong = 18;
pub const HYPERVISOR_PHYSDEV_OP_COMPAT: c_ulong = 19;
pub const HYPERVISOR_GRANT_TABLE_OP: c_ulong = 20;
pub const HYPERVISOR_VM_ASSIST: c_ulong = 21;
pub const HYPERVISOR_UPDATE_VA_MAPPING_OTHERDOMAIN: c_ulong = 22;
pub const HYPERVISOR_IRET: c_ulong = 23;
pub const HYPERVISOR_VCPU_OP: c_ulong = 24;
pub const HYPERVISOR_SET_SEGMENT_BASE: c_ulong = 25;
pub const HYPERVISOR_MMUEXT_OP: c_ulong = 26;
pub const HYPERVISOR_XSM_OP: c_ulong = 27;
pub const HYPERVISOR_NMI_OP: c_ulong = 28;
pub const HYPERVISOR_SCHED_OP: c_ulong = 29;
pub const HYPERVISOR_CALLBACK_OP: c_ulong = 30;
pub const HYPERVISOR_XENOPROF_OP: c_ulong = 31;
pub const HYPERVISOR_EVENT_CHANNEL_OP: c_ulong = 32;
pub const HYPERVISOR_PHYSDEV_OP: c_ulong = 33;
pub const HYPERVISOR_HVM_OP: c_ulong = 34;
pub const HYPERVISOR_SYSCTL: c_ulong = 35;
pub const HYPERVISOR_DOMCTL: c_ulong = 36;
pub const HYPERVISOR_KEXEC_OP: c_ulong = 37;
pub const HYPERVISOR_TMEM_OP: c_ulong = 38;
pub const HYPERVISOR_XC_RESERVED_OP: c_ulong = 39;
pub const HYPERVISOR_XENPMU_OP: c_ulong = 40;
pub const HYPERVISOR_DM_OP: c_ulong = 41;
pub const XEN_DOMCTL_CDF_HVM_GUEST: u32 = 1 << 0;
pub const XEN_DOMCTL_CDF_HAP: u32 = 1 << 1;
pub const XEN_DOMCTL_CDF_S3_INTEGRITY: u32 = 1 << 2;
pub const XEN_DOMCTL_CDF_OOS_OFF: u32 = 1 << 3;
pub const XEN_DOMCTL_CDF_XS_DOMAIN: u32 = 1 << 4;
pub const XEN_X86_EMU_LAPIC: u32 = 1 << 0;
pub const XEN_X86_EMU_HPET: u32 = 1 << 1;
pub const XEN_X86_EMU_PM: u32 = 1 << 2;
pub const XEN_X86_EMU_RTC: u32 = 1 << 3;
pub const XEN_X86_EMU_IOAPIC: u32 = 1 << 4;
pub const XEN_X86_EMU_PIC: u32 = 1 << 5;
pub const XEN_X86_EMU_VGA: u32 = 1 << 6;
pub const XEN_X86_EMU_IOMMU: u32 = 1 << 7;
pub const XEN_X86_EMU_PIT: u32 = 1 << 8;
pub const XEN_X86_EMU_USE_PIRQ: u32 = 1 << 9;
pub const XEN_X86_EMU_ALL: u32 = XEN_X86_EMU_LAPIC
| XEN_X86_EMU_HPET
| XEN_X86_EMU_PM
| XEN_X86_EMU_RTC
| XEN_X86_EMU_IOAPIC
| XEN_X86_EMU_PIC
| XEN_X86_EMU_VGA
| XEN_X86_EMU_IOMMU
| XEN_X86_EMU_PIT
| XEN_X86_EMU_USE_PIRQ;
pub const XEN_DOMCTL_CREATEDOMAIN: u32 = 1;
pub const XEN_DOMCTL_DESTROYDOMAIN: u32 = 2;
pub const XEN_DOMCTL_PAUSEDOMAIN: u32 = 3;
pub const XEN_DOMCTL_UNPAUSEDOMAIN: u32 = 4;
pub const XEN_DOMCTL_GETDOMAININFO: u32 = 5;
pub const XEN_DOMCTL_GETMEMLIST: u32 = 6;
pub const XEN_DOMCTL_SETVCPUAFFINITY: u32 = 9;
pub const XEN_DOMCTL_SHADOW_OP: u32 = 10;
pub const XEN_DOMCTL_MAX_MEM: u32 = 11;
pub const XEN_DOMCTL_SETVCPUCONTEXT: u32 = 12;
pub const XEN_DOMCTL_GETVCPUCONTEXT: u32 = 13;
pub const XEN_DOMCTL_GETVCPUINFO: u32 = 14;
pub const XEN_DOMCTL_MAX_VCPUS: u32 = 15;
pub const XEN_DOMCTL_SCHEDULER_OP: u32 = 16;
pub const XEN_DOMCTL_SETDOMAINHANDLE: u32 = 17;
pub const XEN_DOMCTL_SETDEBUGGING: u32 = 18;
pub const XEN_DOMCTL_IRQ_PERMISSION: u32 = 19;
pub const XEN_DOMCTL_IOMEM_PERMISSION: u32 = 20;
pub const XEN_DOMCTL_IOPORT_PERMISSION: u32 = 21;
pub const XEN_DOMCTL_HYPERCALL_INIT: u32 = 22;
pub const XEN_DOMCTL_SETTIMEOFFSET: u32 = 24;
pub const XEN_DOMCTL_GETVCPUAFFINITY: u32 = 25;
pub const XEN_DOMCTL_RESUMEDOMAIN: u32 = 27;
pub const XEN_DOMCTL_SENDTRIGGER: u32 = 28;
pub const XEN_DOMCTL_SUBSCRIBE: u32 = 29;
pub const XEN_DOMCTL_GETHVMCONTEXT: u32 = 33;
pub const XEN_DOMCTL_SETHVMCONTEXT: u32 = 34;
pub const XEN_DOMCTL_SET_ADDRESS_SIZE: u32 = 35;
pub const XEN_DOMCTL_GET_ADDRESS_SIZE: u32 = 36;
pub const XEN_DOMCTL_ASSIGN_DEVICE: u32 = 37;
pub const XEN_DOMCTL_BIND_PT_IRQ: u32 = 38;
pub const XEN_DOMCTL_MEMORY_MAPPING: u32 = 39;
pub const XEN_DOMCTL_IOPORT_MAPPING: u32 = 40;
pub const XEN_DOMCTL_PIN_MEM_CACHEATTR: u32 = 41;
pub const XEN_DOMCTL_SET_EXT_VCPUCONTEXT: u32 = 42;
pub const XEN_DOMCTL_GET_EXT_VCPUCONTEXT: u32 = 43;
pub const XEN_DOMCTL_TEST_ASSIGN_DEVICE: u32 = 45;
pub const XEN_DOMCTL_SET_TARGET: u32 = 46;
pub const XEN_DOMCTL_DEASSIGN_DEVICE: u32 = 47;
pub const XEN_DOMCTL_UNBIND_PT_IRQ: u32 = 48;
pub const XEN_DOMCTL_SET_CPUID: u32 = 49;
pub const XEN_DOMCTL_GET_DEVICE_GROUP: u32 = 50;
pub const XEN_DOMCTL_SET_MACHINE_ADDRESS_SIZE: u32 = 51;
pub const XEN_DOMCTL_GET_MACHINE_ADDRESS_SIZE: u32 = 52;
pub const XEN_DOMCTL_SUPPRESS_SPURIOUS_PAGE_FAULTS: u32 = 53;
pub const XEN_DOMCTL_DEBUG_OP: u32 = 54;
pub const XEN_DOMCTL_GETHVMCONTEXT_PARTIAL: u32 = 55;
pub const XEN_DOMCTL_VM_EVENT_OP: u32 = 56;
pub const XEN_DOMCTL_MEM_SHARING_OP: u32 = 57;
pub const XEN_DOMCTL_DISABLE_MIGRATE: u32 = 58;
pub const XEN_DOMCTL_GETTSCINFO: u32 = 59;
pub const XEN_DOMCTL_SETTSCINFO: u32 = 60;
pub const XEN_DOMCTL_GETPAGEFRAMEINFO3: u32 = 61;
pub const XEN_DOMCTL_SETVCPUEXTSTATE: u32 = 62;
pub const XEN_DOMCTL_GETVCPUEXTSTATE: u32 = 63;
pub const XEN_DOMCTL_SET_ACCESS_REQUIRED: u32 = 64;
pub const XEN_DOMCTL_AUDIT_P2M: u32 = 65;
pub const XEN_DOMCTL_SET_VIRQ_HANDLER: u32 = 66;
pub const XEN_DOMCTL_SET_BROKEN_PAGE_P2M: u32 = 67;
pub const XEN_DOMCTL_SETNODEAFFINITY: u32 = 68;
pub const XEN_DOMCTL_GETNODEAFFINITY: u32 = 69;
pub const XEN_DOMCTL_SET_MAX_EVTCHN: u32 = 70;
pub const XEN_DOMCTL_CACHEFLUSH: u32 = 71;
pub const XEN_DOMCTL_GET_VCPU_MSRS: u32 = 72;
pub const XEN_DOMCTL_SET_VCPU_MSRS: u32 = 73;
pub const XEN_DOMCTL_SETVNUMAINFO: u32 = 74;
pub const XEN_DOMCTL_PSR_CMT_OP: u32 = 75;
pub const XEN_DOMCTL_MONITOR_OP: u32 = 77;
pub const XEN_DOMCTL_PSR_CAT_OP: u32 = 78;
pub const XEN_DOMCTL_SOFT_RESET: u32 = 79;
pub const XEN_DOMCTL_SET_GNTTAB_LIMITS: u32 = 80;
pub const XEN_DOMCTL_VUART_OP: u32 = 81;
pub const XEN_DOMCTL_GDBSX_GUESTMEMIO: u32 = 1000;
pub const XEN_DOMCTL_GDBSX_PAUSEVCPU: u32 = 1001;
pub const XEN_DOMCTL_GDBSX_UNPAUSEVCPU: u32 = 1002;
pub const XEN_DOMCTL_GDBSX_DOMSTATUS: u32 = 1003;
#[repr(C)]
#[derive(Copy, Clone)]
pub struct DomCtl {
pub cmd: u32,
pub interface_version: u32,
pub domid: u32,
pub value: DomCtlValue,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub struct DomCtlVcpuContext {
pub vcpu: u32,
pub ctx: u64,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub struct AddressSize {
pub size: u32,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub union DomCtlValue {
pub create_domain: CreateDomain,
pub get_domain_info: GetDomainInfo,
pub max_mem: MaxMem,
pub max_cpus: MaxVcpus,
pub hypercall_init: HypercallInit,
pub vcpu_context: DomCtlVcpuContext,
pub address_size: AddressSize,
pub get_page_frame_info: GetPageFrameInfo3,
pub pad: [u8; 128],
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct CreateDomain {
pub ssidref: u32,
pub handle: [u8; 16],
pub flags: u32,
pub iommu_opts: u32,
pub max_vcpus: u32,
pub max_evtchn_port: u32,
pub max_grant_frames: i32,
pub max_maptrack_frames: i32,
pub grant_opts: u32,
pub vmtrace_size: u32,
pub cpupool_id: u32,
pub arch_domain_config: ArchDomainConfig,
}
impl Default for CreateDomain {
fn default() -> Self {
CreateDomain {
ssidref: SECINITSID_DOMU,
handle: Uuid::new_v4().into_bytes(),
flags: 0,
iommu_opts: 0,
max_vcpus: 1,
max_evtchn_port: 1023,
max_grant_frames: -1,
max_maptrack_frames: -1,
grant_opts: 2,
vmtrace_size: 0,
cpupool_id: 0,
arch_domain_config: ArchDomainConfig {
emulation_flags: 0,
misc_flags: 0,
},
}
}
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct GetDomainInfo {
pub domid: u16,
pub pad1: u16,
pub flags: u32,
pub total_pages: u64,
pub max_pages: u64,
pub outstanding_pages: u64,
pub shr_pages: u64,
pub paged_pages: u64,
pub shared_info_frame: u64,
pub cpu_time: u64,
pub number_online_vcpus: u32,
pub max_vcpu_id: u32,
pub ssidref: u32,
pub handle: [u8; 16],
pub cpupool: u32,
pub gpaddr_bits: u8,
pub pad2: [u8; 7],
pub arch: ArchDomainConfig,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct GetPageFrameInfo3 {
pub num: u64,
pub array: c_ulong,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct ArchDomainConfig {
pub emulation_flags: u32,
pub misc_flags: u32,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct MaxMem {
pub max_memkb: u64,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct MaxVcpus {
pub max_vcpus: u32,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct HypercallInit {
pub gmfn: u64,
}
pub const XEN_DOMCTL_INTERFACE_VERSION: u32 = 0x00000015;
pub const SECINITSID_DOMU: u32 = 12;
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct XenCapabilitiesInfo {
pub capabilities: [c_char; 1024],
}
pub const XENVER_CAPABILITIES: u64 = 3;
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct MemoryReservation {
pub extent_start: c_ulong,
pub nr_extents: c_ulong,
pub extent_order: c_uint,
pub mem_flags: c_uint,
pub domid: u16,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct MultiCallEntry {
pub op: c_ulong,
pub result: c_ulong,
pub args: [c_ulong; 6],
}
pub const XEN_MEM_POPULATE_PHYSMAP: u32 = 6;
pub const XEN_MEM_MEMORY_MAP: u32 = 9;
pub const XEN_MEM_CLAIM_PAGES: u32 = 24;
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct MemoryMap {
pub count: c_uint,
pub buffer: c_ulong,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct VcpuGuestContextFpuCtx {
pub x: [c_char; 512],
}
impl Default for VcpuGuestContextFpuCtx {
fn default() -> Self {
VcpuGuestContextFpuCtx { x: [0; 512] }
}
}
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
pub struct CpuUserRegs {
pub r15: u64,
pub r14: u64,
pub r13: u64,
pub r12: u64,
pub rbp: u64,
pub rbx: u64,
pub r11: u64,
pub r10: u64,
pub r9: u64,
pub r8: u64,
pub rax: u64,
pub rcx: u64,
pub rdx: u64,
pub rsi: u64,
pub rdi: u64,
pub error_code: u32,
pub entry_vector: u32,
pub rip: u64,
pub cs: u16,
_pad0: [u16; 1],
pub saved_upcall_mask: u8,
_pad1: [u8; 3],
pub rflags: u64,
pub rsp: u64,
pub ss: u16,
_pad2: [u16; 3],
pub es: u16,
_pad3: [u16; 3],
pub ds: u16,
_pad4: [u16; 3],
pub fs: u16,
_pad5: [u16; 3],
pub gs: u16,
_pad6: [u16; 3],
}
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
pub struct TrapInfo {
pub vector: u8,
pub flags: u8,
pub cs: u16,
pub address: u64,
}
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct VcpuGuestContext {
pub fpu_ctx: VcpuGuestContextFpuCtx,
pub flags: u64,
pub user_regs: CpuUserRegs,
pub trap_ctx: [TrapInfo; 256],
pub ldt_base: u64,
pub ldt_ents: u64,
pub gdt_frames: [u64; 16],
pub gdt_ents: u64,
pub kernel_ss: u64,
pub kernel_sp: u64,
pub ctrlreg: [u64; 8],
pub debugreg: [u64; 8],
pub event_callback_eip: u64,
pub failsafe_callback_eip: u64,
pub syscall_callback_eip: u64,
pub vm_assist: u64,
pub fs_base: u64,
pub gs_base_kernel: u64,
pub gs_base_user: u64,
}
impl Default for VcpuGuestContext {
fn default() -> Self {
VcpuGuestContext {
fpu_ctx: Default::default(),
flags: 0,
user_regs: Default::default(),
trap_ctx: [TrapInfo::default(); 256],
ldt_base: 0,
ldt_ents: 0,
gdt_frames: [0; 16],
gdt_ents: 0,
kernel_ss: 0,
kernel_sp: 0,
ctrlreg: [0; 8],
debugreg: [0; 8],
event_callback_eip: 0,
failsafe_callback_eip: 0,
syscall_callback_eip: 0,
vm_assist: 0,
fs_base: 0,
gs_base_kernel: 0,
gs_base_user: 0,
}
}
}
pub union VcpuGuestContextAny {
pub value: VcpuGuestContext,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct MmuExtOp {
pub cmd: c_uint,
pub arg1: c_ulong,
pub arg2: c_ulong,
}
pub const MMUEXT_PIN_L4_TABLE: u32 = 3;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct EvtChnAllocUnbound {
pub dom: u16,
pub remote_dom: u16,
pub port: u32,
}

View File

@ -0,0 +1,29 @@
[package]
name = "xenclient"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
elf = { workspace = true }
flate2 = { workspace = true }
libc = { workspace = true }
log = { workspace = true }
memchr = { workspace = true }
slice-copy = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
uuid = { workspace = true }
xz2 = { workspace = true }
xencall = { path = "../xencall" }
xenstore = { path = "../xenstore" }
[dev-dependencies]
env_logger = { workspace = true }
[lib]
name = "xenclient"
[[example]]
name = "xenclient-boot"
path = "examples/boot.rs"

View File

@ -0,0 +1,36 @@
use std::{env, process};
use xenclient::error::Result;
use xenclient::{DomainConfig, XenClient};
#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();
let args: Vec<String> = env::args().collect();
if args.len() != 3 {
println!("usage: boot <kernel-image> <initrd>");
process::exit(1);
}
let kernel_image_path = args.get(1).expect("argument not specified");
let initrd_path = args.get(2).expect("argument not specified");
let mut client = XenClient::open().await?;
let config = DomainConfig {
backend_domid: 0,
name: "xenclient-test",
max_vcpus: 1,
mem_mb: 512,
kernel_path: kernel_image_path.as_str(),
initrd_path: initrd_path.as_str(),
cmdline: "debug elevator=noop",
disks: vec![],
consoles: vec![],
vifs: vec![],
filesystems: vec![],
extra_keys: vec![],
extra_rw_paths: vec![],
event_channels: vec![],
};
let domid = client.create(&config).await?;
println!("created domain {}", domid);
Ok(())
}

View File

@ -0,0 +1,355 @@
use crate::error::Result;
use crate::mem::PhysicalPages;
use crate::sys::{GrantEntry, XEN_PAGE_SHIFT};
use crate::Error;
use libc::munmap;
use log::debug;
use slice_copy::copy;
use std::ffi::c_void;
use std::slice;
use xencall::XenCall;
pub trait BootImageLoader {
fn parse(&self) -> Result<BootImageInfo>;
fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()>;
}
pub const XEN_UNSET_ADDR: u64 = -1i64 as u64;
#[derive(Debug)]
pub struct BootImageInfo {
pub start: u64,
pub virt_base: u64,
pub virt_kstart: u64,
pub virt_kend: u64,
pub virt_hypercall: u64,
pub virt_entry: u64,
pub virt_p2m_base: u64,
pub unmapped_initrd: bool,
}
pub struct BootSetup<'a> {
pub(crate) call: &'a XenCall,
pub phys: PhysicalPages<'a>,
pub(crate) domid: u32,
pub(crate) virt_alloc_end: u64,
pub(crate) pfn_alloc_end: u64,
pub(crate) virt_pgtab_end: u64,
pub(crate) total_pages: u64,
}
#[derive(Debug)]
pub struct DomainSegment {
pub(crate) vstart: u64,
vend: u64,
pub pfn: u64,
pub(crate) addr: u64,
pub(crate) size: u64,
pub(crate) pages: u64,
}
#[derive(Debug)]
pub struct BootState {
pub kernel_segment: DomainSegment,
pub start_info_segment: DomainSegment,
pub xenstore_segment: DomainSegment,
pub console_segment: DomainSegment,
pub boot_stack_segment: DomainSegment,
pub p2m_segment: DomainSegment,
pub page_table_segment: DomainSegment,
pub image_info: BootImageInfo,
pub shared_info_frame: u64,
pub initrd_segment: DomainSegment,
pub store_evtchn: u32,
pub console_evtchn: u32,
}
impl BootSetup<'_> {
pub fn new(call: &XenCall, domid: u32) -> BootSetup {
BootSetup {
call,
phys: PhysicalPages::new(call, domid),
domid,
virt_alloc_end: 0,
pfn_alloc_end: 0,
virt_pgtab_end: 0,
total_pages: 0,
}
}
fn initialize_memory(&mut self, arch: &mut dyn ArchBootSetup, total_pages: u64) -> Result<()> {
self.call.set_address_size(self.domid, 64)?;
arch.meminit(self, total_pages)?;
Ok(())
}
pub fn initialize(
&mut self,
arch: &mut dyn ArchBootSetup,
image_loader: &dyn BootImageLoader,
initrd: &[u8],
max_vcpus: u32,
mem_mb: u64,
) -> Result<BootState> {
debug!("initialize max_vcpus={:?} mem_mb={:?}", max_vcpus, mem_mb);
let total_pages = mem_mb << (20 - arch.page_shift());
self.initialize_memory(arch, total_pages)?;
let image_info = image_loader.parse()?;
debug!("initialize image_info={:?}", image_info);
self.virt_alloc_end = image_info.virt_base;
let kernel_segment = self.load_kernel_segment(arch, image_loader, &image_info)?;
let mut p2m_segment: Option<DomainSegment> = None;
if image_info.virt_p2m_base >= image_info.virt_base
|| (image_info.virt_p2m_base & ((1 << arch.page_shift()) - 1)) != 0
{
p2m_segment = Some(arch.alloc_p2m_segment(self, &image_info)?);
}
let start_info_segment = self.alloc_page(arch)?;
let xenstore_segment = self.alloc_page(arch)?;
let console_segment = self.alloc_page(arch)?;
let page_table_segment = arch.alloc_page_tables(self, &image_info)?;
let boot_stack_segment = self.alloc_page(arch)?;
if self.virt_pgtab_end > 0 {
self.alloc_padding_pages(arch, self.virt_pgtab_end)?;
}
let mut initrd_segment: Option<DomainSegment> = None;
if !image_info.unmapped_initrd {
initrd_segment = Some(self.alloc_module(arch, initrd)?);
}
if p2m_segment.is_none() {
let mut segment = arch.alloc_p2m_segment(self, &image_info)?;
segment.vstart = image_info.virt_p2m_base;
p2m_segment = Some(segment);
}
let p2m_segment = p2m_segment.unwrap();
if image_info.unmapped_initrd {
initrd_segment = Some(self.alloc_module(arch, initrd)?);
}
let initrd_segment = initrd_segment.unwrap();
let store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0)?;
let console_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0)?;
let state = BootState {
kernel_segment,
start_info_segment,
xenstore_segment,
console_segment,
boot_stack_segment,
p2m_segment,
page_table_segment,
image_info,
initrd_segment,
store_evtchn,
console_evtchn,
shared_info_frame: 0,
};
debug!("initialize state={:?}", state);
Ok(state)
}
pub fn boot(
&mut self,
arch: &mut dyn ArchBootSetup,
state: &mut BootState,
cmdline: &str,
) -> Result<()> {
let domain_info = self.call.get_domain_info(self.domid)?;
let shared_info_frame = domain_info.shared_info_frame;
state.shared_info_frame = shared_info_frame;
arch.setup_page_tables(self, state)?;
arch.setup_start_info(self, state, cmdline)?;
arch.setup_hypercall_page(self, &state.image_info)?;
arch.bootlate(self, state)?;
arch.setup_shared_info(self, state.shared_info_frame)?;
arch.vcpu(self, state)?;
self.phys.unmap_all()?;
self.gnttab_seed(state)?;
Ok(())
}
fn gnttab_seed(&mut self, state: &mut BootState) -> Result<()> {
let console_gfn = self.phys.p2m[state.console_segment.pfn as usize];
let xenstore_gfn = self.phys.p2m[state.xenstore_segment.pfn as usize];
let addr = self
.call
.mmap(0, 1 << XEN_PAGE_SHIFT)
.ok_or(Error::MmapFailed)?;
self.call.map_resource(self.domid, 1, 0, 0, 1, addr)?;
let entries = unsafe { slice::from_raw_parts_mut(addr as *mut GrantEntry, 2) };
entries[0].flags = 1 << 0;
entries[0].domid = 0;
entries[0].frame = console_gfn as u32;
entries[1].flags = 1 << 0;
entries[1].domid = 0;
entries[1].frame = xenstore_gfn as u32;
unsafe {
let result = munmap(addr as *mut c_void, 1 << XEN_PAGE_SHIFT);
if result != 0 {
return Err(Error::UnmapFailed);
}
}
Ok(())
}
fn load_kernel_segment(
&mut self,
arch: &mut dyn ArchBootSetup,
image_loader: &dyn BootImageLoader,
image_info: &BootImageInfo,
) -> Result<DomainSegment> {
let kernel_segment = self.alloc_segment(
arch,
image_info.virt_kstart,
image_info.virt_kend - image_info.virt_kstart,
)?;
let kernel_segment_ptr = kernel_segment.addr as *mut u8;
let kernel_segment_slice =
unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) };
image_loader.load(image_info, kernel_segment_slice)?;
Ok(kernel_segment)
}
pub(crate) fn round_up(addr: u64, mask: u64) -> u64 {
addr | mask
}
pub(crate) fn bits_to_mask(bits: u64) -> u64 {
(1 << bits) - 1
}
pub(crate) fn alloc_segment(
&mut self,
arch: &mut dyn ArchBootSetup,
start: u64,
size: u64,
) -> Result<DomainSegment> {
if start > 0 {
self.alloc_padding_pages(arch, start)?;
}
let page_size: u32 = (1i64 << XEN_PAGE_SHIFT) as u32;
let pages = (size + page_size as u64 - 1) / page_size as u64;
let start = self.virt_alloc_end;
let mut segment = DomainSegment {
vstart: start,
vend: 0,
pfn: self.pfn_alloc_end,
addr: 0,
size,
pages,
};
self.chk_alloc_pages(arch, pages)?;
let ptr = self.phys.pfn_to_ptr(segment.pfn, pages)?;
segment.addr = ptr;
let slice = unsafe {
slice::from_raw_parts_mut(ptr as *mut u8, (pages * page_size as u64) as usize)
};
slice.fill(0);
segment.vend = self.virt_alloc_end;
debug!(
"alloc_segment {:#x} -> {:#x} (pfn {:#x} + {:#x} pages)",
start, segment.vend, segment.pfn, pages
);
Ok(segment)
}
fn alloc_page(&mut self, arch: &mut dyn ArchBootSetup) -> Result<DomainSegment> {
let start = self.virt_alloc_end;
let pfn = self.pfn_alloc_end;
self.chk_alloc_pages(arch, 1)?;
debug!("alloc_page {:#x} (pfn {:#x})", start, pfn);
Ok(DomainSegment {
vstart: start,
vend: (start + arch.page_size()) - 1,
pfn,
addr: 0,
size: 0,
pages: 1,
})
}
fn alloc_module(
&mut self,
arch: &mut dyn ArchBootSetup,
buffer: &[u8],
) -> Result<DomainSegment> {
let segment = self.alloc_segment(arch, 0, buffer.len() as u64)?;
let slice = unsafe { slice::from_raw_parts_mut(segment.addr as *mut u8, buffer.len()) };
copy(slice, buffer);
Ok(segment)
}
fn alloc_padding_pages(&mut self, arch: &mut dyn ArchBootSetup, boundary: u64) -> Result<()> {
if (boundary & (arch.page_size() - 1)) != 0 {
return Err(Error::MemorySetupFailed);
}
if boundary < self.virt_alloc_end {
return Err(Error::MemorySetupFailed);
}
let pages = (boundary - self.virt_alloc_end) / arch.page_size();
self.chk_alloc_pages(arch, pages)?;
Ok(())
}
fn chk_alloc_pages(&mut self, arch: &mut dyn ArchBootSetup, pages: u64) -> Result<()> {
if pages > self.total_pages
|| self.pfn_alloc_end > self.total_pages
|| pages > self.total_pages - self.pfn_alloc_end
{
return Err(Error::MemorySetupFailed);
}
self.pfn_alloc_end += pages;
self.virt_alloc_end += pages * arch.page_size();
Ok(())
}
}
pub trait ArchBootSetup {
fn page_size(&mut self) -> u64;
fn page_shift(&mut self) -> u64;
fn alloc_p2m_segment(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<DomainSegment>;
fn alloc_page_tables(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<DomainSegment>;
fn setup_page_tables(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
fn setup_start_info(
&mut self,
setup: &mut BootSetup,
state: &BootState,
cmdline: &str,
) -> Result<()>;
fn setup_shared_info(&mut self, setup: &mut BootSetup, shared_info_frame: u64) -> Result<()>;
fn setup_hypercall_page(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<()>;
fn meminit(&mut self, setup: &mut BootSetup, total_pages: u64) -> Result<()>;
fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()>;
}

View File

@ -0,0 +1,289 @@
use crate::boot::{BootImageInfo, BootImageLoader, XEN_UNSET_ADDR};
use crate::error::Result;
use crate::sys::{
XEN_ELFNOTE_ENTRY, XEN_ELFNOTE_HYPERCALL_PAGE, XEN_ELFNOTE_INIT_P2M, XEN_ELFNOTE_MOD_START_PFN,
XEN_ELFNOTE_PADDR_OFFSET, XEN_ELFNOTE_TYPES, XEN_ELFNOTE_VIRT_BASE,
};
use crate::Error;
use elf::abi::{PF_R, PF_W, PF_X, PT_LOAD, SHT_NOTE};
use elf::endian::AnyEndian;
use elf::note::Note;
use elf::ElfBytes;
use flate2::bufread::GzDecoder;
use log::debug;
use memchr::memmem::find_iter;
use slice_copy::copy;
use std::collections::HashMap;
use std::io::{BufReader, Read};
use std::mem::size_of;
use xz2::bufread::XzDecoder;
pub struct ElfImageLoader {
data: Vec<u8>,
}
fn xen_note_value_as_u64(endian: AnyEndian, value: &[u8]) -> Option<u64> {
let bytes = value.to_vec();
match value.len() {
1 => {
let bytes: Option<[u8; size_of::<u8>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u8::from_le_bytes(bytes?),
AnyEndian::Big => u8::from_be_bytes(bytes?),
} as u64)
}
2 => {
let bytes: Option<[u8; size_of::<u16>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u16::from_le_bytes(bytes?),
AnyEndian::Big => u16::from_be_bytes(bytes?),
} as u64)
}
4 => {
let bytes: Option<[u8; size_of::<u32>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u32::from_le_bytes(bytes?),
AnyEndian::Big => u32::from_be_bytes(bytes?),
} as u64)
}
8 => {
let bytes: Option<[u8; size_of::<u64>()]> = bytes.try_into().ok();
Some(match endian {
AnyEndian::Little => u64::from_le_bytes(bytes?),
AnyEndian::Big => u64::from_be_bytes(bytes?),
})
}
_ => None,
}
}
impl ElfImageLoader {
pub fn new(data: Vec<u8>) -> ElfImageLoader {
ElfImageLoader { data }
}
pub fn load_file(path: &str) -> Result<ElfImageLoader> {
let data = std::fs::read(path)?;
Ok(ElfImageLoader::new(data))
}
pub fn load_gz(data: &[u8]) -> Result<ElfImageLoader> {
let buff = BufReader::new(data);
let image = ElfImageLoader::read_one_stream(&mut GzDecoder::new(buff))?;
Ok(ElfImageLoader::new(image))
}
pub fn load_xz(data: &[u8]) -> Result<ElfImageLoader> {
let buff = BufReader::new(data);
let image = ElfImageLoader::read_one_stream(&mut XzDecoder::new(buff))?;
Ok(ElfImageLoader::new(image))
}
fn read_one_stream(read: &mut dyn Read) -> Result<Vec<u8>> {
let mut result: Vec<u8> = Vec::new();
let mut buffer = [0u8; 8192];
loop {
match read.read(&mut buffer) {
Ok(size) => {
if size == 0 {
break;
}
result.extend_from_slice(&buffer[0..size])
}
Err(error) => {
if !result.is_empty() {
break;
}
return Err(Error::from(error));
}
}
}
Ok(result)
}
pub fn load_file_gz(path: &str) -> Result<ElfImageLoader> {
let file = std::fs::read(path)?;
ElfImageLoader::load_gz(file.as_slice())
}
pub fn load_file_xz(path: &str) -> Result<ElfImageLoader> {
let file = std::fs::read(path)?;
ElfImageLoader::load_xz(file.as_slice())
}
pub fn load_file_kernel(path: &str) -> Result<ElfImageLoader> {
let file = std::fs::read(path)?;
for start in find_iter(file.as_slice(), &[0x1f, 0x8b]) {
if let Ok(elf) = ElfImageLoader::load_gz(&file[start..]) {
return Ok(elf);
}
}
for start in find_iter(file.as_slice(), &[0xfd, 0x37, 0x7a, 0x58]) {
if let Ok(elf) = ElfImageLoader::load_xz(&file[start..]) {
return Ok(elf);
}
}
Err(Error::ElfCompressionUnknown)
}
}
struct ElfNoteValue {
value: u64,
}
impl BootImageLoader for ElfImageLoader {
fn parse(&self) -> Result<BootImageInfo> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let headers = elf.section_headers().ok_or(Error::ElfInvalidImage)?;
let mut linux_notes: HashMap<u64, Vec<u8>> = HashMap::new();
let mut xen_notes: HashMap<u64, ElfNoteValue> = HashMap::new();
for header in headers {
if header.sh_type != SHT_NOTE {
continue;
}
let notes = elf.section_data_as_notes(&header)?;
for note in notes {
if let Note::Unknown(note) = note {
if note.name == "Linux" {
linux_notes.insert(note.n_type, note.desc.to_vec());
}
if note.name == "Xen" {
for typ in XEN_ELFNOTE_TYPES {
if typ.id != note.n_type {
continue;
}
let value = if !typ.is_string {
xen_note_value_as_u64(elf.ehdr.endianness, note.desc).unwrap_or(0)
} else {
0
};
xen_notes.insert(typ.id, ElfNoteValue { value });
}
continue;
}
}
}
}
if linux_notes.is_empty() {
return Err(Error::ElfInvalidImage);
}
if xen_notes.is_empty() {
return Err(Error::ElfXenSupportMissing);
}
let paddr_offset = xen_notes
.get(&XEN_ELFNOTE_PADDR_OFFSET)
.ok_or(Error::ElfInvalidImage)?
.value;
let virt_base = xen_notes
.get(&XEN_ELFNOTE_VIRT_BASE)
.ok_or(Error::ElfInvalidImage)?
.value;
let entry = xen_notes
.get(&XEN_ELFNOTE_ENTRY)
.ok_or(Error::ElfInvalidImage)?
.value;
let virt_hypercall = xen_notes
.get(&XEN_ELFNOTE_HYPERCALL_PAGE)
.ok_or(Error::ElfInvalidImage)?
.value;
let init_p2m = xen_notes
.get(&XEN_ELFNOTE_INIT_P2M)
.ok_or(Error::ElfInvalidImage)?
.value;
let mod_start_pfn = xen_notes
.get(&XEN_ELFNOTE_MOD_START_PFN)
.ok_or(Error::ElfInvalidImage)?
.value;
let mut start: u64 = u64::MAX;
let mut end: u64 = 0;
let segments = elf.segments().ok_or(Error::ElfInvalidImage)?;
for header in segments {
if (header.p_type != PT_LOAD) || (header.p_flags & (PF_R | PF_W | PF_X)) == 0 {
continue;
}
let paddr = header.p_paddr;
let memsz = header.p_memsz;
if start > paddr {
start = paddr;
}
if end < paddr + memsz {
end = paddr + memsz;
}
}
if paddr_offset != XEN_UNSET_ADDR && virt_base == XEN_UNSET_ADDR {
return Err(Error::ElfInvalidImage);
}
let virt_offset = virt_base - paddr_offset;
let virt_kstart = start + virt_offset;
let virt_kend = end + virt_offset;
let virt_entry = entry;
let image_info = BootImageInfo {
start,
virt_base,
virt_kstart,
virt_kend,
virt_hypercall,
virt_entry,
virt_p2m_base: init_p2m,
unmapped_initrd: mod_start_pfn != 0,
};
Ok(image_info)
}
fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> {
let elf = ElfBytes::<AnyEndian>::minimal_parse(self.data.as_slice())?;
let segments = elf.segments().ok_or(Error::ElfInvalidImage)?;
debug!(
"load dst={:#x} segments={}",
dst.as_ptr() as u64,
segments.len()
);
for header in segments {
let paddr = header.p_paddr;
let filesz = header.p_filesz;
let memsz = header.p_memsz;
let base_offset = paddr - image_info.start;
let data = elf.segment_data(&header)?;
let segment_dst = &mut dst[base_offset as usize..];
let copy_slice = &data[0..filesz as usize];
debug!(
"load copy hdr={:?} dst={:#x} len={}",
header,
copy_slice.as_ptr() as u64,
copy_slice.len()
);
copy(segment_dst, copy_slice);
if (memsz - filesz) > 0 {
let remaining = &mut segment_dst[filesz as usize..memsz as usize];
debug!(
"load fill_zero hdr={:?} dst={:#x} len={}",
header.p_offset,
remaining.as_ptr() as u64,
remaining.len()
);
remaining.fill(0);
}
}
Ok(())
}
}

View File

@ -0,0 +1,39 @@
use std::io;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("io issue encountered: {0}")]
Io(#[from] io::Error),
#[error("xenstore issue encountered: {0}")]
XenStore(#[from] xenstore::error::Error),
#[error("xencall issue encountered: {0}")]
XenCall(#[from] xencall::error::Error),
#[error("domain does not have a tty")]
TtyNotFound,
#[error("introducing the domain failed")]
IntroduceDomainFailed,
#[error("string conversion of a path failed")]
PathStringConversion,
#[error("parent of path not found")]
PathParentNotFound,
#[error("domain does not exist")]
DomainNonExistent,
#[error("elf parse failed: {0}")]
ElfParseFailed(#[from] elf::ParseError),
#[error("mmap failed")]
MmapFailed,
#[error("munmap failed")]
UnmapFailed,
#[error("memory setup failed")]
MemorySetupFailed,
#[error("populate physmap failed: wanted={0}, received={1}, input_extents={2}")]
PopulatePhysmapFailed(usize, usize, usize),
#[error("unknown elf compression method")]
ElfCompressionUnknown,
#[error("expected elf image format not found")]
ElfInvalidImage,
#[error("provided elf image does not contain xen support")]
ElfXenSupportMissing,
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@ -0,0 +1,774 @@
pub mod boot;
pub mod elfloader;
pub mod error;
pub mod mem;
pub mod sys;
pub mod x86;
use crate::boot::BootSetup;
use crate::elfloader::ElfImageLoader;
use crate::error::{Error, Result};
use crate::x86::X86BootSetup;
use log::{trace, warn};
use std::fs::read;
use std::path::PathBuf;
use std::str::FromStr;
use std::thread;
use std::time::Duration;
use uuid::Uuid;
use xencall::sys::CreateDomain;
use xencall::XenCall;
use xenstore::client::{
XsPermission, XsdClient, XsdInterface, XS_PERM_NONE, XS_PERM_READ, XS_PERM_READ_WRITE,
};
pub struct XenClient {
pub store: XsdClient,
call: XenCall,
}
#[derive(Debug)]
pub struct BlockDeviceRef {
pub path: String,
pub major: u32,
pub minor: u32,
}
#[derive(Debug)]
pub struct DomainDisk<'a> {
pub vdev: &'a str,
pub block: &'a BlockDeviceRef,
pub writable: bool,
}
#[derive(Debug)]
pub struct DomainFilesystem<'a> {
pub path: &'a str,
pub tag: &'a str,
}
#[derive(Debug)]
pub struct DomainNetworkInterface<'a> {
pub mac: &'a str,
pub mtu: u32,
pub bridge: Option<&'a str>,
pub script: Option<&'a str>,
}
#[derive(Debug)]
pub struct DomainConsole {}
#[derive(Debug)]
pub struct DomainEventChannel<'a> {
pub name: &'a str,
}
#[derive(Debug)]
pub struct DomainConfig<'a> {
pub backend_domid: u32,
pub name: &'a str,
pub max_vcpus: u32,
pub mem_mb: u64,
pub kernel_path: &'a str,
pub initrd_path: &'a str,
pub cmdline: &'a str,
pub disks: Vec<DomainDisk<'a>>,
pub consoles: Vec<DomainConsole>,
pub vifs: Vec<DomainNetworkInterface<'a>>,
pub filesystems: Vec<DomainFilesystem<'a>>,
pub event_channels: Vec<DomainEventChannel<'a>>,
pub extra_keys: Vec<(String, String)>,
pub extra_rw_paths: Vec<String>,
}
impl XenClient {
pub async fn open() -> Result<XenClient> {
let store = XsdClient::open().await?;
let call = XenCall::open()?;
Ok(XenClient { store, call })
}
pub async fn create(&mut self, config: &DomainConfig<'_>) -> Result<u32> {
let domain = CreateDomain {
max_vcpus: config.max_vcpus,
..Default::default()
};
let domid = self.call.create_domain(domain)?;
match self.init(domid, &domain, config).await {
Ok(_) => Ok(domid),
Err(err) => {
// ignore since destroying a domain is best
// effort when an error occurs
let _ = self.destroy(domid).await;
Err(err)
}
}
}
async fn init(
&mut self,
domid: u32,
domain: &CreateDomain,
config: &DomainConfig<'_>,
) -> Result<()> {
trace!(
"XenClient init domid={} domain={:?} config={:?}",
domid,
domain,
config
);
let backend_dom_path = self.store.get_domain_path(0).await?;
let dom_path = self.store.get_domain_path(domid).await?;
let uuid_string = Uuid::from_bytes(domain.handle).to_string();
let vm_path = format!("/vm/{}", uuid_string);
let ro_perm = &[
XsPermission {
id: 0,
perms: XS_PERM_NONE,
},
XsPermission {
id: domid,
perms: XS_PERM_READ,
},
];
let rw_perm = &[XsPermission {
id: domid,
perms: XS_PERM_READ_WRITE,
}];
let no_perm = &[XsPermission {
id: 0,
perms: XS_PERM_NONE,
}];
{
let tx = self.store.transaction().await?;
tx.rm(dom_path.as_str()).await?;
tx.mknod(dom_path.as_str(), ro_perm).await?;
tx.rm(vm_path.as_str()).await?;
tx.mknod(vm_path.as_str(), ro_perm).await?;
tx.mknod(vm_path.as_str(), no_perm).await?;
tx.mknod(format!("{}/device", vm_path).as_str(), no_perm)
.await?;
tx.write_string(format!("{}/vm", dom_path).as_str(), &vm_path)
.await?;
tx.mknod(format!("{}/cpu", dom_path).as_str(), ro_perm)
.await?;
tx.mknod(format!("{}/memory", dom_path).as_str(), ro_perm)
.await?;
tx.mknod(format!("{}/control", dom_path).as_str(), ro_perm)
.await?;
tx.mknod(format!("{}/control/shutdown", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(
format!("{}/control/feature-poweroff", dom_path).as_str(),
rw_perm,
)
.await?;
tx.mknod(
format!("{}/control/feature-reboot", dom_path).as_str(),
rw_perm,
)
.await?;
tx.mknod(
format!("{}/control/feature-suspend", dom_path).as_str(),
rw_perm,
)
.await?;
tx.mknod(format!("{}/control/sysrq", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/data", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/drivers", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/feature", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/attr", dom_path).as_str(), rw_perm)
.await?;
tx.mknod(format!("{}/error", dom_path).as_str(), rw_perm)
.await?;
tx.write_string(
format!("{}/uuid", vm_path).as_str(),
&Uuid::from_bytes(domain.handle).to_string(),
)
.await?;
tx.write_string(format!("{}/name", dom_path).as_str(), config.name)
.await?;
tx.write_string(format!("{}/name", vm_path).as_str(), config.name)
.await?;
for (key, value) in &config.extra_keys {
tx.write_string(format!("{}/{}", dom_path, key).as_str(), value)
.await?;
}
for path in &config.extra_rw_paths {
tx.mknod(format!("{}/{}", dom_path, path).as_str(), rw_perm)
.await?;
}
tx.commit().await?;
}
self.call.set_max_vcpus(domid, config.max_vcpus)?;
self.call.set_max_mem(domid, config.mem_mb * 1024)?;
let image_loader = ElfImageLoader::load_file_kernel(config.kernel_path)?;
let console_evtchn: u32;
let xenstore_evtchn: u32;
let console_mfn: u64;
let xenstore_mfn: u64;
{
let mut boot = BootSetup::new(&self.call, domid);
let mut arch = X86BootSetup::new();
let initrd = read(config.initrd_path)?;
let mut state = boot.initialize(
&mut arch,
&image_loader,
initrd.as_slice(),
config.max_vcpus,
config.mem_mb,
)?;
boot.boot(&mut arch, &mut state, config.cmdline)?;
console_evtchn = state.console_evtchn;
xenstore_evtchn = state.store_evtchn;
console_mfn = boot.phys.p2m[state.console_segment.pfn as usize];
xenstore_mfn = boot.phys.p2m[state.xenstore_segment.pfn as usize];
}
{
let tx = self.store.transaction().await?;
tx.write_string(format!("{}/image/os_type", vm_path).as_str(), "linux")
.await?;
tx.write_string(
format!("{}/image/kernel", vm_path).as_str(),
config.kernel_path,
)
.await?;
tx.write_string(
format!("{}/image/ramdisk", vm_path).as_str(),
config.initrd_path,
)
.await?;
tx.write_string(
format!("{}/image/cmdline", vm_path).as_str(),
config.cmdline,
)
.await?;
tx.write_string(
format!("{}/memory/static-max", dom_path).as_str(),
&(config.mem_mb * 1024).to_string(),
)
.await?;
tx.write_string(
format!("{}/memory/target", dom_path).as_str(),
&(config.mem_mb * 1024).to_string(),
)
.await?;
tx.write_string(format!("{}/memory/videoram", dom_path).as_str(), "0")
.await?;
tx.write_string(format!("{}/domid", dom_path).as_str(), &domid.to_string())
.await?;
tx.write_string(
format!("{}/store/port", dom_path).as_str(),
&xenstore_evtchn.to_string(),
)
.await?;
tx.write_string(
format!("{}/store/ring-ref", dom_path).as_str(),
&xenstore_mfn.to_string(),
)
.await?;
for i in 0..config.max_vcpus {
let path = format!("{}/cpu/{}", dom_path, i);
tx.mkdir(&path).await?;
tx.set_perms(&path, ro_perm).await?;
let path = format!("{}/cpu/{}/availability", dom_path, i);
tx.write_string(&path, "online").await?;
tx.set_perms(&path, ro_perm).await?;
}
tx.commit().await?;
}
if !self
.store
.introduce_domain(domid, xenstore_mfn, xenstore_evtchn)
.await?
{
return Err(Error::IntroduceDomainFailed);
}
self.console_device_add(
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
0,
Some(console_evtchn),
Some(console_mfn),
)
.await?;
for (index, _) in config.consoles.iter().enumerate() {
self.console_device_add(
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index + 1,
None,
None,
)
.await?;
}
for (index, disk) in config.disks.iter().enumerate() {
self.disk_device_add(
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
disk,
)
.await?;
}
for (index, filesystem) in config.filesystems.iter().enumerate() {
self.fs_9p_device_add(
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
filesystem,
)
.await?;
}
for (index, vif) in config.vifs.iter().enumerate() {
self.vif_device_add(
&dom_path,
&backend_dom_path,
config.backend_domid,
domid,
index,
vif,
)
.await?;
}
for channel in &config.event_channels {
let id = self
.call
.evtchn_alloc_unbound(domid, config.backend_domid)?;
let channel_path = format!("{}/evtchn/{}", dom_path, channel.name);
self.store
.write_string(&format!("{}/name", channel_path), channel.name)
.await?;
self.store
.write_string(&format!("{}/channel", channel_path), &id.to_string())
.await?;
}
self.call.unpause_domain(domid)?;
Ok(())
}
async fn disk_device_add(
&mut self,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
disk: &DomainDisk<'_>,
) -> Result<()> {
let id = (202 << 8) | (index << 4) as u64;
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("removable", "0".to_string()),
("bootable", "1".to_string()),
("state", "1".to_string()),
("dev", disk.vdev.to_string()),
("type", "phy".to_string()),
("mode", if disk.writable { "w" } else { "r" }.to_string()),
("device-type", "disk".to_string()),
("discard-enable", "0".to_string()),
("specification", "xen".to_string()),
("physical-device-path", disk.block.path.to_string()),
(
"physical-device",
format!("{:02x}:{:02x}", disk.block.major, disk.block.minor),
),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
("virtual-device", id.to_string()),
("device-type", "disk".to_string()),
("trusted", "1".to_string()),
("protocol", "x86_64-abi".to_string()),
];
self.device_add(
"vbd",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)
.await?;
Ok(())
}
#[allow(clippy::too_many_arguments, clippy::unnecessary_unwrap)]
async fn console_device_add(
&mut self,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
port: Option<u32>,
mfn: Option<u64>,
) -> Result<()> {
let mut backend_entries = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("protocol", "vt100".to_string()),
];
let mut frontend_entries = vec![
("backend-id", backend_domid.to_string()),
("limit", "1048576".to_string()),
("output", "pty".to_string()),
("tty", "".to_string()),
];
if index == 0 {
frontend_entries.push(("type", "xenconsoled".to_string()));
} else {
frontend_entries.push(("type", "ioemu".to_string()));
backend_entries.push(("connection", "pty".to_string()));
backend_entries.push(("output", "pty".to_string()));
}
if port.is_some() && mfn.is_some() {
frontend_entries.extend_from_slice(&[
("port", port.unwrap().to_string()),
("ring-ref", mfn.unwrap().to_string()),
]);
} else {
frontend_entries.extend_from_slice(&[
("state", "1".to_string()),
("protocol", "vt100".to_string()),
]);
}
self.device_add(
"console",
index as u64,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_entries,
backend_entries,
)
.await?;
Ok(())
}
async fn fs_9p_device_add(
&mut self,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
filesystem: &DomainFilesystem<'_>,
) -> Result<()> {
let id = 90 + index as u64;
let backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("path", filesystem.path.to_string()),
("security-model", "none".to_string()),
];
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
("tag", filesystem.tag.to_string()),
];
self.device_add(
"9pfs",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)
.await?;
Ok(())
}
async fn vif_device_add(
&mut self,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
index: usize,
vif: &DomainNetworkInterface<'_>,
) -> Result<()> {
let id = 20 + index as u64;
let mut backend_items: Vec<(&str, String)> = vec![
("frontend-id", domid.to_string()),
("online", "1".to_string()),
("state", "1".to_string()),
("mac", vif.mac.to_string()),
("mtu", vif.mtu.to_string()),
("type", "vif".to_string()),
("handle", id.to_string()),
];
if vif.bridge.is_some() {
backend_items.extend_from_slice(&[("bridge", vif.bridge.unwrap().to_string())]);
}
if vif.script.is_some() {
backend_items.extend_from_slice(&[
("script", vif.script.unwrap().to_string()),
("hotplug-status", "".to_string()),
]);
} else {
backend_items.extend_from_slice(&[
("script", "".to_string()),
("hotplug-status", "connected".to_string()),
]);
}
let frontend_items: Vec<(&str, String)> = vec![
("backend-id", backend_domid.to_string()),
("state", "1".to_string()),
("mac", vif.mac.to_string()),
("trusted", "1".to_string()),
("mtu", vif.mtu.to_string()),
];
self.device_add(
"vif",
id,
dom_path,
backend_dom_path,
backend_domid,
domid,
frontend_items,
backend_items,
)
.await?;
Ok(())
}
#[allow(clippy::too_many_arguments)]
async fn device_add(
&mut self,
typ: &str,
id: u64,
dom_path: &str,
backend_dom_path: &str,
backend_domid: u32,
domid: u32,
frontend_items: Vec<(&str, String)>,
backend_items: Vec<(&str, String)>,
) -> Result<()> {
let console_zero = typ == "console" && id == 0;
let frontend_path = if console_zero {
format!("{}/console", dom_path)
} else {
format!("{}/device/{}/{}", dom_path, typ, id)
};
let backend_path = format!("{}/backend/{}/{}/{}", backend_dom_path, typ, domid, id);
let mut backend_items: Vec<(&str, String)> = backend_items.clone();
let mut frontend_items: Vec<(&str, String)> = frontend_items.clone();
backend_items.push(("frontend", frontend_path.clone()));
frontend_items.push(("backend", backend_path.clone()));
let frontend_perms = &[
XsPermission {
id: domid,
perms: XS_PERM_NONE,
},
XsPermission {
id: backend_domid,
perms: XS_PERM_READ,
},
];
let backend_perms = &[
XsPermission {
id: backend_domid,
perms: XS_PERM_NONE,
},
XsPermission {
id: domid,
perms: XS_PERM_READ,
},
];
let tx = self.store.transaction().await?;
tx.mknod(&frontend_path, frontend_perms).await?;
for (p, value) in &frontend_items {
let path = format!("{}/{}", frontend_path, *p);
tx.write_string(&path, value).await?;
if !console_zero {
tx.set_perms(&path, frontend_perms).await?;
}
}
tx.mknod(&backend_path, backend_perms).await?;
for (p, value) in &backend_items {
let path = format!("{}/{}", backend_path, *p);
tx.write_string(&path, value).await?;
}
tx.commit().await?;
Ok(())
}
pub async fn destroy(&mut self, domid: u32) -> Result<()> {
if let Err(err) = self.destroy_store(domid).await {
warn!("failed to destroy store for domain {}: {}", domid, err);
}
self.call.destroy_domain(domid)?;
Ok(())
}
async fn destroy_store(&mut self, domid: u32) -> Result<()> {
let dom_path = self.store.get_domain_path(domid).await?;
let vm_path = self.store.read_string(&format!("{}/vm", dom_path)).await?;
if vm_path.is_none() {
return Err(Error::DomainNonExistent);
}
let mut backend_paths: Vec<String> = Vec::new();
let console_frontend_path = format!("{}/console", dom_path);
let console_backend_path = self
.store
.read_string(format!("{}/backend", console_frontend_path).as_str())
.await?;
for device_category in self
.store
.list(format!("{}/device", dom_path).as_str())
.await?
{
for device_id in self
.store
.list(format!("{}/device/{}", dom_path, device_category).as_str())
.await?
{
let device_path = format!("{}/device/{}/{}", dom_path, device_category, device_id);
let Some(backend_path) = self
.store
.read_string(format!("{}/backend", device_path).as_str())
.await?
else {
continue;
};
backend_paths.push(backend_path);
}
}
for backend in &backend_paths {
let state_path = format!("{}/state", backend);
let online_path = format!("{}/online", backend);
let tx = self.store.transaction().await?;
let state = tx.read_string(&state_path).await?.unwrap_or(String::new());
if state.is_empty() {
break;
}
tx.write_string(&online_path, "0").await?;
if !state.is_empty() && u32::from_str(&state).unwrap_or(0) != 6 {
tx.write_string(&state_path, "5").await?;
}
tx.commit().await?;
let mut count: u32 = 0;
loop {
if count >= 100 {
warn!("unable to safely destroy backend: {}", backend);
break;
}
let Some(state) = self.store.read_string(&state_path).await? else {
break;
};
let state = i64::from_str(&state).unwrap_or(-1);
if state == 6 {
break;
}
thread::sleep(Duration::from_millis(100));
count += 1;
}
}
let tx = self.store.transaction().await?;
let mut backend_removals: Vec<String> = Vec::new();
backend_removals.extend_from_slice(backend_paths.as_slice());
if let Some(backend) = console_backend_path {
backend_removals.push(backend);
}
for path in &backend_removals {
let path = PathBuf::from(path);
let parent = path.parent().ok_or(Error::PathParentNotFound)?;
tx.rm(parent.to_str().ok_or(Error::PathStringConversion)?)
.await?;
}
if let Some(vm_path) = vm_path {
tx.rm(&vm_path).await?;
}
tx.rm(&dom_path).await?;
tx.commit().await?;
Ok(())
}
pub async fn get_console_path(&mut self, domid: u32) -> Result<String> {
let dom_path = self.store.get_domain_path(domid).await?;
let console_tty_path = format!("{}/console/tty", dom_path);
let mut tty: Option<String> = None;
for _ in 0..5 {
tty = self.store.read_string(&console_tty_path).await?;
if tty.is_some() {
break;
}
thread::sleep(Duration::from_millis(200));
}
let Some(tty) = tty else {
return Err(Error::TtyNotFound);
};
Ok(tty)
}
}

View File

@ -0,0 +1,186 @@
use crate::error::Result;
use crate::sys::{XEN_PAGE_SHIFT, XEN_PAGE_SIZE};
use crate::Error;
use libc::munmap;
use log::debug;
use std::ffi::c_void;
use crate::x86::X86_PAGE_SHIFT;
use xencall::sys::MmapEntry;
use xencall::XenCall;
#[derive(Debug)]
pub struct PhysicalPage {
pfn: u64,
ptr: u64,
count: u64,
}
pub struct PhysicalPages<'a> {
domid: u32,
pub(crate) p2m: Vec<u64>,
call: &'a XenCall,
pages: Vec<PhysicalPage>,
}
impl PhysicalPages<'_> {
pub fn new(call: &XenCall, domid: u32) -> PhysicalPages {
PhysicalPages {
domid,
p2m: Vec::new(),
call,
pages: Vec::new(),
}
}
pub fn load_p2m(&mut self, p2m: Vec<u64>) {
self.p2m = p2m;
}
pub fn p2m_size(&mut self) -> u64 {
self.p2m.len() as u64
}
pub fn pfn_to_ptr(&mut self, pfn: u64, count: u64) -> Result<u64> {
for page in &self.pages {
if pfn >= page.pfn + page.count {
continue;
}
if count > 0 {
if (pfn + count) <= page.pfn {
continue;
}
if pfn < page.pfn || (pfn + count) > page.pfn + page.count {
return Err(Error::MemorySetupFailed);
}
} else {
if pfn < page.pfn {
continue;
}
if pfn >= page.pfn + page.count {
continue;
}
}
return Ok(page.ptr + ((pfn - page.pfn) << X86_PAGE_SHIFT));
}
if count == 0 {
return Err(Error::MemorySetupFailed);
}
self.pfn_alloc(pfn, count)
}
fn pfn_alloc(&mut self, pfn: u64, count: u64) -> Result<u64> {
let mut entries = vec![MmapEntry::default(); count as usize];
for (i, entry) in entries.iter_mut().enumerate() {
entry.mfn = self.p2m[pfn as usize + i];
}
let chunk_size = 1 << XEN_PAGE_SHIFT;
let num_per_entry = chunk_size >> XEN_PAGE_SHIFT;
let num = num_per_entry * count as usize;
let mut pfns = vec![u64::MAX; num];
for i in 0..count as usize {
for j in 0..num_per_entry {
pfns[i * num_per_entry + j] = entries[i].mfn + j as u64;
}
}
let actual_mmap_len = (num as u64) << XEN_PAGE_SHIFT;
let addr = self
.call
.mmap(0, actual_mmap_len)
.ok_or(Error::MmapFailed)?;
debug!("mapped {:#x} foreign bytes at {:#x}", actual_mmap_len, addr);
let result = self.call.mmap_batch(self.domid, num as u64, addr, pfns)?;
if result != 0 {
return Err(Error::MmapFailed);
}
let page = PhysicalPage {
pfn,
ptr: addr,
count,
};
debug!(
"alloc_pfn {:#x}+{:#x} at {:#x}",
page.pfn, page.count, page.ptr
);
self.pages.push(page);
Ok(addr)
}
pub fn map_foreign_pages(&mut self, mfn: u64, size: u64) -> Result<u64> {
let num = ((size + XEN_PAGE_SIZE - 1) >> XEN_PAGE_SHIFT) as usize;
let mut pfns = vec![u64::MAX; num];
for (i, item) in pfns.iter_mut().enumerate().take(num) {
*item = mfn + i as u64;
}
let actual_mmap_len = (num as u64) << XEN_PAGE_SHIFT;
let addr = self
.call
.mmap(0, actual_mmap_len)
.ok_or(Error::MmapFailed)?;
debug!("mapped {:#x} foreign bytes at {:#x}", actual_mmap_len, addr);
let result = self.call.mmap_batch(self.domid, num as u64, addr, pfns)?;
if result != 0 {
return Err(Error::MmapFailed);
}
let page = PhysicalPage {
pfn: u64::MAX,
ptr: addr,
count: num as u64,
};
debug!(
"alloc_mfn {:#x}+{:#x} at {:#x}",
page.pfn, page.count, page.ptr
);
self.pages.push(page);
Ok(addr)
}
pub fn unmap_all(&mut self) -> Result<()> {
for page in &self.pages {
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << X86_PAGE_SHIFT) as usize,
);
if err != 0 {
return Err(Error::UnmapFailed);
}
}
}
self.pages.clear();
Ok(())
}
pub fn unmap(&mut self, pfn: u64) -> Result<()> {
let page = self.pages.iter().enumerate().find(|(_, x)| x.pfn == pfn);
if page.is_none() {
return Err(Error::MemorySetupFailed);
}
let (i, page) = page.unwrap();
unsafe {
let err = munmap(
page.ptr as *mut c_void,
(page.count << X86_PAGE_SHIFT) as usize,
);
debug!(
"unmapped {:#x} foreign bytes at {:#x}",
(page.count << X86_PAGE_SHIFT) as usize,
page.ptr
);
if err != 0 {
return Err(Error::UnmapFailed);
}
self.pages.remove(i);
}
Ok(())
}
}

View File

@ -0,0 +1,130 @@
pub const XEN_ELFNOTE_INFO: u64 = 0;
pub const XEN_ELFNOTE_ENTRY: u64 = 1;
pub const XEN_ELFNOTE_HYPERCALL_PAGE: u64 = 2;
pub const XEN_ELFNOTE_VIRT_BASE: u64 = 3;
pub const XEN_ELFNOTE_PADDR_OFFSET: u64 = 4;
pub const XEN_ELFNOTE_XEN_VERSION: u64 = 5;
pub const XEN_ELFNOTE_GUEST_OS: u64 = 6;
pub const XEN_ELFNOTE_GUEST_VERSION: u64 = 7;
pub const XEN_ELFNOTE_LOADER: u64 = 8;
pub const XEN_ELFNOTE_PAE_MODE: u64 = 9;
pub const XEN_ELFNOTE_FEATURES: u64 = 10;
pub const XEN_ELFNOTE_BSD_SYMTAB: u64 = 11;
pub const XEN_ELFNOTE_HV_START_LOW: u64 = 12;
pub const XEN_ELFNOTE_L1_MFN_VALID: u64 = 13;
pub const XEN_ELFNOTE_SUSPEND_CANCEL: u64 = 14;
pub const XEN_ELFNOTE_INIT_P2M: u64 = 15;
pub const XEN_ELFNOTE_MOD_START_PFN: u64 = 16;
pub const XEN_ELFNOTE_SUPPORTED_FEATURES: u64 = 17;
pub const XEN_ELFNOTE_PHYS32_ENTRY: u64 = 18;
#[derive(Copy, Clone)]
pub struct ElfNoteXenType {
pub id: u64,
pub name: &'static str,
pub is_string: bool,
}
pub const XEN_ELFNOTE_TYPES: &[ElfNoteXenType] = &[
ElfNoteXenType {
id: XEN_ELFNOTE_ENTRY,
name: "ENTRY",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_HYPERCALL_PAGE,
name: "HYPERCALL_PAGE",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_VIRT_BASE,
name: "VIRT_BASE",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_INIT_P2M,
name: "INIT_P2M",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_PADDR_OFFSET,
name: "PADDR_OFFSET",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_HV_START_LOW,
name: "HV_START_LOW",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_XEN_VERSION,
name: "XEN_VERSION",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_GUEST_OS,
name: "GUEST_OS",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_GUEST_VERSION,
name: "GUEST_VERSION",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_LOADER,
name: "LOADER",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_PAE_MODE,
name: "PAE_MODE",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_FEATURES,
name: "FEATURES",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_SUPPORTED_FEATURES,
name: "SUPPORTED_FEATURES",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_BSD_SYMTAB,
name: "BSD_SYMTAB",
is_string: true,
},
ElfNoteXenType {
id: XEN_ELFNOTE_SUSPEND_CANCEL,
name: "SUSPEND_CANCEL",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_MOD_START_PFN,
name: "MOD_START_PFN",
is_string: false,
},
ElfNoteXenType {
id: XEN_ELFNOTE_PHYS32_ENTRY,
name: "PHYS32_ENTRY",
is_string: false,
},
];
pub const XEN_PAGE_SHIFT: u64 = 12;
pub const XEN_PAGE_SIZE: u64 = 1 << XEN_PAGE_SHIFT;
pub const XEN_PAGE_MASK: u64 = !(XEN_PAGE_SIZE - 1);
pub const SUPERPAGE_BATCH_SIZE: u64 = 512;
pub const SUPERPAGE_2MB_SHIFT: u64 = 9;
pub const SUPERPAGE_2MB_NR_PFNS: u64 = 1u64 << SUPERPAGE_2MB_SHIFT;
pub const VGCF_IN_KERNEL: u64 = 1 << 2;
pub const VGCF_ONLINE: u64 = 1 << 5;
#[repr(C)]
pub struct GrantEntry {
pub flags: u16,
pub domid: u16,
pub frame: u32,
}

View File

@ -0,0 +1,627 @@
use crate::boot::{
ArchBootSetup, BootImageInfo, BootSetup, BootState, DomainSegment, XEN_UNSET_ADDR,
};
use crate::error::Result;
use crate::sys::{
SUPERPAGE_2MB_NR_PFNS, SUPERPAGE_2MB_SHIFT, SUPERPAGE_BATCH_SIZE, VGCF_IN_KERNEL, VGCF_ONLINE,
XEN_PAGE_SHIFT,
};
use crate::Error;
use libc::c_char;
use log::{debug, trace};
use slice_copy::copy;
use std::cmp::{max, min};
use std::mem::size_of;
use std::slice;
use xencall::sys::{VcpuGuestContext, MMUEXT_PIN_L4_TABLE};
pub const X86_PAGE_SHIFT: u64 = 12;
pub const X86_PAGE_SIZE: u64 = 1 << X86_PAGE_SHIFT;
pub const X86_VIRT_BITS: u64 = 48;
pub const X86_VIRT_MASK: u64 = (1 << X86_VIRT_BITS) - 1;
pub const X86_PGTABLE_LEVELS: u64 = 4;
pub const X86_PGTABLE_LEVEL_SHIFT: u64 = 9;
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct PageTableMappingLevel {
pub from: u64,
pub to: u64,
pub pfn: u64,
pub pgtables: usize,
}
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct PageTableMapping {
pub area: PageTableMappingLevel,
pub levels: [PageTableMappingLevel; X86_PGTABLE_LEVELS as usize],
}
pub const X86_PAGE_TABLE_MAX_MAPPINGS: usize = 2;
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct PageTable {
pub mappings_count: usize,
pub mappings: [PageTableMapping; X86_PAGE_TABLE_MAX_MAPPINGS],
}
#[repr(C)]
#[derive(Debug)]
pub struct StartInfoConsole {
pub mfn: u64,
pub evtchn: u32,
}
pub const MAX_GUEST_CMDLINE: usize = 1024;
#[repr(C)]
#[derive(Debug)]
pub struct StartInfo {
pub magic: [c_char; 32],
pub nr_pages: u64,
pub shared_info: u64,
pub flags: u32,
pub store_mfn: u64,
pub store_evtchn: u32,
pub console: StartInfoConsole,
pub pt_base: u64,
pub nr_pt_frames: u64,
pub mfn_list: u64,
pub mod_start: u64,
pub mod_len: u64,
pub cmdline: [c_char; MAX_GUEST_CMDLINE],
pub first_p2m_pfn: u64,
pub nr_p2m_frames: u64,
}
pub const X86_GUEST_MAGIC: &str = "xen-3.0-x86_64";
#[repr(C)]
#[derive(Debug)]
pub struct ArchVcpuInfo {
pub cr2: u64,
pub pad: u64,
}
#[repr(C)]
#[derive(Debug)]
pub struct VcpuInfoTime {
pub version: u32,
pub pad0: u32,
pub tsc_timestamp: u64,
pub system_time: u64,
pub tsc_to_system_mul: u32,
pub tsc_shift: i8,
pub flags: u8,
pub pad1: [u8; 2],
}
#[repr(C)]
#[derive(Debug)]
pub struct VcpuInfo {
pub evtchn_upcall_pending: u8,
pub evtchn_upcall_mask: u8,
pub evtchn_pending_sel: u64,
pub arch_vcpu_info: ArchVcpuInfo,
pub vcpu_info_time: VcpuInfoTime,
}
#[repr(C)]
#[derive(Debug)]
pub struct SharedInfo {
pub vcpu_info: [VcpuInfo; 32],
pub evtchn_pending: [u64; u64::BITS as usize],
pub evtchn_mask: [u64; u64::BITS as usize],
pub wc_version: u32,
pub wc_sec: u32,
pub wc_nsec: u32,
pub wc_sec_hi: u32,
// arch shared info
pub max_pfn: u64,
pub pfn_to_mfn_frame_list_list: u64,
pub nmi_reason: u64,
pub p2m_cr3: u64,
pub p2m_vaddr: u64,
pub p2m_generation: u64,
}
pub struct X86BootSetup {
table: PageTable,
}
#[derive(Debug)]
struct VmemRange {
start: u64,
end: u64,
_flags: u32,
_nid: u32,
}
impl Default for X86BootSetup {
fn default() -> Self {
Self::new()
}
}
impl X86BootSetup {
pub fn new() -> X86BootSetup {
X86BootSetup {
table: PageTable::default(),
}
}
const PAGE_PRESENT: u64 = 0x001;
const PAGE_RW: u64 = 0x002;
const PAGE_USER: u64 = 0x004;
const PAGE_ACCESSED: u64 = 0x020;
const PAGE_DIRTY: u64 = 0x040;
fn get_pg_prot(&mut self, l: usize, pfn: u64) -> u64 {
let prot = [
X86BootSetup::PAGE_PRESENT | X86BootSetup::PAGE_RW | X86BootSetup::PAGE_ACCESSED,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
X86BootSetup::PAGE_PRESENT
| X86BootSetup::PAGE_RW
| X86BootSetup::PAGE_ACCESSED
| X86BootSetup::PAGE_DIRTY
| X86BootSetup::PAGE_USER,
];
let prot = prot[l];
if l > 0 {
return prot;
}
for m in 0..self.table.mappings_count {
let map = &self.table.mappings[m];
let pfn_s = map.levels[(X86_PGTABLE_LEVELS - 1) as usize].pfn;
let pfn_e = map.area.pgtables as u64 + pfn_s;
if pfn >= pfn_s && pfn < pfn_e {
return prot & !X86BootSetup::PAGE_RW;
}
}
prot
}
fn count_page_tables(
&mut self,
setup: &mut BootSetup,
from: u64,
to: u64,
pfn: u64,
) -> Result<usize> {
debug!("counting pgtables from={} to={} pfn={}", from, to, pfn);
if self.table.mappings_count == X86_PAGE_TABLE_MAX_MAPPINGS {
return Err(Error::MemorySetupFailed);
}
let m = self.table.mappings_count;
let pfn_end = pfn + ((to - from) >> X86_PAGE_SHIFT);
if pfn_end >= setup.phys.p2m_size() {
return Err(Error::MemorySetupFailed);
}
for idx in 0..self.table.mappings_count {
if from < self.table.mappings[idx].area.to && to > self.table.mappings[idx].area.from {
return Err(Error::MemorySetupFailed);
}
}
let mut map = PageTableMapping::default();
map.area.from = from & X86_VIRT_MASK;
map.area.to = to & X86_VIRT_MASK;
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
map.levels[l].pfn = setup.pfn_alloc_end + map.area.pgtables as u64;
if l as u64 == X86_PGTABLE_LEVELS - 1 {
if self.table.mappings_count == 0 {
map.levels[l].from = 0;
map.levels[l].to = X86_VIRT_MASK;
map.levels[l].pgtables = 1;
map.area.pgtables += 1;
}
continue;
}
let bits = X86_PAGE_SHIFT + (l + 1) as u64 * X86_PGTABLE_LEVEL_SHIFT;
let mask = BootSetup::bits_to_mask(bits);
map.levels[l].from = map.area.from & !mask;
map.levels[l].to = map.area.to | mask;
for cmp in &mut self.table.mappings[0..self.table.mappings_count] {
if cmp.levels[l].from == cmp.levels[l].to {
continue;
}
if map.levels[l].from >= cmp.levels[l].from && map.levels[l].to <= cmp.levels[l].to
{
map.levels[l].from = 0;
map.levels[l].to = 0;
break;
}
if map.levels[l].from >= cmp.levels[l].from
&& map.levels[l].from <= cmp.levels[l].to
{
map.levels[l].from = cmp.levels[l].to + 1;
}
if map.levels[l].to >= cmp.levels[l].from && map.levels[l].to <= cmp.levels[l].to {
map.levels[l].to = cmp.levels[l].from - 1;
}
}
if map.levels[l].from < map.levels[l].to {
map.levels[l].pgtables =
(((map.levels[l].to - map.levels[l].from) >> bits) + 1) as usize;
}
debug!(
"count_pgtables {:#x}/{}: {:#x} -> {:#x}, {} tables",
mask, bits, map.levels[l].from, map.levels[l].to, map.levels[l].pgtables
);
map.area.pgtables += map.levels[l].pgtables;
}
self.table.mappings[m] = map;
Ok(m)
}
}
impl ArchBootSetup for X86BootSetup {
fn page_size(&mut self) -> u64 {
X86_PAGE_SIZE
}
fn page_shift(&mut self) -> u64 {
X86_PAGE_SHIFT
}
fn alloc_p2m_segment(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<DomainSegment> {
let mut p2m_alloc_size =
((setup.phys.p2m_size() * 8) + X86_PAGE_SIZE - 1) & !(X86_PAGE_SIZE - 1);
let from = image_info.virt_p2m_base;
let to = from + p2m_alloc_size - 1;
let m = self.count_page_tables(setup, from, to, setup.pfn_alloc_end)?;
let pgtables: usize;
{
let map = &mut self.table.mappings[m];
map.area.pfn = setup.pfn_alloc_end;
for lvl_idx in 0..4 {
map.levels[lvl_idx].pfn += p2m_alloc_size >> X86_PAGE_SHIFT;
}
pgtables = map.area.pgtables;
}
self.table.mappings_count += 1;
p2m_alloc_size += (pgtables << X86_PAGE_SHIFT) as u64;
let p2m_segment = setup.alloc_segment(self, 0, p2m_alloc_size)?;
Ok(p2m_segment)
}
fn alloc_page_tables(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<DomainSegment> {
let mut extra_pages = 1;
extra_pages += (512 * 1024) / X86_PAGE_SIZE;
let mut pages = extra_pages;
let mut try_virt_end: u64;
let mut m: usize;
loop {
try_virt_end = BootSetup::round_up(
setup.virt_alloc_end + pages * X86_PAGE_SIZE,
BootSetup::bits_to_mask(22),
);
m = self.count_page_tables(setup, image_info.virt_base, try_virt_end, 0)?;
pages = self.table.mappings[m].area.pgtables as u64 + extra_pages;
if setup.virt_alloc_end + pages * X86_PAGE_SIZE <= try_virt_end + 1 {
break;
}
}
self.table.mappings[m].area.pfn = 0;
self.table.mappings_count += 1;
setup.virt_pgtab_end = try_virt_end + 1;
let size = self.table.mappings[m].area.pgtables as u64 * X86_PAGE_SIZE;
let segment = setup.alloc_segment(self, 0, size)?;
debug!(
"alloc_page_tables table={:?} segment={:?}",
self.table, segment
);
Ok(segment)
}
fn setup_page_tables(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let p2m_guest = unsafe {
slice::from_raw_parts_mut(
state.p2m_segment.addr as *mut u64,
setup.phys.p2m_size() as usize,
)
};
copy(p2m_guest, &setup.phys.p2m);
for l in (0usize..X86_PGTABLE_LEVELS as usize).rev() {
for m1 in 0usize..self.table.mappings_count {
let map1 = &self.table.mappings[m1];
let from = map1.levels[l].from;
let to = map1.levels[l].to;
let pg_ptr = setup.phys.pfn_to_ptr(map1.levels[l].pfn, 0)? as *mut u64;
for m2 in 0usize..self.table.mappings_count {
let map2 = &self.table.mappings[m2];
let lvl = if l > 0 {
&map2.levels[l - 1]
} else {
&map2.area
};
if l > 0 && lvl.pgtables == 0 {
continue;
}
if lvl.from >= to || lvl.to <= from {
continue;
}
let p_s = (max(from, lvl.from) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let p_e = (min(to, lvl.to) - from)
>> (X86_PAGE_SHIFT + l as u64 * X86_PGTABLE_LEVEL_SHIFT);
let rhs = X86_PAGE_SHIFT as usize + l * X86_PGTABLE_LEVEL_SHIFT as usize;
let mut pfn = ((max(from, lvl.from) - lvl.from) >> rhs) + lvl.pfn;
debug!(
"setup_page_tables lvl={} map_1={} map_2={} pfn={:#x} p_s={:#x} p_e={:#x}",
l, m1, m2, pfn, p_s, p_e
);
let pg = unsafe { slice::from_raw_parts_mut(pg_ptr, (p_e + 1) as usize) };
for p in p_s..p_e + 1 {
let prot = self.get_pg_prot(l, pfn);
let pfn_paddr = setup.phys.p2m[pfn as usize] << X86_PAGE_SHIFT;
let value = pfn_paddr | prot;
pg[p as usize] = value;
pfn += 1;
}
}
}
}
Ok(())
}
fn setup_start_info(
&mut self,
setup: &mut BootSetup,
state: &BootState,
cmdline: &str,
) -> Result<()> {
let ptr = setup.phys.pfn_to_ptr(state.start_info_segment.pfn, 1)?;
let byte_slice =
unsafe { slice::from_raw_parts_mut(ptr as *mut u8, X86_PAGE_SIZE as usize) };
byte_slice.fill(0);
let info = ptr as *mut StartInfo;
unsafe {
for (i, c) in X86_GUEST_MAGIC.chars().enumerate() {
(*info).magic[i] = c as c_char;
}
(*info).magic[X86_GUEST_MAGIC.len()] = 0 as c_char;
(*info).nr_pages = setup.total_pages;
(*info).shared_info = state.shared_info_frame << X86_PAGE_SHIFT;
(*info).pt_base = state.page_table_segment.vstart;
(*info).nr_pt_frames = self.table.mappings[0].area.pgtables as u64;
(*info).mfn_list = state.p2m_segment.vstart;
(*info).first_p2m_pfn = state.p2m_segment.pfn;
(*info).nr_p2m_frames = state.p2m_segment.pages;
(*info).flags = 0;
(*info).store_evtchn = state.store_evtchn;
(*info).store_mfn = setup.phys.p2m[state.xenstore_segment.pfn as usize];
(*info).console.mfn = setup.phys.p2m[state.console_segment.pfn as usize];
(*info).console.evtchn = state.console_evtchn;
(*info).mod_start = state.initrd_segment.vstart;
(*info).mod_len = state.initrd_segment.size;
for (i, c) in cmdline.chars().enumerate() {
(*info).cmdline[i] = c as c_char;
}
(*info).cmdline[MAX_GUEST_CMDLINE - 1] = 0;
trace!("setup_start_info start_info={:?}", *info);
}
Ok(())
}
fn setup_shared_info(&mut self, setup: &mut BootSetup, shared_info_frame: u64) -> Result<()> {
let info = setup
.phys
.map_foreign_pages(shared_info_frame, X86_PAGE_SIZE)?
as *mut SharedInfo;
unsafe {
let size = size_of::<SharedInfo>();
let info_as_buff = slice::from_raw_parts_mut(info as *mut u8, size);
info_as_buff.fill(0);
for i in 0..32 {
(*info).vcpu_info[i].evtchn_upcall_mask = 1;
}
trace!("setup_shared_info shared_info={:?}", *info);
}
Ok(())
}
fn setup_hypercall_page(
&mut self,
setup: &mut BootSetup,
image_info: &BootImageInfo,
) -> Result<()> {
if image_info.virt_hypercall == XEN_UNSET_ADDR {
return Ok(());
}
let pfn = (image_info.virt_hypercall - image_info.virt_base) >> X86_PAGE_SHIFT;
let mfn = setup.phys.p2m[pfn as usize];
setup.call.hypercall_init(setup.domid, mfn)?;
Ok(())
}
fn meminit(&mut self, setup: &mut BootSetup, total_pages: u64) -> Result<()> {
setup.call.claim_pages(setup.domid, total_pages)?;
let mut vmemranges: Vec<VmemRange> = Vec::new();
let stub = VmemRange {
start: 0,
end: total_pages << XEN_PAGE_SHIFT,
_flags: 0,
_nid: 0,
};
vmemranges.push(stub);
let mut p2m_size: u64 = 0;
let mut total: u64 = 0;
for range in &vmemranges {
total += (range.end - range.start) >> XEN_PAGE_SHIFT;
p2m_size = p2m_size.max(range.end >> XEN_PAGE_SHIFT);
}
if total != total_pages {
return Err(Error::MemorySetupFailed);
}
setup.total_pages = total;
let mut p2m = vec![u64::MAX; p2m_size as usize];
for range in &vmemranges {
let mut extents_init = vec![0u64; SUPERPAGE_BATCH_SIZE as usize];
let pages = (range.end - range.start) >> XEN_PAGE_SHIFT;
let pfn_base = range.start >> XEN_PAGE_SHIFT;
for pfn in pfn_base..pfn_base + pages {
p2m[pfn as usize] = pfn;
}
let mut super_pages = pages >> SUPERPAGE_2MB_SHIFT;
let mut pfn_base_idx: u64 = pfn_base;
while super_pages > 0 {
let count = super_pages.min(SUPERPAGE_BATCH_SIZE);
super_pages -= count;
let mut j: usize = 0;
let mut pfn: u64 = pfn_base_idx;
loop {
if pfn >= pfn_base_idx + (count << SUPERPAGE_2MB_SHIFT) {
break;
}
extents_init[j] = p2m[pfn as usize];
pfn += SUPERPAGE_2MB_NR_PFNS;
j += 1;
}
let extents_init_slice = extents_init.as_slice();
let extents = setup.call.populate_physmap(
setup.domid,
count,
SUPERPAGE_2MB_SHIFT as u32,
0,
&extents_init_slice[0usize..count as usize],
)?;
pfn = pfn_base_idx;
for mfn in extents {
for k in 0..SUPERPAGE_2MB_NR_PFNS {
p2m[pfn as usize] = mfn + k;
pfn += 1;
}
}
pfn_base_idx = pfn;
}
let mut j = pfn_base_idx - pfn_base;
loop {
if j >= pages {
break;
}
let allocsz = (1024 * 1024).min(pages - j);
let p2m_idx = (pfn_base + j) as usize;
let p2m_end_idx = p2m_idx + allocsz as usize;
let input_extent_starts = &p2m[p2m_idx..p2m_end_idx];
let result =
setup
.call
.populate_physmap(setup.domid, allocsz, 0, 0, input_extent_starts)?;
if result.len() != allocsz as usize {
return Err(Error::PopulatePhysmapFailed(
allocsz as usize,
result.len(),
input_extent_starts.len(),
));
}
for (i, item) in result.iter().enumerate() {
let p = (pfn_base + j + i as u64) as usize;
let m = *item;
p2m[p] = m;
}
j += allocsz;
}
}
setup.phys.load_p2m(p2m);
setup.call.claim_pages(setup.domid, 0)?;
Ok(())
}
fn bootlate(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let pg_pfn = state.page_table_segment.pfn;
let pg_mfn = setup.phys.p2m[pg_pfn as usize];
setup.phys.unmap(pg_pfn)?;
setup.phys.unmap(state.p2m_segment.pfn)?;
setup
.call
.mmuext(setup.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)?;
Ok(())
}
fn vcpu(&mut self, setup: &mut BootSetup, state: &mut BootState) -> Result<()> {
let pg_pfn = state.page_table_segment.pfn;
let pg_mfn = setup.phys.p2m[pg_pfn as usize];
let mut vcpu = VcpuGuestContext::default();
vcpu.user_regs.rip = state.image_info.virt_entry;
vcpu.user_regs.rsp =
state.image_info.virt_base + (state.boot_stack_segment.pfn + 1) * self.page_size();
vcpu.user_regs.rsi =
state.image_info.virt_base + (state.start_info_segment.pfn) * self.page_size();
vcpu.user_regs.rflags = 1 << 9;
vcpu.debugreg[6] = 0xffff0ff0;
vcpu.debugreg[7] = 0x00000400;
vcpu.flags = VGCF_IN_KERNEL | VGCF_ONLINE;
let cr3_pfn = pg_mfn;
debug!(
"cr3: pfn {:#x} mfn {:#x}",
state.page_table_segment.pfn, cr3_pfn
);
vcpu.ctrlreg[3] = cr3_pfn << 12;
vcpu.user_regs.ds = 0x0;
vcpu.user_regs.es = 0x0;
vcpu.user_regs.fs = 0x0;
vcpu.user_regs.gs = 0x0;
vcpu.user_regs.ss = 0xe02b;
vcpu.user_regs.cs = 0xe033;
vcpu.kernel_ss = vcpu.user_regs.ss as u64;
vcpu.kernel_sp = vcpu.user_regs.rsp;
trace!("vcpu context: {:?}", vcpu);
setup.call.set_vcpu_context(setup.domid, 0, &vcpu)?;
Ok(())
}
}

View File

@ -0,0 +1,18 @@
[package]
name = "xenevtchn"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
log = { workspace = true }
thiserror = { workspace = true }
nix = { workspace = true, features = ["ioctl"] }
tokio = { workspace = true }
[lib]
name = "xenevtchn"
[[example]]
name = "xenevtchn-simple"
path = "examples/simple.rs"

View File

@ -0,0 +1,12 @@
use xenevtchn::error::Result;
use xenevtchn::EventChannel;
#[tokio::main]
async fn main() -> Result<()> {
let channel = EventChannel::open().await?;
println!("channel opened");
let port = channel.bind_unbound_port(0).await?;
println!("port: {}", port);
channel.unbind(port).await?;
Ok(())
}

View File

@ -0,0 +1,13 @@
use std::io;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("kernel error")]
Kernel(#[from] nix::errno::Errno),
#[error("io issue encountered")]
Io(#[from] io::Error),
#[error("failed to send event channel wake")]
WakeSend(tokio::sync::broadcast::error::SendError<u32>),
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@ -0,0 +1,171 @@
pub mod error;
pub mod sys;
use crate::error::{Error, Result};
use crate::sys::{BindInterdomain, BindUnboundPort, BindVirq, Notify, UnbindPort};
use log::error;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::os::fd::AsRawFd;
use std::sync::Arc;
use tokio::fs::{File, OpenOptions};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::select;
use tokio::sync::broadcast::{
channel as broadcast_channel, Receiver as BroadcastReceiver, Sender as BroadastSender,
};
use tokio::sync::mpsc::{channel, Receiver, Sender};
use tokio::sync::Mutex;
use tokio::task::JoinHandle;
const UNMASK_CHANNEL_QUEUE_LEN: usize = 30;
const BROADCAST_CHANNEL_QUEUE_LEN: usize = 30;
type WakeMap = Arc<Mutex<HashMap<u32, BroadastSender<u32>>>>;
#[derive(Clone)]
pub struct EventChannel {
handle: Arc<Mutex<File>>,
wakes: WakeMap,
unmask_sender: Sender<u32>,
task: Arc<JoinHandle<()>>,
}
impl EventChannel {
pub async fn open() -> Result<EventChannel> {
let file = OpenOptions::new()
.read(true)
.write(true)
.open("/dev/xen/evtchn")
.await?;
let wakes = Arc::new(Mutex::new(HashMap::new()));
let (unmask_sender, unmask_receiver) = channel(UNMASK_CHANNEL_QUEUE_LEN);
let task = {
let file = file.try_clone().await?;
let wakes = wakes.clone();
tokio::task::spawn(async move {
if let Err(error) = EventChannel::process(file, wakes, unmask_receiver).await {
error!("event channel processor failed: {}", error);
}
})
};
Ok(EventChannel {
handle: Arc::new(Mutex::new(file)),
wakes,
unmask_sender,
task: Arc::new(task),
})
}
pub async fn bind_virq(&self, virq: u32) -> Result<u32> {
let handle = self.handle.lock().await;
unsafe {
let mut request = BindVirq { virq };
Ok(sys::bind_virq(handle.as_raw_fd(), &mut request)? as u32)
}
}
pub async fn bind_interdomain(&self, domid: u32, port: u32) -> Result<u32> {
let handle = self.handle.lock().await;
unsafe {
let mut request = BindInterdomain {
remote_domain: domid,
remote_port: port,
};
Ok(sys::bind_interdomain(handle.as_raw_fd(), &mut request)? as u32)
}
}
pub async fn bind_unbound_port(&self, domid: u32) -> Result<u32> {
let handle = self.handle.lock().await;
unsafe {
let mut request = BindUnboundPort {
remote_domain: domid,
};
Ok(sys::bind_unbound_port(handle.as_raw_fd(), &mut request)? as u32)
}
}
pub async fn unbind(&self, port: u32) -> Result<u32> {
let handle = self.handle.lock().await;
unsafe {
let mut request = UnbindPort { port };
Ok(sys::unbind(handle.as_raw_fd(), &mut request)? as u32)
}
}
pub async fn notify(&self, port: u32) -> Result<u32> {
let handle = self.handle.lock().await;
unsafe {
let mut request = Notify { port };
Ok(sys::notify(handle.as_raw_fd(), &mut request)? as u32)
}
}
pub async fn reset(&self) -> Result<u32> {
let handle = self.handle.lock().await;
unsafe { Ok(sys::reset(handle.as_raw_fd())? as u32) }
}
pub async fn subscribe(&self, port: u32) -> Result<(BroadcastReceiver<u32>, Sender<u32>)> {
let mut wakes = self.wakes.lock().await;
let receiver = match wakes.entry(port) {
Entry::Occupied(entry) => entry.get().subscribe(),
Entry::Vacant(entry) => {
let (sender, receiver) = broadcast_channel::<u32>(BROADCAST_CHANNEL_QUEUE_LEN);
entry.insert(sender);
receiver
}
};
Ok((receiver, self.unmask_sender.clone()))
}
async fn process(
mut file: File,
wakers: WakeMap,
mut unmask_receiver: Receiver<u32>,
) -> Result<()> {
loop {
select! {
result = file.read_u32_le() => {
match result {
Ok(port) => {
if let Some(sender) = wakers.lock().await.get(&port) {
if let Err(error) = sender.send(port) {
return Err(Error::WakeSend(error));
}
}
}
Err(error) => return Err(Error::Io(error))
}
}
result = unmask_receiver.recv() => {
match result {
Some(port) => {
file.write_u32_le(port).await?;
}
None => {
break;
}
}
}
};
}
Ok(())
}
}
impl Drop for EventChannel {
fn drop(&mut self) {
if Arc::strong_count(&self.task) <= 1 {
self.task.abort();
}
}
}

View File

@ -0,0 +1,35 @@
use nix::{ioctl_none, ioctl_readwrite_bad};
use std::ffi::c_uint;
#[repr(C)]
pub struct BindVirq {
pub virq: c_uint,
}
#[repr(C)]
pub struct BindInterdomain {
pub remote_domain: c_uint,
pub remote_port: c_uint,
}
#[repr(C)]
pub struct BindUnboundPort {
pub remote_domain: c_uint,
}
#[repr(C)]
pub struct UnbindPort {
pub port: c_uint,
}
#[repr(C)]
pub struct Notify {
pub port: c_uint,
}
ioctl_readwrite_bad!(bind_virq, 0x44500, BindVirq);
ioctl_readwrite_bad!(bind_interdomain, 0x84501, BindInterdomain);
ioctl_readwrite_bad!(bind_unbound_port, 0x44503, BindUnboundPort);
ioctl_readwrite_bad!(unbind, 0x44502, UnbindPort);
ioctl_readwrite_bad!(notify, 0x44504, Notify);
ioctl_none!(reset, 0x4505, 5);

View File

@ -0,0 +1,27 @@
[package]
name = "xenstore"
version.workspace = true
edition = "2021"
resolver = "2"
[dependencies]
async-trait = { workspace = true }
byteorder = { workspace = true }
libc = { workspace = true }
log = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
[lib]
name = "xenstore"
[dev-dependencies]
env_logger = { workspace = true }
[[example]]
name = "xenstore-ls"
path = "examples/list.rs"
[[example]]
name = "xenstore-watch"
path = "examples/watch.rs"

View File

@ -0,0 +1,35 @@
use std::env::args;
use xenstore::client::{XsdClient, XsdInterface};
use xenstore::error::Result;
async fn list_recursive(client: &XsdClient, path: &str) -> Result<()> {
let mut pending = vec![path.to_string()];
while let Some(ref path) = pending.pop() {
let children = client.list(path).await?;
for child in children {
let full = format!("{}/{}", if path == "/" { "" } else { path }, child);
let value = client
.read_string(full.as_str())
.await?
.expect("expected value");
println!("{} = {:?}", full, value,);
pending.push(full);
}
}
Ok(())
}
#[tokio::main]
async fn main() -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn")).init();
let client = XsdClient::open().await?;
loop {
list_recursive(&client, "/").await?;
if args().nth(1).unwrap_or("none".to_string()) != "stress" {
break;
}
}
Ok(())
}

View File

@ -0,0 +1,23 @@
use std::env::args;
use xenstore::client::XsdClient;
use xenstore::error::Result;
#[tokio::main]
async fn main() -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn")).init();
let path = args().nth(1).unwrap_or("/local/domain".to_string());
let client = XsdClient::open().await?;
let mut handle = client.watch(&path).await?;
let mut count = 0;
loop {
let Some(event) = handle.receiver.recv().await else {
break;
};
println!("{}", event);
count += 1;
if count >= 3 {
break;
}
}
Ok(())
}

View File

@ -0,0 +1,358 @@
use std::{collections::HashMap, ffi::CString, io::ErrorKind, sync::Arc, time::Duration};
use libc::O_NONBLOCK;
use log::warn;
use tokio::{
fs::{metadata, File},
io::{unix::AsyncFd, AsyncReadExt, AsyncWriteExt},
select,
sync::{
mpsc::{channel, Receiver, Sender},
oneshot::{self, channel as oneshot_channel},
Mutex,
},
task::JoinHandle,
time::timeout,
};
use crate::{
error::{Error, Result},
sys::{XsdMessageHeader, XSD_ERROR, XSD_UNWATCH, XSD_WATCH_EVENT},
};
const XEN_BUS_PATHS: &[&str] = &["/dev/xen/xenbus"];
const XEN_BUS_MAX_PAYLOAD_SIZE: usize = 4096;
const XEN_BUS_MAX_PACKET_SIZE: usize = XsdMessageHeader::SIZE + XEN_BUS_MAX_PAYLOAD_SIZE;
async fn find_bus_path() -> Option<&'static str> {
for path in XEN_BUS_PATHS {
match metadata(path).await {
Ok(_) => return Some(path),
Err(_) => continue,
}
}
None
}
struct WatchState {
sender: Sender<String>,
}
struct ReplyState {
sender: oneshot::Sender<XsdMessage>,
}
type ReplyMap = Arc<Mutex<HashMap<u32, ReplyState>>>;
type WatchMap = Arc<Mutex<HashMap<u32, WatchState>>>;
#[derive(Clone)]
pub struct XsdSocket {
tx_sender: Sender<XsdMessage>,
replies: ReplyMap,
watches: WatchMap,
next_request_id: Arc<Mutex<u32>>,
next_watch_id: Arc<Mutex<u32>>,
processor_task: Arc<JoinHandle<()>>,
rx_task: Arc<JoinHandle<()>>,
unwatch_sender: Sender<u32>,
}
impl XsdSocket {
pub async fn open() -> Result<XsdSocket> {
let path = match find_bus_path().await {
Some(path) => path,
None => return Err(Error::BusNotFound),
};
let file = File::options()
.read(true)
.write(true)
.custom_flags(O_NONBLOCK)
.open(path)
.await?;
XsdSocket::from_handle(file).await
}
pub async fn from_handle(handle: File) -> Result<XsdSocket> {
let replies: ReplyMap = Arc::new(Mutex::new(HashMap::new()));
let watches: WatchMap = Arc::new(Mutex::new(HashMap::new()));
let next_request_id = Arc::new(Mutex::new(0u32));
let (rx_sender, rx_receiver) = channel::<XsdMessage>(10);
let (tx_sender, tx_receiver) = channel::<XsdMessage>(10);
let (unwatch_sender, unwatch_receiver) = channel::<u32>(1000);
let read: File = handle.try_clone().await?;
let mut processor = XsdSocketProcessor {
handle,
replies: replies.clone(),
watches: watches.clone(),
next_request_id: next_request_id.clone(),
tx_receiver,
rx_receiver,
unwatch_receiver,
};
let processor_task = tokio::task::spawn(async move {
if let Err(error) = processor.process().await {
warn!("failed to process xen store messages: {}", error);
}
});
let rx_task = tokio::task::spawn(async move {
if let Err(error) = XsdSocketProcessor::process_rx(read, rx_sender).await {
warn!("failed to process xen store responses: {}", error);
}
});
Ok(XsdSocket {
tx_sender,
replies,
watches,
next_request_id,
next_watch_id: Arc::new(Mutex::new(0u32)),
processor_task: Arc::new(processor_task),
rx_task: Arc::new(rx_task),
unwatch_sender,
})
}
pub async fn send_buf(&self, tx: u32, typ: u32, payload: &[u8]) -> Result<XsdMessage> {
let req = {
let mut guard = self.next_request_id.lock().await;
let req = *guard;
*guard = req + 1;
req
};
let (sender, receiver) = oneshot_channel::<XsdMessage>();
self.replies.lock().await.insert(req, ReplyState { sender });
let header = XsdMessageHeader {
typ,
req,
tx,
len: payload.len() as u32,
};
let message = XsdMessage {
header,
payload: payload.to_vec(),
};
if let Err(error) = self.tx_sender.try_send(message) {
return Err(error.into());
}
let reply = receiver.await?;
if reply.header.typ == XSD_ERROR {
let error = CString::from_vec_with_nul(reply.payload)?;
return Err(Error::ResponseError(error.into_string()?));
}
Ok(reply)
}
pub async fn send(&self, tx: u32, typ: u32, payload: &[&str]) -> Result<XsdMessage> {
let mut buf: Vec<u8> = Vec::new();
for item in payload {
buf.extend_from_slice(item.as_bytes());
buf.push(0);
}
self.send_buf(tx, typ, &buf).await
}
pub async fn add_watch(&self) -> Result<(u32, Receiver<String>, Sender<u32>)> {
let id = {
let mut guard = self.next_watch_id.lock().await;
let req = *guard;
*guard = req + 1;
req
};
let (sender, receiver) = channel(10);
self.watches.lock().await.insert(id, WatchState { sender });
Ok((id, receiver, self.unwatch_sender.clone()))
}
}
struct XsdSocketProcessor {
handle: File,
replies: ReplyMap,
watches: WatchMap,
next_request_id: Arc<Mutex<u32>>,
tx_receiver: Receiver<XsdMessage>,
rx_receiver: Receiver<XsdMessage>,
unwatch_receiver: Receiver<u32>,
}
impl XsdSocketProcessor {
async fn process_rx(read: File, rx_sender: Sender<XsdMessage>) -> Result<()> {
let mut buffer: Vec<u8> = vec![0u8; XEN_BUS_MAX_PACKET_SIZE];
let mut fd = AsyncFd::new(read)?;
loop {
select! {
x = fd.readable_mut() => match x {
Ok(mut guard) => {
let future = XsdSocketProcessor::read_message(&mut buffer, guard.get_inner_mut());
if let Ok(message) = timeout(Duration::from_secs(1), future).await {
rx_sender.send(message?).await?;
}
},
Err(error) => {
return Err(error.into());
}
},
_ = rx_sender.closed() => {
break;
}
};
}
Ok(())
}
async fn read_message(buffer: &mut [u8], read: &mut File) -> Result<XsdMessage> {
let size = loop {
match read.read(buffer).await {
Ok(size) => break size,
Err(error) => {
if error.kind() == ErrorKind::WouldBlock {
tokio::task::yield_now().await;
continue;
}
return Err(error.into());
}
};
};
if size < XsdMessageHeader::SIZE {
return Err(Error::InvalidBusData);
}
let header = XsdMessageHeader::decode(&buffer[0..XsdMessageHeader::SIZE])?;
if size < XsdMessageHeader::SIZE + header.len as usize {
return Err(Error::InvalidBusData);
}
let payload =
&mut buffer[XsdMessageHeader::SIZE..XsdMessageHeader::SIZE + header.len as usize];
Ok(XsdMessage {
header,
payload: payload.to_vec(),
})
}
async fn process(&mut self) -> Result<()> {
loop {
select! {
x = self.tx_receiver.recv() => match x {
Some(message) => {
let mut composed: Vec<u8> = Vec::new();
message.header.encode_to(&mut composed)?;
composed.extend_from_slice(&message.payload);
self.handle.write_all(&composed).await?;
}
None => {
break;
}
},
x = self.rx_receiver.recv() => match x {
Some(message) => {
if message.header.typ == XSD_WATCH_EVENT && message.header.req == 0 && message.header.tx == 0 {
let strings = message.parse_string_vec()?;
let Some(path) = strings.first() else {
return Ok(());
};
let Some(token) = strings.get(1) else {
return Ok(());
};
let Ok(id) = token.parse::<u32>() else {
return Ok(());
};
if let Some(state) = self.watches.lock().await.get(&id) {
let _ = state.sender.try_send(path.clone());
}
} else if let Some(state) = self.replies.lock().await.remove(&message.header.req) {
let _ = state.sender.send(message);
}
}
None => {
break;
}
},
x = self.unwatch_receiver.recv() => match x {
Some(id) => {
let req = {
let mut guard = self.next_request_id.lock().await;
let req = *guard;
*guard = req + 1;
req
};
let mut payload = id.to_string().as_bytes().to_vec();
payload.push(0);
let header = XsdMessageHeader {
typ: XSD_UNWATCH,
req,
tx: 0,
len: payload.len() as u32,
};
let mut data = header.encode()?;
data.extend_from_slice(&payload);
self.handle.write_all(&data).await?;
},
None => {
break;
}
}
};
}
Ok(())
}
}
#[derive(Debug)]
pub struct XsdMessage {
pub header: XsdMessageHeader,
pub payload: Vec<u8>,
}
impl XsdMessage {
pub fn parse_string(&self) -> Result<String> {
Ok(CString::from_vec_with_nul(self.payload.clone())?.into_string()?)
}
pub fn parse_string_vec(&self) -> Result<Vec<String>> {
let mut strings: Vec<String> = Vec::new();
let mut buffer: Vec<u8> = Vec::new();
for b in &self.payload {
if *b == 0 {
let string = String::from_utf8(buffer.clone())?;
strings.push(string);
buffer.clear();
continue;
}
buffer.push(*b);
}
Ok(strings)
}
pub fn parse_bool(&self) -> Result<bool> {
Ok(true)
}
}
impl Drop for XsdSocket {
fn drop(&mut self) {
if Arc::strong_count(&self.rx_task) <= 1 {
self.rx_task.abort();
}
if Arc::strong_count(&self.processor_task) <= 1 {
self.processor_task.abort();
}
}
}

View File

@ -0,0 +1,302 @@
use crate::bus::XsdSocket;
use crate::error::{Error, Result};
use crate::sys::{
XSD_DIRECTORY, XSD_GET_DOMAIN_PATH, XSD_INTRODUCE, XSD_MKDIR, XSD_READ, XSD_RM, XSD_SET_PERMS,
XSD_TRANSACTION_END, XSD_TRANSACTION_START, XSD_WATCH, XSD_WRITE,
};
use log::trace;
use std::ffi::CString;
use tokio::sync::mpsc::Receiver;
use tokio::sync::mpsc::Sender;
pub const XS_PERM_NONE: u32 = 0x00;
pub const XS_PERM_READ: u32 = 0x01;
pub const XS_PERM_WRITE: u32 = 0x02;
pub const XS_PERM_READ_WRITE: u32 = XS_PERM_READ | XS_PERM_WRITE;
#[derive(Debug, Copy, Clone)]
pub struct XsPermission {
pub id: u32,
pub perms: u32,
}
#[derive(Clone)]
pub struct XsdClient {
pub socket: XsdSocket,
}
impl XsPermission {
pub fn encode(&self) -> Result<String> {
let c = match self.perms {
XS_PERM_READ_WRITE => 'b',
XS_PERM_WRITE => 'w',
XS_PERM_READ => 'r',
XS_PERM_NONE => 'n',
_ => return Err(Error::InvalidPermissions),
};
Ok(format!("{}{}", c, self.id))
}
}
pub struct XsdWatchHandle {
id: u32,
unwatch_sender: Sender<u32>,
pub receiver: Receiver<String>,
}
impl Drop for XsdWatchHandle {
fn drop(&mut self) {
let _ = self.unwatch_sender.try_send(self.id);
}
}
#[allow(async_fn_in_trait)]
pub trait XsdInterface {
async fn list(&self, path: &str) -> Result<Vec<String>>;
async fn read(&self, path: &str) -> Result<Option<Vec<u8>>>;
async fn read_string(&self, path: &str) -> Result<Option<String>>;
async fn write(&self, path: &str, data: Vec<u8>) -> Result<bool>;
async fn write_string(&self, path: &str, data: &str) -> Result<bool>;
async fn mkdir(&self, path: &str) -> Result<bool>;
async fn rm(&self, path: &str) -> Result<bool>;
async fn set_perms(&self, path: &str, perms: &[XsPermission]) -> Result<bool>;
async fn mknod(&self, path: &str, perms: &[XsPermission]) -> Result<bool> {
let result1 = self.write_string(path, "").await?;
let result2 = self.set_perms(path, perms).await?;
Ok(result1 && result2)
}
}
impl XsdClient {
pub async fn open() -> Result<XsdClient> {
let socket = XsdSocket::open().await?;
Ok(XsdClient { socket })
}
async fn list(&self, tx: u32, path: &str) -> Result<Vec<String>> {
trace!("list tx={tx} path={path}");
let response = match self.socket.send(tx, XSD_DIRECTORY, &[path]).await {
Ok(response) => response,
Err(error) => {
if error.is_noent_response() {
return Ok(vec![]);
}
return Err(error);
}
};
response.parse_string_vec()
}
async fn read(&self, tx: u32, path: &str) -> Result<Option<Vec<u8>>> {
trace!("read tx={tx} path={path}");
match self.socket.send(tx, XSD_READ, &[path]).await {
Ok(response) => Ok(Some(response.payload)),
Err(error) => {
if error.is_noent_response() {
Ok(None)
} else {
Err(error)
}
}
}
}
async fn write(&self, tx: u32, path: &str, data: Vec<u8>) -> Result<bool> {
trace!("write tx={tx} path={path} data={:?}", data);
let mut buffer = Vec::new();
let path = CString::new(path)?;
buffer.extend_from_slice(path.as_bytes_with_nul());
buffer.extend_from_slice(data.as_slice());
let response = self
.socket
.send_buf(tx, XSD_WRITE, buffer.as_slice())
.await?;
response.parse_bool()
}
async fn mkdir(&self, tx: u32, path: &str) -> Result<bool> {
trace!("mkdir tx={tx} path={path}");
self.socket.send(tx, XSD_MKDIR, &[path]).await?.parse_bool()
}
async fn rm(&self, tx: u32, path: &str) -> Result<bool> {
trace!("rm tx={tx} path={path}");
let result = self.socket.send(tx, XSD_RM, &[path]).await;
if let Err(error) = result {
if error.is_noent_response() {
return Ok(true);
}
return Err(error);
}
result.unwrap().parse_bool()
}
async fn set_perms(&self, tx: u32, path: &str, perms: &[XsPermission]) -> Result<bool> {
trace!("set_perms tx={tx} path={path} perms={:?}", perms);
let mut items: Vec<String> = Vec::new();
items.push(path.to_string());
for perm in perms {
items.push(perm.encode()?);
}
let items_str: Vec<&str> = items.iter().map(|x| x.as_str()).collect();
let response = self.socket.send(tx, XSD_SET_PERMS, &items_str).await?;
response.parse_bool()
}
pub async fn transaction(&self) -> Result<XsdTransaction> {
trace!("transaction start");
let response = self.socket.send(0, XSD_TRANSACTION_START, &[""]).await?;
let str = response.parse_string()?;
let tx = str.parse::<u32>()?;
Ok(XsdTransaction {
client: self.clone(),
tx,
})
}
pub async fn get_domain_path(&mut self, domid: u32) -> Result<String> {
let response = self
.socket
.send(0, XSD_GET_DOMAIN_PATH, &[&domid.to_string()])
.await?;
response.parse_string()
}
pub async fn introduce_domain(&mut self, domid: u32, mfn: u64, evtchn: u32) -> Result<bool> {
trace!("introduce domain domid={domid} mfn={mfn} evtchn={evtchn}");
let response = self
.socket
.send(
0,
XSD_INTRODUCE,
&[
domid.to_string().as_str(),
mfn.to_string().as_str(),
evtchn.to_string().as_str(),
],
)
.await?;
response.parse_bool()
}
pub async fn watch(&self, path: &str) -> Result<XsdWatchHandle> {
let (id, receiver, unwatch_sender) = self.socket.add_watch().await?;
let id_string = id.to_string();
let _ = self.socket.send(0, XSD_WATCH, &[path, &id_string]).await?;
Ok(XsdWatchHandle {
id,
receiver,
unwatch_sender,
})
}
}
#[derive(Clone)]
pub struct XsdTransaction {
client: XsdClient,
tx: u32,
}
impl XsdInterface for XsdClient {
async fn list(&self, path: &str) -> Result<Vec<String>> {
self.list(0, path).await
}
async fn read(&self, path: &str) -> Result<Option<Vec<u8>>> {
self.read(0, path).await
}
async fn read_string(&self, path: &str) -> Result<Option<String>> {
match self.read(0, path).await {
Ok(value) => match value {
Some(value) => Ok(Some(String::from_utf8(value)?)),
None => Ok(None),
},
Err(error) => Err(error),
}
}
async fn write(&self, path: &str, data: Vec<u8>) -> Result<bool> {
self.write(0, path, data).await
}
async fn write_string(&self, path: &str, data: &str) -> Result<bool> {
self.write(0, path, data.as_bytes().to_vec()).await
}
async fn mkdir(&self, path: &str) -> Result<bool> {
self.mkdir(0, path).await
}
async fn rm(&self, path: &str) -> Result<bool> {
self.rm(0, path).await
}
async fn set_perms(&self, path: &str, perms: &[XsPermission]) -> Result<bool> {
self.set_perms(0, path, perms).await
}
}
impl XsdInterface for XsdTransaction {
async fn list(&self, path: &str) -> Result<Vec<String>> {
self.client.list(self.tx, path).await
}
async fn read(&self, path: &str) -> Result<Option<Vec<u8>>> {
self.client.read(self.tx, path).await
}
async fn read_string(&self, path: &str) -> Result<Option<String>> {
match self.client.read(self.tx, path).await {
Ok(value) => match value {
Some(value) => Ok(Some(String::from_utf8(value)?)),
None => Ok(None),
},
Err(error) => Err(error),
}
}
async fn write(&self, path: &str, data: Vec<u8>) -> Result<bool> {
self.client.write(self.tx, path, data).await
}
async fn write_string(&self, path: &str, data: &str) -> Result<bool> {
self.client
.write(self.tx, path, data.as_bytes().to_vec())
.await
}
async fn mkdir(&self, path: &str) -> Result<bool> {
self.client.mkdir(self.tx, path).await
}
async fn rm(&self, path: &str) -> Result<bool> {
self.client.rm(self.tx, path).await
}
async fn set_perms(&self, path: &str, perms: &[XsPermission]) -> Result<bool> {
self.client.set_perms(self.tx, path, perms).await
}
}
impl XsdTransaction {
pub async fn end(&self, abort: bool) -> Result<bool> {
let abort_str = if abort { "F" } else { "T" };
trace!("transaction end abort={}", abort);
self.client
.socket
.send(self.tx, XSD_TRANSACTION_END, &[abort_str])
.await?
.parse_bool()
}
pub async fn commit(&self) -> Result<bool> {
self.end(false).await
}
pub async fn abort(&self) -> Result<bool> {
self.end(true).await
}
}

View File

@ -0,0 +1,53 @@
use std::ffi::{FromVecWithNulError, IntoStringError, NulError};
use std::io;
use std::num::ParseIntError;
use std::str::Utf8Error;
use std::string::FromUtf8Error;
use tokio::sync::mpsc::error::{SendError, TrySendError};
use tokio::sync::oneshot::error::RecvError;
use crate::bus::XsdMessage;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("io issue encountered: {0}")]
Io(#[from] io::Error),
#[error("invalid data received on bus")]
InvalidBusData,
#[error("utf8 string decode failed: {0}")]
Utf8DecodeString(#[from] FromUtf8Error),
#[error("utf8 str decode failed: {0}")]
Utf8DecodeStr(#[from] Utf8Error),
#[error("unable to decode cstring as utf8: {0}")]
Utf8DecodeCstring(#[from] IntoStringError),
#[error("nul byte found in string: {0}")]
NulByteFoundString(#[from] NulError),
#[error("unable to find nul byte in vec: {0}")]
VecNulByteNotFound(#[from] FromVecWithNulError),
#[error("unable to parse integer: {0}")]
ParseInt(#[from] ParseIntError),
#[error("bus was not found on any available path")]
BusNotFound,
#[error("store responded with error: `{0}`")]
ResponseError(String),
#[error("invalid permissions provided")]
InvalidPermissions,
#[error("failed to receive reply: {0}")]
ReceiverError(#[from] RecvError),
#[error("failed to send request: {0}")]
SendError(#[from] SendError<XsdMessage>),
#[error("failed to send request: {0}")]
TrySendError(#[from] TrySendError<XsdMessage>),
}
impl Error {
pub fn is_noent_response(&self) -> bool {
match self {
Error::ResponseError(message) => message == "ENOENT",
_ => false,
}
}
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@ -0,0 +1,4 @@
pub mod bus;
pub mod client;
pub mod error;
pub mod sys;

View File

@ -0,0 +1,171 @@
/// Handwritten protocol definitions for XenStore.
/// Used xen/include/public/io/xs_wire.h as a reference.
use libc;
use crate::error::Result;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::io::Cursor;
#[derive(Copy, Clone, Debug)]
pub struct XsdMessageHeader {
pub typ: u32,
pub req: u32,
pub tx: u32,
pub len: u32,
}
impl XsdMessageHeader {
pub const SIZE: usize = 16;
pub fn decode(bytes: &[u8]) -> Result<XsdMessageHeader> {
let mut cursor = Cursor::new(bytes);
Ok(XsdMessageHeader {
typ: cursor.read_u32::<LittleEndian>()?,
req: cursor.read_u32::<LittleEndian>()?,
tx: cursor.read_u32::<LittleEndian>()?,
len: cursor.read_u32::<LittleEndian>()?,
})
}
pub fn encode_to(&self, buffer: &mut Vec<u8>) -> Result<()> {
buffer.write_u32::<LittleEndian>(self.typ)?;
buffer.write_u32::<LittleEndian>(self.req)?;
buffer.write_u32::<LittleEndian>(self.tx)?;
buffer.write_u32::<LittleEndian>(self.len)?;
Ok(())
}
pub fn encode(&self) -> Result<Vec<u8>> {
let mut buffer = Vec::with_capacity(XsdMessageHeader::SIZE);
self.encode_to(&mut buffer)?;
Ok(buffer)
}
}
pub const XSD_CONTROL: u32 = 0;
pub const XSD_DIRECTORY: u32 = 1;
pub const XSD_READ: u32 = 2;
pub const XSD_GET_PERMS: u32 = 3;
pub const XSD_WATCH: u32 = 4;
pub const XSD_UNWATCH: u32 = 5;
pub const XSD_TRANSACTION_START: u32 = 6;
pub const XSD_TRANSACTION_END: u32 = 7;
pub const XSD_INTRODUCE: u32 = 8;
pub const XSD_RELEASE: u32 = 9;
pub const XSD_GET_DOMAIN_PATH: u32 = 10;
pub const XSD_WRITE: u32 = 11;
pub const XSD_MKDIR: u32 = 12;
pub const XSD_RM: u32 = 13;
pub const XSD_SET_PERMS: u32 = 14;
pub const XSD_WATCH_EVENT: u32 = 15;
pub const XSD_ERROR: u32 = 16;
pub const XSD_IS_DOMAIN_INTRODUCED: u32 = 17;
pub const XSD_RESUME: u32 = 18;
pub const XSD_SET_TARGET: u32 = 19;
pub const XSD_RESET_WATCHES: u32 = XSD_SET_TARGET + 2;
pub const XSD_DIRECTORY_PART: u32 = 20;
pub const XSD_TYPE_COUNT: u32 = 21;
pub const XSD_INVALID: u32 = 0xffff;
pub const XSD_WRITE_NONE: &str = "NONE";
pub const XSD_WRITE_CREATE: &str = "CREATE";
pub const XSD_WRITE_CREATE_EXCL: &str = "CREATE|EXCL";
#[repr(C)]
pub struct XsdError<'a> {
pub num: i32,
pub error: &'a str,
}
pub const XSD_ERROR_EINVAL: XsdError = XsdError {
num: libc::EINVAL,
error: "EINVAL",
};
pub const XSD_ERROR_EACCES: XsdError = XsdError {
num: libc::EACCES,
error: "EACCES",
};
pub const XSD_ERROR_EEXIST: XsdError = XsdError {
num: libc::EEXIST,
error: "EEXIST",
};
pub const XSD_ERROR_EISDIR: XsdError = XsdError {
num: libc::EISDIR,
error: "EISDIR",
};
pub const XSD_ERROR_ENOENT: XsdError = XsdError {
num: libc::ENOENT,
error: "ENOENT",
};
pub const XSD_ERROR_ENOMEM: XsdError = XsdError {
num: libc::ENOMEM,
error: "ENOMEM",
};
pub const XSD_ERROR_ENOSPC: XsdError = XsdError {
num: libc::ENOSPC,
error: "ENOSPC",
};
pub const XSD_ERROR_EIO: XsdError = XsdError {
num: libc::EIO,
error: "EIO",
};
pub const XSD_ERROR_ENOTEMPTY: XsdError = XsdError {
num: libc::ENOTEMPTY,
error: "ENOTEMPTY",
};
pub const XSD_ERROR_ENOSYS: XsdError = XsdError {
num: libc::ENOSYS,
error: "ENOSYS",
};
pub const XSD_ERROR_EROFS: XsdError = XsdError {
num: libc::EROFS,
error: "EROFS",
};
pub const XSD_ERROR_EBUSY: XsdError = XsdError {
num: libc::EBUSY,
error: "EBUSY",
};
pub const XSD_ERROR_EAGAIN: XsdError = XsdError {
num: libc::EAGAIN,
error: "EAGAIN",
};
pub const XSD_ERROR_EISCONN: XsdError = XsdError {
num: libc::EISCONN,
error: "EISCONN",
};
pub const XSD_ERROR_E2BIG: XsdError = XsdError {
num: libc::E2BIG,
error: "E2BIG",
};
pub const XSD_ERROR_EPERM: XsdError = XsdError {
num: libc::EPERM,
error: "EPERM",
};
pub const XSD_WATCH_PATH: u32 = 0;
pub const XSD_WATCH_TOKEN: u32 = 1;
#[repr(C)]
pub struct XenDomainInterface {
req: [i8; 1024],
rsp: [i8; 1024],
req_cons: u32,
req_prod: u32,
rsp_cons: u32,
rsp_prod: u32,
server_features: u32,
connection: u32,
error: u32,
}
pub const XS_PAYLOAD_MAX: u32 = 4096;
pub const XS_ABS_PATH_MAX: u32 = 3072;
pub const XS_REL_PATH_MAX: u32 = 2048;
pub const XS_SERVER_FEATURE_RECONNECTION: u32 = 1;
pub const XS_SERVER_FEATURE_ERROR: u32 = 2;
pub const XS_CONNECTED: u32 = 0;
pub const XS_RECONNECT: u32 = 1;
pub const XS_ERROR_NONE: u32 = 0;
pub const XS_ERROR_COMM: u32 = 1;
pub const XS_ERROR_RINGIDX: u32 = 2;
pub const XS_ERROR_PROTO: u32 = 3;