Skip to content

Commit

Permalink
feat(sandbox): Add UhyveFileMap structure and sandbox
Browse files Browse the repository at this point in the history
* Add --mount parameter for "whitelisting" guest_paths and defining
  their respective filesystem paths on the host FS
* Add UhyveFileMap structure
* Add sandbox support to open() syscall

A few points that could be further worked are unit tests, handling
more of the parsing using the clap library directly and performance
optimizations.

Helped-by: Çağatay Yiğit Şahin <[email protected]>
  • Loading branch information
n0toose committed Nov 5, 2024
1 parent b981d2d commit a8ff49a
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 17 deletions.
12 changes: 12 additions & 0 deletions src/bin/uhyve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ struct Args {
#[cfg(target_os = "linux")]
gdb_port: Option<u16>,

/// Paths that the kernel should be able to view, read or write.
///
/// Files and directories are separated using commas.
/// Desired mount paths must be explicitly defined after a colon.
///
/// Example: --mount host_directory:/root/guest_directory,file.txt:/root/my_file.txt
#[arg(value_delimiter = ',')]
#[clap(long, env = "HERMIT_MOUNT")]
mount: Option<Vec<String>>,

/// The kernel to execute
#[clap(value_parser)]
kernel: PathBuf,
Expand Down Expand Up @@ -243,6 +253,7 @@ impl From<Args> for Params {
},
#[cfg(target_os = "linux")]
gdb_port,
mount,
kernel: _,
kernel_args,
} = args;
Expand All @@ -256,6 +267,7 @@ impl From<Args> for Params {
cpu_count,
#[cfg(target_os = "linux")]
pit,
mount,
#[cfg(target_os = "linux")]
gdb_port,
#[cfg(target_os = "macos")]
Expand Down
53 changes: 45 additions & 8 deletions src/hypercall.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::{
ffi::OsStr,
ffi::{CStr, CString, OsStr},
io::{self, Error, ErrorKind, Write},
os::unix::ffi::OsStrExt,
};
Expand All @@ -8,6 +8,7 @@ use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress,

use crate::{
consts::BOOT_PML4,
isolation::UhyveFileMap,
mem::{MemoryError, MmapMemory},
virt_to_phys,
};
Expand Down Expand Up @@ -84,13 +85,49 @@ pub fn unlink(mem: &MmapMemory, sysunlink: &mut UnlinkParams) {
}

/// Handles an open syscall by opening a file on the host.
pub fn open(mem: &MmapMemory, sysopen: &mut OpenParams) {
unsafe {
sysopen.ret = libc::open(
mem.host_address(sysopen.name).unwrap() as *const i8,
sysopen.flags,
sysopen.mode,
);
pub fn open(mem: &MmapMemory, sysopen: &mut OpenParams, file_map: &Option<UhyveFileMap>) {
// TODO: We could keep track of the file descriptors internally, in case the kernel doesn't close them.
let requested_path = mem.host_address(sysopen.name).unwrap() as *const i8;

// If the file_map doesn't exist, the provided path will be used instead.
// (i.e. host filesystem access).
if let Some(file_map) = file_map {
// Rust deals in UTF-8. C doesn't provide such a guarantee.
// In that case, converting a CStr to str will return a Utf8Error.
//
// See: https://nrc.github.io/big-book-ffi/reference/strings.html
let guest_path = unsafe { CStr::from_ptr(requested_path) }.to_str();

if let Ok(guest_path) = guest_path {
let paths = file_map;
let host_path_option = paths.get_paths().get_key_value(guest_path);

if let Some((_guest_path, host_path)) = host_path_option {
// This variable has to exist, as pointers don't have a lifetime
// and appending .as_ptr() would lead to the string getting
// immediately deallocated after the statement. Nothing is
// referencing it as far as the type system is concerned".
//
// This is also why we can't just have one unsafe block and
// one path variable, otherwise we'll get a use after free.
let host_path_c_string = CString::new(host_path.as_bytes()).unwrap();
let new_host_path = host_path_c_string.as_c_str().as_ptr();

unsafe {
sysopen.ret = libc::open(new_host_path, sysopen.flags, sysopen.mode);
}
} else {
error!("The kernel requested to open() a non-whitelisted path. Rejecting...");
sysopen.ret = -1;
}
} else {
error!("The kernel requested to open() a path that is not valid UTF-8. Rejecting...");
sysopen.ret = -1;
}
} else {
unsafe {
sysopen.ret = libc::open(requested_path, sysopen.flags, sysopen.mode);
}
}
}

Expand Down
61 changes: 61 additions & 0 deletions src/isolation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use std::{collections::HashMap, ffi::OsString, fs, path::PathBuf};

/// HashMap matching a path in the guest OS ([String]) a path in the host OS ([OsString]).
pub struct UhyveFileMap {
files: HashMap<String, OsString>,
}

impl UhyveFileMap {
/// Creates a UhyveFileMap.
///
/// Using a list of parameters stored in a [Vec<String>], this function creates
/// a HashMap that can match a path on the host operating system given a path on
/// the guest operating system.
///
/// See [crate::hypercall::open] to see this in practice.
///
/// * `parameters` - A list of parameters with the format `./host_path.txt:guest.txt`
pub fn new(parameters: &[String]) -> Option<UhyveFileMap> {
Some(UhyveFileMap {
files: parameters
.iter()
.map(String::as_str)
.map(Self::split_guest_and_host_path)
.map(|(guest_path, host_path)| {
(
guest_path,
fs::canonicalize(&host_path).map_or(host_path, PathBuf::into_os_string),
)
})
.collect(),
})
}

/// Separates a string of the format "./host_dir/host_path.txt:guest_path.txt"
/// into a guest_path (String) and host_path (OsString) respectively.
///
/// Keep in mind that the order of the parameters is the inverse of the one
/// in the actual HashMap itself, as we want to use the guest_path as a key
/// to look up the respective host_path, as well as provide an intuitive
/// interface reminiscent of other VMMs like Docker's.
///
/// `parameter` - A parameter of the format `./host_path.txt:guest.txt`.
fn split_guest_and_host_path(parameter: &str) -> (String, OsString) {
let mut partsiter = parameter.split(":");

// Mind the order.
// TODO: Do this work using clap.
let host_path = OsString::from(partsiter.next().unwrap());
let guest_path = partsiter.next().unwrap().to_owned();

(guest_path, host_path)
}

/// Returns a reference to the stored HashMap.
///
/// This function is commonly used with get_key_value, using a String
/// (that is read from a const char* in an `open()` call) as a key.
pub fn get_paths(&self) -> &HashMap<String, OsString> {
&self.files
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub mod macos;
#[cfg(target_os = "macos")]
pub use macos as os;
mod hypercall;
mod isolation;
pub mod mem;
pub mod paging;
pub mod params;
Expand Down
8 changes: 5 additions & 3 deletions src/linux/x86_64/kvm_cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,11 @@ impl VirtualCPU for KvmCpu {
}
Hypercall::FileClose(sysclose) => hypercall::close(sysclose),
Hypercall::FileLseek(syslseek) => hypercall::lseek(syslseek),
Hypercall::FileOpen(sysopen) => {
hypercall::open(&self.parent_vm.mem, sysopen)
}
Hypercall::FileOpen(sysopen) => hypercall::open(
&self.parent_vm.mem,
sysopen,
&self.parent_vm.file_map,
),
Hypercall::FileRead(sysread) => {
hypercall::read(&self.parent_vm.mem, sysread)
}
Expand Down
8 changes: 5 additions & 3 deletions src/macos/aarch64/vcpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,11 @@ impl VirtualCPU for XhyveCpu {
}
Hypercall::FileClose(sysclose) => hypercall::close(sysclose),
Hypercall::FileLseek(syslseek) => hypercall::lseek(syslseek),
Hypercall::FileOpen(sysopen) => {
hypercall::open(&self.parent_vm.mem, sysopen)
}
Hypercall::FileOpen(sysopen) => hypercall::open(
&self.parent_vm.mem,
sysopen,
&self.parent_vm.file_map,
),
Hypercall::FileRead(sysread) => {
hypercall::read(&self.parent_vm.mem, sysread)
}
Expand Down
8 changes: 5 additions & 3 deletions src/macos/x86_64/vcpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -737,9 +737,11 @@ impl VirtualCPU for XhyveCpu {
}
Hypercall::FileClose(sysclose) => hypercall::close(sysclose),
Hypercall::FileLseek(syslseek) => hypercall::lseek(syslseek),
Hypercall::FileOpen(sysopen) => {
hypercall::open(&self.parent_vm.mem, sysopen)
}
Hypercall::FileOpen(sysopen) => hypercall::open(
&self.parent_vm.mem,
sysopen,
&self.parent_vm.file_map,
),
Hypercall::FileRead(sysread) => {
hypercall::read(&self.parent_vm.mem, sysread)
}
Expand Down
4 changes: 4 additions & 0 deletions src/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ pub struct Params {

/// Arguments to forward to the kernel
pub kernel_args: Vec<String>,

/// Paths that should be mounted on-device.
pub mount: Option<Vec<String>>,
}

#[allow(clippy::derivable_impls)]
Expand All @@ -51,6 +54,7 @@ impl Default for Params {
pit: false,
cpu_count: Default::default(),
gdb_port: Default::default(),
mount: Default::default(),
kernel_args: Default::default(),
}
}
Expand Down
5 changes: 5 additions & 0 deletions src/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use crate::{
arch::{self, FrequencyDetectionFailed},
consts::*,
fdt::Fdt,
isolation::*,
mem::MmapMemory,
os::HypervisorError,
params::Params,
Expand Down Expand Up @@ -118,6 +119,7 @@ pub struct UhyveVm<VCpuType: VirtualCPU = VcpuDefault> {
pub virtio_device: Arc<Mutex<VirtioNetPciDevice>>,
#[allow(dead_code)] // gdb is not supported on macos
pub(super) gdb_port: Option<u16>,
pub file_map: Option<UhyveFileMap>,
_vcpu_type: PhantomData<VCpuType>,
}
impl<VCpuType: VirtualCPU> UhyveVm<VCpuType> {
Expand Down Expand Up @@ -149,6 +151,8 @@ impl<VCpuType: VirtualCPU> UhyveVm<VCpuType> {
"gdbstub is only supported with one CPU"
);

let file_map = params.mount.as_deref().and_then(UhyveFileMap::new);

let mut vm = Self {
offset: 0,
entry_point: 0,
Expand All @@ -161,6 +165,7 @@ impl<VCpuType: VirtualCPU> UhyveVm<VCpuType> {
verbose: params.verbose,
virtio_device,
gdb_port: params.gdb_port,
file_map,
_vcpu_type: PhantomData,
};

Expand Down

0 comments on commit a8ff49a

Please sign in to comment.