Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 33 additions & 1 deletion crates/openshell-cli/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2901,6 +2901,30 @@ fn dockerfile_sources_supported_for_gateway(metadata: Option<&GatewayMetadata>)
!metadata.is_some_and(|metadata| metadata.is_remote)
}

/// Load key=value pairs from a `versions.env` file in the given directory.
/// Returns an empty map if the file doesn't exist or can't be read.
fn load_versions_env(context: &Path) -> HashMap<String, String> {
let env_file = context.join("versions.env");
let Ok(contents) = std::fs::read_to_string(&env_file) else {
return HashMap::new();
};
contents
.lines()
.filter_map(|line| {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
return None;
}
let (key, value) = line.split_once('=')?;
let key = key.trim();
if key.is_empty() {
return None;
}
Some((key.to_string(), value.trim().to_string()))
})
.collect()
}

/// Build a Dockerfile and make the resulting image available to the gateway.
///
/// For local Kubernetes gateways running in Docker, this imports the built image
Expand Down Expand Up @@ -2935,6 +2959,14 @@ async fn build_from_dockerfile(
eprintln!(" {} {}", "Gateway:".dimmed(), gateway_name);
eprintln!();

let build_args = load_versions_env(context);
if !build_args.is_empty() {
for (k, v) in &build_args {
eprintln!(" Build arg (from versions.env): {k}={v}");
}
eprintln!();
}

let mut on_log = |msg: String| {
eprintln!(" {msg}");
};
Expand All @@ -2943,7 +2975,7 @@ async fn build_from_dockerfile(
dockerfile,
&tag,
context,
&HashMap::new(),
&build_args,
&mut on_log,
)
.await?;
Expand Down
3 changes: 3 additions & 0 deletions crates/openshell-driver-vm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ flate2 = "1"
sha2 = "0.10"
zstd = "0.13"

[dev-dependencies]
temp-env = "0.3"

# smol-rs/polling drives the BSD/macOS parent-death detection in
# procguard via kqueue's EVFILT_PROC / NOTE_EXIT filter. We could use
# it on Linux too (via epoll + pidfd) but sticking with
Expand Down
5 changes: 5 additions & 0 deletions crates/openshell-driver-vm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ By default `mise run gateway:vm`:

For GPU passthrough (VFIO), pass `-- --gpu` and run with root privileges:

> **Note:** GPU passthrough requires an **x86_64 host and guest**. The QEMU
> backend uses `qemu-system-x86_64`, and the NVIDIA driver installer /
> kernel module build scripts target x86_64 exclusively. ARM/aarch64 GPU
> passthrough is not yet supported.

```shell
sudo -E env "PATH=$PATH" mise run gateway:vm -- --gpu
```
Expand Down
56 changes: 56 additions & 0 deletions crates/openshell-driver-vm/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use std::path::{Path, PathBuf};
use std::{env, fs};

fn main() {
emit_guest_kernel_version();

println!("cargo:rerun-if-env-changed=OPENSHELL_VM_RUNTIME_COMPRESSED_DIR");

if let Ok(dir) = env::var("OPENSHELL_VM_RUNTIME_COMPRESSED_DIR") {
Expand Down Expand Up @@ -143,3 +145,57 @@ fn generate_stub_resources(out_dir: &Path, names: &[&str]) {
}
}
}

/// Parse `GUEST_KERNEL_VERSION` from `pins.env` and emit it as a compile-time
/// environment variable so `rootfs.rs` can use `env!("GUEST_KERNEL_VERSION")`.
fn emit_guest_kernel_version() {
let manifest_dir =
PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"));
let pins_path = manifest_dir.join("../../crates/openshell-vm/pins.env");

println!("cargo:rerun-if-changed={}", pins_path.display());
println!("cargo:rerun-if-env-changed=GUEST_KERNEL_VERSION");

let version = if let Ok(v) = env::var("GUEST_KERNEL_VERSION") {
v
} else if let Ok(contents) = fs::read_to_string(&pins_path) {
parse_guest_kernel_version(&contents).unwrap_or_else(|| {
panic!(
"GUEST_KERNEL_VERSION not found in {}",
pins_path.display()
)
})
} else {
panic!(
"Cannot read {} and GUEST_KERNEL_VERSION env var not set",
pins_path.display()
);
};

println!("cargo:rustc-env=GUEST_KERNEL_VERSION={version}");
}

/// Extract the default value from a `GUEST_KERNEL_VERSION="${GUEST_KERNEL_VERSION:-<default>}"`
/// line in pins.env.
fn parse_guest_kernel_version(contents: &str) -> Option<String> {
for line in contents.lines() {
let trimmed = line.trim();
if trimmed.starts_with('#') || !trimmed.starts_with("GUEST_KERNEL_VERSION=") {
continue;
}
// Pattern: GUEST_KERNEL_VERSION="${GUEST_KERNEL_VERSION:-6.12.76}"
if let Some(start) = trimmed.find(":-") {
let after = &trimmed[start + 2..];
if let Some(end) = after.find('}') {
let value = after[..end].trim_end_matches('"');
return Some(value.to_string());
}
}
// Fallback: simple assignment like GUEST_KERNEL_VERSION="6.12.76"
if let Some((_key, value)) = trimmed.split_once('=') {
let v = value.trim_matches('"').trim_matches('\'');
return Some(v.to_string());
}
}
None
}
18 changes: 18 additions & 0 deletions crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,19 @@ create_gpu_device_nodes_mknod() {
setup_gpu() {
ts "GPU_ENABLED=true — initializing GPU passthrough"

# Kernel modules are built for a specific guest kernel version.
# If the running kernel doesn't match, depmod/modprobe will silently fail.
local expected_kver="${GUEST_KERNEL_VERSION:-}"
local actual_kver
actual_kver="$(uname -r)"
if [ -z "${expected_kver}" ]; then
ts "GUEST_KERNEL_VERSION not set; skipping kernel version check"
elif [ "${actual_kver}" != "${expected_kver}" ]; then
ts "WARNING: kernel version mismatch: expected ${expected_kver}, got ${actual_kver}"
ts " GPU modules are installed under lib/modules/${expected_kver}/"
ts " modprobe may fail to find them"
fi

if ! command -v modprobe >/dev/null 2>&1; then
ts "FATAL: modprobe not found; cannot load nvidia kernel modules"
return 1
Expand All @@ -249,6 +262,11 @@ setup_gpu() {
fi
fi

ts "generating module dependency index"
if ! depmod -a "$(uname -r)" 2>/dev/null; then
ts "WARNING: depmod failed; modprobe may not find modules"
fi

ts "loading nvidia kernel modules"
modprobe nvidia || { ts "FATAL: modprobe nvidia failed"; return 1; }
modprobe nvidia_uvm 2>/dev/null || true
Expand Down
41 changes: 35 additions & 6 deletions crates/openshell-driver-vm/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use crate::gpu::{
GpuInventory, SubnetAllocator, allocate_vsock_cid, mac_from_sandbox_id, tap_device_name,
};
use crate::rootfs::{
create_rootfs_archive_from_dir, extract_rootfs_archive_to,
prepare_sandbox_rootfs_from_image_root, sandbox_guest_init_path,
create_rootfs_archive_from_dir, extract_rootfs_archive_to, inject_gpu_modules,
prepare_sandbox_rootfs_from_image_root, refresh_runtime_artifacts, sandbox_guest_init_path,
};
use bollard::Docker;
use bollard::errors::Error as BollardError;
Expand Down Expand Up @@ -419,6 +419,28 @@ impl VmDriver {
return Err(err);
}
};
if is_gpu {
let rootfs_for_gpu = rootfs.clone();
let driver_state_dir = self.config.state_dir.clone();
if let Err(err) = tokio::task::spawn_blocking(move || {
inject_gpu_modules(&rootfs_for_gpu, &driver_state_dir)
})
.await
.map_err(|e| Status::internal(format!("GPU module injection panicked: {e}")))?
{
warn!(
sandbox_id = %sandbox.id,
error = %err,
"vm driver: GPU module injection failed"
);
let _ = tokio::fs::remove_dir_all(&state_dir).await;
return Err(Status::failed_precondition(format!(
"GPU module injection failed: {err}"
)));
}
info!(sandbox_id = %sandbox.id, "vm driver: GPU modules injected into rootfs");
}

if let Some(tls_paths) = tls_paths.as_ref()
&& let Err(err) = prepare_guest_tls_materials(&rootfs, tls_paths).await
{
Expand Down Expand Up @@ -738,10 +760,13 @@ impl VmDriver {
.await?;
let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity);
let rootfs_dest = rootfs.to_path_buf();
tokio::task::spawn_blocking(move || extract_rootfs_archive_to(&archive_path, &rootfs_dest))
.await
.map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))?
.map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?;
tokio::task::spawn_blocking(move || {
extract_rootfs_archive_to(&archive_path, &rootfs_dest)?;
refresh_runtime_artifacts(&rootfs_dest)
})
.await
.map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))?
.map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?;

Ok(image_identity)
}
Expand Down Expand Up @@ -2195,6 +2220,10 @@ fn build_guest_environment(
"OPENSHELL_SSH_HANDSHAKE_SECRET".to_string(),
config.ssh_handshake_secret.clone(),
),
(
"GUEST_KERNEL_VERSION".to_string(),
env!("GUEST_KERNEL_VERSION").to_string(),
),
]);
if config.requires_tls_materials() {
environment.extend(HashMap::from([
Expand Down
Loading
Loading