Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions crates/openshell-driver-docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,11 @@ The Docker driver bind-mounts a host-side Linux `openshell-sandbox` binary into
each sandbox container. Resolution order is:

1. `supervisor_bin` in `[openshell.drivers.docker]`.
2. A sibling `openshell-sandbox` next to the running `openshell-gateway` binary.
3. A local Linux cargo target build for the Docker daemon architecture.
4. `supervisor_image` in `[openshell.drivers.docker]`, or the
release-matched default supervisor image, extracting `/openshell-sandbox`.
2. `supervisor_image` in `[openshell.drivers.docker]`, extracting
`/openshell-sandbox` from that image.
3. A sibling `openshell-sandbox` next to the running `openshell-gateway` binary.
4. A local Linux cargo target build for the Docker daemon architecture.
5. The release-matched default supervisor image, extracting `/openshell-sandbox`.

Release and Docker-image gateway builds bake the matching supervisor image tag
into the binary at compile time. The default Docker supervisor image is not
Expand Down
103 changes: 68 additions & 35 deletions crates/openshell-driver-docker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ const DOCKER_NETWORK_DRIVER: &str = "bridge";

/// Default image holding the Linux `openshell-sandbox` binary. The gateway
/// pulls this image and extracts the binary to a host-side cache when no
/// explicit `supervisor_bin` override or local build is available.
/// explicit `supervisor_bin`, configured `supervisor_image`, sibling binary,
/// or local build is available.
const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor";

/// Return the default `ghcr.io/nvidia/openshell/supervisor:<tag>` reference
Expand Down Expand Up @@ -155,10 +156,9 @@ pub struct DockerComputeConfig {
/// Optional override for the Linux `openshell-sandbox` binary mounted into containers.
pub supervisor_bin: Option<PathBuf>,

/// Optional override for the image the gateway pulls to extract the
/// Linux `openshell-sandbox` binary when no explicit binary path or
/// local build is available. Defaults to
/// `ghcr.io/nvidia/openshell/supervisor:<gateway-image-tag>`.
/// Optional image used to extract the Linux `openshell-sandbox` binary.
/// Ignored when `supervisor_bin` is set. See `resolve_supervisor_bin` for
/// the full resolution order.
Comment thread
elezar marked this conversation as resolved.
pub supervisor_image: Option<String>,

/// Host-side CA certificate for Docker sandbox mTLS.
Expand Down Expand Up @@ -2948,56 +2948,89 @@ fn normalize_docker_arch(arch: &str) -> String {
}
}

pub(crate) async fn resolve_supervisor_bin(
docker: &Docker,
#[derive(Debug, Eq, PartialEq)]
enum SupervisorBinSource {
Binary(PathBuf),
Image(String),
}

fn resolve_supervisor_bin_source(
docker_config: &DockerComputeConfig,
daemon_arch: &str,
) -> CoreResult<PathBuf> {
current_exe: Option<&Path>,
target_candidates: &[PathBuf],
) -> CoreResult<SupervisorBinSource> {
// Tier 1: explicit supervisor_bin in [openshell.drivers.docker].
if let Some(path) = docker_config.supervisor_bin.clone() {
let path = canonicalize_existing_file(&path, "docker supervisor binary")?;
validate_linux_elf_binary(&path)?;
return Ok(path);
return Ok(SupervisorBinSource::Binary(path));
}

// Tier 2: explicit supervisor_image in [openshell.drivers.docker].
// A configured image should be the source of truth even when a local
// developer build is present under target/.
if let Some(image) = docker_config.supervisor_image.clone() {
return Ok(SupervisorBinSource::Image(image));
}
Comment thread
elezar marked this conversation as resolved.

// Tier 2: sibling `openshell-sandbox` next to the running gateway
// Tier 3: sibling `openshell-sandbox` next to the running gateway
// (release artifact layout). Linux-only because the sibling must be a
// Linux ELF to bind-mount into a Linux container.
if cfg!(target_os = "linux") {
let current_exe = std::env::current_exe()
.map_err(|err| Error::config(format!("failed to resolve current executable: {err}")))?;
if let Some(parent) = current_exe.parent() {
let sibling = parent.join("openshell-sandbox");
if sibling.is_file() {
let path = canonicalize_existing_file(&sibling, "docker supervisor binary")?;
if validate_linux_elf_binary(&path).is_ok() {
return Ok(path);
}
if cfg!(target_os = "linux")
&& let Some(current_exe) = current_exe
&& let Some(parent) = current_exe.parent()
{
let sibling = parent.join("openshell-sandbox");
if sibling.is_file() {
let path = canonicalize_existing_file(&sibling, "docker supervisor binary")?;
if validate_linux_elf_binary(&path).is_ok() {
return Ok(SupervisorBinSource::Binary(path));
}
}
}

// Tier 3: local cargo target build (developer workflow). Preferred
// over a registry pull when available because it matches whatever the
// developer just built.
let target_candidates = linux_supervisor_candidates(daemon_arch);
for candidate in &target_candidates {
// Tier 4: local cargo target build (developer workflow). Preferred
// over the default registry image when available because it matches
// whatever the developer just built.
for candidate in target_candidates {
if candidate.is_file() {
let path = canonicalize_existing_file(candidate, "docker supervisor binary")?;
if validate_linux_elf_binary(&path).is_ok() {
return Ok(path);
return Ok(SupervisorBinSource::Binary(path));
}
}
}

// Tier 4: pull the supervisor image from a registry and extract the
// binary to a host-side cache keyed by image content digest. This is
// the default path for released gateway binaries.
let image = docker_config
.supervisor_image
.clone()
.unwrap_or_else(default_docker_supervisor_image);
extract_supervisor_bin_from_image(docker, &image).await
// Tier 5: pull the release-matched default supervisor image and extract
// the binary to a host-side cache keyed by image content digest.
Ok(SupervisorBinSource::Image(default_docker_supervisor_image()))
}

pub(crate) async fn resolve_supervisor_bin(
docker: &Docker,
docker_config: &DockerComputeConfig,
daemon_arch: &str,
) -> CoreResult<PathBuf> {
let current_exe =
if cfg!(target_os = "linux")
&& docker_config.supervisor_bin.is_none()
&& docker_config.supervisor_image.is_none()
{
Some(std::env::current_exe().map_err(|err| {
Error::config(format!("failed to resolve current executable: {err}"))
})?)
} else {
None
};
let target_candidates = linux_supervisor_candidates(daemon_arch);

match resolve_supervisor_bin_source(docker_config, current_exe.as_deref(), &target_candidates)?
{
SupervisorBinSource::Binary(path) => Ok(path),
SupervisorBinSource::Image(image) => {
extract_supervisor_bin_from_image(docker, &image).await
}
}
}

fn linux_supervisor_candidates(daemon_arch: &str) -> Vec<PathBuf> {
Expand Down
30 changes: 30 additions & 0 deletions crates/openshell-driver-docker/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1711,6 +1711,36 @@ fn default_docker_supervisor_image_uses_nvidia_ghcr_repo() {
);
}

#[test]
fn configured_supervisor_image_takes_precedence_over_local_binaries() {
let tempdir = TempDir::new().unwrap();
let bin_dir = tempdir.path().join("bin");
fs::create_dir_all(&bin_dir).unwrap();
let current_exe = bin_dir.join("openshell-gateway");
let sibling = bin_dir.join("openshell-sandbox");
fs::write(&current_exe, b"gateway").unwrap();
fs::write(&sibling, b"\x7fELFsibling").unwrap();

let local_build = tempdir.path().join("target/openshell-sandbox");
fs::create_dir_all(local_build.parent().unwrap()).unwrap();
fs::write(&local_build, b"\x7fELFlocal").unwrap();

let source = resolve_supervisor_bin_source(
&DockerComputeConfig {
supervisor_image: Some("example.com/openshell/supervisor:test".to_string()),
..Default::default()
},
Some(&current_exe),
&[local_build],
)
.unwrap();

assert_eq!(
source,
SupervisorBinSource::Image("example.com/openshell/supervisor:test".to_string())
);
}

#[test]
fn docker_supervisor_image_tag_prefers_explicit_build_tags() {
assert_eq!(
Expand Down
1 change: 1 addition & 0 deletions docs/reference/gateway-config.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ sandbox_namespace = "docker-dev"
grpc_endpoint = "https://host.openshell.internal:17670"
# Skip the image-pull-and-extract step by pointing at a locally built binary.
supervisor_bin = "/usr/local/libexec/openshell/openshell-sandbox"
# When supervisor_bin is omitted, Docker extracts /openshell-sandbox from this image.
supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest"
guest_tls_ca = "/etc/openshell/certs/ca.pem"
guest_tls_cert = "/etc/openshell/certs/client.pem"
Expand Down
101 changes: 29 additions & 72 deletions e2e/with-docker-gateway.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ require_container_engine_lane() {
}

require_container_engine_lane docker Docker
CONTAINER_ENGINE_QUIET="${CONTAINER_ENGINE_QUIET:-1}"
# shellcheck source=tasks/scripts/container-engine.sh
source "${ROOT}/tasks/scripts/container-engine.sh"

github_actions_host_docker_tmpdir() {
if [ "${GITHUB_ACTIONS:-}" != "true" ] \
Expand Down Expand Up @@ -111,7 +114,6 @@ DOCKER_NETWORK_NAME=""
DOCKER_NETWORK_CONNECTED_CONTAINER=""
DOCKER_NETWORK_MANAGED=0
GPU_MODE="${OPENSHELL_E2E_DOCKER_GPU:-0}"
DOCKER_SUPERVISOR_ARGS=()

# Isolate CLI/SDK gateway metadata from the developer's real config.
export XDG_CONFIG_HOME="${WORKDIR}/config"
Expand Down Expand Up @@ -292,25 +294,6 @@ if [ "${GPU_MODE}" = "1" ]; then
fi
fi

normalize_arch() {
case "$1" in
x86_64|amd64) echo "amd64" ;;
aarch64|arm64) echo "arm64" ;;
*) echo "$1" ;;
esac
}

linux_target_triple() {
case "$1" in
amd64) echo "x86_64-unknown-linux-gnu" ;;
arm64) echo "aarch64-unknown-linux-gnu" ;;
*)
echo "ERROR: unsupported Docker daemon architecture '$1'" >&2
exit 2
;;
esac
}

resolve_docker_supervisor_image() {
if [ -n "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE:-}" ]; then
printf '%s\n' "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE}"
Expand All @@ -333,7 +316,7 @@ resolve_docker_supervisor_image() {
return 0
fi

printf '%s\n' ""
printf '%s\n' "openshell/supervisor:dev"
}

docker_pull_with_retry() {
Expand Down Expand Up @@ -362,6 +345,27 @@ docker_pull_with_retry() {
return 1
}

build_local_docker_supervisor_image_if_required() {
local image=$1

if [ "${image}" != "openshell/supervisor:dev" ]; then
return 0
fi

local daemon_arch
daemon_arch="$(ce_info_arch)"

echo "Building local Docker supervisor image ${image} for linux/${daemon_arch}..."
CONTAINER_ENGINE=docker DOCKER_PLATFORM="linux/${daemon_arch}" IMAGE_TAG=dev \
bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor
if docker image inspect "${image}" >/dev/null 2>&1; then
return 0
fi

echo "ERROR: expected supervisor image '${image}' after local build." >&2
exit 2
}

ensure_docker_supervisor_image() {
local image=$1

Expand Down Expand Up @@ -414,47 +418,12 @@ ensure_sandbox_image_available() {
docker_pull_with_retry "${image}"
}

DAEMON_ARCH="$(normalize_arch "$(docker info --format '{{.Architecture}}' 2>/dev/null || true)")"
SUPERVISOR_TARGET="$(linux_target_triple "${DAEMON_ARCH}")"
HOST_OS="$(uname -s)"
HOST_ARCH="$(normalize_arch "$(uname -m)")"
SUPERVISOR_OUT_DIR="${WORKDIR}/supervisor/${DAEMON_ARCH}"
SUPERVISOR_BIN="${SUPERVISOR_OUT_DIR}/openshell-sandbox"

CARGO_BUILD_JOBS_ARG=()
if [ -n "${CARGO_BUILD_JOBS:-}" ]; then
CARGO_BUILD_JOBS_ARG=(-j "${CARGO_BUILD_JOBS}")
fi

e2e_build_gateway_binaries "${ROOT}" TARGET_DIR GATEWAY_BIN CLI_BIN

SUPERVISOR_IMAGE="$(resolve_docker_supervisor_image)"
if [ -n "${SUPERVISOR_IMAGE}" ]; then
ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}"
echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}"
DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-image "${SUPERVISOR_IMAGE}")
else
echo "Building openshell-sandbox for ${SUPERVISOR_TARGET}..."
mkdir -p "${SUPERVISOR_OUT_DIR}"
if [ "${HOST_OS}" = "Linux" ] && [ "${HOST_ARCH}" = "${DAEMON_ARCH}" ]; then
rustup target add "${SUPERVISOR_TARGET}" >/dev/null 2>&1 || true
cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \
--release -p openshell-sandbox --target "${SUPERVISOR_TARGET}"
cp "${TARGET_DIR}/${SUPERVISOR_TARGET}/release/openshell-sandbox" "${SUPERVISOR_BIN}"
else
CONTAINER_ENGINE=docker \
DOCKER_PLATFORM="linux/${DAEMON_ARCH}" \
DOCKER_OUTPUT="type=local,dest=${SUPERVISOR_OUT_DIR}" \
bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor-output
fi

if [ ! -f "${SUPERVISOR_BIN}" ]; then
echo "ERROR: expected supervisor binary at ${SUPERVISOR_BIN}" >&2
exit 1
fi
chmod +x "${SUPERVISOR_BIN}"
DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-bin "${SUPERVISOR_BIN}")
fi
build_local_docker_supervisor_image_if_required "${SUPERVISOR_IMAGE}"
ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}"
echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}"

DEFAULT_SANDBOX_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/base:latest"
SANDBOX_IMAGE="${OPENSHELL_E2E_DOCKER_SANDBOX_IMAGE:-${OPENSHELL_SANDBOX_IMAGE:-${DEFAULT_SANDBOX_IMAGE}}}"
Expand Down Expand Up @@ -521,19 +490,7 @@ GATEWAY_CONFIG="${STATE_DIR}/gateway.toml"
printf 'guest_tls_cert = %s\n' "$(toml_string "${PKI_DIR}/client/tls.crt")"
printf 'guest_tls_key = %s\n' "$(toml_string "${PKI_DIR}/client/tls.key")"
printf 'enable_bind_mounts = true\n'
# DOCKER_SUPERVISOR_ARGS holds either ("--docker-supervisor-bin" "<path>")
# or ("--docker-supervisor-image" "<image>"); both map to TOML keys on
# the docker driver config.
for ((i=0; i<${#DOCKER_SUPERVISOR_ARGS[@]}; i+=2)); do
case "${DOCKER_SUPERVISOR_ARGS[$i]}" in
--docker-supervisor-bin)
printf 'supervisor_bin = %s\n' "$(toml_string "${DOCKER_SUPERVISOR_ARGS[$((i+1))]}")"
;;
--docker-supervisor-image)
printf 'supervisor_image = %s\n' "$(toml_string "${DOCKER_SUPERVISOR_ARGS[$((i+1))]}")"
;;
esac
done
printf 'supervisor_image = %s\n' "$(toml_string "${SUPERVISOR_IMAGE}")"
if [ -n "${GATEWAY_HOST_ALIAS_IP}" ]; then
printf 'host_gateway_ip = %s\n' "$(toml_string "${GATEWAY_HOST_ALIAS_IP}")"
fi
Expand Down
Loading