Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions crates/openshell-cli/src/ssh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@ use tokio::process::Command as TokioCommand;
use tokio_stream::wrappers::ReceiverStream;

const FOREGROUND_FORWARD_STARTUP_GRACE_PERIOD: Duration = Duration::from_secs(2);
const HOST_TOOL_LINKER_ENV: &[&str] = &[
"DYLD_FALLBACK_LIBRARY_PATH",
"DYLD_INSERT_LIBRARIES",
"DYLD_LIBRARY_PATH",
"LD_AUDIT",
"LD_LIBRARY_PATH",
"LD_PRELOAD",
"LIBRARY_PATH",
"NIX_LD_LIBRARY_PATH",
];

#[derive(Clone, Copy, Debug)]
pub enum Editor {
Expand Down Expand Up @@ -121,6 +131,7 @@ async fn ssh_session_config(
&session.token,
gateway_name,
);
let proxy_command = proxy_command_with_preserved_environment(proxy_command);

Ok(SshSessionConfig {
proxy_command,
Expand All @@ -137,6 +148,7 @@ fn ssh_base_command(proxy_command: &str) -> Command {
std::env::var("OPENSHELL_SSH_LOG_LEVEL").unwrap_or_else(|_| "ERROR".to_string());

let mut command = Command::new("ssh");
sanitize_host_tool_environment(&mut command);
command
.arg("-o")
.arg(format!("ProxyCommand={proxy_command}"))
Expand All @@ -159,6 +171,30 @@ fn ssh_base_command(proxy_command: &str) -> Command {
command
}

fn sanitize_host_tool_environment(command: &mut Command) {
for key in HOST_TOOL_LINKER_ENV {
command.env_remove(key);
}
}

fn proxy_command_with_preserved_environment(proxy_command: String) -> String {
let assignments = HOST_TOOL_LINKER_ENV
.iter()
.filter_map(|key| {
std::env::var_os(key).map(|value| {
let value = value.to_string_lossy();
format!("{key}={}", shell_escape(&value))
})
})
.collect::<Vec<_>>();

if assignments.is_empty() {
proxy_command
} else {
format!("env {} {proxy_command}", assignments.join(" "))
}
}

#[cfg(unix)]
const TRANSIENT_TTY_SIGNALS: &[Signal] = &[Signal::SIGINT, Signal::SIGQUIT, Signal::SIGTERM];

Expand Down Expand Up @@ -1508,6 +1544,93 @@ mod tests {
use super::*;
use crate::TEST_ENV_LOCK;

#[test]
fn ssh_base_command_removes_host_linker_environment() {
let command = ssh_base_command("openshell ssh-proxy");
let removed_keys = command
.get_envs()
.filter(|(_, value)| value.is_none())
.map(|(key, _)| key.to_string_lossy().into_owned())
.collect::<Vec<_>>();

for key in HOST_TOOL_LINKER_ENV {
assert!(
removed_keys.iter().any(|removed| removed == key),
"expected ssh command to remove {key}"
);
}
}

#[test]
#[allow(unsafe_code)] // Test-only: env vars require unsafe in Rust 2024.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could use temp-env here (cli crate already has a dep on it) to remove the unsafe and resetting the env var.

I think it could also remove the need for the lock, based on their docs.

Avoid interference when running concurrently

fn proxy_command_preserves_linker_environment_for_proxy_child() {
let _guard = TEST_ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let old_env = HOST_TOOL_LINKER_ENV
.iter()
.map(|key| (*key, std::env::var_os(key)))
.collect::<Vec<_>>();

unsafe {
for key in HOST_TOOL_LINKER_ENV {
std::env::remove_var(key);
}
std::env::set_var("LD_LIBRARY_PATH", "/nix/store/z3 lib:/opt/lib");
}

let proxy_command =
proxy_command_with_preserved_environment("openshell ssh-proxy".to_string());
let has_assignment = proxy_command.contains("LD_LIBRARY_PATH='/nix/store/z3 lib:/opt/lib'");
let has_env_prefix = proxy_command.starts_with("env ");
let has_command = proxy_command.ends_with(" openshell ssh-proxy");

unsafe {
for (key, value) in old_env {
match value {
Some(value) => std::env::set_var(key, value),
None => std::env::remove_var(key),
}
}
}

assert!(has_assignment, "unexpected proxy command: {proxy_command}");
assert!(has_env_prefix, "unexpected proxy command: {proxy_command}");
assert!(has_command, "unexpected proxy command: {proxy_command}");
}

#[test]
#[allow(unsafe_code)] // Test-only: env vars require unsafe in Rust 2024.
fn proxy_command_is_unchanged_without_linker_environment() {
let _guard = TEST_ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let old_env = HOST_TOOL_LINKER_ENV
.iter()
.map(|key| (*key, std::env::var_os(key)))
.collect::<Vec<_>>();

unsafe {
for key in HOST_TOOL_LINKER_ENV {
std::env::remove_var(key);
}
}

let proxy_command =
proxy_command_with_preserved_environment("openshell ssh-proxy".to_string());

unsafe {
for (key, value) in old_env {
match value {
Some(value) => std::env::set_var(key, value),
None => std::env::remove_var(key),
}
}
}

assert_eq!(proxy_command, "openshell ssh-proxy");
}

#[test]
fn upsert_host_block_appends_when_missing() {
let input = "Host existing\n HostName example.com\n";
Expand Down
9 changes: 5 additions & 4 deletions crates/openshell-driver-docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,11 @@ The Docker driver bind-mounts a host-side Linux `openshell-sandbox` binary into
each sandbox container. Resolution order is:

1. `supervisor_bin` in `[openshell.drivers.docker]`.
2. A sibling `openshell-sandbox` next to the running `openshell-gateway` binary.
3. A local Linux cargo target build for the Docker daemon architecture.
4. `supervisor_image` in `[openshell.drivers.docker]`, or the
release-matched default supervisor image, extracting `/openshell-sandbox`.
2. `supervisor_image` in `[openshell.drivers.docker]`, extracting
`/openshell-sandbox` from that image.
3. A sibling `openshell-sandbox` next to the running `openshell-gateway` binary.
4. A local Linux cargo target build for the Docker daemon architecture.
5. The release-matched default supervisor image, extracting `/openshell-sandbox`.

Release and Docker-image gateway builds bake the matching supervisor image tag
into the binary at compile time. The default Docker supervisor image is not
Expand Down
28 changes: 16 additions & 12 deletions crates/openshell-driver-docker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ const DOCKER_NETWORK_DRIVER: &str = "bridge";

/// Default image holding the Linux `openshell-sandbox` binary. The gateway
/// pulls this image and extracts the binary to a host-side cache when no
/// explicit `supervisor_bin` override or local build is available.
/// explicit `supervisor_bin`, configured `supervisor_image`, sibling binary,
/// or local build is available.
const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor";

/// Return the default `ghcr.io/nvidia/openshell/supervisor:<tag>` reference
Expand Down Expand Up @@ -2960,7 +2961,14 @@ pub(crate) async fn resolve_supervisor_bin(
return Ok(path);
}

// Tier 2: sibling `openshell-sandbox` next to the running gateway
// Tier 2: explicit supervisor_image in [openshell.drivers.docker].
// A configured image should be the source of truth even when a local
// developer build is present under target/.
if let Some(image) = docker_config.supervisor_image.clone() {
return extract_supervisor_bin_from_image(docker, &image).await;
}
Comment on lines +2964 to +2969

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a comment here:

/// Optional override for the image the gateway pulls to extract the
/// Linux `openshell-sandbox` binary when no explicit binary path or
/// local build is available. Defaults to
/// `ghcr.io/nvidia/openshell/supervisor:<gateway-image-tag>`.
pub supervisor_image: Option<String>,
that should be updated after changing order in which the supervisor bin is resolved.

Could be something like:

/// Optional image used to extract the Linux `openshell-sandbox` binary.
/// Ignored when `supervisor_bin` is set. See `resolve_supervisor_bin` for
/// the full resolution order.
pub supervisor_image: Option<String>,

It could make sense to validate the config and only allow either supervisor_bin or supervisor_image, so both cannot be set. But that would be a breaking change.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, it would be good to have a test for this new resolution order.


// Tier 3: sibling `openshell-sandbox` next to the running gateway
// (release artifact layout). Linux-only because the sibling must be a
// Linux ELF to bind-mount into a Linux container.
if cfg!(target_os = "linux") {
Expand All @@ -2977,9 +2985,9 @@ pub(crate) async fn resolve_supervisor_bin(
}
}

// Tier 3: local cargo target build (developer workflow). Preferred
// over a registry pull when available because it matches whatever the
// developer just built.
// Tier 4: local cargo target build (developer workflow). Preferred
// over the default registry image when available because it matches
// whatever the developer just built.
let target_candidates = linux_supervisor_candidates(daemon_arch);
for candidate in &target_candidates {
if candidate.is_file() {
Expand All @@ -2990,13 +2998,9 @@ pub(crate) async fn resolve_supervisor_bin(
}
}

// Tier 4: pull the supervisor image from a registry and extract the
// binary to a host-side cache keyed by image content digest. This is
// the default path for released gateway binaries.
let image = docker_config
.supervisor_image
.clone()
.unwrap_or_else(default_docker_supervisor_image);
// Tier 5: pull the release-matched default supervisor image and extract
// the binary to a host-side cache keyed by image content digest.
let image = default_docker_supervisor_image();
extract_supervisor_bin_from_image(docker, &image).await
}

Expand Down
1 change: 1 addition & 0 deletions docs/reference/gateway-config.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ sandbox_namespace = "docker-dev"
grpc_endpoint = "https://host.openshell.internal:17670"
# Skip the image-pull-and-extract step by pointing at a locally built binary.
supervisor_bin = "/usr/local/libexec/openshell/openshell-sandbox"
# When supervisor_bin is omitted, Docker extracts /openshell-sandbox from this image.
supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest"
guest_tls_ca = "/etc/openshell/certs/ca.pem"
guest_tls_cert = "/etc/openshell/certs/client.pem"
Expand Down
91 changes: 19 additions & 72 deletions e2e/with-docker-gateway.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ DOCKER_NETWORK_NAME=""
DOCKER_NETWORK_CONNECTED_CONTAINER=""
DOCKER_NETWORK_MANAGED=0
GPU_MODE="${OPENSHELL_E2E_DOCKER_GPU:-0}"
DOCKER_SUPERVISOR_ARGS=()

# Isolate CLI/SDK gateway metadata from the developer's real config.
export XDG_CONFIG_HOME="${WORKDIR}/config"
Expand Down Expand Up @@ -263,25 +262,6 @@ if [ "${GPU_MODE}" = "1" ]; then
fi
fi

normalize_arch() {
case "$1" in
x86_64|amd64) echo "amd64" ;;
aarch64|arm64) echo "arm64" ;;
*) echo "$1" ;;
esac
}

linux_target_triple() {
case "$1" in
amd64) echo "x86_64-unknown-linux-gnu" ;;
arm64) echo "aarch64-unknown-linux-gnu" ;;
*)
echo "ERROR: unsupported Docker daemon architecture '$1'" >&2
exit 2
;;
esac
}

resolve_docker_supervisor_image() {
if [ -n "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE:-}" ]; then
printf '%s\n' "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE}"
Expand All @@ -304,7 +284,7 @@ resolve_docker_supervisor_image() {
return 0
fi

printf '%s\n' ""
printf '%s\n' "openshell/supervisor:dev"
}

docker_pull_with_retry() {
Expand Down Expand Up @@ -336,6 +316,21 @@ docker_pull_with_retry() {
ensure_docker_supervisor_image() {
local image=$1

if [ "${image}" = "openshell/supervisor:dev" ] \
&& [ -z "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE:-}" ] \
&& [ -z "${OPENSHELL_SUPERVISOR_IMAGE:-}" ] \
&& [ -z "${CI:-}" ]; then
echo "Building local Docker supervisor image ${image}..."
CONTAINER_ENGINE=docker IMAGE_TAG=dev \
bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor
if docker image inspect "${image}" >/dev/null 2>&1; then
return 0
fi

echo "ERROR: expected supervisor image '${image}' after local build." >&2
exit 2
fi

if docker image inspect "${image}" >/dev/null 2>&1; then
return 0
fi
Expand Down Expand Up @@ -385,47 +380,11 @@ ensure_sandbox_image_available() {
docker_pull_with_retry "${image}"
}

DAEMON_ARCH="$(normalize_arch "$(docker info --format '{{.Architecture}}' 2>/dev/null || true)")"
SUPERVISOR_TARGET="$(linux_target_triple "${DAEMON_ARCH}")"
HOST_OS="$(uname -s)"
HOST_ARCH="$(normalize_arch "$(uname -m)")"
SUPERVISOR_OUT_DIR="${WORKDIR}/supervisor/${DAEMON_ARCH}"
SUPERVISOR_BIN="${SUPERVISOR_OUT_DIR}/openshell-sandbox"

CARGO_BUILD_JOBS_ARG=()
if [ -n "${CARGO_BUILD_JOBS:-}" ]; then
CARGO_BUILD_JOBS_ARG=(-j "${CARGO_BUILD_JOBS}")
fi

e2e_build_gateway_binaries "${ROOT}" TARGET_DIR GATEWAY_BIN CLI_BIN

SUPERVISOR_IMAGE="$(resolve_docker_supervisor_image)"
if [ -n "${SUPERVISOR_IMAGE}" ]; then
ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}"
echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}"
DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-image "${SUPERVISOR_IMAGE}")
else
echo "Building openshell-sandbox for ${SUPERVISOR_TARGET}..."
mkdir -p "${SUPERVISOR_OUT_DIR}"
if [ "${HOST_OS}" = "Linux" ] && [ "${HOST_ARCH}" = "${DAEMON_ARCH}" ]; then
rustup target add "${SUPERVISOR_TARGET}" >/dev/null 2>&1 || true
cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \
--release -p openshell-sandbox --target "${SUPERVISOR_TARGET}"
cp "${TARGET_DIR}/${SUPERVISOR_TARGET}/release/openshell-sandbox" "${SUPERVISOR_BIN}"
else
CONTAINER_ENGINE=docker \
DOCKER_PLATFORM="linux/${DAEMON_ARCH}" \
DOCKER_OUTPUT="type=local,dest=${SUPERVISOR_OUT_DIR}" \
bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor-output
fi

if [ ! -f "${SUPERVISOR_BIN}" ]; then
echo "ERROR: expected supervisor binary at ${SUPERVISOR_BIN}" >&2
exit 1
fi
chmod +x "${SUPERVISOR_BIN}"
DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-bin "${SUPERVISOR_BIN}")
fi
ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}"
echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}"

DEFAULT_SANDBOX_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/base:latest"
SANDBOX_IMAGE="${OPENSHELL_E2E_DOCKER_SANDBOX_IMAGE:-${OPENSHELL_SANDBOX_IMAGE:-${DEFAULT_SANDBOX_IMAGE}}}"
Expand Down Expand Up @@ -493,19 +452,7 @@ GATEWAY_CONFIG="${STATE_DIR}/gateway.toml"
printf 'guest_tls_cert = %s\n' "$(toml_string "${PKI_DIR}/client/tls.crt")"
printf 'guest_tls_key = %s\n' "$(toml_string "${PKI_DIR}/client/tls.key")"
printf 'enable_bind_mounts = true\n'
# DOCKER_SUPERVISOR_ARGS holds either ("--docker-supervisor-bin" "<path>")
# or ("--docker-supervisor-image" "<image>"); both map to TOML keys on
# the docker driver config.
for ((i=0; i<${#DOCKER_SUPERVISOR_ARGS[@]}; i+=2)); do
case "${DOCKER_SUPERVISOR_ARGS[$i]}" in
--docker-supervisor-bin)
printf 'supervisor_bin = %s\n' "$(toml_string "${DOCKER_SUPERVISOR_ARGS[$((i+1))]}")"
;;
--docker-supervisor-image)
printf 'supervisor_image = %s\n' "$(toml_string "${DOCKER_SUPERVISOR_ARGS[$((i+1))]}")"
;;
esac
done
printf 'supervisor_image = %s\n' "$(toml_string "${SUPERVISOR_IMAGE}")"
if [ -n "${GATEWAY_HOST_ALIAS_IP}" ]; then
printf 'host_gateway_ip = %s\n' "$(toml_string "${GATEWAY_HOST_ALIAS_IP}")"
fi
Expand Down
Loading