From 5bba1b726711aa7720ce27156692ad8ae3140efb Mon Sep 17 00:00:00 2001 From: Carlos Alexandro Becker Date: Wed, 24 Jun 2026 09:43:23 -0300 Subject: [PATCH 1/9] feat(wxc_common): add the sandbox execution interfaces (SandboxBackend / SandboxProcess / Runner) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the shared, no-pty execution surface that the containment backends and a future in-process library build on. Purely additive: no existing code path uses these yet, so behavior is unchanged. - `SandboxProcess` — a handle to a running sandboxed child: `take_stdin` / `take_stdout` / `take_stderr`, `try_wait`, `id`, `kill` (process-tree), and `wait` (drains any untaken stdio, honors `scriptTimeout`), plus stdout/stderr closers for abandoning a backgrounded-descendant-held read without a kill. - `SandboxBackend` — `validate` + `spawn(request, logger, StdioMode) -> SandboxProcess` + a `diagnose_exit` hook; `StdioMode::{Pipes, Inherit}`. - `Runner` — the generic adapter bridging any `SandboxBackend` to the run-to-completion `ScriptRunner` (spawn `Inherit`, then `wait`). - `StreamCloser`, `group_kill` (Unix leader-first SIGKILL of the child's group), and `wait_with_timeout` (adaptive 1ms->50ms backoff poll). - `interruptible_reader` (Unix self-pipe + `poll`) and the Windows pipe helpers in `process_util` (`InterruptiblePipeReader` / `PipeReadCanceller` / `create_std_pipes`) for out-of-band-cancelable streaming reads. - `FailurePhase::Timeout` so a timeout is distinguishable from other failures. The library backends and executor binaries are migrated onto this surface in a follow-up PR, and the importable `mxc-sdk` crate is built on top of it. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Carlos Alexandro Becker --- src/core/wxc_common/Cargo.toml | 2 +- .../wxc_common/src/interruptible_reader.rs | 293 +++++++++++ src/core/wxc_common/src/lib.rs | 5 + src/core/wxc_common/src/models.rs | 5 + src/core/wxc_common/src/process_util.rs | 227 ++++++++- src/core/wxc_common/src/sandbox_process.rs | 475 ++++++++++++++++++ 6 files changed, 1005 insertions(+), 2 deletions(-) create mode 100644 src/core/wxc_common/src/interruptible_reader.rs create mode 100644 src/core/wxc_common/src/sandbox_process.rs diff --git a/src/core/wxc_common/Cargo.toml b/src/core/wxc_common/Cargo.toml index e6dcf35b8..bca51a80f 100644 --- a/src/core/wxc_common/Cargo.toml +++ b/src/core/wxc_common/Cargo.toml @@ -28,7 +28,7 @@ windows-core = { workspace = true } widestring = { workspace = true } winreg = { workspace = true } -[target.'cfg(target_os = "linux")'.dependencies] +[target.'cfg(unix)'.dependencies] libc = { workspace = true } [dev-dependencies] diff --git a/src/core/wxc_common/src/interruptible_reader.rs b/src/core/wxc_common/src/interruptible_reader.rs new file mode 100644 index 000000000..1cc01bb11 --- /dev/null +++ b/src/core/wxc_common/src/interruptible_reader.rs @@ -0,0 +1,293 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! A Unix pipe reader whose `read` can be cancelled out-of-band. +//! +//! The in-tree Unix backends (Seatbelt, Bubblewrap) hand the caller the child's +//! raw stdout/stderr pipe, where a blocking `read` only ends at EOF — when every +//! write end closes. A backgrounded descendant that inherited the pipe can hold +//! its write end open past the foreground command's exit, leaving such a read +//! parked indefinitely. [`InterruptibleReader`] wraps the pipe so a separate +//! [`ReadCanceller`] (a [`StreamCloser`]) can make that read return EOF +//! (`Ok(0)`) promptly, without killing the child. +//! +//! It uses a self-pipe + `poll(2)`: the read fd is set non-blocking and the +//! reader blocks in `poll` on both the data pipe and the read end of a +//! self-pipe; cancellation writes a byte to the self-pipe (waking the `poll`) +//! and sets a flag so later reads short-circuit to EOF. + +use std::io::{self, Read}; +use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use crate::sandbox_process::StreamCloser; + +/// Cancellation state shared between an [`InterruptibleReader`] and the +/// [`ReadCanceller`]s minted from it: the write end of the self-pipe used to +/// wake the reader's `poll`, plus a flag so a read after cancellation returns +/// EOF without touching the data pipe. +struct CancelState { + cancelled: AtomicBool, + /// Write end of the self-pipe; one byte here wakes the reader's `poll`. + wake_w: OwnedFd, +} + +impl CancelState { + /// Mark cancelled (once) and nudge the reader's `poll` awake. + fn cancel(&self) { + // Flag first so a read that wakes observes EOF, then wake the poll. If + // we were already cancelled, do nothing — `close` is idempotent. + if self.cancelled.swap(true, Ordering::Release) { + return; + } + // A single byte makes `poll` return. The self-pipe write end is + // non-blocking, so this never blocks; ignore the result — `EAGAIN` + // (a wake byte is already pending) and `EPIPE` (the reader's end has + // been dropped) are both fine. + let byte = [0u8; 1]; + // SAFETY: `wake_w` is a valid, owned, non-blocking pipe write fd; the + // buffer is a valid 1-byte local. + unsafe { + libc::write(self.wake_w.as_raw_fd(), byte.as_ptr().cast(), 1); + } + } +} + +/// A [`StreamCloser`] for an [`InterruptibleReader`]. Cloneable and `Send + +/// Sync` so several may be held (and fired from any thread); all share one +/// cancellation state, and `close` is idempotent. +#[derive(Clone)] +pub struct ReadCanceller(Arc); + +impl StreamCloser for ReadCanceller { + fn close(&self) { + self.0.cancel(); + } +} + +/// A readable pipe whose `read` can be cancelled via a [`ReadCanceller`]. +/// +/// Implements [`Read`]: it blocks in `poll(2)` on the data pipe and a self-pipe +/// and returns the next chunk, real EOF (`Ok(0)`), or — once a paired +/// [`ReadCanceller::close`] fires — a prompt cancellation EOF (`Ok(0)`). +pub struct InterruptibleReader { + /// The child's stdout/stderr pipe, set non-blocking. + fd: OwnedFd, + /// Read end of the self-pipe; readable once cancellation writes its byte. + wake_r: OwnedFd, + state: Arc, +} + +impl InterruptibleReader { + /// Wrap an owned readable pipe `fd` so its reads can be cancelled + /// out-of-band. Sets `fd` non-blocking and creates the self-pipe used for + /// wakeups. + /// + /// # Errors + /// + /// Returns the underlying [`io::Error`] if the self-pipe cannot be created + /// or either fd cannot be switched to non-blocking mode. + pub fn new(fd: OwnedFd) -> io::Result { + set_nonblocking(fd.as_raw_fd())?; + + // Self-pipe for wakeups: the write end is non-blocking so `cancel` + // never stalls; the read end stays blocking but is only ever polled. + let mut fds = [0 as RawFd; 2]; + // SAFETY: `fds` is a valid 2-element array for `pipe` to fill. + if unsafe { libc::pipe(fds.as_mut_ptr()) } < 0 { + return Err(io::Error::last_os_error()); + } + // SAFETY: `pipe` succeeded, so both fds are freshly owned by us. + let wake_r = unsafe { OwnedFd::from_raw_fd(fds[0]) }; + let wake_w = unsafe { OwnedFd::from_raw_fd(fds[1]) }; + set_nonblocking(wake_w.as_raw_fd())?; + + Ok(Self { + fd, + wake_r, + state: Arc::new(CancelState { + cancelled: AtomicBool::new(false), + wake_w, + }), + }) + } + + /// Mint a closer that EOFs this reader's `read` on demand. Several closers + /// may be minted; they share one cancellation state. + pub fn canceller(&self) -> ReadCanceller { + ReadCanceller(Arc::clone(&self.state)) + } +} + +/// Wrap an optional child pipe end into an [`InterruptibleReader`] plus a +/// [`ReadCanceller`] for its [`StreamCloser`]. `None` (inherited stdio) stays +/// `None` for both. Convenience for the Unix backends, which hold `ChildStdout` +/// / `ChildStderr` (both `Into`). +/// +/// # Errors +/// +/// Propagates any [`io::Error`] from [`InterruptibleReader::new`]. +pub fn wrap_pipe>( + pipe: Option, +) -> io::Result<(Option, Option)> { + let Some(pipe) = pipe else { + return Ok((None, None)); + }; + let reader = InterruptibleReader::new(pipe.into())?; + let canceller = reader.canceller(); + Ok((Some(reader), Some(canceller))) +} + +impl Read for InterruptibleReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + // A zero-length read must return `Ok(0)` immediately (the `Read` + // contract), never block in `poll`. + if buf.is_empty() { + return Ok(0); + } + // Already cancelled: report EOF without touching the data pipe. + if self.state.cancelled.load(Ordering::Acquire) { + return Ok(0); + } + loop { + let mut poll_fds = [ + libc::pollfd { + fd: self.fd.as_raw_fd(), + events: libc::POLLIN, + revents: 0, + }, + libc::pollfd { + fd: self.wake_r.as_raw_fd(), + events: libc::POLLIN, + revents: 0, + }, + ]; + // SAFETY: `poll_fds` is a valid 2-element array of pollfds; both + // fds are owned and live for the duration of the call. + let rc = unsafe { libc::poll(poll_fds.as_mut_ptr(), 2, -1) }; + if rc < 0 { + let err = io::Error::last_os_error(); + if err.kind() == io::ErrorKind::Interrupted { + continue; + } + return Err(err); + } + + // Cancellation wins over any pending data so a held-open pipe is + // abandoned promptly. + if self.state.cancelled.load(Ordering::Acquire) || poll_fds[1].revents != 0 { + return Ok(0); + } + + if poll_fds[0].revents != 0 { + // SAFETY: `fd` is owned and `buf` is a valid writable slice. + let n = + unsafe { libc::read(self.fd.as_raw_fd(), buf.as_mut_ptr().cast(), buf.len()) }; + if n >= 0 { + return Ok(n as usize); + } + let err = io::Error::last_os_error(); + match err.raw_os_error() { + // Spurious readiness (e.g. POLLHUP with no buffered bytes): + // loop and re-poll. + Some(libc::EAGAIN) => continue, + _ if err.kind() == io::ErrorKind::Interrupted => continue, + _ => return Err(err), + } + } + } + } +} + +/// Add `O_NONBLOCK` to `fd`'s file-status flags. +fn set_nonblocking(fd: RawFd) -> io::Result<()> { + // SAFETY: `fd` is a valid open fd; `fcntl` with these commands only reads + // and writes its flags. + let flags = unsafe { libc::fcntl(fd, libc::F_GETFL) }; + if flags < 0 { + return Err(io::Error::last_os_error()); + } + if unsafe { libc::fcntl(fd, libc::F_SETFL, flags | libc::O_NONBLOCK) } < 0 { + return Err(io::Error::last_os_error()); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use std::time::{Duration, Instant}; + + /// Build an `InterruptibleReader` over a fresh pipe, returning it plus the + /// write end so a test can feed it bytes. + fn reader_with_writer() -> (InterruptibleReader, OwnedFd) { + let mut fds = [0 as RawFd; 2]; + assert!(unsafe { libc::pipe(fds.as_mut_ptr()) } == 0, "pipe"); + let read_end = unsafe { OwnedFd::from_raw_fd(fds[0]) }; + let write_end = unsafe { OwnedFd::from_raw_fd(fds[1]) }; + let reader = InterruptibleReader::new(read_end).expect("wrap reader"); + (reader, write_end) + } + + #[test] + fn reads_data_then_eof_on_writer_close() { + let (mut reader, write_end) = reader_with_writer(); + let mut writer = std::fs::File::from(write_end); + writer.write_all(b"hello").expect("write"); + drop(writer); // close write end -> EOF after the data + + let mut buf = [0u8; 16]; + let n = reader.read(&mut buf).expect("read data"); + assert_eq!(&buf[..n], b"hello"); + assert_eq!(reader.read(&mut buf).expect("read eof"), 0); + } + + #[test] + fn zero_length_read_returns_ok_zero_without_blocking() { + // The write end stays open, so a normal read would block; a zero-length + // read must still return Ok(0) immediately per the `Read` contract. + let (mut reader, _write_end) = reader_with_writer(); + let mut empty: [u8; 0] = []; + assert_eq!(reader.read(&mut empty).expect("zero-length read"), 0); + } + + #[test] + fn close_unblocks_a_parked_read_without_writer_close() { + // The write end stays open for the whole test, so a plain read would + // block forever; the canceller must EOF it promptly. + let (reader, _write_end) = reader_with_writer(); + let canceller = reader.canceller(); + let mut reader = reader; + + let handle = std::thread::spawn(move || { + let mut buf = [0u8; 16]; + let start = Instant::now(); + let n = reader.read(&mut buf).expect("read returns"); + (n, start.elapsed()) + }); + + std::thread::sleep(Duration::from_millis(50)); + canceller.close(); + + let (n, elapsed) = handle.join().expect("reader thread"); + assert_eq!(n, 0, "cancelled read reports EOF"); + assert!( + elapsed < Duration::from_secs(5), + "read should return promptly after close, took {elapsed:?}" + ); + } + + #[test] + fn close_is_idempotent_and_reads_stay_eof() { + let (mut reader, _write_end) = reader_with_writer(); + let canceller = reader.canceller(); + canceller.close(); + canceller.close(); // second call is a no-op + + let mut buf = [0u8; 16]; + assert_eq!(reader.read(&mut buf).expect("eof"), 0); + assert_eq!(reader.read(&mut buf).expect("still eof"), 0); + } +} diff --git a/src/core/wxc_common/src/lib.rs b/src/core/wxc_common/src/lib.rs index f6af283b6..9165e7441 100644 --- a/src/core/wxc_common/src/lib.rs +++ b/src/core/wxc_common/src/lib.rs @@ -14,6 +14,7 @@ pub mod logger; pub mod microvm_staging; pub mod models; pub mod mxc_error; +pub mod sandbox_process; pub mod script_runner; pub mod state_aware_backend; pub mod state_aware_dispatch; @@ -40,6 +41,10 @@ pub mod process_util; #[cfg(target_os = "windows")] pub mod string_util; +// Unix-specific modules (shared by the Seatbelt and Bubblewrap backends). +#[cfg(unix)] +pub mod interruptible_reader; + // Linux-specific modules #[cfg(target_os = "linux")] pub mod linux_proxy_coordinator; diff --git a/src/core/wxc_common/src/models.rs b/src/core/wxc_common/src/models.rs index c5ff37e6d..29a41f21e 100644 --- a/src/core/wxc_common/src/models.rs +++ b/src/core/wxc_common/src/models.rs @@ -592,6 +592,11 @@ pub enum FailurePhase { LaunchFailed, /// The process was created but exited with a non-zero code. ProcessExited, + /// The process was force-terminated because it exceeded `scriptTimeout`. + /// Distinct from [`ProcessExited`] (it did not exit on its own) so callers + /// can detect a timeout uniformly across backends rather than inferring it + /// from `exit_code == -1` (which collides with other failures). + Timeout, /// The selected containment backend is unavailable on this host: the API is /// missing, or present but not usable (e.g. feature-disabled). Distinct from /// [`LaunchFailed`] so callers can fall back to a lower tier rather than diff --git a/src/core/wxc_common/src/process_util.rs b/src/core/wxc_common/src/process_util.rs index 14605e059..2c89f56dc 100644 --- a/src/core/wxc_common/src/process_util.rs +++ b/src/core/wxc_common/src/process_util.rs @@ -2,8 +2,11 @@ // Licensed under the MIT License. use std::path::PathBuf; +use std::sync::{Arc, Mutex, Weak}; +use std::time::Duration; use crate::error::WxcError; +use crate::sandbox_process::StreamCloser; use crate::string_util; use windows::Win32::Foundation::{ @@ -11,12 +14,234 @@ use windows::Win32::Foundation::{ HLOCAL, WAIT_OBJECT_0, }; use windows::Win32::Security::{DeriveCapabilitySidsFromName, PSID, SECURITY_ATTRIBUTES}; -use windows::Win32::Storage::FileSystem::ReadFile; +use windows::Win32::Storage::FileSystem::{ReadFile, WriteFile}; use windows::Win32::System::Pipes::CreatePipe; use windows::Win32::System::Threading::WaitForSingleObject; +use windows::Win32::System::IO::CancelIoEx; use windows_core::BOOL; use windows_core::PCWSTR; +/// A readable end of an anonymous pipe (e.g. the child's stdout/stderr), +/// owning the handle and closing it on drop. Implements [`std::io::Read`] +/// via `ReadFile`; a broken pipe (all write ends closed) reads as EOF. +/// `Send` so it can be handed to a reader thread. +pub struct PipeReader(SendOwnedHandle); + +impl PipeReader { + /// Take ownership of `handle` (invalidating the source `OwnedHandle`). + pub fn new(mut handle: OwnedHandle) -> Self { + Self(SendOwnedHandle::take(&mut handle)) + } +} + +impl std::io::Read for PipeReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + use windows::Win32::Foundation::ERROR_BROKEN_PIPE; + let mut read: u32 = 0; + // SAFETY: `self.0` owns a valid pipe handle for the lifetime of this + // `PipeReader`; `buf`/`read` are valid local out-params for the call. + match unsafe { ReadFile(self.0.get(), Some(buf), Some(&mut read), None) } { + Ok(()) => Ok(read as usize), + // Write ends all closed: normal end-of-stream, report EOF. + Err(e) if e.code() == ERROR_BROKEN_PIPE.to_hresult() => Ok(0), + Err(e) => Err(std::io::Error::other(e)), + } + } +} + +/// Cancellation state shared between an [`InterruptiblePipeReader`] and its +/// [`PipeReadCanceller`]s. Owns the pipe handle — closed only when the **last** +/// reference drops, so a canceller's `CancelIoEx` can never race a closed (and +/// possibly reused) handle — plus the [`ReadGate`] the reader and cancellers +/// hand off through. +struct CancelablePipe { + handle: SendOwnedHandle, + gate: Mutex, +} + +/// Reader/canceller handshake, guarded by [`CancelablePipe::gate`]. +/// +/// `CancelIoEx` is *edge-triggered*: it aborts only I/O already pending when it +/// is called. A bare `cancelled` flag + a single `CancelIoEx` therefore has a +/// lost-wakeup race — a `close` landing between a read's flag check and its +/// `ReadFile` entering the kernel cancels nothing and never retries, parking the +/// read until real EOF. The mutex closes that race by ordering "a read is +/// starting" against "cancel requested": a racing `close` either sees the read +/// has not started (and the read then observes `cancelled`) or sees `reading` +/// and keeps issuing `CancelIoEx` until the read is aborted. +#[derive(Default)] +struct ReadGate { + /// Set once `close` has been called; a read observing it returns EOF instead + /// of issuing (or while abandoning) a `ReadFile`. + cancelled: bool, + /// True while a read is in — or about to enter — its blocking `ReadFile`, so + /// `close` knows to keep issuing `CancelIoEx` until that read is aborted. + reading: bool, +} + +impl CancelablePipe { + /// Lock the gate, tolerating a poisoned mutex (the guarded data is two + /// bools with no broken invariant on panic). + fn lock(&self) -> std::sync::MutexGuard<'_, ReadGate> { + self.gate + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + } +} + +// SAFETY: the only non-`Sync` field is a process-global Windows HANDLE whose +// value is copied for `ReadFile` / `CancelIoEx`; issuing `CancelIoEx` from one +// thread to abort a `ReadFile` blocked on another is the documented, supported +// way to interrupt synchronous pipe I/O, and the reader/canceller handshake is +// serialised by `gate`. So sharing `&CancelablePipe` across threads is sound. +unsafe impl Sync for CancelablePipe {} + +/// A readable pipe end (e.g. the child's stdout/stderr) whose blocking +/// `ReadFile` can be cancelled out-of-band via a [`PipeReadCanceller`] — +/// reporting EOF — without closing the child or its other streams. A broken +/// pipe (all write ends closed) still reads as EOF as usual. `Send` so it can +/// be handed to a reader thread. Single-reader: at most one thread may `read` it +/// at a time (any number of cancellers may fire concurrently). +pub struct InterruptiblePipeReader(Arc); + +impl InterruptiblePipeReader { + /// Take ownership of `handle` (invalidating the source `OwnedHandle`). + pub fn new(mut handle: OwnedHandle) -> Self { + Self(Arc::new(CancelablePipe { + handle: SendOwnedHandle::take(&mut handle), + gate: Mutex::new(ReadGate::default()), + })) + } + + /// Mint a closer that EOFs this reader's `read` on demand. Several closers + /// may be minted; they share one cancellation state. The closer holds only a + /// [`Weak`] reference, so it never keeps the read handle open past this + /// reader's lifetime. + pub fn canceller(&self) -> PipeReadCanceller { + PipeReadCanceller(Arc::downgrade(&self.0)) + } +} + +impl std::io::Read for InterruptiblePipeReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + use windows::Win32::Foundation::{ERROR_BROKEN_PIPE, ERROR_OPERATION_ABORTED}; + // Announce the read under the gate: a `close` that already fired makes us + // EOF without touching the pipe; otherwise mark `reading` so a `close` + // racing us keeps issuing `CancelIoEx` until our `ReadFile` is aborted. + { + let mut gate = self.0.lock(); + if gate.cancelled { + return Ok(0); + } + gate.reading = true; + } + let mut read: u32 = 0; + // SAFETY: the `Arc` keeps the pipe handle valid for this call; + // `buf`/`read` are valid local out-params. + let result = unsafe { ReadFile(self.0.handle.get(), Some(buf), Some(&mut read), None) }; + let cancelled = { + let mut gate = self.0.lock(); + gate.reading = false; + gate.cancelled + }; + // If `close` fired while this read was in flight, drop any + // completed-but-undelivered chunk and report EOF — matching the Unix + // reader, which never delivers data once cancelled. + if cancelled { + return Ok(0); + } + match result { + Ok(()) => Ok(read as usize), + // Write ends all closed: normal end-of-stream, report EOF. + Err(e) if e.code() == ERROR_BROKEN_PIPE.to_hresult() => Ok(0), + // A canceller's `CancelIoEx` aborted this read: report EOF. + Err(e) if e.code() == ERROR_OPERATION_ABORTED.to_hresult() => Ok(0), + Err(e) => Err(std::io::Error::other(e)), + } + } +} + +/// A [`StreamCloser`] for an [`InterruptiblePipeReader`]. Cloneable and +/// `Send + Sync` so a watchdog thread can hold and fire it; all clones share +/// one cancellation state and [`close`](StreamCloser::close) is idempotent. +/// Holds a [`Weak`] reference so a stored canceller never keeps the reader's +/// data-pipe handle open after the reader is dropped. +#[derive(Clone)] +pub struct PipeReadCanceller(Weak); + +impl StreamCloser for PipeReadCanceller { + fn close(&self) { + // Upgrade to a temporary strong ref for the duration of the cancel. If + // the reader has already been dropped there is nothing to cancel (and + // its handle is already closed), so this is a no-op. + let Some(pipe) = self.0.upgrade() else { + return; + }; + // Mark cancelled once (so reads short-circuit to EOF), then abort an + // in-flight read. Because `CancelIoEx` is edge-triggered, retry while a + // read is in its announce→`ReadFile` window (`reading == true`): the next + // iteration catches the `ReadFile` once it enters the kernel. The reader + // clears `reading` when its `ReadFile` returns (aborted or with data), + // which bounds the loop; if no read is in progress it exits at once. + { + let mut gate = pipe.lock(); + if gate.cancelled { + return; + } + gate.cancelled = true; + } + loop { + // SAFETY: the upgraded `Arc` keeps the handle valid; `CancelIoEx` + // with a null overlapped aborts all outstanding synchronous I/O on + // it. Ignore the result — a benign no-op (ERROR_NOT_FOUND) when none + // is pending. + unsafe { + let _ = CancelIoEx(pipe.handle.get(), None); + } + if !pipe.lock().reading { + return; + } + std::thread::sleep(Duration::from_millis(1)); + } + } +} + +/// A writable end of an anonymous pipe (e.g. the child's stdin), owning the +/// handle and closing it on drop (which sends EOF to the child). Implements +/// [`std::io::Write`] via `WriteFile`. `Send`. +pub struct PipeWriter(SendOwnedHandle); + +impl PipeWriter { + /// Take ownership of `handle` (invalidating the source `OwnedHandle`). + pub fn new(mut handle: OwnedHandle) -> Self { + Self(SendOwnedHandle::take(&mut handle)) + } +} + +impl std::io::Write for PipeWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + use windows::Win32::Foundation::ERROR_BROKEN_PIPE; + let mut written: u32 = 0; + // SAFETY: `self.0` owns a valid pipe handle for the lifetime of this + // `PipeWriter`; `buf`/`written` are valid local params for the call. + match unsafe { WriteFile(self.0.get(), Some(buf), Some(&mut written), None) } { + Ok(()) => Ok(written as usize), + // The read end is gone (child exited / closed its stdin): surface the + // standard `BrokenPipe` kind so callers' graceful handling fires + // instead of an opaque OS error. + Err(e) if e.code() == ERROR_BROKEN_PIPE.to_hresult() => { + Err(std::io::Error::from(std::io::ErrorKind::BrokenPipe)) + } + Err(e) => Err(std::io::Error::other(e)), + } + } + + fn flush(&mut self) -> std::io::Result<()> { + // Anonymous pipes are not buffered on the writer side. + Ok(()) + } +} + const BUFFER_SIZE: u32 = 4096; const MAX_OUTPUT_CHARS: usize = 1024 * 1024; diff --git a/src/core/wxc_common/src/sandbox_process.rs b/src/core/wxc_common/src/sandbox_process.rs new file mode 100644 index 000000000..957507456 --- /dev/null +++ b/src/core/wxc_common/src/sandbox_process.rs @@ -0,0 +1,475 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Handle-based ("streaming") sandbox execution. +//! +//! [`ScriptRunner`](crate::script_runner::ScriptRunner) owns the whole +//! lifecycle (spawn → wait → drain → return), which is fine for fire-and- +//! forget runs but cannot expose the running child. This module adds the +//! interface for the other model: spawn the sandboxed process and hand the +//! caller a [`SandboxProcess`] handle they can write to, read from, wait on, +//! and kill while it runs. +//! +//! As with [`ScriptRunner`](crate::script_runner::ScriptRunner), the traits +//! live in `wxc_common` (the cross-platform foundation) while the +//! implementations live in the per-backend crates — `wxc_common` must not +//! depend on any `backends/*` crate. + +use std::io::{Read, Write}; + +use crate::logger::Logger; +use crate::models::{ExecutionRequest, FailurePhase, ScriptResponse}; +use crate::script_runner::ScriptRunner; + +/// A handle to a running sandboxed process. +/// +/// Modelled on [`std::process::Child`]: the caller may `take_*` the std +/// streams to drive them directly (and is then responsible for draining any +/// stream they take, to avoid the child blocking on a full pipe), then +/// [`wait`](SandboxProcess::wait) for exit or [`kill`](SandboxProcess::kill) +/// it. +/// +/// Any stdout/stderr stream the caller does **not** take is drained and +/// discarded internally by [`wait`](SandboxProcess::wait) so the child can +/// never block on a full pipe. +/// +/// No pty is ever allocated; the streams are ordinary pipes. +/// +/// # Abandoning a held-open stream (stdout/stderr closers) +/// +/// A read on a taken stdout/stderr only ends at EOF — when **every** write end +/// closes. A backgrounded descendant that inherited the pipe can hold its write +/// end open long after the foreground command exits, so a caller blocked on +/// such a read would hang until that descendant finally exits. A plain +/// [`kill`](SandboxProcess::kill) would unblock it but also tear the descendant +/// down, defeating any grace window for backgrounded work. +/// +/// [`stdout_closer`](SandboxProcess::stdout_closer) / +/// [`stderr_closer`](SandboxProcess::stderr_closer) hand back a +/// [`StreamCloser`] for exactly this case: calling +/// [`close`](StreamCloser::close) makes an in-flight or subsequent read on that +/// stream return EOF (`Ok(0)`) promptly **without** terminating the child. +/// +/// # Pipe-deadlock contract (read both ends concurrently) +/// +/// stdout and stderr are independent OS pipes with bounded kernel buffers. If +/// one is left undrained while the child keeps writing to it, the child blocks +/// on the full pipe — and if the reader is meanwhile blocked waiting on the +/// *other* stream (or on the child to exit), the two deadlock. So both ends +/// must be consumed **concurrently**, never one fully then the other: +/// +/// - **Implementors** of [`wait`](SandboxProcess::wait) must drain the +/// not-taken stdout and stderr on separate threads (or non-blocking I/O) +/// before/while waiting on the child — not sequentially. The in-tree +/// backends spawn one reader thread per stream. +/// - **Callers** that `take_stdout()` *and* `take_stderr()` and read them to +/// EOF must likewise read them on separate threads; reading one to EOF +/// before touching the other can hang on output-heavy children. Taking only +/// one stream (leaving the other for `wait()` to drain) is always safe. +pub trait SandboxProcess: Send { + /// Take ownership of the child's stdin so the caller can write to it. + /// Returns `None` if already taken. Drop the writer to send EOF. + fn take_stdin(&mut self) -> Option>; + + /// Take ownership of the child's stdout for live reading. Returns `None` + /// if already taken. A taken stream is **not** drained by + /// [`wait`](SandboxProcess::wait). + fn take_stdout(&mut self) -> Option>; + + /// Take ownership of the child's stderr for live reading. Returns `None` + /// if already taken. A taken stream is **not** drained by + /// [`wait`](SandboxProcess::wait). + fn take_stderr(&mut self) -> Option>; + + /// Non-blocking exit check. `Ok(Some(code))` if the child has exited, + /// `Ok(None)` if it is still running. + fn try_wait(&mut self) -> std::io::Result>; + + /// The OS process id of the sandboxed child (its PID on Unix, process id + /// on Windows). Useful for external monitoring or a caller-driven process + /// tree kill. + /// + /// Only meaningful while the child is alive. On Unix the PID may be reused + /// by an unrelated process once the child has been reaped (by + /// [`wait`](SandboxProcess::wait)), so do not act on it after waiting. + fn id(&self) -> u32; + + /// Request termination of the sandboxed process **and its descendants** + /// (a process-tree kill). On Unix the child leads its own process group + /// and this signals the whole group (an immediate `SIGKILL`, no graceful + /// `SIGTERM` first); on Windows it terminates the job + /// object the child is assigned to. Reaping happens in + /// [`wait`](SandboxProcess::wait). + fn kill(&mut self) -> std::io::Result<()>; + + /// Block until the child exits (honouring the request's `scriptTimeout`, + /// where `0` means wait forever) and return its exit code. + /// + /// Any stdout/stderr the caller did not `take_*` is drained and discarded + /// while waiting so the child can never block on a full pipe. If the + /// timeout elapses, the child and its tree are killed and + /// [`ErrorKind::TimedOut`](std::io::ErrorKind::TimedOut) is returned. + /// + /// Implementors must drain the not-taken stdout and stderr **concurrently** + /// (not one then the other) — see the type-level pipe-deadlock contract. + fn wait(&mut self) -> std::io::Result; + + /// A closer that EOFs the stdout stream returned by + /// [`take_stdout`](SandboxProcess::take_stdout), on demand, **without** + /// killing the child — for abandoning a stream a backgrounded descendant is + /// holding open past the foreground command's exit (a plain + /// [`kill`](SandboxProcess::kill) would also take that descendant down). + /// + /// Intended for a stream the caller has **taken** and is reading: + /// [`close`](StreamCloser::close) abandons that read, and may be called + /// concurrently with it. [`wait`](SandboxProcess::wait) already cancels its + /// own internal safety-drain of any *not-taken* stream once the child exits, + /// so a closer is only useful on a taken stream — firing one on a not-taken + /// stream while the child is still producing output would stall the child on + /// a full pipe. Returns `None` when the stream is not interruptible — e.g. + /// inherited stdio ([`StdioMode::Inherit`]), where the caller never reads + /// from a handle stream. The default returns `None`. + fn stdout_closer(&self) -> Option> { + None + } + + /// A closer for the stderr stream — see + /// [`stdout_closer`](SandboxProcess::stdout_closer). The default returns + /// `None`. + fn stderr_closer(&self) -> Option> { + None + } +} + +/// Abandons reads on one of a [`SandboxProcess`]'s standard streams: a call to +/// [`close`](StreamCloser::close) makes an in-flight or subsequent read on the +/// corresponding [`take_stdout`](SandboxProcess::take_stdout) / +/// [`take_stderr`](SandboxProcess::take_stderr) stream return EOF (`Ok(0)`) +/// promptly, **without** terminating the child. +/// +/// Obtained from [`stdout_closer`](SandboxProcess::stdout_closer) / +/// [`stderr_closer`](SandboxProcess::stderr_closer). `Send + Sync` so a +/// watchdog thread (separate from the one blocked on the read) can hold and +/// fire it. +pub trait StreamCloser: Send + Sync { + /// Promptly EOF the stream this closer was minted for. Idempotent and safe + /// to call after the reader has already reached EOF or been dropped. + fn close(&self); +} + +/// Spawn a thread that reads `reader` to EOF and discards it, so a stream the +/// caller did not take can't block the child on a full pipe. Returns `None` +/// when there is nothing to drain. +pub fn spawn_discard( + reader: Option, +) -> Option> { + reader.map(|mut r| { + std::thread::spawn(move || { + let _ = std::io::copy(&mut r, &mut std::io::sink()); + }) + }) +} + +/// Join a [`spawn_discard`] thread (no-op when absent). +pub fn join_discard(handle: Option>) { + if let Some(t) = handle { + let _ = t.join(); + } +} + +/// Take a readable stream out of an `Option` and box it as a trait object, for +/// the [`SandboxProcess::take_stdout`] / [`SandboxProcess::take_stderr`] +/// accessors. Returns `None` if already taken. +pub fn take_boxed_read( + slot: &mut Option, +) -> Option> { + slot.take().map(|r| Box::new(r) as Box) +} + +/// Take a writable stream out of an `Option` and box it as a trait object, for +/// the [`SandboxProcess::take_stdin`] accessor. Returns `None` if already taken. +pub fn take_boxed_write( + slot: &mut Option, +) -> Option> { + slot.take().map(|w| Box::new(w) as Box) +} + +/// Clone a stored stream canceller and box it as a [`StreamCloser`], for the +/// [`SandboxProcess::stdout_closer`] / [`SandboxProcess::stderr_closer`] +/// accessors. Returns `None` when there is no canceller (non-streamed stdio). +pub fn boxed_closer( + canceller: &Option, +) -> Option> { + canceller + .clone() + .map(|c| Box::new(c) as Box) +} + +/// Join a not-taken stdout/stderr discard thread from +/// [`wait`](SandboxProcess::wait), first cancelling its read so the join can't +/// block. When the stream was drained (a [`spawn_discard`] thread exists), fire +/// `canceller` before joining: a backgrounded descendant holding the pipe's +/// write end open past the foreground child's exit would otherwise keep the +/// discard [`io::copy`](std::io::copy) — and thus `wait()` — from ever returning +/// under a wait-forever (`scriptTimeout == 0`) timeout. The drained output is +/// discarded regardless, so cutting it short is harmless. +/// +/// Call *after* the child has exited (so its own output has drained normally). +/// A no-op when the caller took the stream (`drain` is `None`): there is no +/// thread to join, and the canceller must not fire while the caller may still be +/// reading. +pub fn cancel_and_join_discard( + drain: Option>, + canceller: &Option, +) { + if drain.is_some() { + if let Some(canceller) = canceller { + canceller.close(); + } + } + join_discard(drain); +} + +/// SIGKILL a Unix child's process group. The backends make the child a group +/// leader (`setsid()` / `process_group(0)`), so `-pid` targets that group — +/// never the host's — killing the leader and every descendant. +/// +/// No graceful `SIGTERM` first: it's unreliable (a `/bin/sh -c …` wrapper parked +/// in a foreground `wait` defers it and finishes the script) and sandboxed code +/// isn't owed a cleanup window. The **leader is killed before the group**: a +/// `-pid`-only sweep races — the kernel can kill a descendant first, waking the +/// shell to run one more command (seen as post-timeout output on the Inherit +/// path) before its own signal lands — so we make the leader's SIGKILL pending +/// first. The caller reaps the direct child afterwards. +#[cfg(unix)] +pub fn group_kill(child: &mut std::process::Child) -> std::io::Result<()> { + // The child is unreaped, so its pid (== pgid) can't have been recycled. + let pid = child.id() as i32; + // SAFETY: `kill(2)` with a plain pid / negative pgid — just integers. + unsafe { + libc::kill(pid, libc::SIGKILL); // leader first + libc::kill(-pid, libc::SIGKILL); // then its group + } + Ok(()) +} + +/// Outcome of [`wait_with_timeout`]: the child exited, the deadline passed, or +/// the wait itself failed. +#[cfg(unix)] +pub enum WaitError { + Timeout, + Io(std::io::Error), +} + +/// Wait for `child` to exit. With a timeout we poll (rather than add an async +/// runtime), starting at a short interval and backing off to a cap: a quick +/// child is detected within ~a millisecond instead of always paying a full +/// fixed tick, while a long run settles to an inexpensive cadence. Each sleep is +/// clamped to the time remaining so even sub-interval timeouts fire on time. +/// Shared by the Unix run-to-completion backends. +#[cfg(unix)] +pub fn wait_with_timeout( + child: &mut std::process::Child, + timeout: Option, +) -> Result { + use std::time::{Duration, Instant}; + // Poll interval grows from this floor to the cap (doubling each idle tick), + // trading low exit-detection latency for short runs against an inexpensive + // cadence for long ones. + const MIN_POLL: Duration = Duration::from_millis(1); + const MAX_POLL: Duration = Duration::from_millis(50); + + let Some(deadline) = timeout.map(|d| Instant::now() + d) else { + return child.wait().map_err(WaitError::Io); + }; + let mut interval = MIN_POLL; + loop { + match child.try_wait() { + Ok(Some(status)) => return Ok(status), + Ok(None) => { + let now = Instant::now(); + if now >= deadline { + return Err(WaitError::Timeout); + } + std::thread::sleep((deadline - now).min(interval)); + interval = (interval * 2).min(MAX_POLL); + } + Err(error) => return Err(WaitError::Io(error)), + } + } +} + +/// How a [`SandboxBackend`] wires the sandboxed child's standard streams. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StdioMode { + /// stdin/stdout/stderr are fresh pipes the caller drives via the handle's + /// `take_*` accessors (the `mxc` library / streaming path). The child sees + /// no TTY and leads its own process group so it can be tree-terminated. + Pipes, + /// The child inherits the current process's stdin/stdout/stderr (the CLI + /// executor path): its output goes straight to the binary's own stdio, so + /// the child sees a TTY exactly when the binary does. The returned handle's + /// `take_*` all return `None`; [`wait`](SandboxProcess::wait) just waits. + Inherit, +} + +/// A containment backend that spawns a sandboxed process and hands back a +/// [`SandboxProcess`] handle — the single entry point for starting a sandbox. +/// +/// The caller picks how the child's stdio is wired ([`StdioMode`]) and then +/// drives the handle: stream it ([`StdioMode::Pipes`]) or just +/// [`wait`](SandboxProcess::wait) ([`StdioMode::Inherit`]). The `mxc` library +/// calls this directly with [`StdioMode::Pipes`]; the CLI executor binaries +/// reach it through the [`Runner`] bridge. +pub trait SandboxBackend { + /// Backend-specific validation, run before [`spawn`](SandboxBackend::spawn) + /// and on dry-run. Override to reject unsupported policies; default accepts. + fn validate(&self, _request: &ExecutionRequest) -> Result<(), ScriptResponse> { + Ok(()) + } + + /// Apply this backend's containment and spawn the sandboxed process with + /// stdio wired per `stdio`, returning a handle. On a validation or spawn + /// failure returns a [`ScriptResponse`] carrying the error. + fn spawn( + &mut self, + request: &ExecutionRequest, + logger: &mut Logger, + stdio: StdioMode, + ) -> Result, ScriptResponse>; + + /// Optional post-exit diagnostics for the run-to-completion (binary) path: + /// when the child exits non-zero, return a more actionable error message + /// (e.g. a known AppContainer filesystem-permission failure). Default: none. + /// The streaming/library path does not call this. + fn diagnose_exit(&self, _request: &ExecutionRequest, _exit_code: i32) -> Option { + None + } +} + +/// The single run-to-completion bridge: adapts any [`SandboxBackend`] to the +/// [`ScriptRunner`] contract the executor binaries (`wxc-exec` / `lxc-exec` / +/// `mxc-exec-mac`) dispatch over. +/// +/// It spawns the child with [`StdioMode::Inherit`] — so the sandboxed process +/// reads/writes the binary's own stdio directly (a TTY when the binary has +/// one) — and [`wait`](SandboxProcess::wait)s for exit, mapping the outcome to +/// a [`ScriptResponse`]. Because the child streams straight to the binary's +/// stdio, `standard_out`/`standard_err` stay empty (the binaries already print +/// those, which is then a no-op). +/// +/// This is the *only* run-to-completion logic for these backends; the backends +/// themselves expose just [`SandboxBackend::spawn`]. +pub struct Runner(B); + +impl Runner { + /// Wrap a [`SandboxBackend`] so it can be dispatched as a [`ScriptRunner`]. + pub fn new(backend: B) -> Self { + Self(backend) + } +} + +impl ScriptRunner for Runner { + fn validate_runner(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { + self.0.validate(request) + } + + fn execute(&mut self, request: &ExecutionRequest, logger: &mut Logger) -> ScriptResponse { + let mut child = match self.0.spawn(request, logger, StdioMode::Inherit) { + Ok(child) => child, + Err(response) => return response, + }; + match child.wait() { + Ok(exit_code) => { + let mut response = ScriptResponse { + exit_code, + failure_phase: if exit_code == 0 { + FailurePhase::None + } else { + FailurePhase::ProcessExited + }, + ..Default::default() + }; + // Let the backend enrich a non-zero exit with an actionable + // message (the child streamed live, so the response is otherwise + // empty). + if exit_code != 0 { + if let Some(msg) = self.0.diagnose_exit(request, exit_code) { + logger.log_line(&format!("Error: Launch diagnostic: {msg}")); + response.standard_err.push_str(&msg); + response.error_message = msg; + } + } + response + } + Err(e) if e.kind() == std::io::ErrorKind::TimedOut => ScriptResponse { + exit_code: -1, + error_message: format!("script timed out after {}ms", request.script_timeout), + failure_phase: FailurePhase::Timeout, + ..Default::default() + }, + Err(e) => ScriptResponse::error(&format!("wait failed: {e}")), + } + } +} + +#[cfg(all(test, unix))] +mod tests { + use super::{wait_with_timeout, WaitError}; + use std::process::Command; + use std::time::{Duration, Instant}; + + #[test] + fn wait_with_timeout_detects_quick_exit_promptly() { + // A child that exits almost immediately is reaped well before a generous + // deadline -- the adaptive poll starts in the millisecond range, so the + // detection latency is small (the old fixed 50ms tick was the worst case). + let mut child = Command::new("true").spawn().expect("spawn true"); + let start = Instant::now(); + let status = match wait_with_timeout(&mut child, Some(Duration::from_secs(10))) { + Ok(status) => status, + Err(_) => panic!("a quick child must exit, not time out"), + }; + assert!(status.success(), "`true` exits 0"); + assert!( + start.elapsed() < Duration::from_secs(1), + "quick exit should be detected promptly, took {:?}", + start.elapsed() + ); + } + + #[test] + fn wait_with_timeout_fires_at_the_deadline() { + // A long-running child hits the timeout branch at (not before) the + // deadline, even though the deadline is shorter than the poll cap. + let mut child = Command::new("sleep") + .arg("30") + .spawn() + .expect("spawn sleep"); + let start = Instant::now(); + let result = wait_with_timeout(&mut child, Some(Duration::from_millis(200))); + let elapsed = start.elapsed(); + let _ = child.kill(); + let _ = child.wait(); + assert!(matches!(result, Err(WaitError::Timeout)), "should time out"); + assert!( + elapsed >= Duration::from_millis(200), + "must not fire before the deadline, fired at {elapsed:?}" + ); + assert!( + elapsed < Duration::from_secs(2), + "should fire near the deadline, fired at {elapsed:?}" + ); + } + + #[test] + fn wait_with_timeout_without_deadline_waits_for_exit() { + // A `None` timeout blocks until the child exits. + let mut child = Command::new("true").spawn().expect("spawn true"); + let status = match wait_with_timeout(&mut child, None) { + Ok(status) => status, + Err(_) => panic!("blocking wait must return the exit status"), + }; + assert!(status.success()); + } +} From 93eafce4faa4483487d4f2116372558071ba7d32 Mon Sep 17 00:00:00 2001 From: Carlos Alexandro Becker Date: Wed, 24 Jun 2026 09:52:19 -0300 Subject: [PATCH 2/9] refactor(backends): unify the library backends on SandboxBackend + route binaries via Runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the three in-process backends — Seatbelt (macOS), Bubblewrap (Linux), and Windows ProcessContainer (AppContainer + BaseContainer) — onto the SandboxBackend / SandboxProcess interfaces added in the previous PR, and route the executor binaries (wxc-exec, lxc-exec, mxc-exec-mac) through the generic Runner adapter. The old per-backend run-to-completion logic is removed; each backend now exposes only spawn(), and Runner provides the single ScriptRunner the binaries dispatch on (spawn StdioMode::Inherit, then wait). Each backend gains a streaming handle with whole-process-tree termination (Unix process-group SIGKILL; Windows job-object terminate) and a uniform io::ErrorKind::TimedOut on scriptTimeout. Intentional behavior changes for existing binaries (call-outs for review): - Seatbelt now always env_clear()s the child (previously only when process.env was non-empty), aligning the binary with the SDK's documented "host env is not inherited" contract. - Seatbelt resolves an empty process.cwd to a policy read-write path (or "/") instead of the launcher's cwd. - Seatbelt/Bubblewrap inherit the executor's own stdio (StdioMode::Inherit) — Seatbelt no longer allocates a private pty, and Bubblewrap no longer forces stdin to /dev/null or post-exit-captures stdout/stderr (it streams live). - BaseContainer now places the child in a UiJobObject for tree-kill (it had none before); the child is created suspended, assigned to the job, then resumed so no descendant can escape the kill window. - kill() is a no-op once the child has been reaped, so it never signals a recycled pid/process-group. The macOS Seatbelt characterization suite is updated to assert the new env/cwd/ streaming/timeout behavior; the LXC and Seatbelt backend docs are updated to match. The default LXC path keeps its native pty and is unaffected. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Carlos Alexandro Becker --- docs/lxc-support/lxc-backend.md | 2 +- docs/macos-support/seatbelt-backend.md | 15 + src/Cargo.lock | 1 - .../common/src/appcontainer_runner.rs | 713 ++++++++++++----- .../common/src/base_container_runner.rs | 695 +++++++++++++---- .../appcontainer/common/src/dispatcher.rs | 15 +- .../appcontainer/common/src/job_object.rs | 20 +- .../common/src/network_manager.rs | 198 ++--- src/backends/appcontainer/common/src/probe.rs | 2 +- .../bubblewrap/common/src/bwrap_runner.rs | 390 ++++++--- src/backends/seatbelt/common/Cargo.toml | 4 - .../seatbelt/common/src/profile_builder.rs | 14 +- .../seatbelt/common/src/seatbelt_runner.rs | 738 +++++++++++------- src/core/lxc/src/main.rs | 4 +- src/core/mxc_darwin/src/main.rs | 3 +- src/core/wxc/src/main.rs | 3 +- .../tests/e2e_seatbelt_characterization.rs | 83 +- 17 files changed, 1961 insertions(+), 939 deletions(-) diff --git a/docs/lxc-support/lxc-backend.md b/docs/lxc-support/lxc-backend.md index 32c20d3b5..b94cd2861 100644 --- a/docs/lxc-support/lxc-backend.md +++ b/docs/lxc-support/lxc-backend.md @@ -91,7 +91,7 @@ The `process.cwd` and `process.env` fields from the standard schema are honored | `process.cwd` | `cd -- "$1" && exec /bin/sh -c "$2"` wrapper prelude, with the cwd passed as a positional argument | Empty string preserves the container default cwd. A nonexistent or non-permitted path surfaces as a generic non-zero exit (typically `1`, from `cd`'s own status); callers needing strong cwd validation should pre-check the path. The positional-arg trick means cwd values with spaces, quotes, `$vars`, or backticks pass through verbatim with no shell escaping. | | `process.env` | Each `KEY=VAL` entry becomes a repeated `--set-var=KEY=VAL` flag to `lxc-attach` | Malformed entries — those without `=` (e.g. `"BADENTRY"`) or with an empty key (e.g. `"=foo"`) — are silently skipped. Embedded `=` in the value (e.g. `"X=a=b=c"`) is preserved. | -**Replace semantics.** When `process.env` is non-empty, `lxc-exec` also passes `--clear-env` to `lxc-attach` so the host environment does **not** leak into the sandbox, regardless of how many entries survive the malformed-skip. This matches the Seatbelt backend's `env_clear()`-on-non-empty contract and is the posture `lxc-attach(1)` recommends for sandbox-spawn callers. If a variable is set in both the host and `process.env`, the `process.env` value wins. +**Replace semantics.** When `process.env` is non-empty, `lxc-exec` also passes `--clear-env` to `lxc-attach` so the host environment does **not** leak into the sandbox, regardless of how many entries survive the malformed-skip. This is the posture `lxc-attach(1)` recommends for sandbox-spawn callers. If a variable is set in both the host and `process.env`, the `process.env` value wins. When `process.env` is empty (or absent), the legacy keep-env behavior is preserved and the host environment is inherited. diff --git a/docs/macos-support/seatbelt-backend.md b/docs/macos-support/seatbelt-backend.md index dd2ad0b69..039dd0761 100644 --- a/docs/macos-support/seatbelt-backend.md +++ b/docs/macos-support/seatbelt-backend.md @@ -207,6 +207,21 @@ SDK rejects it with a clear error, mirroring the Linux behavior. | `ui.clipboard: "none"` (default) | `(deny mach-lookup (global-name "com.apple.pasteboard.1"))` | | `ui.injection: false` (default) | `(deny iokit-open (iokit-user-client-class "IOHIDLibUserClient"))` | +### Process environment + +The host environment is **never** inherited — the sandboxed child always starts +from a cleared environment, so host secrets (cloud credentials, API tokens) can +never leak into untrusted code. `PATH` defaults to `/usr/bin:/bin:/usr/sbin:/sbin`, +and each `process.env` entry adds to / overrides that baseline. (This is +unconditional; it applies whether or not `process.env` is provided.) + +### Working directory + +If `process.cwd` is omitted it resolves to `readwritePaths[0]`, else +`readonlyPaths[0]`, else `/`; a `~`/`~/…` default is tilde-expanded the same way +the sandbox profile expands policy paths. `PWD` is exported to the resolved +directory so the child's `getcwd()` takes its fast `$PWD` path. + ## Usage ### Command line diff --git a/src/Cargo.lock b/src/Cargo.lock index a4397660b..cead9eaad 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1906,7 +1906,6 @@ name = "seatbelt_common" version = "0.7.0" dependencies = [ "libc", - "mxc_pty", "wxc_common", ] diff --git a/src/backends/appcontainer/common/src/appcontainer_runner.rs b/src/backends/appcontainer/common/src/appcontainer_runner.rs index cf7aba8ad..cd2edd00b 100644 --- a/src/backends/appcontainer/common/src/appcontainer_runner.rs +++ b/src/backends/appcontainer/common/src/appcontainer_runner.rs @@ -6,7 +6,7 @@ use std::ptr; use windows::Win32::Foundation::{ CloseHandle, GetLastError, LocalFree, SetHandleInformation, ERROR_ALREADY_EXISTS, HANDLE, - HANDLE_FLAG_INHERIT, HLOCAL, WAIT_FAILED, WAIT_OBJECT_0, WAIT_TIMEOUT, + HANDLE_FLAG_INHERIT, HLOCAL, WAIT_OBJECT_0, WAIT_TIMEOUT, }; use windows::Win32::Security::Authorization::ConvertSidToStringSidW; use windows::Win32::Security::Isolation::{ @@ -35,8 +35,15 @@ use crate::process_mitigation; use wxc_common::error::WxcError; use wxc_common::logger::Logger; use wxc_common::models::{ExecutionRequest, NetworkEnforcementMode, NetworkPolicy, ScriptResponse}; -use wxc_common::process_util::{get_capability_sid_from_name, OwnedHandle, SidAndAttributes}; -use wxc_common::script_runner::{get_timeout_milliseconds, ScriptRunner}; +use wxc_common::process_util::{ + create_std_pipes, get_capability_sid_from_name, InterruptiblePipeReader, OwnedHandle, + PipeReadCanceller, PipeWriter, SendOwnedHandle, SidAndAttributes, +}; +use wxc_common::sandbox_process::{ + boxed_closer, cancel_and_join_discard, spawn_discard, take_boxed_read, take_boxed_write, + SandboxBackend, SandboxProcess, StdioMode, StreamCloser, +}; +use wxc_common::script_runner::get_timeout_milliseconds; use wxc_common::{string_util, ui_policy}; /// `UpdateProcThreadAttribute` value for @@ -147,20 +154,25 @@ fn build_explicit_entries( .collect(); if let Some(addr) = proxy_address { - // Strip existing proxy vars before injecting ours. - entries.retain(|(key, _)| { - !PROXY_VAR_NAMES - .iter() - .any(|name| key.eq_ignore_ascii_case(name)) - }); - let proxy_url = addr.to_url(); - entries.push(("HTTP_PROXY".to_string(), proxy_url.clone())); - entries.push(("HTTPS_PROXY".to_string(), proxy_url)); + inject_proxy_vars(&mut entries, addr); } entries } +/// Strip any pre-existing proxy env vars from `entries`, then inject the +/// configured proxy as `HTTP_PROXY` / `HTTPS_PROXY`. +fn inject_proxy_vars(entries: &mut Vec<(String, String)>, addr: &wxc_common::models::ProxyAddress) { + entries.retain(|(key, _)| { + !PROXY_VAR_NAMES + .iter() + .any(|name| key.eq_ignore_ascii_case(name)) + }); + let proxy_url = addr.to_url(); + entries.push(("HTTP_PROXY".to_string(), proxy_url.clone())); + entries.push(("HTTPS_PROXY".to_string(), proxy_url)); +} + /// RAII guard that frees capability SID pointers via `LocalFree` on drop. /// Ensures SIDs are freed regardless of the error return path. struct CapabilitySidGuard(Vec<*mut core::ffi::c_void>); @@ -415,12 +427,18 @@ impl AppContainerScriptRunner { } } - /// Core implementation of `run_internal`, returning `Result` for ergonomic error handling. - fn run_internal_impl( + /// Set up the AppContainer and create the sandboxed child **suspended**, + /// returning a [`SpawnedChild`] the caller resumes and then either waits on + /// (run-to-completion) or wraps in a streaming handle. When `capture` is set + /// the child's stdio is wired to pipes the caller drives (the streaming + /// path); otherwise the child inherits the parent's std handles / console + /// (the run-to-completion path). + fn spawn_suspended( &self, request: &ExecutionRequest, logger: &mut Logger, - ) -> Result { + capture: bool, + ) -> Result { // --- Validate permissiveLearningMode --- for cap in &request.policy.capabilities { if cap == "permissiveLearningMode" { @@ -434,8 +452,9 @@ impl AppContainerScriptRunner { } #[cfg(not(debug_assertions))] { - return Ok(ScriptResponse::error( - "SECURITY: permissiveLearningMode not allowed in release builds", + return Err(WxcError::Validation( + "SECURITY: permissiveLearningMode not allowed in release builds" + .to_string(), )); } } @@ -495,10 +514,20 @@ impl AppContainerScriptRunner { // we forward our own std handles to the child via STARTF_USESTDHANDLES so the // child's output streams directly to the SDK in real time. Otherwise we use // console sharing (the ConPTY path). - let pipe_mode = !std::io::stdout().is_terminal() || !std::io::stderr().is_terminal(); + // + // In capture mode (`StdioMode::Pipes`) we always take the pipe + // path — but instead of forwarding our own std handles we wire the + // child to capture pipes that the streaming handle reads from. + let pipe_mode = + capture || !std::io::stdout().is_terminal() || !std::io::stderr().is_terminal(); if pipe_mode { - logger.log_line("STDIO mode: passthrough (forwarding parent handles to child)"); + if capture { + logger + .log_line("STDIO mode: capture (piping child output to the streaming handle)"); + } else { + logger.log_line("STDIO mode: passthrough (forwarding parent handles to child)"); + } } // --- Allocate and initialize attribute list --- @@ -609,48 +638,86 @@ impl AppContainerScriptRunner { logger.log_line("Win32k mitigation applied to child process"); } - // --- Setup handle passthrough (pipe mode only) --- - // Forward wxc-exec's own stdin/stdout/stderr handles to the child so the - // child's output streams directly to the SDK caller in real time. - // Handle list for PROC_THREAD_ATTRIBUTE_HANDLE_LIST. Must outlive CreateProcessW. + // --- Setup handle passthrough / capture (pipe mode only) --- + // In passthrough mode we forward wxc-exec's own std handles to the + // child so its output streams to the caller. In capture mode we wire + // the child to fresh capture pipes that the streaming handle reads from + // (the `mxc` library path). Handle list for + // PROC_THREAD_ATTRIBUTE_HANDLE_LIST. Must outlive CreateProcessW. let mut handle_list: Vec = Vec::new(); let h_stdin; let h_stdout; let h_stderr; + // Capture pipe read-ends (parent side): kept alive until after the + // wait, then drained. Child-side ends (stdin read, stdout/stderr + // write): kept alive until after CreateProcessW, then dropped so the + // read-ends observe EOF when the child exits. + let mut capture_reads: Option<(OwnedHandle, OwnedHandle)> = None; + let mut capture_child_ends: Vec = Vec::new(); + // Parent's stdin write-end; in capture mode it is handed to the caller + // so they can write to the child. + let mut captured_stdin_write: Option = None; + if pipe_mode { - h_stdin = unsafe { GetStdHandle(STD_INPUT_HANDLE) } - .map_err(|e| WxcError::Process(format!("GetStdHandle(STDIN): {e}")))?; - h_stdout = unsafe { GetStdHandle(STD_OUTPUT_HANDLE) } - .map_err(|e| WxcError::Process(format!("GetStdHandle(STDOUT): {e}")))?; - h_stderr = unsafe { GetStdHandle(STD_ERROR_HANDLE) } - .map_err(|e| WxcError::Process(format!("GetStdHandle(STDERR): {e}")))?; - - if h_stdin.is_invalid() || h_stdin == HANDLE::default() { - return Err(WxcError::Process( - "GetStdHandle(STDIN) returned null/invalid handle".to_string(), - )); - } - if h_stdout.is_invalid() || h_stdout == HANDLE::default() { - return Err(WxcError::Process( - "GetStdHandle(STDOUT) returned null/invalid handle".to_string(), - )); - } - if h_stderr.is_invalid() || h_stderr == HANDLE::default() { - return Err(WxcError::Process( - "GetStdHandle(STDERR) returned null/invalid handle".to_string(), - )); - } + if capture { + // create_std_pipes(false): read-end inheritable (child stdin), + // write-end non-inheritable (kept for streaming, else dropped). + let (stdin_read, stdin_write) = create_std_pipes(false)?; + // create_std_pipes(true): read-end non-inheritable (parent + // reads it), write-end inheritable (child writes to it). + let (stdout_read, stdout_write) = create_std_pipes(true)?; + let (stderr_read, stderr_write) = create_std_pipes(true)?; + + h_stdin = stdin_read.get(); + h_stdout = stdout_write.get(); + h_stderr = stderr_write.get(); + + capture_child_ends.push(stdin_read); + capture_child_ends.push(stdout_write); + capture_child_ends.push(stderr_write); + captured_stdin_write = Some(stdin_write); + capture_reads = Some((stdout_read, stderr_read)); + } else { + h_stdin = unsafe { GetStdHandle(STD_INPUT_HANDLE) } + .map_err(|e| WxcError::Process(format!("GetStdHandle(STDIN): {e}")))?; + h_stdout = unsafe { GetStdHandle(STD_OUTPUT_HANDLE) } + .map_err(|e| WxcError::Process(format!("GetStdHandle(STDOUT): {e}")))?; + h_stderr = unsafe { GetStdHandle(STD_ERROR_HANDLE) } + .map_err(|e| WxcError::Process(format!("GetStdHandle(STDERR): {e}")))?; + + if h_stdin.is_invalid() || h_stdin == HANDLE::default() { + return Err(WxcError::Process( + "GetStdHandle(STDIN) returned null/invalid handle".to_string(), + )); + } + if h_stdout.is_invalid() || h_stdout == HANDLE::default() { + return Err(WxcError::Process( + "GetStdHandle(STDOUT) returned null/invalid handle".to_string(), + )); + } + if h_stderr.is_invalid() || h_stderr == HANDLE::default() { + return Err(WxcError::Process( + "GetStdHandle(STDERR) returned null/invalid handle".to_string(), + )); + } - // Ensure the handles are inheritable. - unsafe { - SetHandleInformation(h_stdin, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) - .map_err(|e| WxcError::Process(format!("SetHandleInformation(STDIN): {e}")))?; - SetHandleInformation(h_stdout, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) - .map_err(|e| WxcError::Process(format!("SetHandleInformation(STDOUT): {e}")))?; - SetHandleInformation(h_stderr, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) - .map_err(|e| WxcError::Process(format!("SetHandleInformation(STDERR): {e}")))?; + // Ensure the handles are inheritable. + unsafe { + SetHandleInformation(h_stdin, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) + .map_err(|e| { + WxcError::Process(format!("SetHandleInformation(STDIN): {e}")) + })?; + SetHandleInformation(h_stdout, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) + .map_err(|e| { + WxcError::Process(format!("SetHandleInformation(STDOUT): {e}")) + })?; + SetHandleInformation(h_stderr, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) + .map_err(|e| { + WxcError::Process(format!("SetHandleInformation(STDERR): {e}")) + })?; + } } handle_list.push(h_stdin); @@ -721,16 +788,7 @@ impl AppContainerScriptRunner { // Get clean default user env without inheriting process env vars. let mut entries = create_default_env_entries()?; if let Some(addr) = self.proxy_address.as_ref() { - // Strip any pre-existing proxy vars from the default block - // and inject our configured proxy. - entries.retain(|(key, _)| { - !PROXY_VAR_NAMES - .iter() - .any(|name| key.eq_ignore_ascii_case(name)) - }); - let proxy_url = addr.to_url(); - entries.push(("HTTP_PROXY".to_string(), proxy_url.clone())); - entries.push(("HTTPS_PROXY".to_string(), proxy_url)); + inject_proxy_vars(&mut entries, addr); } encode_env_block(&entries) }; @@ -789,13 +847,17 @@ impl AppContainerScriptRunner { pi.dwProcessId )); + // The child has inherited the pipe handles, so close the parent's + // child-side ends now (otherwise the read-ends would never see EOF). + capture_child_ends.clear(); + let process_handle = OwnedHandle::new(pi.hProcess); let thread_handle = OwnedHandle::new(pi.hThread); // CRITICAL: child was created with CREATE_SUSPENDED. We must either - // successfully attach the Job Object and ResumeThread, OR TerminateProcess. - // Anything that returns an error in this block must terminate first. - let _job = match (|| -> Result { + // successfully attach the Job Object, OR TerminateProcess. Anything + // that returns an error in this block must terminate first. + let job = match (|| -> Result { let job = UiJobObject::new()?; let restrictions = ui_policy::resolve_ui_restrictions( &request.policy.ui, @@ -817,56 +879,22 @@ impl AppContainerScriptRunner { } }; - // Resume the child now that UI restrictions are in place. - // ResumeThread returns the previous suspend count (or u32::MAX on failure). - let resume_result = unsafe { ResumeThread(thread_handle.get()) }; - if resume_result == u32::MAX { - let err = unsafe { GetLastError() }; - unsafe { - let _ = TerminateProcess(process_handle.get(), u32::MAX); - } - return Err(WxcError::Process(format!("ResumeThread failed: {:?}", err))); - } - - // --- Wait for child process to exit --- - let timeout_ms = get_timeout_milliseconds(request.script_timeout); - - let wait_result = unsafe { WaitForSingleObject(process_handle.get(), timeout_ms) }; - - match wait_result { - WAIT_OBJECT_0 => {} - WAIT_TIMEOUT => unsafe { - let _ = TerminateProcess(process_handle.get(), u32::MAX); - let _ = WaitForSingleObject(process_handle.get(), u32::MAX); - }, - WAIT_FAILED => { - let err = unsafe { GetLastError() }; - return Err(WxcError::Process(format!( - "WaitForSingleObject failed: {:?}", - err - ))); - } - other => { - return Err(WxcError::Process(format!( - "WaitForSingleObject returned unexpected value: {}", - other.0 - ))); - } - } - - // --- Get exit code --- - let mut exit_code: u32 = 0; - unsafe { - GetExitCodeProcess(process_handle.get(), &mut exit_code) - .map_err(|_| WxcError::Process("GetExitCodeProcess failed".into()))?; - } + let (stdout_read, stderr_read) = match capture_reads { + Some((out, err)) => (Some(out), Some(err)), + None => (None, None), + }; - Ok(ScriptResponse { - exit_code: exit_code as i32, - standard_out: String::new(), - standard_err: String::new(), - error_message: String::new(), - ..Default::default() + // The child is still suspended; the caller resumes it (after starting + // any drain threads, for the run-to-completion path). + Ok(SpawnedChild { + process: process_handle, + thread: thread_handle, + job, + pid: pi.dwProcessId, + stdin_write: captured_stdin_write, + stdout_read, + stderr_read, + timeout_ms: get_timeout_milliseconds(request.script_timeout), }) } @@ -895,13 +923,38 @@ impl AppContainerScriptRunner { } unsafe { string_util::sid_to_string(self.app_container_sid.0, "unknown-sid") } } +} - /// Execute the script inside the AppContainer, converting errors to ScriptResponse. - fn run_internal(&mut self, request: &ExecutionRequest, logger: &mut Logger) -> ScriptResponse { - match self.run_internal_impl(request, logger) { - Ok(response) => response, - Err(e) => ScriptResponse::error(&e.to_string()), +/// A sandboxed AppContainer child created **suspended** by +/// [`AppContainerScriptRunner::spawn_suspended`]. The caller resumes it and +/// then either runs it to completion (blocking) or wraps it in a streaming +/// handle. Owns the process/thread/job handles and the parent-side pipe ends. +struct SpawnedChild { + process: OwnedHandle, + thread: OwnedHandle, + job: UiJobObject, + /// OS process id of the child. + pid: u32, + /// Parent's stdin write-end (Some only when spawned for streaming). + stdin_write: Option, + /// Parent's stdout/stderr read-ends (Some only in streaming mode). + stdout_read: Option, + stderr_read: Option, + timeout_ms: u32, +} + +impl SpawnedChild { + /// Resume the suspended child, terminating it on failure. + fn resume(&self) -> Result<(), WxcError> { + let r = unsafe { ResumeThread(self.thread.get()) }; + if r == u32::MAX { + let err = unsafe { GetLastError() }; + unsafe { + let _ = TerminateProcess(self.process.get(), u32::MAX); + } + return Err(WxcError::Process(format!("ResumeThread failed: {:?}", err))); } + Ok(()) } } @@ -911,28 +964,74 @@ impl Default for AppContainerScriptRunner { } } -impl ScriptRunner for AppContainerScriptRunner { - fn validate_runner(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { - if !request.policy.denied_paths.is_empty() && self.filesystem_mode != FilesystemMode::Dacl { - return Err(ScriptResponse::error( - wxc_common::error::DENIED_PATHS_NOT_SUPPORTED_MSG, - )); +/// Delete the AppContainer profile created via [`CreateAppContainerProfile`] +/// and clear any BFS policy registered against it. +/// +/// This is the explicit cleanup entry point used by `wxc-exec --delete`, +/// kept next to the create/setup path on `AppContainerScriptRunner` so +/// both ends of the profile lifecycle live in the same module. +/// +/// The BFS-clear step is best-effort: it delegates to +/// [`FileSystemBfsManager::clear_policy`], which resolves `bfscfg.exe` +/// itself and logs (rather than fails) when the resolver returns no +/// path. The profile delete is still attempted in that case. +pub fn delete_app_container_profile(name: &str, logger: &mut Logger) -> bool { + crate::filesystem_bfs::FileSystemBfsManager::clear_policy(name, logger); + + let wide_name: Vec = name.encode_utf16().chain(std::iter::once(0)).collect(); + let hstring = windows::core::HSTRING::from_wide(&wide_name[..wide_name.len() - 1]); + match unsafe { DeleteAppContainerProfile(&hstring) } { + Ok(()) => { + logger.log_line(&format!("Deleted AppContainer profile: {}", name)); + true } - if !request.policy.allowed_hosts.is_empty() || !request.policy.blocked_hosts.is_empty() { - return Err(ScriptResponse::error( - wxc_common::error::HOST_LISTS_NOT_SUPPORTED_MSG, + Err(e) => { + logger.log_line(&format!( + "Failed to delete AppContainer profile '{}': {}", + name, e )); + false + } + } +} + +impl Drop for AppContainerScriptRunner { + fn drop(&mut self) { + if !self.app_container_sid.0.is_null() { + unsafe { + // AppContainer SIDs from CreateAppContainerProfile / + // DeriveAppContainerSidFromAppContainerName must be freed with FreeSid. + windows::Win32::Security::FreeSid(self.app_container_sid); + } + self.app_container_sid = PSID(ptr::null_mut()); } - Ok(()) } +} + +// ─────────────────────────────────────────────────────────────────────────── +// Shared setup/teardown + streaming (handle-based) execution +// ─────────────────────────────────────────────────────────────────────────── - fn execute(&mut self, request: &ExecutionRequest, logger: &mut Logger) -> ScriptResponse { +/// Per-run resources (firewall + filesystem policy) whose lifetime is tied to +/// the sandboxed child. Created by [`AppContainerScriptRunner::prepare`] and +/// torn down by [`AppContainerScriptRunner::teardown`] after the child exits. +struct Prepared { + network_manager: crate::network_manager::NetworkManager, + bfs_manager: crate::filesystem_bfs::FileSystemBfsManager, +} + +impl AppContainerScriptRunner { + /// Set up the AppContainer for a run: initialise the SID, configure BFS + /// filesystem policy, and start network enforcement. Shared by both stdio + /// modes of [`SandboxBackend::spawn`]. + fn prepare( + &mut self, + request: &ExecutionRequest, + logger: &mut Logger, + ) -> Result { use crate::filesystem_bfs::FileSystemBfsManager; - use crate::launch_diagnostics::diagnose_process_exit; use crate::network_manager::NetworkManager; - use wxc_common::models::FailurePhase; - // Apply experimental features when flag is set if request.experimental_enabled { if let Some(ref test) = request.experimental.test { logger.log_line(&format!( @@ -943,21 +1042,18 @@ impl ScriptRunner for AppContainerScriptRunner { } if let Err(e) = self.initialize(request) { - return ScriptResponse::error(&e.to_string()); + return Err(ScriptResponse::error(&e.to_string())); } let principal_id = self.get_principal_id(); logger.log_line(&format!("AppContainerSID: {principal_id}")); - // Resolve `bfscfg.exe` by absolute path so probe and execution - // agree on the binary — defeats executable-search-order - // hijacking (see `fallback_detector::find_bfscfg_exe`). Only - // resolve when we actually plan to use BFS; Tier 3 (DACL) hosts - // legitimately may not have `bfscfg.exe` installed. + // Resolve `bfscfg.exe` by absolute path (defeats search-order + // hijacking); only needed in BFS mode. let bfscfg_path = if self.filesystem_mode == FilesystemMode::Bfs { match crate::fallback_detector::find_bfscfg_exe() { Ok(p) => p, - Err(e) => return ScriptResponse::error(&e.to_string()), + Err(e) => return Err(ScriptResponse::error(&e.to_string())), } } else { None @@ -981,7 +1077,7 @@ impl ScriptRunner for AppContainerScriptRunner { } else { e.to_string() }; - return ScriptResponse::error(&msg); + return Err(ScriptResponse::error(&msg)); } } @@ -1005,92 +1101,283 @@ impl ScriptRunner for AppContainerScriptRunner { self.proxy_address = network_manager.proxy_address().cloned(); } Err(err) => { - return ScriptResponse::error(&err.to_string()); + return Err(ScriptResponse::error(&err.to_string())); } } - let mut response = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - self.run_internal(request, logger) - })) { - Ok(r) => r, - Err(_) => ScriptResponse::error("Unknown error during script execution."), - }; + Ok(Prepared { + network_manager, + bfs_manager, + }) + } - // Post-failure diagnostics: if the child failed, check for known - // environment issues and enrich the error message. - if response.exit_code != 0 { - response.failure_phase = FailurePhase::ProcessExited; - if let Some(diag) = diagnose_process_exit( - &request.script_code, - &request.policy.readonly_paths, - &request.policy.readwrite_paths, - response.exit_code as u32, - ) { - logger.log_line(&format!( - "Error: Launch diagnostic [{}]: {}", - diag.kind, diag.message - )); - if !response.error_message.is_empty() { - response.extended_error = response.error_message.clone(); - } - response.error_message = diag.message.clone(); - response.standard_err.push_str(&diag.message); + /// Tear down the per-run firewall and filesystem policy. Idempotent at the + /// manager level; called once after the child exits. + fn teardown(&self, prepared: &mut Prepared, preserve_policy: bool, logger: &mut Logger) { + prepared.network_manager.stop_all(!preserve_policy, logger); + if self.filesystem_mode == FilesystemMode::Bfs + && prepared.bfs_manager.configured() + && !preserve_policy + { + prepared.bfs_manager.remove_configuration(logger); + } + } +} + +impl SandboxBackend for AppContainerScriptRunner { + fn validate(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { + if !request.policy.denied_paths.is_empty() && self.filesystem_mode != FilesystemMode::Dacl { + return Err(ScriptResponse::error( + wxc_common::error::DENIED_PATHS_NOT_SUPPORTED_MSG, + )); + } + if !request.policy.allowed_hosts.is_empty() || !request.policy.blocked_hosts.is_empty() { + return Err(ScriptResponse::error( + wxc_common::error::HOST_LISTS_NOT_SUPPORTED_MSG, + )); + } + Ok(()) + } + + fn spawn( + &mut self, + request: &ExecutionRequest, + logger: &mut Logger, + stdio: StdioMode, + ) -> Result, ScriptResponse> { + use wxc_common::validator::validate_common; + + validate_common(request)?; + self.validate(request)?; + + let mut prepared = self.prepare(request, logger)?; + + // Pipes → capture pipes the caller drives; Inherit → the child inherits + // the binary's own std handles / console (a TTY when the binary has one). + let capture = stdio == StdioMode::Pipes; + let child = match self.spawn_suspended(request, logger, capture) { + Ok(c) => c, + Err(e) => { + self.teardown(&mut prepared, request.lifecycle.preserve_policy, logger); + return Err(ScriptResponse::error(&e.to_string())); } + }; + if let Err(e) = child.resume() { + self.teardown(&mut prepared, request.lifecycle.preserve_policy, logger); + return Err(ScriptResponse::error(&e.to_string())); + } + + Ok(Box::new(AppContainerSandboxProcess::new( + child, + prepared, + self.filesystem_mode, + request, + ))) + } + + fn diagnose_exit(&self, request: &ExecutionRequest, exit_code: i32) -> Option { + crate::launch_diagnostics::diagnose_process_exit( + &request.script_code, + &request.policy.readonly_paths, + &request.policy.readwrite_paths, + exit_code as u32, + ) + .map(|diag| diag.message) + } +} + +/// A running AppContainer-sandboxed process exposed as a [`SandboxProcess`]. +/// Owns the process/job handles, the parent-side pipes, and the per-run +/// firewall/filesystem policy, which it tears down once the child exits. +struct AppContainerSandboxProcess { + process: SendOwnedHandle, + _thread: SendOwnedHandle, + job: crate::job_object::UiJobObject, + pid: u32, + stdin: Option, + stdout: Option, + stderr: Option, + /// Cancellers for the stdout/stderr reads, kept so the `SandboxProcess` + /// closers can mint a [`StreamCloser`] even after the stream is taken. + stdout_canceller: Option, + stderr_canceller: Option, + prepared: Prepared, + filesystem_mode: FilesystemMode, + preserve_policy: bool, + timeout_ms: u32, + teardown_done: bool, +} + +// SAFETY: the fields are Windows HANDLEs / handle-owning managers and owned +// strings. HANDLEs are process-global and safe to use from any single thread, +// and this handle is owned exclusively by the caller (not shared), so it is +// only ever touched from one thread at a time. +// +// The one historically thread-affine field was the `NetworkManager` inside +// `prepared`: it used to cache an STA `INetFwPolicy2` interface plus its +// `CoInitializeEx` state and reuse them at teardown, which is unsound when +// `wait()`/`kill()`/`Drop` run on a different thread (e.g. a tokio +// `spawn_blocking` worker) than `spawn`. That no longer happens: each firewall +// apply/remove is apartment-self-contained (it opens its own COM apartment, +// creates a fresh interface, and uninitializes — all on whichever thread runs +// it), so no COM interface or apartment state is moved across threads. The only +// remaining OS state the manager keeps is the process-global Winsock refcount, +// which is thread-agnostic. Moving this handle across threads is therefore +// sound. +unsafe impl Send for AppContainerSandboxProcess {} + +impl AppContainerSandboxProcess { + fn new( + mut child: SpawnedChild, + prepared: Prepared, + filesystem_mode: FilesystemMode, + request: &ExecutionRequest, + ) -> Self { + let process = SendOwnedHandle::take(&mut child.process); + let thread = SendOwnedHandle::take(&mut child.thread); + let stdin = child.stdin_write.take().map(PipeWriter::new); + let stdout = child.stdout_read.take().map(InterruptiblePipeReader::new); + let stderr = child.stderr_read.take().map(InterruptiblePipeReader::new); + let stdout_canceller = stdout.as_ref().map(InterruptiblePipeReader::canceller); + let stderr_canceller = stderr.as_ref().map(InterruptiblePipeReader::canceller); + Self { + process, + _thread: thread, + job: child.job, + pid: child.pid, + stdin, + stdout, + stderr, + stdout_canceller, + stderr_canceller, + prepared, + filesystem_mode, + preserve_policy: request.lifecycle.preserve_policy, + timeout_ms: child.timeout_ms, + teardown_done: false, } + } - network_manager.stop_all(!request.lifecycle.preserve_policy, logger); + fn run_teardown(&mut self) { + if self.teardown_done { + return; + } + self.teardown_done = true; + let mut logger = Logger::new(wxc_common::logger::Mode::Buffer); + self.prepared + .network_manager + .stop_all(!self.preserve_policy, &mut logger); if self.filesystem_mode == FilesystemMode::Bfs - && bfs_manager.configured() - && !request.lifecycle.preserve_policy + && self.prepared.bfs_manager.configured() + && !self.preserve_policy { - bfs_manager.remove_configuration(logger); + self.prepared.bfs_manager.remove_configuration(&mut logger); } - - response } } -/// Delete the AppContainer profile created via [`CreateAppContainerProfile`] -/// and clear any BFS policy registered against it. -/// -/// This is the explicit cleanup entry point used by `wxc-exec --delete`, -/// kept next to the create/setup path on `AppContainerScriptRunner` so -/// both ends of the profile lifecycle live in the same module. -/// -/// The BFS-clear step is best-effort: it delegates to -/// [`FileSystemBfsManager::clear_policy`], which resolves `bfscfg.exe` -/// itself and logs (rather than fails) when the resolver returns no -/// path. The profile delete is still attempted in that case. -pub fn delete_app_container_profile(name: &str, logger: &mut Logger) -> bool { - crate::filesystem_bfs::FileSystemBfsManager::clear_policy(name, logger); +impl SandboxProcess for AppContainerSandboxProcess { + fn take_stdin(&mut self) -> Option> { + take_boxed_write(&mut self.stdin) + } - let wide_name: Vec = name.encode_utf16().chain(std::iter::once(0)).collect(); - let hstring = windows::core::HSTRING::from_wide(&wide_name[..wide_name.len() - 1]); - match unsafe { DeleteAppContainerProfile(&hstring) } { - Ok(()) => { - logger.log_line(&format!("Deleted AppContainer profile: {}", name)); - true + fn take_stdout(&mut self) -> Option> { + take_boxed_read(&mut self.stdout) + } + + fn take_stderr(&mut self) -> Option> { + take_boxed_read(&mut self.stderr) + } + + fn stdout_closer(&self) -> Option> { + boxed_closer(&self.stdout_canceller) + } + + fn stderr_closer(&self) -> Option> { + boxed_closer(&self.stderr_canceller) + } + + fn try_wait(&mut self) -> std::io::Result> { + match unsafe { WaitForSingleObject(self.process.get(), 0) } { + WAIT_OBJECT_0 => { + let mut code: u32 = 0; + if unsafe { GetExitCodeProcess(self.process.get(), &mut code) }.is_err() { + return Err(std::io::Error::other("GetExitCodeProcess failed")); + } + Ok(Some(code as i32)) + } + WAIT_TIMEOUT => Ok(None), + _ => Err(std::io::Error::other("WaitForSingleObject failed")), } - Err(e) => { - logger.log_line(&format!( - "Failed to delete AppContainer profile '{}': {}", - name, e - )); - false + } + + fn id(&self) -> u32 { + self.pid + } + + fn kill(&mut self) -> std::io::Result<()> { + // Terminate the whole job: the child and every descendant assigned to + // it die together (tree-kill). + self.job.terminate(u32::MAX); + Ok(()) + } + + fn wait(&mut self) -> std::io::Result { + // Close our copy of any not-taken stdin so the child sees EOF and can + // exit reliably (an interactive command would otherwise block waiting + // for input). + self.stdin.take(); + + // Drain (and discard) any not-taken streams concurrently to avoid the + // child blocking on a full pipe buffer. + let stdout_thread = spawn_discard(self.stdout.take()); + let stderr_thread = spawn_discard(self.stderr.take()); + + let result = match unsafe { WaitForSingleObject(self.process.get(), self.timeout_ms) } { + WAIT_OBJECT_0 => { + let mut code: u32 = 0; + if unsafe { GetExitCodeProcess(self.process.get(), &mut code) }.is_err() { + Err(std::io::Error::other("GetExitCodeProcess failed")) + } else { + Ok(code as i32) + } + } + WAIT_TIMEOUT => Err(std::io::Error::new( + std::io::ErrorKind::TimedOut, + format!("script timed out after {}ms", self.timeout_ms), + )), + _ => Err(std::io::Error::other("WaitForSingleObject failed")), + }; + + // Tree-kill the job so any backgrounded descendant dies *before* + // `run_teardown()` removes the firewall / BFS enforcement (keyed to the + // shared AppContainer package SID) — upholding the same invariant as + // `Drop`. The foreground child has already exited on the success path; on + // a timeout or wait failure this also terminates it. Then reap the root + // (immediate once it has exited) before releasing the pipe drains — and + // killing the tree closes the descendant's pipe write-ends, so the drains + // can finish. + let _ = self.kill(); + unsafe { + let _ = WaitForSingleObject(self.process.get(), u32::MAX); } + cancel_and_join_discard(stdout_thread, &self.stdout_canceller); + cancel_and_join_discard(stderr_thread, &self.stderr_canceller); + self.run_teardown(); + result } } -impl Drop for AppContainerScriptRunner { +impl Drop for AppContainerSandboxProcess { fn drop(&mut self) { - if !self.app_container_sid.0.is_null() { - unsafe { - // AppContainer SIDs from CreateAppContainerProfile / - // DeriveAppContainerSidFromAppContainerName must be freed with FreeSid. - windows::Win32::Security::FreeSid(self.app_container_sid); - } - self.app_container_sid = PSID(ptr::null_mut()); + // Kill the tree and reap before tearing down firewall/filesystem + // policy, so an abandoned-but-running sandbox cannot outlive its + // enforcement (or leak as an orphan). `kill()` terminates the job. + let _ = self.kill(); + unsafe { + let _ = WaitForSingleObject(self.process.get(), u32::MAX); } + self.run_teardown(); } } @@ -1272,7 +1559,7 @@ mod tests { use super::{AppContainerScriptRunner, FilesystemMode}; use wxc_common::models::ExecutionRequest; - use wxc_common::script_runner::ScriptRunner; + use wxc_common::sandbox_process::SandboxBackend; #[test] fn validate_runner_rejects_denied_paths_in_bfs_mode() { @@ -1281,7 +1568,7 @@ mod tests { request.policy.denied_paths = vec!["C:\\secret".into()]; let err = runner - .validate_runner(&request) + .validate(&request) .expect_err("BFS mode must reject deniedPaths"); assert!( err.error_message.contains("deniedPaths"), @@ -1297,7 +1584,7 @@ mod tests { request.policy.denied_paths = vec!["C:\\secret".into()]; assert!( - runner.validate_runner(&request).is_ok(), + runner.validate(&request).is_ok(), "DACL mode supports deniedPaths and should not error" ); } @@ -1309,7 +1596,7 @@ mod tests { request.policy.allowed_hosts = vec!["example.com".into()]; let err = runner - .validate_runner(&request) + .validate(&request) .expect_err("allowedHosts is not yet supported"); assert!(err.error_message.contains("allowedHosts")); } @@ -1321,7 +1608,7 @@ mod tests { request.policy.blocked_hosts = vec!["bad.example.com".into()]; let err = runner - .validate_runner(&request) + .validate(&request) .expect_err("blockedHosts is not yet supported"); assert!(err.error_message.contains("blockedHosts")); } @@ -1330,6 +1617,6 @@ mod tests { fn validate_runner_accepts_empty_policy() { let runner = AppContainerScriptRunner::new(); let request = ExecutionRequest::default(); - assert!(runner.validate_runner(&request).is_ok()); + assert!(runner.validate(&request).is_ok()); } } diff --git a/src/backends/appcontainer/common/src/base_container_runner.rs b/src/backends/appcontainer/common/src/base_container_runner.rs index f5e6c2527..073f9be19 100644 --- a/src/backends/appcontainer/common/src/base_container_runner.rs +++ b/src/backends/appcontainer/common/src/base_container_runner.rs @@ -16,7 +16,7 @@ use std::ptr; use windows::Win32::Foundation::{ CloseHandle, GetLastError, SetHandleInformation, ERROR_CALL_NOT_IMPLEMENTED, E_NOTIMPL, HANDLE, - HANDLE_FLAG_INHERIT, WAIT_FAILED, WAIT_TIMEOUT, + HANDLE_FLAG_INHERIT, WAIT_OBJECT_0, WAIT_TIMEOUT, }; use windows::Win32::System::Console::{ GetStdHandle, STD_ERROR_HANDLE, STD_INPUT_HANDLE, STD_OUTPUT_HANDLE, @@ -30,6 +30,7 @@ use windows::Win32::System::Threading::{ }; use windows_core::PCWSTR; +use crate::job_object::UiJobObject; use crate::launch_diagnostics::{ diagnose_create_process_failure, diagnose_environment_not_supported, diagnose_process_exit, is_environment_not_supported, @@ -48,10 +49,20 @@ use wxc_common::models::{ ExecutionRequest, FailurePhase, NetworkEnforcementMode, NetworkPolicy, ProxyAddress, ScriptResponse, }; -use wxc_common::script_runner::{get_timeout_milliseconds, ScriptRunner}; +use wxc_common::process_util::{ + create_std_pipes, InterruptiblePipeReader, OwnedHandle, PipeReadCanceller, PipeWriter, + SendOwnedHandle, +}; +use wxc_common::sandbox_process::{ + boxed_closer, cancel_and_join_discard, spawn_discard, take_boxed_read, take_boxed_write, + SandboxBackend, SandboxProcess, StdioMode, StreamCloser, +}; +use wxc_common::script_runner::get_timeout_milliseconds; use wxc_common::string_util; -use windows::Win32::System::Threading::CREATE_UNICODE_ENVIRONMENT; +use windows::Win32::System::Threading::{ + ResumeThread, CREATE_SUSPENDED, CREATE_UNICODE_ENVIRONMENT, +}; /// Serialize `KEY=VALUE` pairs into a double-null-terminated UTF-16 environment block. /// @@ -101,7 +112,7 @@ const SANDBOX_CAP_CREATE_PROCESS_IN_SANDBOX: u64 = 0x0000_0000_0000_0001; /// True when a Win32 error code signals the BaseContainer feature is not /// enabled on this build (symbol present, capability gated off). -pub(crate) fn is_api_not_implemented(err: u32) -> bool { +fn is_api_not_implemented(err: u32) -> bool { err == ERROR_CALL_NOT_IMPLEMENTED.0 || err == E_NOTIMPL.0 as u32 } @@ -532,51 +543,23 @@ impl BaseContainerRunner { } } -impl ScriptRunner for BaseContainerRunner { - fn validate_runner(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { - if !request.policy.denied_paths.is_empty() { - return Err(ScriptResponse::error( - wxc_common::error::DENIED_PATHS_NOT_SUPPORTED_MSG, - )); - } - if !request.policy.allowed_hosts.is_empty() || !request.policy.blocked_hosts.is_empty() { - return Err(ScriptResponse::error( - wxc_common::error::HOST_LISTS_NOT_SUPPORTED_MSG, - )); - } - Self::is_base_container_api_present().map_err(|e| { - let hint = if !request.experimental_enabled { - format!( - "BaseContainer API unavailable: {e}\n\ - Hint: Config schema version '{}' requires the BaseContainer backend, \ - but this OS build does not support it. \ - Use schema version '0.4.0-alpha' to fall back to AppContainer.", - request.schema_version - ) - } else { - format!( - "BaseContainer API unavailable: {e}\n\ - Hint: --experimental requested BaseContainer, but this OS build \ - does not support it. Remove --experimental to use the AppContainer \ - backend, or use an OS build with BaseContainer support." - ) - }; - ScriptResponse { - // Symbol absent: report BackendUnavailable, not a hard error. - failure_phase: FailurePhase::BackendUnavailable, - ..ScriptResponse::error(&hint) - } - }) - } - - fn execute(&mut self, request: &ExecutionRequest, logger: &mut Logger) -> ScriptResponse { +impl BaseContainerRunner { + /// Set up and launch the BaseContainer child, returning a [`BaseChild`] the + /// caller runs to completion (blocking) or wraps in a streaming handle. When + /// `capture` is set the child's stdio is wired to pipes the caller drives + /// (the streaming path); otherwise the child inherits the parent's std + /// handles / console (the run-to-completion path). + fn spawn_base( + &mut self, + request: &ExecutionRequest, + logger: &mut Logger, + capture: bool, + ) -> Result { let _ = writeln!( logger, "{EMOJI_SECTION} SECTION: Backend runner 'BaseContainer'" ); - let run_start = std::time::Instant::now(); - // Launch builtin test proxy if requested (before building spec so we have the port). let mut request = request.clone(); if request.policy.network_proxy.builtin_test_server { @@ -586,9 +569,9 @@ impl ScriptRunner for BaseContainerRunner { request.policy.network_proxy.address = Some(addr); } Err(e) => { - return ScriptResponse::error(&format!( + return Err(ScriptResponse::error(&format!( "Failed to start builtin test proxy: {e}" - )); + ))); } } } @@ -626,7 +609,7 @@ impl ScriptRunner for BaseContainerRunner { // 2. Dynamically load the API from processmodel.dll. let create_process_in_sandbox = match Self::load_api() { Ok(f) => f, - Err(e) => return ScriptResponse::error(&e), + Err(e) => return Err(ScriptResponse::error(&e)), }; let _ = writeln!( logger, @@ -714,60 +697,123 @@ impl ScriptRunner for BaseContainerRunner { // If wxc-exec's stdout or stderr is not a terminal (i.e., piped by the SDK), // we forward our own std handles to the child via STARTF_USESTDHANDLES so the // child's output streams directly to the SDK in real time. - let pipe_mode = !std::io::stdout().is_terminal() || !std::io::stderr().is_terminal(); + // + // In capture mode (`StdioMode::Pipes`) we always take the pipe + // path and wire the child to capture pipes that the streaming handle + // reads from. + let pipe_mode = + capture || !std::io::stdout().is_terminal() || !std::io::stderr().is_terminal(); if pipe_mode { - let _ = writeln!( - logger, - "STDIO mode: passthrough (forwarding parent handles to child)" - ); + if capture { + let _ = writeln!( + logger, + "STDIO mode: capture (piping child output to the streaming handle)" + ); + } else { + let _ = writeln!( + logger, + "STDIO mode: passthrough (forwarding parent handles to child)" + ); + } } - // --- Retrieve parent std handles for passthrough (pipe mode only) --- + // --- Retrieve / create std handles (pipe mode only) --- let mut h_stdin = HANDLE::default(); let mut h_stdout = HANDLE::default(); let mut h_stderr = HANDLE::default(); + // Capture pipe read-ends (parent side) kept alive until after the wait; + // child-side ends kept alive until after process creation. + let mut capture_reads: Option<(OwnedHandle, OwnedHandle)> = None; + let mut capture_child_ends: Vec = Vec::new(); + // Parent's stdin write-end; in capture mode it is handed to the caller + // so they can write to the child. + let mut captured_stdin_write: Option = None; + if pipe_mode { - h_stdin = match unsafe { GetStdHandle(STD_INPUT_HANDLE) } { - Ok(h) => h, - Err(e) => return ScriptResponse::error(&format!("GetStdHandle(STDIN): {e}")), - }; - h_stdout = match unsafe { GetStdHandle(STD_OUTPUT_HANDLE) } { - Ok(h) => h, - Err(e) => return ScriptResponse::error(&format!("GetStdHandle(STDOUT): {e}")), - }; - h_stderr = match unsafe { GetStdHandle(STD_ERROR_HANDLE) } { - Ok(h) => h, - Err(e) => return ScriptResponse::error(&format!("GetStdHandle(STDERR): {e}")), - }; + if capture { + let (stdin_read, stdin_write) = match create_std_pipes(false) { + Ok(p) => p, + Err(e) => return Err(ScriptResponse::error(&format!("stdin pipe: {e}"))), + }; + let (stdout_read, stdout_write) = match create_std_pipes(true) { + Ok(p) => p, + Err(e) => return Err(ScriptResponse::error(&format!("stdout pipe: {e}"))), + }; + let (stderr_read, stderr_write) = match create_std_pipes(true) { + Ok(p) => p, + Err(e) => return Err(ScriptResponse::error(&format!("stderr pipe: {e}"))), + }; - if h_stdin.is_invalid() || h_stdin == HANDLE::default() { - return ScriptResponse::error("GetStdHandle(STDIN) returned null/invalid handle"); - } - if h_stdout.is_invalid() || h_stdout == HANDLE::default() { - return ScriptResponse::error("GetStdHandle(STDOUT) returned null/invalid handle"); - } - if h_stderr.is_invalid() || h_stderr == HANDLE::default() { - return ScriptResponse::error("GetStdHandle(STDERR) returned null/invalid handle"); - } + h_stdin = stdin_read.get(); + h_stdout = stdout_write.get(); + h_stderr = stderr_write.get(); - // Ensure the handles are inheritable. - unsafe { - if let Err(e) = - SetHandleInformation(h_stdin, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) - { - return ScriptResponse::error(&format!("SetHandleInformation(STDIN): {e}")); + capture_child_ends.push(stdin_read); + capture_child_ends.push(stdout_write); + capture_child_ends.push(stderr_write); + captured_stdin_write = Some(stdin_write); + capture_reads = Some((stdout_read, stderr_read)); + } else { + h_stdin = match unsafe { GetStdHandle(STD_INPUT_HANDLE) } { + Ok(h) => h, + Err(e) => { + return Err(ScriptResponse::error(&format!("GetStdHandle(STDIN): {e}"))) + } + }; + h_stdout = match unsafe { GetStdHandle(STD_OUTPUT_HANDLE) } { + Ok(h) => h, + Err(e) => { + return Err(ScriptResponse::error(&format!("GetStdHandle(STDOUT): {e}"))) + } + }; + h_stderr = match unsafe { GetStdHandle(STD_ERROR_HANDLE) } { + Ok(h) => h, + Err(e) => { + return Err(ScriptResponse::error(&format!("GetStdHandle(STDERR): {e}"))) + } + }; + + if h_stdin.is_invalid() || h_stdin == HANDLE::default() { + return Err(ScriptResponse::error( + "GetStdHandle(STDIN) returned null/invalid handle", + )); } - if let Err(e) = - SetHandleInformation(h_stdout, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) - { - return ScriptResponse::error(&format!("SetHandleInformation(STDOUT): {e}")); + if h_stdout.is_invalid() || h_stdout == HANDLE::default() { + return Err(ScriptResponse::error( + "GetStdHandle(STDOUT) returned null/invalid handle", + )); } - if let Err(e) = - SetHandleInformation(h_stderr, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) - { - return ScriptResponse::error(&format!("SetHandleInformation(STDERR): {e}")); + if h_stderr.is_invalid() || h_stderr == HANDLE::default() { + return Err(ScriptResponse::error( + "GetStdHandle(STDERR) returned null/invalid handle", + )); + } + + // Ensure the handles are inheritable. + unsafe { + if let Err(e) = + SetHandleInformation(h_stdin, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) + { + return Err(ScriptResponse::error(&format!( + "SetHandleInformation(STDIN): {e}" + ))); + } + if let Err(e) = + SetHandleInformation(h_stdout, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) + { + return Err(ScriptResponse::error(&format!( + "SetHandleInformation(STDOUT): {e}" + ))); + } + if let Err(e) = + SetHandleInformation(h_stderr, HANDLE_FLAG_INHERIT.0, HANDLE_FLAG_INHERIT) + { + return Err(ScriptResponse::error(&format!( + "SetHandleInformation(STDERR): {e}" + ))); + } } } } @@ -805,11 +851,17 @@ impl ScriptRunner for BaseContainerRunner { .as_ref() .map(|b| b.as_ptr() as *const c_void) .unwrap_or(ptr::null()); - let creation_flags = if env_block.is_some() { - CREATE_UNICODE_ENVIRONMENT.0 - } else { - 0 - }; + // Create the child suspended so its main thread cannot spawn any + // descendant before we've assigned it to the job object below; it is + // resumed right after the assignment. If the sandbox create API ignores + // CREATE_SUSPENDED on a given build, the child starts running anyway and + // the later resume is a harmless no-op. + let creation_flags = CREATE_SUSPENDED.0 + | if env_block.is_some() { + CREATE_UNICODE_ENVIRONMENT.0 + } else { + 0 + }; let _ = writeln!(logger, "launching: {}", request.script_code); let _ = writeln!(logger, "identity: {identity}"); @@ -829,13 +881,13 @@ impl ScriptRunner for BaseContainerRunner { "Error: Pre-launch diagnostic [{}]: {}", diag.kind, diag.message ); - return ScriptResponse { + return Err(ScriptResponse { exit_code: -1, error_message: diag.message.clone(), standard_err: diag.message, failure_phase: FailurePhase::LaunchFailed, ..Default::default() - }; + }); } // 4. Call Experimental_CreateProcessInSandbox. @@ -902,9 +954,10 @@ impl ScriptRunner for BaseContainerRunner { diag.kind, diag.message ); - // Retry without the environment block. + // Retry without the environment block, but keep the child + // suspended (resumed after job assignment). current_env_ptr = ptr::null(); - current_creation_flags = 0; + current_creation_flags = CREATE_SUSPENDED.0; continue; } @@ -960,107 +1013,405 @@ impl ScriptRunner for BaseContainerRunner { FailurePhase::LaunchFailed }; - return ScriptResponse { + return Err(ScriptResponse { exit_code: -1, error_message: diag.message.clone(), standard_err: diag.message, extended_error, failure_phase, ..Default::default() - }; + }); } let _ = writeln!(logger, "process created (PID: {})", pi.dwProcessId); - let _ = writeln!(logger, "{EMOJI_SECTION} SECTION: Wait for exit"); + // Child has inherited the pipe handles; close the parent's child-side + // ends so the read-ends observe EOF when the child exits. + capture_child_ends.clear(); - // 5. Wait for the child process to exit. - let timeout_ms = get_timeout_milliseconds(request.script_timeout); - let mut exit_code: u32 = u32::MAX; + let (stdout_read, stderr_read) = match capture_reads { + Some((out, err)) => (Some(out), Some(err)), + None => (None, None), + }; - unsafe { - let wait_result = WaitForSingleObject(pi.hProcess, timeout_ms); - if wait_result == WAIT_FAILED { - let err = GetLastError(); - let _ = CloseHandle(pi.hProcess); - let _ = CloseHandle(pi.hThread); - return ScriptResponse::error(&format!("WaitForSingleObject failed: {err:?}")); - } else if wait_result == WAIT_TIMEOUT { - let _ = writeln!(logger, "process timed out, terminating..."); - let _ = TerminateProcess(pi.hProcess, u32::MAX); - let _ = WaitForSingleObject(pi.hProcess, 5000); + // Assign the child to a job object so the streaming handle's `kill()` + // (and the timeout / `Drop` paths) can tree-kill it — the child plus + // every descendant it spawns after assignment. This backend *is* a + // security boundary, so fail **closed**: if the job cannot be created + // or the process cannot be assigned, terminate the just-launched child + // and reject the spawn rather than run a sandbox that cannot be + // reliably torn down. (Previously this was best-effort: a failed + // assignment left `job = None`, after which `kill()`/timeout/`Drop` + // could only `TerminateProcess` the root and no descendant was + // tree-killed at all.) + // + // The child was created suspended (CREATE_SUSPENDED) and is resumed only + // after this assignment, so no descendant it spawns can escape the job. + // If the create API ignores CREATE_SUSPENDED on a given build the child + // is already running; it is a shell that has not yet run the user + // command, so the pre-assignment window is empty in practice and the + // later resume is a harmless no-op. + let job = match UiJobObject::new().and_then(|job| { + // Pass the raw handle — `assign_process` borrows it and does not + // take ownership. Wrapping it in a temporary `OwnedHandle` here + // would close `pi.hProcess` when the temporary dropped, leaving the + // owned handle on the `BaseChild` below pointing at a closed (and + // possibly reused) handle. Sole ownership stays with that field. + job.assign_process(pi.hProcess)?; + Ok(job) + }) { + Ok(job) => job, + Err(e) => { + let _ = writeln!( + logger, + "Error: BaseContainer job-object setup failed ({e}); terminating \ + the child and failing closed — a sandbox that cannot be \ + tree-killed must not run." + ); + // The child is already running and there is no job to tree-kill + // through, so terminate the root directly and reap it before + // tearing down sandbox / proxy state, upholding the same + // "enforcement never outlives a live child" invariant as the + // normal teardown paths. + unsafe { + let _ = TerminateProcess(pi.hProcess, u32::MAX); + let _ = WaitForSingleObject(pi.hProcess, u32::MAX); + let _ = CloseHandle(pi.hProcess); + let _ = CloseHandle(pi.hThread); + } + if request.lifecycle.destroy_on_exit { + run_sandbox_cleanup( + &identity, + &sid_string, + request.policy.network_proxy.is_enabled(), + logger, + ); + sandbox_tracking::unregister_ctrl_c_cleanup(); + } + self.proxy_coordinator.stop(logger); + + const JOB_SETUP_FAILED_MSG: &str = + "BaseContainer sandbox could not be placed in a job object, so it \ + could not be reliably terminated; the launch was rejected to \ + avoid running an uncontainable sandbox."; + return Err(ScriptResponse { + exit_code: -1, + error_message: JOB_SETUP_FAILED_MSG.to_string(), + standard_err: JOB_SETUP_FAILED_MSG.to_string(), + extended_error: format!("BaseContainer job-object setup failed: {e}"), + failure_phase: FailurePhase::LaunchFailed, + ..Default::default() + }); } + }; - let _ = GetExitCodeProcess(pi.hProcess, &mut exit_code); + // The child was created suspended; now that it is in the job object (so + // every descendant it spawns is captured), resume its main thread. If the + // create API ignored CREATE_SUSPENDED the thread is already running and + // this is a harmless no-op. + // SAFETY: `pi.hThread` is the just-created, still-owned main-thread + // handle; `ResumeThread` only adjusts its suspend count. + unsafe { + ResumeThread(pi.hThread); + } - let _ = CloseHandle(pi.hProcess); - let _ = CloseHandle(pi.hThread); + // Hand ownership to the caller via `BaseChild`, which performs + // sandbox/proxy teardown after the child exits. `job` is always present + // here (we failed closed above); the `Option` and the root-only fallback + // in `kill()` remain purely as defense-in-depth. + Ok(BaseChild { + process: OwnedHandle::new(pi.hProcess), + thread: OwnedHandle::new(pi.hThread), + pid: pi.dwProcessId, + job: Some(job), + stdin_write: captured_stdin_write, + stdout_read, + stderr_read, + timeout_ms: get_timeout_milliseconds(request.script_timeout), + destroy_on_exit: request.lifecycle.destroy_on_exit, + proxy_enabled: request.policy.network_proxy.is_enabled(), + identity, + sid_string, + proxy_coordinator: std::mem::take(&mut self.proxy_coordinator), + }) + } +} + +/// A BaseContainer child launched by [`BaseContainerRunner::spawn_base`]. The +/// child runs immediately (no suspend); this owns the process handle, the +/// parent-side pipe ends, and the per-run proxy/sandbox state it tears down +/// once the child exits. +struct BaseChild { + process: OwnedHandle, + thread: OwnedHandle, + pid: u32, + /// Job object the child is assigned to, used to tree-kill it. Always + /// `Some` on a successfully spawned child (`spawn_base` fails closed when + /// the job cannot be set up); the `Option` is retained so `kill()` can keep + /// a root-only fallback as defense-in-depth. + job: Option, + stdin_write: Option, + stdout_read: Option, + stderr_read: Option, + timeout_ms: u32, + destroy_on_exit: bool, + proxy_enabled: bool, + identity: String, + sid_string: String, + proxy_coordinator: ProxyCoordinator, +} + +impl SandboxBackend for BaseContainerRunner { + fn validate(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { + if !request.policy.denied_paths.is_empty() { + return Err(ScriptResponse::error( + wxc_common::error::DENIED_PATHS_NOT_SUPPORTED_MSG, + )); + } + if !request.policy.allowed_hosts.is_empty() || !request.policy.blocked_hosts.is_empty() { + return Err(ScriptResponse::error( + wxc_common::error::HOST_LISTS_NOT_SUPPORTED_MSG, + )); } + Self::is_base_container_api_present().map_err(|e| { + let hint = if !request.experimental_enabled { + format!( + "BaseContainer API unavailable: {e}\n\ + Hint: Config schema version '{}' requires the BaseContainer backend, \ + but this OS build does not support it. \ + Use schema version '0.4.0-alpha' to fall back to AppContainer.", + request.schema_version + ) + } else { + format!( + "BaseContainer API unavailable: {e}\n\ + Hint: --experimental requested BaseContainer, but this OS build \ + does not support it. Remove --experimental to use the AppContainer \ + backend, or use an OS build with BaseContainer support." + ) + }; + ScriptResponse { + // Symbol absent: report BackendUnavailable, not a hard error. + failure_phase: FailurePhase::BackendUnavailable, + ..ScriptResponse::error(&hint) + } + }) + } - let _ = writeln!(logger, "process exited with code {exit_code}"); + fn spawn( + &mut self, + request: &ExecutionRequest, + logger: &mut Logger, + stdio: StdioMode, + ) -> Result, ScriptResponse> { + use wxc_common::validator::validate_common; + + validate_common(request)?; + self.validate(request)?; + + // Pipes → capture pipes the caller drives; Inherit → the child inherits + // the binary's own std handles / console (a TTY when the binary has one). + let capture = stdio == StdioMode::Pipes; + let child = self.spawn_base(request, logger, capture)?; + Ok(Box::new(BaseContainerSandboxProcess::from_child(child))) + } - // 6. Sandbox cleanup: delete AppContainer profile and tracking entry. - // Deferred if a network proxy is configured (proxy state can't be cleaned up yet). - if request.lifecycle.destroy_on_exit { + fn diagnose_exit(&self, request: &ExecutionRequest, exit_code: i32) -> Option { + diagnose_process_exit( + &request.script_code, + &request.policy.readonly_paths, + &request.policy.readwrite_paths, + exit_code as u32, + ) + .map(|diag| diag.message) + } +} + +/// A running BaseContainer-sandboxed process exposed as a [`SandboxProcess`]. +/// Owns the process handle, the parent-side pipes, and the per-run proxy / +/// sandbox state, which it tears down once the child exits. +struct BaseContainerSandboxProcess { + process: SendOwnedHandle, + _thread: SendOwnedHandle, + job: Option, + pid: u32, + stdin: Option, + stdout: Option, + stderr: Option, + /// Cancellers for the stdout/stderr reads, kept so the `SandboxProcess` + /// closers can mint a [`StreamCloser`] even after the stream is taken. + stdout_canceller: Option, + stderr_canceller: Option, + timeout_ms: u32, + destroy_on_exit: bool, + proxy_enabled: bool, + identity: String, + sid_string: String, + proxy_coordinator: ProxyCoordinator, + teardown_done: bool, +} + +// SAFETY: the fields are Windows HANDLEs / handle-owning managers and owned +// strings. HANDLEs are process-global and safe to use from any single thread; +// this handle is owned exclusively by the caller, so moving it across threads +// is sound. +unsafe impl Send for BaseContainerSandboxProcess {} + +impl BaseContainerSandboxProcess { + fn from_child(mut child: BaseChild) -> Self { + let process = SendOwnedHandle::take(&mut child.process); + let thread = SendOwnedHandle::take(&mut child.thread); + let stdin = child.stdin_write.take().map(PipeWriter::new); + let stdout = child.stdout_read.take().map(InterruptiblePipeReader::new); + let stderr = child.stderr_read.take().map(InterruptiblePipeReader::new); + let stdout_canceller = stdout.as_ref().map(InterruptiblePipeReader::canceller); + let stderr_canceller = stderr.as_ref().map(InterruptiblePipeReader::canceller); + Self { + process, + _thread: thread, + job: child.job.take(), + pid: child.pid, + stdin, + stdout, + stderr, + stdout_canceller, + stderr_canceller, + timeout_ms: child.timeout_ms, + destroy_on_exit: child.destroy_on_exit, + proxy_enabled: child.proxy_enabled, + identity: std::mem::take(&mut child.identity), + sid_string: std::mem::take(&mut child.sid_string), + proxy_coordinator: std::mem::take(&mut child.proxy_coordinator), + teardown_done: false, + } + } + + fn run_teardown(&mut self) { + if self.teardown_done { + return; + } + self.teardown_done = true; + let mut logger = Logger::new(wxc_common::logger::Mode::Buffer); + if self.destroy_on_exit { run_sandbox_cleanup( - &identity, - &sid_string, - request.policy.network_proxy.is_enabled(), - logger, + &self.identity, + &self.sid_string, + self.proxy_enabled, + &mut logger, ); - // Unregister so a late Ctrl+C doesn't double-cleanup. sandbox_tracking::unregister_ctrl_c_cleanup(); } + self.proxy_coordinator.stop(&mut logger); + } +} - let _ = writeln!( - logger, - "{EMOJI_SECTION} SECTION: Done ({:.3}s)", - run_start.elapsed().as_secs_f64() - ); +impl SandboxProcess for BaseContainerSandboxProcess { + fn take_stdin(&mut self) -> Option> { + take_boxed_write(&mut self.stdin) + } - // Stop the builtin test proxy if it was started. - self.proxy_coordinator.stop(logger); + fn take_stdout(&mut self) -> Option> { + take_boxed_read(&mut self.stdout) + } - // - // Diagnose the application failure (FailurePhase::ProcessExited). - // - let (error_message, failure_phase) = if exit_code != 0 { - if let Some(diag) = diagnose_process_exit( - &request.script_code, - &request.policy.readonly_paths, - &request.policy.readwrite_paths, - exit_code, - ) { - let _ = writeln!( - logger, - "Error: Launch diagnostic [{}]: {}", - diag.kind, diag.message - ); - (diag.message, FailurePhase::ProcessExited) - } else { - (String::new(), FailurePhase::ProcessExited) + fn take_stderr(&mut self) -> Option> { + take_boxed_read(&mut self.stderr) + } + + fn stdout_closer(&self) -> Option> { + boxed_closer(&self.stdout_canceller) + } + + fn stderr_closer(&self) -> Option> { + boxed_closer(&self.stderr_canceller) + } + + fn try_wait(&mut self) -> std::io::Result> { + match unsafe { WaitForSingleObject(self.process.get(), 0) } { + WAIT_OBJECT_0 => { + let mut code: u32 = 0; + if unsafe { GetExitCodeProcess(self.process.get(), &mut code) }.is_err() { + return Err(std::io::Error::other("GetExitCodeProcess failed")); + } + Ok(Some(code as i32)) } - } else { - (String::new(), FailurePhase::None) - }; + WAIT_TIMEOUT => Ok(None), + _ => Err(std::io::Error::other("WaitForSingleObject failed")), + } + } + + fn id(&self) -> u32 { + self.pid + } - // Merge diagnostic error into stderr field if present. - // In passthrough mode, stdout/stderr already went directly to the SDK caller, - // so standard_out/standard_err in ScriptResponse will be empty. - let final_stderr = if error_message.is_empty() { - String::new() + fn kill(&mut self) -> std::io::Result<()> { + // Tree-kill via the job object when the child was successfully assigned + // to one; otherwise fall back to terminating the root process. + if let Some(job) = &self.job { + job.terminate(u32::MAX); } else { - error_message.clone() + unsafe { + let _ = TerminateProcess(self.process.get(), u32::MAX); + } + } + Ok(()) + } + + fn wait(&mut self) -> std::io::Result { + // Close our copy of any not-taken stdin so the child sees EOF and can + // exit reliably (an interactive command would otherwise block waiting + // for input). + self.stdin.take(); + + // Drain (and discard) any not-taken streams concurrently to avoid the + // child blocking on a full pipe buffer. + let stdout_thread = spawn_discard(self.stdout.take()); + let stderr_thread = spawn_discard(self.stderr.take()); + + let result = match unsafe { WaitForSingleObject(self.process.get(), self.timeout_ms) } { + WAIT_OBJECT_0 => { + let mut code: u32 = 0; + if unsafe { GetExitCodeProcess(self.process.get(), &mut code) }.is_err() { + Err(std::io::Error::other("GetExitCodeProcess failed")) + } else { + Ok(code as i32) + } + } + WAIT_TIMEOUT => Err(std::io::Error::new( + std::io::ErrorKind::TimedOut, + format!("script timed out after {}ms", self.timeout_ms), + )), + _ => Err(std::io::Error::other("WaitForSingleObject failed")), }; - ScriptResponse { - exit_code: exit_code as i32, - standard_out: String::new(), - standard_err: final_stderr, - error_message, - failure_phase, - ..Default::default() + // Tree-kill (the job when assigned, else the root) so any backgrounded + // descendant dies *before* `run_teardown()` stops the proxy / sandbox + // enforcement — upholding the same invariant as `Drop`. The foreground + // child has already exited on the success path; on a timeout or wait + // failure this also terminates it. Then reap the root before releasing + // the pipe drains — and killing the tree closes the descendant's pipe + // write-ends, so the drains can finish. + let _ = self.kill(); + unsafe { + let _ = WaitForSingleObject(self.process.get(), u32::MAX); + } + cancel_and_join_discard(stdout_thread, &self.stdout_canceller); + cancel_and_join_discard(stderr_thread, &self.stderr_canceller); + self.run_teardown(); + result + } +} + +impl Drop for BaseContainerSandboxProcess { + fn drop(&mut self) { + // Kill and reap before tearing down proxy / sandbox state, so an + // abandoned-but-running sandbox cannot outlive its enforcement (or + // leak as an orphan). + let _ = self.kill(); + unsafe { + let _ = WaitForSingleObject(self.process.get(), u32::MAX); } + self.run_teardown(); } } @@ -1319,7 +1670,7 @@ mod tests { // ---- validate_runner: unsupported policy fields surface as errors. ---- - use wxc_common::script_runner::ScriptRunner; + use wxc_common::sandbox_process::SandboxBackend; #[test] fn validate_runner_rejects_denied_paths() { @@ -1328,7 +1679,7 @@ mod tests { request.policy.denied_paths = vec!["C:\\secret".into()]; let err = runner - .validate_runner(&request) + .validate(&request) .expect_err("BaseContainer does not yet support deniedPaths"); assert!( err.error_message.contains("deniedPaths"), @@ -1344,7 +1695,7 @@ mod tests { request.policy.allowed_hosts = vec!["example.com".into()]; let err = runner - .validate_runner(&request) + .validate(&request) .expect_err("allowedHosts is not yet supported"); assert!(err.error_message.contains("allowedHosts")); } @@ -1356,7 +1707,7 @@ mod tests { request.policy.blocked_hosts = vec!["bad.example.com".into()]; let err = runner - .validate_runner(&request) + .validate(&request) .expect_err("blockedHosts is not yet supported"); assert!(err.error_message.contains("blockedHosts")); } @@ -1370,7 +1721,7 @@ mod tests { // the policy-field checks above don't fire. Skip when the host doesn't // expose the API. if BaseContainerRunner::is_base_container_api_present().is_ok() { - assert!(runner.validate_runner(&request).is_ok()); + assert!(runner.validate(&request).is_ok()); } } } diff --git a/src/backends/appcontainer/common/src/dispatcher.rs b/src/backends/appcontainer/common/src/dispatcher.rs index 6e85c09fa..48f62fedb 100644 --- a/src/backends/appcontainer/common/src/dispatcher.rs +++ b/src/backends/appcontainer/common/src/dispatcher.rs @@ -75,6 +75,7 @@ use crate::fallback_detector::{self, FallbackError, IsolationTier}; use wxc_common::error::WxcError; use wxc_common::filesystem_dacl::{DaclError, DaclManager, RO_MASK, RW_MASK}; use wxc_common::models::ExecutionRequest; +use wxc_common::sandbox_process::Runner; use wxc_common::script_runner::ScriptRunner; /// Result of a successful dispatch decision: a phased handle holding a @@ -287,7 +288,7 @@ pub fn dispatch_with_fallback(request: &ExecutionRequest) -> Result = Box::new(BaseContainerRunner::new()); + let runner: Box = Box::new(Runner::new(BaseContainerRunner::new())); (runner, None) } IsolationTier::AppContainerBfs => { @@ -297,9 +298,9 @@ pub fn dispatch_with_fallback(request: &ExecutionRequest) -> Result = Box::new( + let runner: Box = Box::new(Runner::new( AppContainerScriptRunner::with_filesystem_mode(FilesystemMode::Bfs), - ); + )); (runner, None) } else { let sid = @@ -308,12 +309,12 @@ pub fn dispatch_with_fallback(request: &ExecutionRequest) -> Result = Box::new( + let runner: Box = Box::new(Runner::new( AppContainerScriptRunner::with_filesystem_mode_and_sid_string( FilesystemMode::Bfs, sid, ), - ); + )); (runner, mgr) } } @@ -342,12 +343,12 @@ pub fn dispatch_with_fallback(request: &ExecutionRequest) -> Result = Box::new( + let runner: Box = Box::new(Runner::new( AppContainerScriptRunner::with_filesystem_mode_and_sid_string( FilesystemMode::Dacl, sid, ), - ); + )); (runner, Some(mgr)) } }; diff --git a/src/backends/appcontainer/common/src/job_object.rs b/src/backends/appcontainer/common/src/job_object.rs index fa83b48ff..2080d7ade 100644 --- a/src/backends/appcontainer/common/src/job_object.rs +++ b/src/backends/appcontainer/common/src/job_object.rs @@ -19,10 +19,11 @@ use std::sync::OnceLock; use windows::Win32::Foundation::{CloseHandle, HANDLE}; use windows::Win32::System::JobObjects::{ AssignProcessToJobObject, CreateJobObjectW, JobObjectBasicUIRestrictions, - SetInformationJobObject, JOBOBJECT_BASIC_UI_RESTRICTIONS, JOB_OBJECT_UILIMIT, - JOB_OBJECT_UILIMIT_DESKTOP, JOB_OBJECT_UILIMIT_DISPLAYSETTINGS, JOB_OBJECT_UILIMIT_EXITWINDOWS, - JOB_OBJECT_UILIMIT_GLOBALATOMS, JOB_OBJECT_UILIMIT_HANDLES, JOB_OBJECT_UILIMIT_READCLIPBOARD, - JOB_OBJECT_UILIMIT_SYSTEMPARAMETERS, JOB_OBJECT_UILIMIT_WRITECLIPBOARD, + SetInformationJobObject, TerminateJobObject, JOBOBJECT_BASIC_UI_RESTRICTIONS, + JOB_OBJECT_UILIMIT, JOB_OBJECT_UILIMIT_DESKTOP, JOB_OBJECT_UILIMIT_DISPLAYSETTINGS, + JOB_OBJECT_UILIMIT_EXITWINDOWS, JOB_OBJECT_UILIMIT_GLOBALATOMS, JOB_OBJECT_UILIMIT_HANDLES, + JOB_OBJECT_UILIMIT_READCLIPBOARD, JOB_OBJECT_UILIMIT_SYSTEMPARAMETERS, + JOB_OBJECT_UILIMIT_WRITECLIPBOARD, }; use windows::Win32::System::SystemServices::JOB_OBJECT_UILIMIT_IME; use windows_core::PCWSTR; @@ -273,6 +274,17 @@ impl UiJobObject { unsafe { AssignProcessToJobObject(self.handle, process_handle) } .map_err(|e| WxcError::Process(format!("AssignProcessToJobObject: {e}"))) } + + /// Terminate every process currently assigned to this job (the sandboxed + /// child and all of its descendants) with the given exit code. Used to + /// tree-kill a running sandbox. Best-effort: errors are ignored since the + /// processes may already have exited. + pub fn terminate(&self, exit_code: u32) { + // SAFETY: `self.handle` is a valid job handle owned by this struct. + unsafe { + let _ = TerminateJobObject(self.handle, exit_code); + } + } } impl Drop for UiJobObject { diff --git a/src/backends/appcontainer/common/src/network_manager.rs b/src/backends/appcontainer/common/src/network_manager.rs index 1f7c2b732..9abb2c342 100644 --- a/src/backends/appcontainer/common/src/network_manager.rs +++ b/src/backends/appcontainer/common/src/network_manager.rs @@ -28,20 +28,80 @@ pub enum DefaultPolicy { Block, } +/// `RPC_E_CHANGED_MODE`: `CoInitializeEx` returns this when COM is already +/// initialized on the calling thread with a *different* apartment model. The +/// existing initialization is reused and must **not** be balanced by our own +/// `CoUninitialize`. +const RPC_E_CHANGED_MODE: u32 = 0x8001_0106; + +/// RAII guard for a per-call COM apartment on the **current** thread. +/// +/// Every firewall operation creates one of these, does all of its COM work +/// (`CoCreateInstance`, interface use, release) while it is alive, and lets it +/// drop — running the matching `CoUninitialize` — before returning. Because no +/// COM interface or apartment state is ever cached on [`NetworkManager`] across +/// calls, teardown (`remove_firewall_rules`) can run on a *different* thread +/// than setup (`apply_firewall_rules`) without ever using an interface from +/// another apartment or pairing `CoInitializeEx`/`CoUninitialize` across +/// threads. That self-containment is what makes the `unsafe impl Send` on the +/// owning sandbox handle sound. +struct ComApartment { + /// Whether *this* guard performed the initialization that it must balance + /// with `CoUninitialize`. `false` when COM was already initialized on this + /// thread under a different model (`RPC_E_CHANGED_MODE`). + owns_init: bool, +} + +impl ComApartment { + /// Join (or initialize) an apartment-threaded COM apartment for the current + /// thread. `S_OK`/`S_FALSE` both count as an initialization this guard must + /// balance; `RPC_E_CHANGED_MODE` reuses an existing apartment without + /// taking ownership of its teardown. + fn new() -> Result { + // SAFETY: `CoInitializeEx` is always safe to call; the matching + // `CoUninitialize` runs in `Drop` on this same thread when we own it. + let hr = unsafe { CoInitializeEx(None, COINIT_APARTMENTTHREADED) }; + if hr.is_ok() { + Ok(Self { owns_init: true }) + } else if hr.0 as u32 == RPC_E_CHANGED_MODE { + Ok(Self { owns_init: false }) + } else { + Err(WxcError::Firewall(format!( + "CoInitializeEx failed: 0x{:08X}", + hr.0 as u32 + ))) + } + } +} + +impl Drop for ComApartment { + fn drop(&mut self) { + if self.owns_init { + // SAFETY: balances the `CoInitializeEx` in `new` on the same thread. + unsafe { CoUninitialize() }; + } + } +} + pub struct NetworkManager { - fw_policy: Option, created_rule_names: Vec, - com_initialized: bool, wsa_initialized: bool, proxy_coordinator: ProxyCoordinator, } +/// Invariant context for creating firewall rules within a single +/// `apply_firewall_rules` call: the firewall interface (valid only for the +/// current COM apartment / thread) and the AppContainer principal the rules are +/// scoped to. Bundled so the rule helpers stay within the argument-count lint. +struct RuleContext<'a> { + fw_policy: &'a INetFwPolicy2, + principal_id: &'a str, +} + impl NetworkManager { pub fn new() -> Self { Self { - fw_policy: None, created_rule_names: Vec::new(), - com_initialized: false, wsa_initialized: false, proxy_coordinator: ProxyCoordinator::new(), } @@ -84,7 +144,13 @@ impl NetworkManager { return Ok(true); } - self.initialize_firewall(logger)?; + // Open a COM apartment and create the firewall interface for the + // duration of *this* call only — nothing is cached on `self`. See + // [`ComApartment`] for why this self-containment matters. + let _com = ComApartment::new()?; + let fw_policy: INetFwPolicy2 = + unsafe { CoCreateInstance(&NetFwPolicy2, None, CLSCTX_INPROC_SERVER) } + .map_err(|e| WxcError::Firewall(format!("Failed to create NetFwPolicy2: {e}")))?; self.ensure_wsa_initialized(logger)?; let now = std::time::SystemTime::now() @@ -100,43 +166,35 @@ impl NetworkManager { sanitized_principal_id.truncate(MAX_PRINCIPAL_ID_LEN); } let rule_prefix = format!("WXC_{}_{}", sanitized_principal_id, millis); + let ctx = RuleContext { + fw_policy: &fw_policy, + principal_id, + }; if default_policy == DefaultPolicy::Block { let block_all_name = format!("{}_BlockAll", rule_prefix); - if !self.create_rule( - &block_all_name, - principal_id, - NET_FW_ACTION_BLOCK, - "", - logger, - )? { + if !self.create_rule(&ctx, &block_all_name, NET_FW_ACTION_BLOCK, "", logger)? { return Ok(false); } self.created_rule_names.push(block_all_name); self.process_host_list( + &ctx, &policy.allowed_hosts, &rule_prefix, - principal_id, NET_FW_ACTION_ALLOW, "Allow", logger, )?; } else { let allow_all_name = format!("{}_AllowAll", rule_prefix); - if !self.create_rule( - &allow_all_name, - principal_id, - NET_FW_ACTION_ALLOW, - "*", - logger, - )? { + if !self.create_rule(&ctx, &allow_all_name, NET_FW_ACTION_ALLOW, "*", logger)? { return Ok(false); } self.created_rule_names.push(allow_all_name); self.process_host_list( + &ctx, &policy.blocked_hosts, &rule_prefix, - principal_id, NET_FW_ACTION_BLOCK, "Block", logger, @@ -148,9 +206,9 @@ impl NetworkManager { fn process_host_list( &mut self, + ctx: &RuleContext, hosts: &[String], rule_prefix: &str, - principal_id: &str, action: NET_FW_ACTION, action_name: &str, logger: &mut Logger, @@ -169,7 +227,7 @@ impl NetworkManager { }; let rule_name = format!("{}_{}_{}", rule_prefix, action_name, index); - match self.create_rule(&rule_name, principal_id, action, &ip_address, logger) { + match self.create_rule(ctx, &rule_name, action, &ip_address, logger) { Ok(true) => { self.created_rule_names.push(rule_name); } @@ -236,13 +294,19 @@ impl NetworkManager { } pub fn remove_firewall_rules(&mut self, logger: &mut Logger) -> Result { - let fw_policy = match &self.fw_policy { - Some(p) => p.clone(), - None => { - logger.log_line("Firewall policy not initialized"); - return Ok(false); - } - }; + if self.created_rule_names.is_empty() { + return Ok(true); + } + + // Re-acquire a fresh firewall interface in its own apartment on the + // current thread. Windows Firewall rules persist by name independently + // of the COM client that created them, so removal does not need (and + // must not reuse) the interface or apartment `apply_firewall_rules` + // used — which may have run on a different thread. See [`ComApartment`]. + let _com = ComApartment::new()?; + let fw_policy: INetFwPolicy2 = + unsafe { CoCreateInstance(&NetFwPolicy2, None, CLSCTX_INPROC_SERVER) } + .map_err(|e| WxcError::Firewall(format!("Failed to create NetFwPolicy2: {e}")))?; let rules = unsafe { fw_policy.Rules() } .map_err(|e| WxcError::Firewall(format!("Failed to get firewall rules: {}", e)))?; @@ -255,60 +319,10 @@ impl NetworkManager { } } self.created_rule_names.clear(); - Ok(all_success) - } - - fn initialize_firewall(&mut self, _logger: &mut Logger) -> Result<(), WxcError> { - if self.fw_policy.is_some() { - return Ok(()); - } - - let hr = unsafe { CoInitializeEx(None, COINIT_APARTMENTTHREADED) }; - // CoInitializeEx returns HRESULT directly in windows 0.58 - if hr.is_ok() { - self.com_initialized = true; - } else { - // RPC_E_CHANGED_MODE (0x80010106) means COM is already initialized - // with a different threading model — that's acceptable. - let code = hr.0 as u32; - if code == 0x80010106 { - self.com_initialized = false; - } else { - return Err(WxcError::Firewall(format!( - "CoInitializeEx failed: 0x{:08X}", - code - ))); - } - } - - match unsafe { - CoCreateInstance::<_, INetFwPolicy2>(&NetFwPolicy2, None, CLSCTX_INPROC_SERVER) - } { - Ok(policy) => { - self.fw_policy = Some(policy); - Ok(()) - } - Err(e) => { - if self.com_initialized { - unsafe { CoUninitialize() }; - self.com_initialized = false; - } - Err(WxcError::Firewall(format!( - "Failed to create NetFwPolicy2: {}", - e - ))) - } - } - } - - fn cleanup_firewall(&mut self) { - if let Some(policy) = self.fw_policy.take() { - drop(policy); - } - if self.com_initialized { - unsafe { CoUninitialize() }; - self.com_initialized = false; + if !all_success { + logger.log_line("Warning: some firewall rules could not be removed"); } + Ok(all_success) } fn ensure_wsa_initialized(&mut self, _logger: &mut Logger) -> Result<(), WxcError> { @@ -336,18 +350,13 @@ impl NetworkManager { fn create_rule( &self, + ctx: &RuleContext, rule_name: &str, - principal_id: &str, action: NET_FW_ACTION, remote_addresses: &str, _logger: &mut Logger, ) -> Result { - let fw_policy = self - .fw_policy - .as_ref() - .ok_or_else(|| WxcError::Firewall("Firewall policy not initialized".into()))?; - - let rules = unsafe { fw_policy.Rules() } + let rules = unsafe { ctx.fw_policy.Rules() } .map_err(|e| WxcError::Firewall(format!("Failed to get firewall rules: {}", e)))?; let rule: windows::Win32::NetworkManagement::WindowsFirewall::INetFwRule = @@ -366,7 +375,7 @@ impl NetworkManager { .map_err(|e| WxcError::Firewall(format!("put_Description failed: {}", e)))?; rule3 - .SetLocalAppPackageId(&BSTR::from(principal_id)) + .SetLocalAppPackageId(&BSTR::from(ctx.principal_id)) .map_err(|e| WxcError::Firewall(format!("put_LocalAppPackageId failed: {}", e)))?; rule.SetDirection(NET_FW_RULE_DIR_OUT) @@ -411,7 +420,10 @@ impl Default for NetworkManager { impl Drop for NetworkManager { fn drop(&mut self) { - self.cleanup_firewall(); + // No COM state is cached across calls (each firewall op is + // apartment-self-contained), so there is nothing COM-related to undo + // here. Only the process-global Winsock refcount — which is + // thread-agnostic — needs balancing. self.cleanup_wsa(); } } @@ -528,9 +540,7 @@ mod tests { #[test] fn test_default_creates_new_manager() { let mgr = NetworkManager::default(); - assert!(mgr.fw_policy.is_none()); assert!(mgr.created_rule_names.is_empty()); - assert!(!mgr.com_initialized); assert!(!mgr.wsa_initialized); } diff --git a/src/backends/appcontainer/common/src/probe.rs b/src/backends/appcontainer/common/src/probe.rs index c0eb3dec6..8d0be7023 100644 --- a/src/backends/appcontainer/common/src/probe.rs +++ b/src/backends/appcontainer/common/src/probe.rs @@ -63,7 +63,7 @@ pub struct ProbeFacts { } /// Host support for enforcing sandbox UI restrictions. -#[derive(Serialize, Debug)] +#[derive(Serialize, Debug, Clone)] #[serde(rename_all = "camelCase")] pub struct UiCapabilitySupport { /// Whether the host can block reads from the clipboard. diff --git a/src/backends/bubblewrap/common/src/bwrap_runner.rs b/src/backends/bubblewrap/common/src/bwrap_runner.rs index ba888e98f..2c297872b 100644 --- a/src/backends/bubblewrap/common/src/bwrap_runner.rs +++ b/src/backends/bubblewrap/common/src/bwrap_runner.rs @@ -26,20 +26,24 @@ //! without root. use std::fmt::Write as FmtWrite; -use std::process::{Command, Stdio}; -use std::time::{Duration, Instant}; +use std::os::unix::process::CommandExt; +use std::process::{Child, ChildStdin, Command, Stdio}; +use std::time::Duration; use lxc_common::network_iptables::NetworkIptablesManager; +use wxc_common::interruptible_reader::{wrap_pipe, InterruptibleReader, ReadCanceller}; use wxc_common::linux_proxy_coordinator::LinuxProxyCoordinator; use wxc_common::logger::Logger; use wxc_common::models::{ExecutionRequest, NetworkEnforcementMode, ScriptResponse}; -use wxc_common::script_runner::ScriptRunner; +use wxc_common::sandbox_process::{ + boxed_closer, cancel_and_join_discard, group_kill, spawn_discard, take_boxed_read, + take_boxed_write, wait_with_timeout, SandboxBackend, SandboxProcess, StdioMode, StreamCloser, + WaitError, +}; +use wxc_common::validator::validate_common; use crate::bwrap_command; -/// Polling interval for timeout enforcement. -const POLL_INTERVAL_MS: u64 = 500; - /// Bubblewrap sandbox runner. Uses only shared `ContainerPolicy` fields — /// no backend-specific config struct required. #[derive(Default)] @@ -62,8 +66,8 @@ impl BubblewrapScriptRunner { } } -impl ScriptRunner for BubblewrapScriptRunner { - fn validate_runner(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { +impl SandboxBackend for BubblewrapScriptRunner { + fn validate(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { // User-input validation runs before the environmental `bwrap` // probe so config errors are reported deterministically even on // hosts without bwrap installed. @@ -85,14 +89,6 @@ impl ScriptRunner for BubblewrapScriptRunner { )); } - // Reject timeouts smaller than our polling interval. - if request.script_timeout > 0 && u64::from(request.script_timeout) < POLL_INTERVAL_MS { - return Err(ScriptResponse::error(&format!( - "script_timeout {}ms is below the minimum of {}ms", - request.script_timeout, POLL_INTERVAL_MS - ))); - } - if !Self::is_bwrap_available() { return Err(ScriptResponse::error( "Bubblewrap (bwrap) is not installed or not on PATH. \ @@ -103,16 +99,34 @@ impl ScriptRunner for BubblewrapScriptRunner { Ok(()) } - fn execute(&mut self, request: &ExecutionRequest, logger: &mut Logger) -> ScriptResponse { + fn spawn( + &mut self, + request: &ExecutionRequest, + logger: &mut Logger, + stdio: StdioMode, + ) -> Result, ScriptResponse> { + validate_common(request)?; + self.validate(request)?; + let child = self.spawn_bwrap(request, logger, stdio)?; + Ok(Box::new(BubblewrapSandboxProcess::new(child))) + } +} + +impl BubblewrapScriptRunner { + /// Set up networking and spawn `bwrap`, returning a [`BwrapChild`] wrapped + /// by the [`SandboxProcess`] handle. With [`StdioMode::Pipes`] the child's + /// stdio is piped (the caller drives it); with [`StdioMode::Inherit`] it + /// inherits the binary's stdio (a TTY when the binary has one). bwrap is + /// always placed in its own process group so it can be tree-terminated. + fn spawn_bwrap( + &self, + request: &ExecutionRequest, + logger: &mut Logger, + stdio: StdioMode, + ) -> Result { // 1. Start the network proxy if configured. Must happen before // arg-building so the proxy's loopback address can be injected as // HTTP_PROXY / HTTPS_PROXY into the sandbox environment. - // - // Pass the request's `default_network_policy` through so that a - // config of `{ defaultPolicy: "block", proxy: {...}, allowedHosts: - // [] }` actually denies-by-default at the proxy layer (otherwise - // the empty allow list + no iptables + no --unshare-net would let - // everything through). let mut proxy = LinuxProxyCoordinator::new(); if request.policy.network_proxy.is_enabled() { if let Err(err) = proxy.start( @@ -123,10 +137,10 @@ impl ScriptRunner for BubblewrapScriptRunner { request.policy.default_network_policy.clone(), logger, ) { - return ScriptResponse::error(&format!( + return Err(ScriptResponse::error(&format!( "Bubblewrap: failed to start network proxy: {}", err - )); + ))); } } @@ -148,7 +162,7 @@ impl ScriptRunner for BubblewrapScriptRunner { request.container_id.clone() }; - let mut fw_manager = if needs_iptables { + let fw_manager = if needs_iptables { let _ = writeln!( logger, "Bubblewrap: applying iptables rules for host-level network filtering" @@ -158,16 +172,16 @@ impl ScriptRunner for BubblewrapScriptRunner { Ok(true) => {} Ok(false) => { proxy.stop(logger); - return ScriptResponse::error( + return Err(ScriptResponse::error( "Bubblewrap: failed to apply iptables firewall rules.", - ); + )); } Err(e) => { proxy.stop(logger); - return ScriptResponse::error(&format!( + return Err(ScriptResponse::error(&format!( "Bubblewrap: network policy error: {}", e - )); + ))); } } Some(mgr) @@ -178,77 +192,253 @@ impl ScriptRunner for BubblewrapScriptRunner { // 4. Spawn `bwrap`. let mut command = Command::new("bwrap"); command.args(&args); - command - .stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); + match stdio { + StdioMode::Pipes => { + command + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + } + StdioMode::Inherit => { + // The child (bwrap, PID 1 of the sandbox) inherits the binary's + // stdio directly — a TTY when the binary has one. + command + .stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()); + } + } + // Pipes mode: put bwrap in its own process group so a timeout / `kill()` + // can tree-kill it with a single `killpg` without touching the host's + // group. Inherit mode keeps bwrap in the executor's group (so it retains + // the controlling terminal and can't be SIGTTIN-stopped reading it); it's + // PID 1 of the new pid namespace (`--unshare-pid`), so killing the root + // process alone tears the whole sandbox down. + let group = stdio == StdioMode::Pipes; + if group { + command.process_group(0); + } let mut child = match command.spawn() { Ok(process) => process, Err(error) => { + let mut fw_manager = fw_manager; cleanup_iptables(&mut fw_manager, logger); proxy.stop(logger); - return ScriptResponse::error(&format!( + return Err(ScriptResponse::error(&format!( "Bubblewrap: failed to spawn bwrap: {}", error - )); + ))); } }; - // 5. Drain stdout/stderr in background threads to avoid pipe-buffer - // deadlock. - let stdout_handle = child - .stdout - .take() - .map(|r| std::thread::spawn(move || read_to_string(r))); - let stderr_handle = child - .stderr - .take() - .map(|r| std::thread::spawn(move || read_to_string(r))); - - // 6. Wait with optional timeout. + let (stdin, stdout, stderr) = match stdio { + StdioMode::Pipes => (child.stdin.take(), child.stdout.take(), child.stderr.take()), + StdioMode::Inherit => (None, None, None), + }; + // Wrap the pipe reads so the caller can abandon a stream a backgrounded + // descendant is holding open (see `SandboxProcess::stdout_closer`) + // without killing the child. On failure, tear down the per-run network + // state we already set up before returning the error. + let (stdout, stdout_canceller, stderr, stderr_canceller) = + match (wrap_pipe(stdout), wrap_pipe(stderr)) { + (Ok((out, out_canceller)), Ok((err, err_canceller))) => { + (out, out_canceller, err, err_canceller) + } + (out_result, err_result) => { + let _ = child.kill(); + let _ = child.wait(); + let mut fw_manager = fw_manager; + cleanup_iptables(&mut fw_manager, logger); + proxy.stop(logger); + let error = out_result.err().or(err_result.err()); + return Err(ScriptResponse::error(&format!( + "Bubblewrap: failed to wrap stdio pipes: {}", + error.map_or_else(|| "unknown error".to_string(), |e| e.to_string()), + ))); + } + }; let timeout = if request.script_timeout == 0 { None } else { Some(Duration::from_millis(u64::from(request.script_timeout))) }; - let exit_status = match wait_with_timeout(&mut child, timeout) { - Ok(status) => status, + Ok(BwrapChild { + child, + stdin, + stdout, + stderr, + stdout_canceller, + stderr_canceller, + group, + proxy, + fw_manager, + timeout, + }) + } +} + +/// A spawned `bwrap` sandbox: the child process, its parent-side pipe ends, +/// and the per-run network proxy / iptables state torn down once it exits. +struct BwrapChild { + child: Child, + stdin: Option, + stdout: Option, + stderr: Option, + /// Cancellers for the stdout/stderr reads, kept so the `SandboxProcess` + /// closers can mint a [`StreamCloser`] even after the stream is taken. + stdout_canceller: Option, + stderr_canceller: Option, + /// `true` when bwrap leads its own process group (`Pipes` mode), so + /// termination signals the whole group; `false` for `Inherit` mode, where + /// killing bwrap (pid 1 of the namespace) alone tears the sandbox down. + group: bool, + proxy: LinuxProxyCoordinator, + fw_manager: Option, + timeout: Option, +} + +impl BwrapChild { + /// Tear down per-run network state (iptables rules + proxy). Idempotent at + /// the manager level. + fn cleanup(&mut self, logger: &mut Logger) { + cleanup_iptables(&mut self.fw_manager, logger); + self.proxy.stop(logger); + } +} + +/// A running `bwrap` sandbox exposed as a [`SandboxProcess`]. Wraps the spawned +/// [`BwrapChild`] (child, pipes, and per-run network state), tearing the network +/// state down once the child exits. +struct BubblewrapSandboxProcess { + inner: BwrapChild, + teardown_done: bool, +} + +impl BubblewrapSandboxProcess { + fn new(child: BwrapChild) -> Self { + Self { + inner: child, + teardown_done: false, + } + } + + fn run_teardown(&mut self) { + if self.teardown_done { + return; + } + self.teardown_done = true; + let mut logger = Logger::new(wxc_common::logger::Mode::Buffer); + self.inner.cleanup(&mut logger); + } +} + +impl SandboxProcess for BubblewrapSandboxProcess { + fn take_stdin(&mut self) -> Option> { + take_boxed_write(&mut self.inner.stdin) + } + + fn take_stdout(&mut self) -> Option> { + take_boxed_read(&mut self.inner.stdout) + } + + fn take_stderr(&mut self) -> Option> { + take_boxed_read(&mut self.inner.stderr) + } + + fn stdout_closer(&self) -> Option> { + boxed_closer(&self.inner.stdout_canceller) + } + + fn stderr_closer(&self) -> Option> { + boxed_closer(&self.inner.stderr_canceller) + } + + fn try_wait(&mut self) -> std::io::Result> { + Ok(self + .inner + .child + .try_wait()? + .map(|status| status.code().unwrap_or(-1))) + } + + fn id(&self) -> u32 { + self.inner.child.id() + } + + fn kill(&mut self) -> std::io::Result<()> { + // No-op once the child has exited and been reaped: its pid/pgid can be + // recycled, so signaling it could hit an unrelated process (group). A + // reaped `Child` returns its cached status here without a syscall. + if self.inner.child.try_wait()?.is_some() { + return Ok(()); + } + if self.inner.group { + // Pipes mode: bwrap leads its own process group — tree-kill it. + group_kill(&mut self.inner.child) + } else { + // Inherit mode: bwrap shares the executor's group (no + // `process_group(0)`), so a group-kill would hit the executor. + // bwrap is pid 1 of the sandbox pid namespace, so killing the root + // alone tears the whole namespace (every descendant) down. + self.inner.child.kill() + } + } + + fn wait(&mut self) -> std::io::Result { + // Close our copy of any not-taken stdin so the child sees EOF. + self.inner.stdin.take(); + + // Drain (and discard) any not-taken stdout/stderr concurrently so the + // child can't block on a full pipe (taken streams are the caller's + // responsibility). + let stdout_thread = spawn_discard(self.inner.stdout.take()); + let stderr_thread = spawn_discard(self.inner.stderr.take()); + + let result = match wait_with_timeout(&mut self.inner.child, self.inner.timeout) { + Ok(status) => Ok(status.code().unwrap_or(-1)), Err(WaitError::Timeout) => { - let _ = child.kill(); - let _ = child.wait(); - cleanup_iptables(&mut fw_manager, logger); - proxy.stop(logger); - return ScriptResponse { - exit_code: -1, - standard_out: join_reader(stdout_handle), - standard_err: join_reader(stderr_handle), - error_message: format!( - "Bubblewrap: script timed out after {}ms", - request.script_timeout - ), - ..Default::default() - }; + // Tree-kill so descendants die too and release any stdout/stderr + // pipe write-ends (else the drain threads below could block). + // `kill()` group-kills in Pipes mode, and in Inherit mode kills + // bwrap (pid 1 of the namespace), which tears the sandbox down. + let _ = self.kill(); + let _ = self.inner.child.wait(); + Err(std::io::Error::new( + std::io::ErrorKind::TimedOut, + "Bubblewrap: script timed out", + )) } Err(WaitError::Io(error)) => { - cleanup_iptables(&mut fw_manager, logger); - proxy.stop(logger); - return ScriptResponse::error(&format!("Bubblewrap: wait failed: {}", error)); + // The child may still be alive; kill+reap it before + // `run_teardown()` removes the iptables/proxy enforcement out + // from under it. + let _ = self.kill(); + let _ = self.inner.child.wait(); + Err(std::io::Error::other(format!( + "Bubblewrap: wait failed: {error}" + ))) } }; - // 7. Collect output and clean up. - cleanup_iptables(&mut fw_manager, logger); - proxy.stop(logger); + cancel_and_join_discard(stdout_thread, &self.inner.stdout_canceller); + cancel_and_join_discard(stderr_thread, &self.inner.stderr_canceller); + self.run_teardown(); + result + } +} - ScriptResponse { - exit_code: exit_status.code().unwrap_or(-1), - standard_out: join_reader(stdout_handle), - standard_err: join_reader(stderr_handle), - error_message: String::new(), - ..Default::default() - } +impl Drop for BubblewrapSandboxProcess { + fn drop(&mut self) { + // Kill and reap the child *before* removing network enforcement — + // otherwise an abandoned-but-running sandbox would keep egressing after + // its iptables/proxy rules were torn down, and the child would leak as + // a zombie. `kill()` group-kills (bwrap is PID 1 of the pid namespace), + // then we reap. + let _ = self.kill(); + let _ = self.inner.child.wait(); + self.run_teardown(); } } @@ -276,48 +466,6 @@ fn cleanup_iptables(manager: &mut Option, logger: &mut L } } -// -- I/O helpers (mirrors seatbelt_runner) -------------------------------- - -fn read_to_string(mut reader: R) -> String { - let mut buffer = String::new(); - let _ = reader.read_to_string(&mut buffer); - buffer -} - -fn join_reader(handle: Option>) -> String { - match handle { - Some(h) => h.join().unwrap_or_default(), - None => String::new(), - } -} - -enum WaitError { - Timeout, - Io(std::io::Error), -} - -fn wait_with_timeout( - child: &mut std::process::Child, - timeout: Option, -) -> Result { - let Some(deadline) = timeout.map(|d| Instant::now() + d) else { - return child.wait().map_err(WaitError::Io); - }; - - loop { - match child.try_wait() { - Ok(Some(status)) => return Ok(status), - Ok(None) => { - if Instant::now() >= deadline { - return Err(WaitError::Timeout); - } - std::thread::sleep(Duration::from_millis(POLL_INTERVAL_MS)); - } - Err(e) => return Err(WaitError::Io(e)), - } - } -} - #[cfg(test)] mod tests { use super::*; @@ -340,7 +488,7 @@ mod tests { req.experimental_enabled = false; let runner = BubblewrapScriptRunner::new(); - let err = runner.validate_runner(&req).unwrap_err(); + let err = runner.validate(&req).unwrap_err(); assert!( err.error_message.contains("builtinTestServer") && err.error_message.contains("--experimental"), @@ -357,7 +505,7 @@ mod tests { req.script_code = String::new(); let runner = BubblewrapScriptRunner::new(); - let err = runner.validate_runner(&req).unwrap_err(); + let err = runner.validate(&req).unwrap_err(); assert!(err.error_message.contains("script_code is empty")); } } diff --git a/src/backends/seatbelt/common/Cargo.toml b/src/backends/seatbelt/common/Cargo.toml index 378e6fda1..fb7029ae6 100644 --- a/src/backends/seatbelt/common/Cargo.toml +++ b/src/backends/seatbelt/common/Cargo.toml @@ -12,7 +12,3 @@ wxc_common = { workspace = true } [target.'cfg(target_os = "macos")'.dependencies] libc = { workspace = true } -# Pty bridge is shared with the LXC backend via mxc_pty; that crate owns -# the openpty + pre_exec (setsid + TIOCSCTTY) plumbing and the libc/nix -# deps that go with it. -mxc_pty = { workspace = true } diff --git a/src/backends/seatbelt/common/src/profile_builder.rs b/src/backends/seatbelt/common/src/profile_builder.rs index effb4c03d..df4ae160f 100644 --- a/src/backends/seatbelt/common/src/profile_builder.rs +++ b/src/backends/seatbelt/common/src/profile_builder.rs @@ -59,11 +59,11 @@ pub fn build_profile(request: &ExecutionRequest) -> Result { // Filesystem — read-only system paths every process needs. out.push_str(SYSTEM_READ_ALLOW); - // Pseudo-terminal access — the seatbelt runner attaches the inner - // shell to a freshly-allocated pty (see `mxc_pty::run_with_pty`) so - // callers can stream output and the shell sees a real TTY. Without - // these rules, `isatty()` / `tcgetattr()` / `ttyname()` fail with - // EPERM because the kernel calls block on the secondary fd. + // Pseudo-terminal access — when the executor binary runs under a pty + // the sandboxed shell inherits that TTY, so it sees a real terminal + // and calls `isatty()` / `tcgetattr()` / `ttyname()` against it. + // Without these rules, those calls fail with EPERM because the + // kernel calls block on the secondary fd. out.push_str(TTY_ALLOW); // Policy-derived allow rules. @@ -136,7 +136,7 @@ const SYSTEM_READ_ALLOW: &str = "\ /// at startup, and read access to `/dev/fd` for the `/dev/stdout` etc. /// indirection some tools use. const TTY_ALLOW: &str = "\ -;; --- pseudo-terminal access (pty bridge in mxc_pty::run_with_pty) --- +;; --- pseudo-terminal access (inherited TTY when run under a pty) --- (allow file-read* file-write* file-ioctl (literal \"/dev/tty\") (regex #\"^/dev/ttys[0-9]+$\")) @@ -422,7 +422,7 @@ fn write_extra_seatbelt_rules(out: &mut String, request: &ExecutionRequest) { /// Expand a leading `~` or `~/` to the current user's home directory. /// Returns an error if `HOME` is not set and the path requires expansion. -fn expand_tilde(path: &str) -> Result { +pub(crate) fn expand_tilde(path: &str) -> Result { if path == "~" || path.starts_with("~/") { let home = std::env::var("HOME").map_err(|_| { format!("HOME environment variable not set; cannot expand '{path}' in seatbelt profile") diff --git a/src/backends/seatbelt/common/src/seatbelt_runner.rs b/src/backends/seatbelt/common/src/seatbelt_runner.rs index 1c6f63655..49db23aff 100644 --- a/src/backends/seatbelt/common/src/seatbelt_runner.rs +++ b/src/backends/seatbelt/common/src/seatbelt_runner.rs @@ -7,9 +7,11 @@ //! The sandbox is applied via `sandbox_init()` inside `Command::pre_exec`, //! then `/bin/sh` is exec'd directly. The child inherits the parent's //! Mach bootstrap namespace so both CLI commands and GUI applications -//! (when `guiAccess = true`) work correctly. The exec path uses -//! [`mxc_pty::run_with_pty`] so the inner shell sees a real TTY and the -//! host can stream its output as it arrives. +//! (when `guiAccess = true`) work correctly. The exec path returns a +//! `SandboxProcess` whose stdio follows the requested `StdioMode`: +//! `Inherit` gives the child the host's own stdio (a real TTY when the +//! binary runs under a pty), while `Pipes` exposes stdout/stderr/stdin +//! handles the caller can stream. //! //! For apps that require LaunchServices (`launchMethod: "open"`), the runner //! writes a sandbox helper script and launches the target app via `open -n -W`, @@ -23,12 +25,17 @@ use std::fmt::Write as FmtWrite; use std::fs; use std::os::unix::process::CommandExt; use std::process::{Command, Stdio}; -use std::time::{Duration, Instant}; +use std::time::Duration; -use mxc_pty::{run_with_pty, PtyOptions, PtyOutcome}; +use wxc_common::interruptible_reader::{wrap_pipe, InterruptibleReader, ReadCanceller}; use wxc_common::logger::Logger; use wxc_common::models::{ExecutionRequest, LaunchMethod, ScriptResponse}; -use wxc_common::script_runner::ScriptRunner; +use wxc_common::sandbox_process::{ + boxed_closer, cancel_and_join_discard, group_kill, spawn_discard, take_boxed_read, + take_boxed_write, wait_with_timeout, SandboxBackend, SandboxProcess, StdioMode, StreamCloser, + WaitError, +}; +use wxc_common::validator::validate_common; use crate::profile_builder::build_profile; @@ -65,10 +72,8 @@ impl SeatbeltScriptRunner { } } -const POLL_INTERVAL_MS: u64 = 500; - -impl ScriptRunner for SeatbeltScriptRunner { - fn validate_runner(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { +impl SandboxBackend for SeatbeltScriptRunner { + fn validate(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { // Seatbelt cannot filter network by hostname — reject blockedHosts // rather than silently allowing traffic the user expects to be denied. if !request.policy.blocked_hosts.is_empty() { @@ -80,40 +85,27 @@ impl ScriptRunner for SeatbeltScriptRunner { )); } - // Reject timeouts that are too small for our polling interval to - // enforce accurately. - if request.script_timeout > 0 && u64::from(request.script_timeout) < POLL_INTERVAL_MS { - return Err(error_response(format!( - "scriptTimeout {}ms is below the minimum of {}ms", - request.script_timeout, POLL_INTERVAL_MS - ))); - } - Ok(()) } - fn execute(&mut self, request: &ExecutionRequest, logger: &mut Logger) -> ScriptResponse { - // 1. Build the Seatbelt profile from the policy. - let profile = match build_profile(request) { - Ok(p) => p, - Err(e) => { - return ScriptResponse { - exit_code: -1, - standard_out: String::new(), - standard_err: String::new(), - error_message: e, - ..Default::default() - } - } - }; + fn spawn( + &mut self, + request: &ExecutionRequest, + logger: &mut Logger, + stdio: StdioMode, + ) -> Result, ScriptResponse> { + validate_common(request)?; + self.validate(request)?; - // Determine launch method from seatbelt config. + // Build the Seatbelt profile from the policy. + let profile = build_profile(request).map_err(error_response)?; + + // Determine launch method + GUI access from the seatbelt config. let launch_method = request .seatbelt .as_ref() .map(|s| s.launch_method.clone()) .unwrap_or_default(); - let gui_access = request .seatbelt .as_ref() @@ -121,252 +113,388 @@ impl ScriptRunner for SeatbeltScriptRunner { .unwrap_or(false); match launch_method { - LaunchMethod::Exec => self.execute_exec(&profile, request, gui_access, logger), - LaunchMethod::Open => self.execute_open(&profile, request, logger), + LaunchMethod::Exec => spawn_exec(&profile, request, gui_access, stdio, logger), + LaunchMethod::Open => spawn_open(&profile, request, stdio, logger), } } } -impl SeatbeltScriptRunner { - /// Standard execution path: fork → sandbox_init → exec. - /// When `gui_access` is true, stdio is inherited for GUI app compatibility. - fn execute_exec( - &self, - profile: &str, - request: &ExecutionRequest, - gui_access: bool, - logger: &mut Logger, - ) -> ScriptResponse { - let mut command = match build_sandbox_command(profile, &request.script_code, logger) { - Ok(cmd) => cmd, - Err(resp) => return resp, - }; - - // Environment setup. - if !request.env.is_empty() { - command.env_clear(); - for kv in &request.env { - if let Some((key, value)) = kv.split_once('=') { - command.env(key, value); - } - } - } +/// Exec launch path: fork → sandbox_init → exec `/bin/sh -c