From 5bba1b726711aa7720ce27156692ad8ae3140efb Mon Sep 17 00:00:00 2001 From: Carlos Alexandro Becker Date: Wed, 24 Jun 2026 09:43:23 -0300 Subject: [PATCH] feat(wxc_common): add the sandbox execution interfaces (SandboxBackend / SandboxProcess / Runner) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the shared, no-pty execution surface that the containment backends and a future in-process library build on. Purely additive: no existing code path uses these yet, so behavior is unchanged. - `SandboxProcess` — a handle to a running sandboxed child: `take_stdin` / `take_stdout` / `take_stderr`, `try_wait`, `id`, `kill` (process-tree), and `wait` (drains any untaken stdio, honors `scriptTimeout`), plus stdout/stderr closers for abandoning a backgrounded-descendant-held read without a kill. - `SandboxBackend` — `validate` + `spawn(request, logger, StdioMode) -> SandboxProcess` + a `diagnose_exit` hook; `StdioMode::{Pipes, Inherit}`. - `Runner` — the generic adapter bridging any `SandboxBackend` to the run-to-completion `ScriptRunner` (spawn `Inherit`, then `wait`). - `StreamCloser`, `group_kill` (Unix leader-first SIGKILL of the child's group), and `wait_with_timeout` (adaptive 1ms->50ms backoff poll). - `interruptible_reader` (Unix self-pipe + `poll`) and the Windows pipe helpers in `process_util` (`InterruptiblePipeReader` / `PipeReadCanceller` / `create_std_pipes`) for out-of-band-cancelable streaming reads. - `FailurePhase::Timeout` so a timeout is distinguishable from other failures. The library backends and executor binaries are migrated onto this surface in a follow-up PR, and the importable `mxc-sdk` crate is built on top of it. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Carlos Alexandro Becker --- src/core/wxc_common/Cargo.toml | 2 +- .../wxc_common/src/interruptible_reader.rs | 293 +++++++++++ src/core/wxc_common/src/lib.rs | 5 + src/core/wxc_common/src/models.rs | 5 + src/core/wxc_common/src/process_util.rs | 227 ++++++++- src/core/wxc_common/src/sandbox_process.rs | 475 ++++++++++++++++++ 6 files changed, 1005 insertions(+), 2 deletions(-) create mode 100644 src/core/wxc_common/src/interruptible_reader.rs create mode 100644 src/core/wxc_common/src/sandbox_process.rs diff --git a/src/core/wxc_common/Cargo.toml b/src/core/wxc_common/Cargo.toml index e6dcf35b8..bca51a80f 100644 --- a/src/core/wxc_common/Cargo.toml +++ b/src/core/wxc_common/Cargo.toml @@ -28,7 +28,7 @@ windows-core = { workspace = true } widestring = { workspace = true } winreg = { workspace = true } -[target.'cfg(target_os = "linux")'.dependencies] +[target.'cfg(unix)'.dependencies] libc = { workspace = true } [dev-dependencies] diff --git a/src/core/wxc_common/src/interruptible_reader.rs b/src/core/wxc_common/src/interruptible_reader.rs new file mode 100644 index 000000000..1cc01bb11 --- /dev/null +++ b/src/core/wxc_common/src/interruptible_reader.rs @@ -0,0 +1,293 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! A Unix pipe reader whose `read` can be cancelled out-of-band. +//! +//! The in-tree Unix backends (Seatbelt, Bubblewrap) hand the caller the child's +//! raw stdout/stderr pipe, where a blocking `read` only ends at EOF — when every +//! write end closes. A backgrounded descendant that inherited the pipe can hold +//! its write end open past the foreground command's exit, leaving such a read +//! parked indefinitely. [`InterruptibleReader`] wraps the pipe so a separate +//! [`ReadCanceller`] (a [`StreamCloser`]) can make that read return EOF +//! (`Ok(0)`) promptly, without killing the child. +//! +//! It uses a self-pipe + `poll(2)`: the read fd is set non-blocking and the +//! reader blocks in `poll` on both the data pipe and the read end of a +//! self-pipe; cancellation writes a byte to the self-pipe (waking the `poll`) +//! and sets a flag so later reads short-circuit to EOF. + +use std::io::{self, Read}; +use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use crate::sandbox_process::StreamCloser; + +/// Cancellation state shared between an [`InterruptibleReader`] and the +/// [`ReadCanceller`]s minted from it: the write end of the self-pipe used to +/// wake the reader's `poll`, plus a flag so a read after cancellation returns +/// EOF without touching the data pipe. +struct CancelState { + cancelled: AtomicBool, + /// Write end of the self-pipe; one byte here wakes the reader's `poll`. + wake_w: OwnedFd, +} + +impl CancelState { + /// Mark cancelled (once) and nudge the reader's `poll` awake. + fn cancel(&self) { + // Flag first so a read that wakes observes EOF, then wake the poll. If + // we were already cancelled, do nothing — `close` is idempotent. + if self.cancelled.swap(true, Ordering::Release) { + return; + } + // A single byte makes `poll` return. The self-pipe write end is + // non-blocking, so this never blocks; ignore the result — `EAGAIN` + // (a wake byte is already pending) and `EPIPE` (the reader's end has + // been dropped) are both fine. + let byte = [0u8; 1]; + // SAFETY: `wake_w` is a valid, owned, non-blocking pipe write fd; the + // buffer is a valid 1-byte local. + unsafe { + libc::write(self.wake_w.as_raw_fd(), byte.as_ptr().cast(), 1); + } + } +} + +/// A [`StreamCloser`] for an [`InterruptibleReader`]. Cloneable and `Send + +/// Sync` so several may be held (and fired from any thread); all share one +/// cancellation state, and `close` is idempotent. +#[derive(Clone)] +pub struct ReadCanceller(Arc); + +impl StreamCloser for ReadCanceller { + fn close(&self) { + self.0.cancel(); + } +} + +/// A readable pipe whose `read` can be cancelled via a [`ReadCanceller`]. +/// +/// Implements [`Read`]: it blocks in `poll(2)` on the data pipe and a self-pipe +/// and returns the next chunk, real EOF (`Ok(0)`), or — once a paired +/// [`ReadCanceller::close`] fires — a prompt cancellation EOF (`Ok(0)`). +pub struct InterruptibleReader { + /// The child's stdout/stderr pipe, set non-blocking. + fd: OwnedFd, + /// Read end of the self-pipe; readable once cancellation writes its byte. + wake_r: OwnedFd, + state: Arc, +} + +impl InterruptibleReader { + /// Wrap an owned readable pipe `fd` so its reads can be cancelled + /// out-of-band. Sets `fd` non-blocking and creates the self-pipe used for + /// wakeups. + /// + /// # Errors + /// + /// Returns the underlying [`io::Error`] if the self-pipe cannot be created + /// or either fd cannot be switched to non-blocking mode. + pub fn new(fd: OwnedFd) -> io::Result { + set_nonblocking(fd.as_raw_fd())?; + + // Self-pipe for wakeups: the write end is non-blocking so `cancel` + // never stalls; the read end stays blocking but is only ever polled. + let mut fds = [0 as RawFd; 2]; + // SAFETY: `fds` is a valid 2-element array for `pipe` to fill. + if unsafe { libc::pipe(fds.as_mut_ptr()) } < 0 { + return Err(io::Error::last_os_error()); + } + // SAFETY: `pipe` succeeded, so both fds are freshly owned by us. + let wake_r = unsafe { OwnedFd::from_raw_fd(fds[0]) }; + let wake_w = unsafe { OwnedFd::from_raw_fd(fds[1]) }; + set_nonblocking(wake_w.as_raw_fd())?; + + Ok(Self { + fd, + wake_r, + state: Arc::new(CancelState { + cancelled: AtomicBool::new(false), + wake_w, + }), + }) + } + + /// Mint a closer that EOFs this reader's `read` on demand. Several closers + /// may be minted; they share one cancellation state. + pub fn canceller(&self) -> ReadCanceller { + ReadCanceller(Arc::clone(&self.state)) + } +} + +/// Wrap an optional child pipe end into an [`InterruptibleReader`] plus a +/// [`ReadCanceller`] for its [`StreamCloser`]. `None` (inherited stdio) stays +/// `None` for both. Convenience for the Unix backends, which hold `ChildStdout` +/// / `ChildStderr` (both `Into`). +/// +/// # Errors +/// +/// Propagates any [`io::Error`] from [`InterruptibleReader::new`]. +pub fn wrap_pipe>( + pipe: Option, +) -> io::Result<(Option, Option)> { + let Some(pipe) = pipe else { + return Ok((None, None)); + }; + let reader = InterruptibleReader::new(pipe.into())?; + let canceller = reader.canceller(); + Ok((Some(reader), Some(canceller))) +} + +impl Read for InterruptibleReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + // A zero-length read must return `Ok(0)` immediately (the `Read` + // contract), never block in `poll`. + if buf.is_empty() { + return Ok(0); + } + // Already cancelled: report EOF without touching the data pipe. + if self.state.cancelled.load(Ordering::Acquire) { + return Ok(0); + } + loop { + let mut poll_fds = [ + libc::pollfd { + fd: self.fd.as_raw_fd(), + events: libc::POLLIN, + revents: 0, + }, + libc::pollfd { + fd: self.wake_r.as_raw_fd(), + events: libc::POLLIN, + revents: 0, + }, + ]; + // SAFETY: `poll_fds` is a valid 2-element array of pollfds; both + // fds are owned and live for the duration of the call. + let rc = unsafe { libc::poll(poll_fds.as_mut_ptr(), 2, -1) }; + if rc < 0 { + let err = io::Error::last_os_error(); + if err.kind() == io::ErrorKind::Interrupted { + continue; + } + return Err(err); + } + + // Cancellation wins over any pending data so a held-open pipe is + // abandoned promptly. + if self.state.cancelled.load(Ordering::Acquire) || poll_fds[1].revents != 0 { + return Ok(0); + } + + if poll_fds[0].revents != 0 { + // SAFETY: `fd` is owned and `buf` is a valid writable slice. + let n = + unsafe { libc::read(self.fd.as_raw_fd(), buf.as_mut_ptr().cast(), buf.len()) }; + if n >= 0 { + return Ok(n as usize); + } + let err = io::Error::last_os_error(); + match err.raw_os_error() { + // Spurious readiness (e.g. POLLHUP with no buffered bytes): + // loop and re-poll. + Some(libc::EAGAIN) => continue, + _ if err.kind() == io::ErrorKind::Interrupted => continue, + _ => return Err(err), + } + } + } + } +} + +/// Add `O_NONBLOCK` to `fd`'s file-status flags. +fn set_nonblocking(fd: RawFd) -> io::Result<()> { + // SAFETY: `fd` is a valid open fd; `fcntl` with these commands only reads + // and writes its flags. + let flags = unsafe { libc::fcntl(fd, libc::F_GETFL) }; + if flags < 0 { + return Err(io::Error::last_os_error()); + } + if unsafe { libc::fcntl(fd, libc::F_SETFL, flags | libc::O_NONBLOCK) } < 0 { + return Err(io::Error::last_os_error()); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use std::time::{Duration, Instant}; + + /// Build an `InterruptibleReader` over a fresh pipe, returning it plus the + /// write end so a test can feed it bytes. + fn reader_with_writer() -> (InterruptibleReader, OwnedFd) { + let mut fds = [0 as RawFd; 2]; + assert!(unsafe { libc::pipe(fds.as_mut_ptr()) } == 0, "pipe"); + let read_end = unsafe { OwnedFd::from_raw_fd(fds[0]) }; + let write_end = unsafe { OwnedFd::from_raw_fd(fds[1]) }; + let reader = InterruptibleReader::new(read_end).expect("wrap reader"); + (reader, write_end) + } + + #[test] + fn reads_data_then_eof_on_writer_close() { + let (mut reader, write_end) = reader_with_writer(); + let mut writer = std::fs::File::from(write_end); + writer.write_all(b"hello").expect("write"); + drop(writer); // close write end -> EOF after the data + + let mut buf = [0u8; 16]; + let n = reader.read(&mut buf).expect("read data"); + assert_eq!(&buf[..n], b"hello"); + assert_eq!(reader.read(&mut buf).expect("read eof"), 0); + } + + #[test] + fn zero_length_read_returns_ok_zero_without_blocking() { + // The write end stays open, so a normal read would block; a zero-length + // read must still return Ok(0) immediately per the `Read` contract. + let (mut reader, _write_end) = reader_with_writer(); + let mut empty: [u8; 0] = []; + assert_eq!(reader.read(&mut empty).expect("zero-length read"), 0); + } + + #[test] + fn close_unblocks_a_parked_read_without_writer_close() { + // The write end stays open for the whole test, so a plain read would + // block forever; the canceller must EOF it promptly. + let (reader, _write_end) = reader_with_writer(); + let canceller = reader.canceller(); + let mut reader = reader; + + let handle = std::thread::spawn(move || { + let mut buf = [0u8; 16]; + let start = Instant::now(); + let n = reader.read(&mut buf).expect("read returns"); + (n, start.elapsed()) + }); + + std::thread::sleep(Duration::from_millis(50)); + canceller.close(); + + let (n, elapsed) = handle.join().expect("reader thread"); + assert_eq!(n, 0, "cancelled read reports EOF"); + assert!( + elapsed < Duration::from_secs(5), + "read should return promptly after close, took {elapsed:?}" + ); + } + + #[test] + fn close_is_idempotent_and_reads_stay_eof() { + let (mut reader, _write_end) = reader_with_writer(); + let canceller = reader.canceller(); + canceller.close(); + canceller.close(); // second call is a no-op + + let mut buf = [0u8; 16]; + assert_eq!(reader.read(&mut buf).expect("eof"), 0); + assert_eq!(reader.read(&mut buf).expect("still eof"), 0); + } +} diff --git a/src/core/wxc_common/src/lib.rs b/src/core/wxc_common/src/lib.rs index f6af283b6..9165e7441 100644 --- a/src/core/wxc_common/src/lib.rs +++ b/src/core/wxc_common/src/lib.rs @@ -14,6 +14,7 @@ pub mod logger; pub mod microvm_staging; pub mod models; pub mod mxc_error; +pub mod sandbox_process; pub mod script_runner; pub mod state_aware_backend; pub mod state_aware_dispatch; @@ -40,6 +41,10 @@ pub mod process_util; #[cfg(target_os = "windows")] pub mod string_util; +// Unix-specific modules (shared by the Seatbelt and Bubblewrap backends). +#[cfg(unix)] +pub mod interruptible_reader; + // Linux-specific modules #[cfg(target_os = "linux")] pub mod linux_proxy_coordinator; diff --git a/src/core/wxc_common/src/models.rs b/src/core/wxc_common/src/models.rs index c5ff37e6d..29a41f21e 100644 --- a/src/core/wxc_common/src/models.rs +++ b/src/core/wxc_common/src/models.rs @@ -592,6 +592,11 @@ pub enum FailurePhase { LaunchFailed, /// The process was created but exited with a non-zero code. ProcessExited, + /// The process was force-terminated because it exceeded `scriptTimeout`. + /// Distinct from [`ProcessExited`] (it did not exit on its own) so callers + /// can detect a timeout uniformly across backends rather than inferring it + /// from `exit_code == -1` (which collides with other failures). + Timeout, /// The selected containment backend is unavailable on this host: the API is /// missing, or present but not usable (e.g. feature-disabled). Distinct from /// [`LaunchFailed`] so callers can fall back to a lower tier rather than diff --git a/src/core/wxc_common/src/process_util.rs b/src/core/wxc_common/src/process_util.rs index 14605e059..2c89f56dc 100644 --- a/src/core/wxc_common/src/process_util.rs +++ b/src/core/wxc_common/src/process_util.rs @@ -2,8 +2,11 @@ // Licensed under the MIT License. use std::path::PathBuf; +use std::sync::{Arc, Mutex, Weak}; +use std::time::Duration; use crate::error::WxcError; +use crate::sandbox_process::StreamCloser; use crate::string_util; use windows::Win32::Foundation::{ @@ -11,12 +14,234 @@ use windows::Win32::Foundation::{ HLOCAL, WAIT_OBJECT_0, }; use windows::Win32::Security::{DeriveCapabilitySidsFromName, PSID, SECURITY_ATTRIBUTES}; -use windows::Win32::Storage::FileSystem::ReadFile; +use windows::Win32::Storage::FileSystem::{ReadFile, WriteFile}; use windows::Win32::System::Pipes::CreatePipe; use windows::Win32::System::Threading::WaitForSingleObject; +use windows::Win32::System::IO::CancelIoEx; use windows_core::BOOL; use windows_core::PCWSTR; +/// A readable end of an anonymous pipe (e.g. the child's stdout/stderr), +/// owning the handle and closing it on drop. Implements [`std::io::Read`] +/// via `ReadFile`; a broken pipe (all write ends closed) reads as EOF. +/// `Send` so it can be handed to a reader thread. +pub struct PipeReader(SendOwnedHandle); + +impl PipeReader { + /// Take ownership of `handle` (invalidating the source `OwnedHandle`). + pub fn new(mut handle: OwnedHandle) -> Self { + Self(SendOwnedHandle::take(&mut handle)) + } +} + +impl std::io::Read for PipeReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + use windows::Win32::Foundation::ERROR_BROKEN_PIPE; + let mut read: u32 = 0; + // SAFETY: `self.0` owns a valid pipe handle for the lifetime of this + // `PipeReader`; `buf`/`read` are valid local out-params for the call. + match unsafe { ReadFile(self.0.get(), Some(buf), Some(&mut read), None) } { + Ok(()) => Ok(read as usize), + // Write ends all closed: normal end-of-stream, report EOF. + Err(e) if e.code() == ERROR_BROKEN_PIPE.to_hresult() => Ok(0), + Err(e) => Err(std::io::Error::other(e)), + } + } +} + +/// Cancellation state shared between an [`InterruptiblePipeReader`] and its +/// [`PipeReadCanceller`]s. Owns the pipe handle — closed only when the **last** +/// reference drops, so a canceller's `CancelIoEx` can never race a closed (and +/// possibly reused) handle — plus the [`ReadGate`] the reader and cancellers +/// hand off through. +struct CancelablePipe { + handle: SendOwnedHandle, + gate: Mutex, +} + +/// Reader/canceller handshake, guarded by [`CancelablePipe::gate`]. +/// +/// `CancelIoEx` is *edge-triggered*: it aborts only I/O already pending when it +/// is called. A bare `cancelled` flag + a single `CancelIoEx` therefore has a +/// lost-wakeup race — a `close` landing between a read's flag check and its +/// `ReadFile` entering the kernel cancels nothing and never retries, parking the +/// read until real EOF. The mutex closes that race by ordering "a read is +/// starting" against "cancel requested": a racing `close` either sees the read +/// has not started (and the read then observes `cancelled`) or sees `reading` +/// and keeps issuing `CancelIoEx` until the read is aborted. +#[derive(Default)] +struct ReadGate { + /// Set once `close` has been called; a read observing it returns EOF instead + /// of issuing (or while abandoning) a `ReadFile`. + cancelled: bool, + /// True while a read is in — or about to enter — its blocking `ReadFile`, so + /// `close` knows to keep issuing `CancelIoEx` until that read is aborted. + reading: bool, +} + +impl CancelablePipe { + /// Lock the gate, tolerating a poisoned mutex (the guarded data is two + /// bools with no broken invariant on panic). + fn lock(&self) -> std::sync::MutexGuard<'_, ReadGate> { + self.gate + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + } +} + +// SAFETY: the only non-`Sync` field is a process-global Windows HANDLE whose +// value is copied for `ReadFile` / `CancelIoEx`; issuing `CancelIoEx` from one +// thread to abort a `ReadFile` blocked on another is the documented, supported +// way to interrupt synchronous pipe I/O, and the reader/canceller handshake is +// serialised by `gate`. So sharing `&CancelablePipe` across threads is sound. +unsafe impl Sync for CancelablePipe {} + +/// A readable pipe end (e.g. the child's stdout/stderr) whose blocking +/// `ReadFile` can be cancelled out-of-band via a [`PipeReadCanceller`] — +/// reporting EOF — without closing the child or its other streams. A broken +/// pipe (all write ends closed) still reads as EOF as usual. `Send` so it can +/// be handed to a reader thread. Single-reader: at most one thread may `read` it +/// at a time (any number of cancellers may fire concurrently). +pub struct InterruptiblePipeReader(Arc); + +impl InterruptiblePipeReader { + /// Take ownership of `handle` (invalidating the source `OwnedHandle`). + pub fn new(mut handle: OwnedHandle) -> Self { + Self(Arc::new(CancelablePipe { + handle: SendOwnedHandle::take(&mut handle), + gate: Mutex::new(ReadGate::default()), + })) + } + + /// Mint a closer that EOFs this reader's `read` on demand. Several closers + /// may be minted; they share one cancellation state. The closer holds only a + /// [`Weak`] reference, so it never keeps the read handle open past this + /// reader's lifetime. + pub fn canceller(&self) -> PipeReadCanceller { + PipeReadCanceller(Arc::downgrade(&self.0)) + } +} + +impl std::io::Read for InterruptiblePipeReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + use windows::Win32::Foundation::{ERROR_BROKEN_PIPE, ERROR_OPERATION_ABORTED}; + // Announce the read under the gate: a `close` that already fired makes us + // EOF without touching the pipe; otherwise mark `reading` so a `close` + // racing us keeps issuing `CancelIoEx` until our `ReadFile` is aborted. + { + let mut gate = self.0.lock(); + if gate.cancelled { + return Ok(0); + } + gate.reading = true; + } + let mut read: u32 = 0; + // SAFETY: the `Arc` keeps the pipe handle valid for this call; + // `buf`/`read` are valid local out-params. + let result = unsafe { ReadFile(self.0.handle.get(), Some(buf), Some(&mut read), None) }; + let cancelled = { + let mut gate = self.0.lock(); + gate.reading = false; + gate.cancelled + }; + // If `close` fired while this read was in flight, drop any + // completed-but-undelivered chunk and report EOF — matching the Unix + // reader, which never delivers data once cancelled. + if cancelled { + return Ok(0); + } + match result { + Ok(()) => Ok(read as usize), + // Write ends all closed: normal end-of-stream, report EOF. + Err(e) if e.code() == ERROR_BROKEN_PIPE.to_hresult() => Ok(0), + // A canceller's `CancelIoEx` aborted this read: report EOF. + Err(e) if e.code() == ERROR_OPERATION_ABORTED.to_hresult() => Ok(0), + Err(e) => Err(std::io::Error::other(e)), + } + } +} + +/// A [`StreamCloser`] for an [`InterruptiblePipeReader`]. Cloneable and +/// `Send + Sync` so a watchdog thread can hold and fire it; all clones share +/// one cancellation state and [`close`](StreamCloser::close) is idempotent. +/// Holds a [`Weak`] reference so a stored canceller never keeps the reader's +/// data-pipe handle open after the reader is dropped. +#[derive(Clone)] +pub struct PipeReadCanceller(Weak); + +impl StreamCloser for PipeReadCanceller { + fn close(&self) { + // Upgrade to a temporary strong ref for the duration of the cancel. If + // the reader has already been dropped there is nothing to cancel (and + // its handle is already closed), so this is a no-op. + let Some(pipe) = self.0.upgrade() else { + return; + }; + // Mark cancelled once (so reads short-circuit to EOF), then abort an + // in-flight read. Because `CancelIoEx` is edge-triggered, retry while a + // read is in its announce→`ReadFile` window (`reading == true`): the next + // iteration catches the `ReadFile` once it enters the kernel. The reader + // clears `reading` when its `ReadFile` returns (aborted or with data), + // which bounds the loop; if no read is in progress it exits at once. + { + let mut gate = pipe.lock(); + if gate.cancelled { + return; + } + gate.cancelled = true; + } + loop { + // SAFETY: the upgraded `Arc` keeps the handle valid; `CancelIoEx` + // with a null overlapped aborts all outstanding synchronous I/O on + // it. Ignore the result — a benign no-op (ERROR_NOT_FOUND) when none + // is pending. + unsafe { + let _ = CancelIoEx(pipe.handle.get(), None); + } + if !pipe.lock().reading { + return; + } + std::thread::sleep(Duration::from_millis(1)); + } + } +} + +/// A writable end of an anonymous pipe (e.g. the child's stdin), owning the +/// handle and closing it on drop (which sends EOF to the child). Implements +/// [`std::io::Write`] via `WriteFile`. `Send`. +pub struct PipeWriter(SendOwnedHandle); + +impl PipeWriter { + /// Take ownership of `handle` (invalidating the source `OwnedHandle`). + pub fn new(mut handle: OwnedHandle) -> Self { + Self(SendOwnedHandle::take(&mut handle)) + } +} + +impl std::io::Write for PipeWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + use windows::Win32::Foundation::ERROR_BROKEN_PIPE; + let mut written: u32 = 0; + // SAFETY: `self.0` owns a valid pipe handle for the lifetime of this + // `PipeWriter`; `buf`/`written` are valid local params for the call. + match unsafe { WriteFile(self.0.get(), Some(buf), Some(&mut written), None) } { + Ok(()) => Ok(written as usize), + // The read end is gone (child exited / closed its stdin): surface the + // standard `BrokenPipe` kind so callers' graceful handling fires + // instead of an opaque OS error. + Err(e) if e.code() == ERROR_BROKEN_PIPE.to_hresult() => { + Err(std::io::Error::from(std::io::ErrorKind::BrokenPipe)) + } + Err(e) => Err(std::io::Error::other(e)), + } + } + + fn flush(&mut self) -> std::io::Result<()> { + // Anonymous pipes are not buffered on the writer side. + Ok(()) + } +} + const BUFFER_SIZE: u32 = 4096; const MAX_OUTPUT_CHARS: usize = 1024 * 1024; diff --git a/src/core/wxc_common/src/sandbox_process.rs b/src/core/wxc_common/src/sandbox_process.rs new file mode 100644 index 000000000..957507456 --- /dev/null +++ b/src/core/wxc_common/src/sandbox_process.rs @@ -0,0 +1,475 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Handle-based ("streaming") sandbox execution. +//! +//! [`ScriptRunner`](crate::script_runner::ScriptRunner) owns the whole +//! lifecycle (spawn → wait → drain → return), which is fine for fire-and- +//! forget runs but cannot expose the running child. This module adds the +//! interface for the other model: spawn the sandboxed process and hand the +//! caller a [`SandboxProcess`] handle they can write to, read from, wait on, +//! and kill while it runs. +//! +//! As with [`ScriptRunner`](crate::script_runner::ScriptRunner), the traits +//! live in `wxc_common` (the cross-platform foundation) while the +//! implementations live in the per-backend crates — `wxc_common` must not +//! depend on any `backends/*` crate. + +use std::io::{Read, Write}; + +use crate::logger::Logger; +use crate::models::{ExecutionRequest, FailurePhase, ScriptResponse}; +use crate::script_runner::ScriptRunner; + +/// A handle to a running sandboxed process. +/// +/// Modelled on [`std::process::Child`]: the caller may `take_*` the std +/// streams to drive them directly (and is then responsible for draining any +/// stream they take, to avoid the child blocking on a full pipe), then +/// [`wait`](SandboxProcess::wait) for exit or [`kill`](SandboxProcess::kill) +/// it. +/// +/// Any stdout/stderr stream the caller does **not** take is drained and +/// discarded internally by [`wait`](SandboxProcess::wait) so the child can +/// never block on a full pipe. +/// +/// No pty is ever allocated; the streams are ordinary pipes. +/// +/// # Abandoning a held-open stream (stdout/stderr closers) +/// +/// A read on a taken stdout/stderr only ends at EOF — when **every** write end +/// closes. A backgrounded descendant that inherited the pipe can hold its write +/// end open long after the foreground command exits, so a caller blocked on +/// such a read would hang until that descendant finally exits. A plain +/// [`kill`](SandboxProcess::kill) would unblock it but also tear the descendant +/// down, defeating any grace window for backgrounded work. +/// +/// [`stdout_closer`](SandboxProcess::stdout_closer) / +/// [`stderr_closer`](SandboxProcess::stderr_closer) hand back a +/// [`StreamCloser`] for exactly this case: calling +/// [`close`](StreamCloser::close) makes an in-flight or subsequent read on that +/// stream return EOF (`Ok(0)`) promptly **without** terminating the child. +/// +/// # Pipe-deadlock contract (read both ends concurrently) +/// +/// stdout and stderr are independent OS pipes with bounded kernel buffers. If +/// one is left undrained while the child keeps writing to it, the child blocks +/// on the full pipe — and if the reader is meanwhile blocked waiting on the +/// *other* stream (or on the child to exit), the two deadlock. So both ends +/// must be consumed **concurrently**, never one fully then the other: +/// +/// - **Implementors** of [`wait`](SandboxProcess::wait) must drain the +/// not-taken stdout and stderr on separate threads (or non-blocking I/O) +/// before/while waiting on the child — not sequentially. The in-tree +/// backends spawn one reader thread per stream. +/// - **Callers** that `take_stdout()` *and* `take_stderr()` and read them to +/// EOF must likewise read them on separate threads; reading one to EOF +/// before touching the other can hang on output-heavy children. Taking only +/// one stream (leaving the other for `wait()` to drain) is always safe. +pub trait SandboxProcess: Send { + /// Take ownership of the child's stdin so the caller can write to it. + /// Returns `None` if already taken. Drop the writer to send EOF. + fn take_stdin(&mut self) -> Option>; + + /// Take ownership of the child's stdout for live reading. Returns `None` + /// if already taken. A taken stream is **not** drained by + /// [`wait`](SandboxProcess::wait). + fn take_stdout(&mut self) -> Option>; + + /// Take ownership of the child's stderr for live reading. Returns `None` + /// if already taken. A taken stream is **not** drained by + /// [`wait`](SandboxProcess::wait). + fn take_stderr(&mut self) -> Option>; + + /// Non-blocking exit check. `Ok(Some(code))` if the child has exited, + /// `Ok(None)` if it is still running. + fn try_wait(&mut self) -> std::io::Result>; + + /// The OS process id of the sandboxed child (its PID on Unix, process id + /// on Windows). Useful for external monitoring or a caller-driven process + /// tree kill. + /// + /// Only meaningful while the child is alive. On Unix the PID may be reused + /// by an unrelated process once the child has been reaped (by + /// [`wait`](SandboxProcess::wait)), so do not act on it after waiting. + fn id(&self) -> u32; + + /// Request termination of the sandboxed process **and its descendants** + /// (a process-tree kill). On Unix the child leads its own process group + /// and this signals the whole group (an immediate `SIGKILL`, no graceful + /// `SIGTERM` first); on Windows it terminates the job + /// object the child is assigned to. Reaping happens in + /// [`wait`](SandboxProcess::wait). + fn kill(&mut self) -> std::io::Result<()>; + + /// Block until the child exits (honouring the request's `scriptTimeout`, + /// where `0` means wait forever) and return its exit code. + /// + /// Any stdout/stderr the caller did not `take_*` is drained and discarded + /// while waiting so the child can never block on a full pipe. If the + /// timeout elapses, the child and its tree are killed and + /// [`ErrorKind::TimedOut`](std::io::ErrorKind::TimedOut) is returned. + /// + /// Implementors must drain the not-taken stdout and stderr **concurrently** + /// (not one then the other) — see the type-level pipe-deadlock contract. + fn wait(&mut self) -> std::io::Result; + + /// A closer that EOFs the stdout stream returned by + /// [`take_stdout`](SandboxProcess::take_stdout), on demand, **without** + /// killing the child — for abandoning a stream a backgrounded descendant is + /// holding open past the foreground command's exit (a plain + /// [`kill`](SandboxProcess::kill) would also take that descendant down). + /// + /// Intended for a stream the caller has **taken** and is reading: + /// [`close`](StreamCloser::close) abandons that read, and may be called + /// concurrently with it. [`wait`](SandboxProcess::wait) already cancels its + /// own internal safety-drain of any *not-taken* stream once the child exits, + /// so a closer is only useful on a taken stream — firing one on a not-taken + /// stream while the child is still producing output would stall the child on + /// a full pipe. Returns `None` when the stream is not interruptible — e.g. + /// inherited stdio ([`StdioMode::Inherit`]), where the caller never reads + /// from a handle stream. The default returns `None`. + fn stdout_closer(&self) -> Option> { + None + } + + /// A closer for the stderr stream — see + /// [`stdout_closer`](SandboxProcess::stdout_closer). The default returns + /// `None`. + fn stderr_closer(&self) -> Option> { + None + } +} + +/// Abandons reads on one of a [`SandboxProcess`]'s standard streams: a call to +/// [`close`](StreamCloser::close) makes an in-flight or subsequent read on the +/// corresponding [`take_stdout`](SandboxProcess::take_stdout) / +/// [`take_stderr`](SandboxProcess::take_stderr) stream return EOF (`Ok(0)`) +/// promptly, **without** terminating the child. +/// +/// Obtained from [`stdout_closer`](SandboxProcess::stdout_closer) / +/// [`stderr_closer`](SandboxProcess::stderr_closer). `Send + Sync` so a +/// watchdog thread (separate from the one blocked on the read) can hold and +/// fire it. +pub trait StreamCloser: Send + Sync { + /// Promptly EOF the stream this closer was minted for. Idempotent and safe + /// to call after the reader has already reached EOF or been dropped. + fn close(&self); +} + +/// Spawn a thread that reads `reader` to EOF and discards it, so a stream the +/// caller did not take can't block the child on a full pipe. Returns `None` +/// when there is nothing to drain. +pub fn spawn_discard( + reader: Option, +) -> Option> { + reader.map(|mut r| { + std::thread::spawn(move || { + let _ = std::io::copy(&mut r, &mut std::io::sink()); + }) + }) +} + +/// Join a [`spawn_discard`] thread (no-op when absent). +pub fn join_discard(handle: Option>) { + if let Some(t) = handle { + let _ = t.join(); + } +} + +/// Take a readable stream out of an `Option` and box it as a trait object, for +/// the [`SandboxProcess::take_stdout`] / [`SandboxProcess::take_stderr`] +/// accessors. Returns `None` if already taken. +pub fn take_boxed_read( + slot: &mut Option, +) -> Option> { + slot.take().map(|r| Box::new(r) as Box) +} + +/// Take a writable stream out of an `Option` and box it as a trait object, for +/// the [`SandboxProcess::take_stdin`] accessor. Returns `None` if already taken. +pub fn take_boxed_write( + slot: &mut Option, +) -> Option> { + slot.take().map(|w| Box::new(w) as Box) +} + +/// Clone a stored stream canceller and box it as a [`StreamCloser`], for the +/// [`SandboxProcess::stdout_closer`] / [`SandboxProcess::stderr_closer`] +/// accessors. Returns `None` when there is no canceller (non-streamed stdio). +pub fn boxed_closer( + canceller: &Option, +) -> Option> { + canceller + .clone() + .map(|c| Box::new(c) as Box) +} + +/// Join a not-taken stdout/stderr discard thread from +/// [`wait`](SandboxProcess::wait), first cancelling its read so the join can't +/// block. When the stream was drained (a [`spawn_discard`] thread exists), fire +/// `canceller` before joining: a backgrounded descendant holding the pipe's +/// write end open past the foreground child's exit would otherwise keep the +/// discard [`io::copy`](std::io::copy) — and thus `wait()` — from ever returning +/// under a wait-forever (`scriptTimeout == 0`) timeout. The drained output is +/// discarded regardless, so cutting it short is harmless. +/// +/// Call *after* the child has exited (so its own output has drained normally). +/// A no-op when the caller took the stream (`drain` is `None`): there is no +/// thread to join, and the canceller must not fire while the caller may still be +/// reading. +pub fn cancel_and_join_discard( + drain: Option>, + canceller: &Option, +) { + if drain.is_some() { + if let Some(canceller) = canceller { + canceller.close(); + } + } + join_discard(drain); +} + +/// SIGKILL a Unix child's process group. The backends make the child a group +/// leader (`setsid()` / `process_group(0)`), so `-pid` targets that group — +/// never the host's — killing the leader and every descendant. +/// +/// No graceful `SIGTERM` first: it's unreliable (a `/bin/sh -c …` wrapper parked +/// in a foreground `wait` defers it and finishes the script) and sandboxed code +/// isn't owed a cleanup window. The **leader is killed before the group**: a +/// `-pid`-only sweep races — the kernel can kill a descendant first, waking the +/// shell to run one more command (seen as post-timeout output on the Inherit +/// path) before its own signal lands — so we make the leader's SIGKILL pending +/// first. The caller reaps the direct child afterwards. +#[cfg(unix)] +pub fn group_kill(child: &mut std::process::Child) -> std::io::Result<()> { + // The child is unreaped, so its pid (== pgid) can't have been recycled. + let pid = child.id() as i32; + // SAFETY: `kill(2)` with a plain pid / negative pgid — just integers. + unsafe { + libc::kill(pid, libc::SIGKILL); // leader first + libc::kill(-pid, libc::SIGKILL); // then its group + } + Ok(()) +} + +/// Outcome of [`wait_with_timeout`]: the child exited, the deadline passed, or +/// the wait itself failed. +#[cfg(unix)] +pub enum WaitError { + Timeout, + Io(std::io::Error), +} + +/// Wait for `child` to exit. With a timeout we poll (rather than add an async +/// runtime), starting at a short interval and backing off to a cap: a quick +/// child is detected within ~a millisecond instead of always paying a full +/// fixed tick, while a long run settles to an inexpensive cadence. Each sleep is +/// clamped to the time remaining so even sub-interval timeouts fire on time. +/// Shared by the Unix run-to-completion backends. +#[cfg(unix)] +pub fn wait_with_timeout( + child: &mut std::process::Child, + timeout: Option, +) -> Result { + use std::time::{Duration, Instant}; + // Poll interval grows from this floor to the cap (doubling each idle tick), + // trading low exit-detection latency for short runs against an inexpensive + // cadence for long ones. + const MIN_POLL: Duration = Duration::from_millis(1); + const MAX_POLL: Duration = Duration::from_millis(50); + + let Some(deadline) = timeout.map(|d| Instant::now() + d) else { + return child.wait().map_err(WaitError::Io); + }; + let mut interval = MIN_POLL; + loop { + match child.try_wait() { + Ok(Some(status)) => return Ok(status), + Ok(None) => { + let now = Instant::now(); + if now >= deadline { + return Err(WaitError::Timeout); + } + std::thread::sleep((deadline - now).min(interval)); + interval = (interval * 2).min(MAX_POLL); + } + Err(error) => return Err(WaitError::Io(error)), + } + } +} + +/// How a [`SandboxBackend`] wires the sandboxed child's standard streams. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StdioMode { + /// stdin/stdout/stderr are fresh pipes the caller drives via the handle's + /// `take_*` accessors (the `mxc` library / streaming path). The child sees + /// no TTY and leads its own process group so it can be tree-terminated. + Pipes, + /// The child inherits the current process's stdin/stdout/stderr (the CLI + /// executor path): its output goes straight to the binary's own stdio, so + /// the child sees a TTY exactly when the binary does. The returned handle's + /// `take_*` all return `None`; [`wait`](SandboxProcess::wait) just waits. + Inherit, +} + +/// A containment backend that spawns a sandboxed process and hands back a +/// [`SandboxProcess`] handle — the single entry point for starting a sandbox. +/// +/// The caller picks how the child's stdio is wired ([`StdioMode`]) and then +/// drives the handle: stream it ([`StdioMode::Pipes`]) or just +/// [`wait`](SandboxProcess::wait) ([`StdioMode::Inherit`]). The `mxc` library +/// calls this directly with [`StdioMode::Pipes`]; the CLI executor binaries +/// reach it through the [`Runner`] bridge. +pub trait SandboxBackend { + /// Backend-specific validation, run before [`spawn`](SandboxBackend::spawn) + /// and on dry-run. Override to reject unsupported policies; default accepts. + fn validate(&self, _request: &ExecutionRequest) -> Result<(), ScriptResponse> { + Ok(()) + } + + /// Apply this backend's containment and spawn the sandboxed process with + /// stdio wired per `stdio`, returning a handle. On a validation or spawn + /// failure returns a [`ScriptResponse`] carrying the error. + fn spawn( + &mut self, + request: &ExecutionRequest, + logger: &mut Logger, + stdio: StdioMode, + ) -> Result, ScriptResponse>; + + /// Optional post-exit diagnostics for the run-to-completion (binary) path: + /// when the child exits non-zero, return a more actionable error message + /// (e.g. a known AppContainer filesystem-permission failure). Default: none. + /// The streaming/library path does not call this. + fn diagnose_exit(&self, _request: &ExecutionRequest, _exit_code: i32) -> Option { + None + } +} + +/// The single run-to-completion bridge: adapts any [`SandboxBackend`] to the +/// [`ScriptRunner`] contract the executor binaries (`wxc-exec` / `lxc-exec` / +/// `mxc-exec-mac`) dispatch over. +/// +/// It spawns the child with [`StdioMode::Inherit`] — so the sandboxed process +/// reads/writes the binary's own stdio directly (a TTY when the binary has +/// one) — and [`wait`](SandboxProcess::wait)s for exit, mapping the outcome to +/// a [`ScriptResponse`]. Because the child streams straight to the binary's +/// stdio, `standard_out`/`standard_err` stay empty (the binaries already print +/// those, which is then a no-op). +/// +/// This is the *only* run-to-completion logic for these backends; the backends +/// themselves expose just [`SandboxBackend::spawn`]. +pub struct Runner(B); + +impl Runner { + /// Wrap a [`SandboxBackend`] so it can be dispatched as a [`ScriptRunner`]. + pub fn new(backend: B) -> Self { + Self(backend) + } +} + +impl ScriptRunner for Runner { + fn validate_runner(&self, request: &ExecutionRequest) -> Result<(), ScriptResponse> { + self.0.validate(request) + } + + fn execute(&mut self, request: &ExecutionRequest, logger: &mut Logger) -> ScriptResponse { + let mut child = match self.0.spawn(request, logger, StdioMode::Inherit) { + Ok(child) => child, + Err(response) => return response, + }; + match child.wait() { + Ok(exit_code) => { + let mut response = ScriptResponse { + exit_code, + failure_phase: if exit_code == 0 { + FailurePhase::None + } else { + FailurePhase::ProcessExited + }, + ..Default::default() + }; + // Let the backend enrich a non-zero exit with an actionable + // message (the child streamed live, so the response is otherwise + // empty). + if exit_code != 0 { + if let Some(msg) = self.0.diagnose_exit(request, exit_code) { + logger.log_line(&format!("Error: Launch diagnostic: {msg}")); + response.standard_err.push_str(&msg); + response.error_message = msg; + } + } + response + } + Err(e) if e.kind() == std::io::ErrorKind::TimedOut => ScriptResponse { + exit_code: -1, + error_message: format!("script timed out after {}ms", request.script_timeout), + failure_phase: FailurePhase::Timeout, + ..Default::default() + }, + Err(e) => ScriptResponse::error(&format!("wait failed: {e}")), + } + } +} + +#[cfg(all(test, unix))] +mod tests { + use super::{wait_with_timeout, WaitError}; + use std::process::Command; + use std::time::{Duration, Instant}; + + #[test] + fn wait_with_timeout_detects_quick_exit_promptly() { + // A child that exits almost immediately is reaped well before a generous + // deadline -- the adaptive poll starts in the millisecond range, so the + // detection latency is small (the old fixed 50ms tick was the worst case). + let mut child = Command::new("true").spawn().expect("spawn true"); + let start = Instant::now(); + let status = match wait_with_timeout(&mut child, Some(Duration::from_secs(10))) { + Ok(status) => status, + Err(_) => panic!("a quick child must exit, not time out"), + }; + assert!(status.success(), "`true` exits 0"); + assert!( + start.elapsed() < Duration::from_secs(1), + "quick exit should be detected promptly, took {:?}", + start.elapsed() + ); + } + + #[test] + fn wait_with_timeout_fires_at_the_deadline() { + // A long-running child hits the timeout branch at (not before) the + // deadline, even though the deadline is shorter than the poll cap. + let mut child = Command::new("sleep") + .arg("30") + .spawn() + .expect("spawn sleep"); + let start = Instant::now(); + let result = wait_with_timeout(&mut child, Some(Duration::from_millis(200))); + let elapsed = start.elapsed(); + let _ = child.kill(); + let _ = child.wait(); + assert!(matches!(result, Err(WaitError::Timeout)), "should time out"); + assert!( + elapsed >= Duration::from_millis(200), + "must not fire before the deadline, fired at {elapsed:?}" + ); + assert!( + elapsed < Duration::from_secs(2), + "should fire near the deadline, fired at {elapsed:?}" + ); + } + + #[test] + fn wait_with_timeout_without_deadline_waits_for_exit() { + // A `None` timeout blocks until the child exits. + let mut child = Command::new("true").spawn().expect("spawn true"); + let status = match wait_with_timeout(&mut child, None) { + Ok(status) => status, + Err(_) => panic!("blocking wait must return the exit status"), + }; + assert!(status.success()); + } +}