diff --git a/API.md b/API.md index 59c1d6e..3aa396a 100644 --- a/API.md +++ b/API.md @@ -10,11 +10,13 @@ import pyisolate as psi PyIsolate supports exactly seven cell operations: `exec source`, `call dotted function`, `import module`, `post messages`, `stream logs`, `emit metrics`, and `request broker actions`. +Isolation mode is explicit in the public API. Use `backend="subinterpreter"` for the execution-cell backend, `backend="process"` for one sandbox per OS process, and `backend="microvm"` for a process placed behind a microVM boundary. The cell contract is the same in every mode, but only the process and microVM modes are intended to represent hard blast-radius boundaries. + The canonical contract lives in [docs/execution-model.md](docs/execution-model.md). Keep this surface small; production systems win by refusing extra features. | Call | Description | |------|-------------| -| `psi.spawn(name:str, policy:str|dict=None, allowed_imports:list[str]|None=None) → Sandbox` | Create sandbox thread, attach eBPF, return handle with module whitelist. | +| `psi.spawn(name:str, policy:str|dict=None, allowed_imports:list[str]|None=None, backend="subinterpreter") → Sandbox` | Create a sandbox with an explicit backend. `"subinterpreter"` is an execution cell; `"process"` and `"microvm"` are explicit hard-boundary modes and fail closed until their native launchers are available. | | `psi.Supervisor(warm_pool:int=0, rollout_mode:str="dev")` | Build an isolated supervisor with explicit rollout posture (`dev`, `hardened`, `compatibility`). | | `sandbox.close(timeout=0.2)` | Graceful stop → SIGTERM; force‑kill after timeout. | | `with psi.spawn(name, policy)` | Context manager form; sandbox closes on exit. | @@ -23,7 +25,7 @@ The canonical contract lives in [docs/execution-model.md](docs/execution-model.m ## 2  Executing code ```python -sb = psi.spawn("guest42", policy="defaults", numa_node=0) +sb = psi.spawn("guest42", policy="defaults", numa_node=0, backend="subinterpreter") sb.exec("from math import sqrt; post(sqrt(2))") result = sb.recv(timeout=0.1) # 1.4142135623 ``` @@ -63,7 +65,7 @@ policy.refresh("/tmp/policy.yml", token="secret") ## 4  High-level helpers ```python -@psi.sandbox(policy="ml-inference", timeout="30s") +@psi.sandbox(policy="ml-inference", timeout="30s", backend="subinterpreter") def run_model(data): ... diff --git a/README.md b/README.md index f051812..5a4b5d5 100644 --- a/README.md +++ b/README.md @@ -200,13 +200,17 @@ Use `pyisolate.policy.refresh("policy/.yml", token="secret")` to hot‑loa A cell is intentionally limited to seven operations: execute source, call a dotted function, import allowed modules, post messages, stream logs, emit metrics, and request broker actions. +The API makes the isolation choice explicit: `backend="subinterpreter"` means an execution cell, `backend="process"` means a separate OS process boundary, and `backend="microvm"` means a process behind a microVM boundary. The cell contract stays the same across modes, but the security boundary does not: sub-interpreters are not treated as a hard boundary. + See [docs/execution-model.md](docs/execution-model.md). We keep this model small on purpose: production systems are safer when they refuse features outside a single contract. --- ## Security model -* **Execution cell** – each guest runs in its own sub‑interpreter, hosted by one sandbox thread. +* **Execution cell** – `backend="subinterpreter"`; each guest runs in its own sub‑interpreter, hosted by one sandbox thread. +* **Process boundary** – `backend="process"`; each guest is intended to run in its own OS process with kernel policy applied outside the Python runtime. +* **MicroVM boundary** – `backend="microvm"`; each guest is intended to run inside a process launched behind a microVM boundary for stronger blast-radius isolation. * **Security boundary (authoritative)** – enforcement lives at the kernel/process layer (cgroups + eBPF/LSM), not at the Python sub‑interpreter boundary. * **Kernel boundary** – every sandbox thread enters its own cgroup; CO‑RE eBPF programs enforce FS/net/syscall policy. * **Broker** – sole path to privileged syscalls, sealed with AEAD and strict replay protection. diff --git a/docs/execution-model.md b/docs/execution-model.md index d1962da..f6dc870 100644 --- a/docs/execution-model.md +++ b/docs/execution-model.md @@ -2,6 +2,8 @@ A sandboxed cell has exactly **one** execution contract. +The public API names the isolation backend explicitly: `backend="subinterpreter"` is the execution-cell mode, `backend="process"` is the process-boundary mode, and `backend="microvm"` is the microVM-boundary mode. These modes change the containment boundary, not the seven cell operations below. + ## Allowed operations 1. **`exec(source)`** diff --git a/pyisolate/__init__.py b/pyisolate/__init__.py index 9c314b1..93adcb2 100644 --- a/pyisolate/__init__.py +++ b/pyisolate/__init__.py @@ -20,7 +20,10 @@ try: from .checkpoint import checkpoint, restore -except (ModuleNotFoundError, ImportError) as exc: # pragma: no cover - optional dependency +except ( + ModuleNotFoundError, + ImportError, +) as exc: # pragma: no cover - optional dependency # Trap only dependency-related import failures; let unrelated import-time # bugs in optional modules propagate so they remain visible to developers. if ( @@ -58,7 +61,10 @@ def restore(*args, **kwargs): # type: ignore[no-redef] try: from .migration import migrate -except (ModuleNotFoundError, ImportError) as exc: # pragma: no cover - optional dependency +except ( + ModuleNotFoundError, + ImportError, +) as exc: # pragma: no cover - optional dependency # Trap only dependency-related import failures; let unrelated import-time # bugs in optional modules propagate so they remain visible to developers. if ( @@ -78,6 +84,10 @@ def migrate(*args, **kwargs): # type: ignore[no-redef] from .sdk import Pipeline, sandbox # noqa: F401 from .subset import OwnershipError, RestrictedExec # noqa: F401 from .supervisor import ( + BackendMode, + DEFAULT_BACKEND, + IMPLEMENTED_BACKENDS, + SUPPORTED_BACKENDS, Sandbox, Supervisor, list_active, @@ -89,6 +99,10 @@ def migrate(*args, **kwargs): # type: ignore[no-redef] __all__ = [ "spawn", + "BackendMode", + "DEFAULT_BACKEND", + "SUPPORTED_BACKENDS", + "IMPLEMENTED_BACKENDS", "list_active", "Sandbox", "Supervisor", diff --git a/pyisolate/sdk.py b/pyisolate/sdk.py index cc7956e..b6a1974 100644 --- a/pyisolate/sdk.py +++ b/pyisolate/sdk.py @@ -4,11 +4,13 @@ from typing import Any, Callable -from .supervisor import spawn +from .supervisor import BackendMode, DEFAULT_BACKEND, spawn def sandbox( - policy: str | None = None, timeout: float | None = None + policy: str | None = None, + timeout: float | None = None, + backend: BackendMode = DEFAULT_BACKEND, ) -> Callable[[Callable[..., Any]], Callable[..., Any]]: """Decorate a function to run inside a sandbox when called. @@ -19,11 +21,14 @@ def sandbox( timeout: Seconds to wait for the sandboxed call to complete before raising :class:`pyisolate.errors.TimeoutError`. + backend: + Isolation backend: ``"subinterpreter"`` for an execution cell, or + explicit boundary modes ``"process"`` / ``"microvm"`` when available. """ def decorator(func: Callable[..., Any]) -> Callable[..., Any]: def wrapper(*args: Any, **kwargs: Any) -> Any: - sb = spawn(func.__name__, policy=policy) + sb = spawn(func.__name__, policy=policy, backend=backend) try: return sb.call( f"{func.__module__}.{func.__name__}", @@ -43,24 +48,27 @@ class Pipeline: """Sequential sandboxed stages.""" def __init__(self) -> None: - self._stages: list[tuple[str, str | None]] = [] + self._stages: list[tuple[str, str | None, BackendMode]] = [] def add_stage( - self, stage: str | Callable[[Any], Any], policy: str | None = None + self, + stage: str | Callable[[Any], Any], + policy: str | None = None, + backend: BackendMode = DEFAULT_BACKEND, ) -> "Pipeline": """Register a stage by dotted path or callable.""" if callable(stage): dotted = f"{stage.__module__}.{stage.__name__}" else: dotted = stage - self._stages.append((dotted, policy)) + self._stages.append((dotted, policy, backend)) return self def run(self, data: Any) -> Any: """Run data through all stages sequentially.""" value = data - for dotted, policy in self._stages: + for dotted, policy, backend in self._stages: name = dotted.rsplit(".", 1)[-1] - with spawn(name, policy=policy) as sb: + with spawn(name, policy=policy, backend=backend) as sb: value = sb.call(dotted, value) return value diff --git a/pyisolate/supervisor.py b/pyisolate/supervisor.py index 7830f2d..3565f7e 100644 --- a/pyisolate/supervisor.py +++ b/pyisolate/supervisor.py @@ -13,7 +13,7 @@ import re import threading from pathlib import Path -from typing import Dict, Optional +from typing import Dict, Literal, Optional from . import cgroup, recovery from .capabilities import ROOT, RootCapability @@ -30,6 +30,32 @@ DEFAULT_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_-]+$") NAME_PATTERN = DEFAULT_NAME_PATTERN +BackendMode = Literal["subinterpreter", "process", "microvm"] +DEFAULT_BACKEND: BackendMode = "subinterpreter" +SUPPORTED_BACKENDS: tuple[BackendMode, ...] = ( + "subinterpreter", + "process", + "microvm", +) +IMPLEMENTED_BACKENDS: tuple[BackendMode, ...] = ("subinterpreter",) + + +def _normalize_backend(backend: str) -> BackendMode: + if backend not in SUPPORTED_BACKENDS: + options = ", ".join(repr(item) for item in SUPPORTED_BACKENDS) + raise ValueError(f"backend must be one of: {options}") + return backend # type: ignore[return-value] + + +def _require_implemented_backend(backend: BackendMode) -> None: + if backend in IMPLEMENTED_BACKENDS: + return + raise NotImplementedError( + f"backend={backend!r} is an explicit isolation mode, but this build only " + "implements backend='subinterpreter'. Use an external process or microVM " + "launcher until the native backend is available." + ) + class Sandbox: """Handle to a sandbox thread.""" @@ -109,6 +135,11 @@ def __del__(self): except Exception: pass + @property + def backend(self) -> BackendMode: + """Return the isolation backend used by this sandbox handle.""" + return getattr(self._thread, "_backend", DEFAULT_BACKEND) + @property def stats(self): return self._thread.stats @@ -210,9 +241,18 @@ def spawn( capabilities: Optional[dict[str, object]] = None, tenant: Optional[str] = None, tenant_quota: Optional[int] = None, + backend: BackendMode = DEFAULT_BACKEND, ) -> Sandbox: - """Create and start a sandbox thread.""" + """Create and start a sandbox in the requested isolation backend. + + ``backend="subinterpreter"`` is the execution-cell backend and is not + a hard security boundary by itself. ``backend="process"`` and + ``backend="microvm"`` are explicit boundary modes; they are reserved + API choices and fail closed until native launchers are available. + """ global NAME_PATTERN + backend = _normalize_backend(backend) + _require_implemented_backend(backend) if not isinstance(name, str) or not name: raise ValueError("Sandbox name must be non-empty string") if len(name) > 64: @@ -268,6 +308,7 @@ def spawn( ) thread._on_violation = self._alerts.notify thread._tracer = self._tracer + thread._backend = backend else: thread = SandboxThread( name=name, @@ -276,6 +317,7 @@ def spawn( tracer=self._tracer, cgroup_path=cg_path, ) + thread._backend = backend thread.start() thread._temp_dir = temp_dir self._sandboxes[name] = thread @@ -321,7 +363,9 @@ def get_active_threads(self) -> list[SandboxThread]: with self._lock: return [t for t in self._sandboxes.values() if t.is_alive()] - def _authorize_control(self, token: str | RootCapability, op: str) -> ControlRequest: + def _authorize_control( + self, token: str | RootCapability, op: str + ) -> ControlRequest: """Validate an authenticated control-plane operation request.""" if token is ROOT: @@ -444,6 +488,10 @@ def _get_supervisor() -> Supervisor: # Public API def spawn(*args, **kwargs): + if "backend" in kwargs: + backend = _normalize_backend(kwargs["backend"]) + _require_implemented_backend(backend) + kwargs["backend"] = backend return _get_supervisor().spawn(*args, **kwargs) diff --git a/tests/test_supervisor.py b/tests/test_supervisor.py index 886ee3e..294d265 100644 --- a/tests/test_supervisor.py +++ b/tests/test_supervisor.py @@ -20,7 +20,9 @@ def fake_watchdog_start(self): calls.append("watchdog") monkeypatch.setattr(BPFManager, "load", fake_load) - monkeypatch.setattr("pyisolate.watchdog.ResourceWatchdog.start", fake_watchdog_start) + monkeypatch.setattr( + "pyisolate.watchdog.ResourceWatchdog.start", fake_watchdog_start + ) sup_mod = iso.supervisor sup_mod._supervisor = None @@ -134,6 +136,27 @@ def test_spawn_valid_name_regex(name): sb.close() +def test_spawn_backend_is_explicit_subinterpreter(): + sb = iso.spawn("backend-sub", backend="subinterpreter") + try: + assert sb.backend == "subinterpreter" + assert iso.SUPPORTED_BACKENDS == ("subinterpreter", "process", "microvm") + assert iso.IMPLEMENTED_BACKENDS == ("subinterpreter",) + finally: + sb.close() + + +@pytest.mark.parametrize("backend", ["process", "microvm"]) +def test_spawn_explicit_boundary_backends_fail_closed(backend): + with pytest.raises(NotImplementedError, match=backend): + iso.spawn(f"backend-{backend}", backend=backend) + + +def test_spawn_rejects_unknown_backend(): + with pytest.raises(ValueError, match="backend must be one of"): + iso.spawn("backend-bad", backend="thread") + + @pytest.mark.parametrize("name", ["bad name", "name!", "foo/bar"]) def test_spawn_invalid_name_regex(name): with pytest.raises(ValueError): @@ -259,7 +282,9 @@ def fail_start(self): sup_replay.shutdown() -def test_spawn_registry_failure_rolls_back_tenant_usage_and_ledger(tmp_path, monkeypatch): +def test_spawn_registry_failure_rolls_back_tenant_usage_and_ledger( + tmp_path, monkeypatch +): ledger = tmp_path / "quota.log" monkeypatch.setenv("PYISOLATE_QUOTA_LEDGER", str(ledger))