diff --git a/POLICY.md b/POLICY.md index cb75484..ba401dc 100644 --- a/POLICY.md +++ b/POLICY.md @@ -118,12 +118,30 @@ register_plugin(IpcLimiter) ## 7  Policy templates -Several ready-to-use policies are included under the `policy/` directory: +Named policies live as YAML files in the repository-level `policy/` directory. +Sandbox creation APIs resolve public string names with `pyisolate.policy.resolve_policy()` +before a `SandboxThread` is constructed. Unknown names fail closed with +`PolicyCompilerError` rather than falling back to an unconstrained sandbox. -| File | Intended use | -|------|--------------| -| `ml.yml` | Machine learning jobs with outbound HTTPS and generous quotas | -| `web_scraper.yml` | Basic web scraping with only HTTP/HTTPS access | +Supported public names are: -Load any template with `pyisolate.policy.refresh("policy/.yml", token)` and the -new limits take effect instantly. +| Public name | File | Intended use | +|-------------|------|--------------| +| `stdlib.readonly` | `policy/stdlib.readonly.yml` | Standard-library-oriented sandbox with a small import allow-list, `/tmp` filesystem access, and no outbound network. | +| `ml-inference` | `policy/ml-inference.yml` | Offline ML inference workloads with model/data paths and no outbound network by default. | +| `readonly-fs` | `policy/readonly-fs.yml` | Filesystem-focused sandbox rooted at `/tmp` with no imports or outbound network by default. | +| `ml` | `policy/ml.yml` | Legacy machine learning template loaded by filename stem. | +| `web_scraper` | `policy/web_scraper.yml` | Legacy web scraping template loaded by filename stem. | + +Use a public name directly when spawning a sandbox: + +```python +import pyisolate as iso + +with iso.spawn("worker", policy="stdlib.readonly") as sb: + sb.exec("import math; post(math.sqrt(16))") +``` + +You can still hot-reload a template with +`pyisolate.policy.refresh("policy/.yml", token)` when updating live eBPF maps; +the resolver is for sandbox construction-time policy selection. diff --git a/policy/ml-inference.yml b/policy/ml-inference.yml new file mode 100644 index 0000000..0795ecf --- /dev/null +++ b/policy/ml-inference.yml @@ -0,0 +1,12 @@ +# Offline ML inference sandbox template. +version: 1.0 +imports: + - math + - json + - statistics + - pathlib +fs: + - allow: "/tmp" + - allow: "/srv/models" + - allow: "/srv/data" +net: [] diff --git a/policy/readonly-fs.yml b/policy/readonly-fs.yml new file mode 100644 index 0000000..050d07b --- /dev/null +++ b/policy/readonly-fs.yml @@ -0,0 +1,6 @@ +# Filesystem-focused template with no imports or outbound network by default. +version: 1.0 +fs: + - allow: "/tmp" +net: [] +imports: [] diff --git a/policy/stdlib.readonly.yml b/policy/stdlib.readonly.yml new file mode 100644 index 0000000..ecef97b --- /dev/null +++ b/policy/stdlib.readonly.yml @@ -0,0 +1,11 @@ +# Standard-library-only sandbox template. +version: 1.0 +imports: + - math + - json + - pathlib + - statistics + - socket +fs: + - allow: "/tmp" +net: [] diff --git a/pyisolate/__init__.py b/pyisolate/__init__.py index d620d12..8d90276 100644 --- a/pyisolate/__init__.py +++ b/pyisolate/__init__.py @@ -88,7 +88,7 @@ def migrate(*args, **kwargs): # type: ignore[no-redef] raise ModuleNotFoundError("cryptography is required for migration support") -from .policy import refresh_remote # noqa: F401 +from .policy import refresh_remote, resolve_policy # noqa: F401 from .sdk import Pipeline, sandbox # noqa: F401 from .subset import OwnershipError, RestrictedExec # noqa: F401 from .nogil import no_gil_readiness_report, warn_if_unsafe_native_extensions # noqa: F401 @@ -160,6 +160,7 @@ def migrate(*args, **kwargs): # type: ignore[no-redef] "restore", "migrate", "refresh_remote", + "resolve_policy", "setup_structured_logging", "DenialEvent", "no_gil_readiness_report", diff --git a/pyisolate/policy/__init__.py b/pyisolate/policy/__init__.py index 03c713d..8467e3f 100644 --- a/pyisolate/policy/__init__.py +++ b/pyisolate/policy/__init__.py @@ -68,8 +68,14 @@ def safe_load(stream): yaml = _MiniYaml() +from .compiler import ( + CompiledPolicy, + PolicyCompilerError, + SandboxPolicy, + compile_policy, +) # noqa: F401 + from ..capabilities import ConnectTCP, CpuBudget, Import, ReadPath, WritePath -from .compiler import PolicyCompilerError, compile_policy # noqa: F401 from .model import ( # noqa: F401 FilesystemRule, NetworkRule, @@ -300,6 +306,163 @@ def refresh_remote( os.unlink(tmp_path) +def _policy_root() -> Path: + """Return the repository-level directory that stores named policy YAML files.""" + + return Path(__file__).resolve().parents[2] / "policy" + + +NAMED_POLICIES: dict[str, str] = { + "stdlib.readonly": "stdlib.readonly.yml", + "ml-inference": "ml-inference.yml", + "readonly-fs": "readonly-fs.yml", +} + + +def _select_sandbox_policy(compiled, selector: str | None = None): + sandboxes = compiled.sandboxes + if selector and selector in sandboxes: + return sandboxes[selector] + if "default" in sandboxes: + return sandboxes["default"] + if len(sandboxes) == 1: + return next(iter(sandboxes.values())) + available = ", ".join(sorted(sandboxes)) + raise PolicyCompilerError( + "policy document contains multiple sandboxes; " f"select one of: {available}" + ) + + +def _runtime_policy_from_sandbox(sandbox_policy: SandboxPolicy) -> Policy: + runtime = Policy() + for rule in sandbox_policy.fs: + if rule.action == "allow": + runtime.allow_fs(rule.path) + for rule in sandbox_policy.tcp: + if rule.action == "connect": + runtime.allow_tcp(rule.addr) + for module in sandbox_policy.imports: + runtime.allow_import(module) + return runtime + + +def _runtime_policy_from_dict(data: dict) -> Policy: + if "sandboxes" in data: + sandboxes = data.get("sandboxes") + if not isinstance(sandboxes, dict): + raise PolicyCompilerError("missing or invalid 'sandboxes' section") + selector = "default" if "default" in sandboxes else None + if selector is None and len(sandboxes) == 1: + selector = next(iter(sandboxes)) + if selector is None: + available = ", ".join(sorted(str(k) for k in sandboxes)) + raise PolicyCompilerError( + "policy mapping contains multiple sandboxes; " + f"select one of: {available}" + ) + selected = sandboxes[selector] + if not isinstance(selected, dict): + raise PolicyCompilerError(f"sandbox '{selector}' must be a mapping") + merged = dict(data.get("defaults") or {}) + merged.update(selected) + data = merged + + runtime = Policy() + fs_rules = data.get("fs", []) or [] + if not isinstance(fs_rules, list): + raise PolicyCompilerError("'fs' must be a list") + for rule in fs_rules: + if isinstance(rule, str): + runtime.allow_fs(rule) + elif isinstance(rule, dict) and len(rule) == 1: + action, path = next(iter(rule.items())) + if action == "allow" and isinstance(path, str): + runtime.allow_fs(path) + elif action not in {"allow", "deny"}: + raise PolicyCompilerError(f"invalid fs action '{action}'") + else: + raise PolicyCompilerError(f"invalid fs rule: {rule!r}") + + net_rules = data.get("net", data.get("tcp", [])) or [] + if not isinstance(net_rules, list): + raise PolicyCompilerError("'net' must be a list") + for rule in net_rules: + if isinstance(rule, str): + runtime.allow_tcp(rule) + elif isinstance(rule, dict) and len(rule) == 1: + action, addr = next(iter(rule.items())) + if action == "connect": + addresses = addr if isinstance(addr, list) else [addr] + for address in addresses: + if not isinstance(address, str): + raise PolicyCompilerError( + f"net addresses must be strings: {address!r}" + ) + runtime.allow_tcp(address) + elif action != "deny": + raise PolicyCompilerError(f"invalid net action '{action}'") + else: + raise PolicyCompilerError(f"invalid net rule: {rule!r}") + + imports = data.get("imports", []) or [] + if not isinstance(imports, list): + raise PolicyCompilerError("'imports' must be a list") + for module in imports: + if not isinstance(module, str): + raise PolicyCompilerError(f"import rules must be strings: {module!r}") + runtime.allow_import(module) + return runtime + + +def _resolve_policy_path(name: str) -> Path: + candidate = Path(name) + if candidate.exists(): + return candidate + + policy_root = _policy_root() + mapped = NAMED_POLICIES.get(name) + if mapped is not None: + path = policy_root / mapped + if path.exists(): + return path + raise PolicyCompilerError( + f"named policy '{name}' is registered but {path} does not exist" + ) + + for suffix in (".yml", ".yaml"): + path = policy_root / f"{name}{suffix}" + if path.exists(): + return path + + supported = ", ".join(sorted(NAMED_POLICIES)) + raise PolicyCompilerError( + f"unknown policy '{name}'. Supported named policies: {supported}" + ) + + +def resolve_policy(policy: str | Policy | SandboxPolicy | CompiledPolicy | dict | None): + """Resolve public policy inputs to the runtime policy applied by a sandbox. + + String inputs are fail-closed: they must name an existing file in ``policy/`` + or a supported named policy, otherwise :class:`PolicyCompilerError` is raised. + """ + + if policy is None or isinstance(policy, Policy): + return policy + if isinstance(policy, SandboxPolicy): + return _runtime_policy_from_sandbox(policy) + if isinstance(policy, CompiledPolicy): + return _runtime_policy_from_sandbox(_select_sandbox_policy(policy)) + if isinstance(policy, dict): + return _runtime_policy_from_dict(policy) + if isinstance(policy, str): + path = _resolve_policy_path(policy) + compiled = compile_policy(path) + selector = path.stem if path.stem in compiled.sandboxes else policy + return _runtime_policy_from_sandbox(_select_sandbox_policy(compiled, selector)) + raise ValueError(f"unsupported policy type: {type(policy).__name__}") + + __all__ = [ "Policy", "ReadPath", @@ -311,4 +474,8 @@ def refresh_remote( "compile_policy", "PolicyCompilerError", "refresh_remote", + "resolve_policy", + "NAMED_POLICIES", + "SandboxPolicy", + "CompiledPolicy", ] diff --git a/pyisolate/sdk.py b/pyisolate/sdk.py index b6a1974..6b5c0b8 100644 --- a/pyisolate/sdk.py +++ b/pyisolate/sdk.py @@ -4,13 +4,12 @@ from typing import Any, Callable -from .supervisor import BackendMode, DEFAULT_BACKEND, spawn +from .policy import Policy, resolve_policy +from .supervisor import spawn def sandbox( - policy: str | None = None, - timeout: float | None = None, - backend: BackendMode = DEFAULT_BACKEND, + policy: str | Policy | dict | None = None, timeout: float | None = None ) -> Callable[[Callable[..., Any]], Callable[..., Any]]: """Decorate a function to run inside a sandbox when called. @@ -28,7 +27,8 @@ def sandbox( def decorator(func: Callable[..., Any]) -> Callable[..., Any]: def wrapper(*args: Any, **kwargs: Any) -> Any: - sb = spawn(func.__name__, policy=policy, backend=backend) + resolved_policy = resolve_policy(policy) + sb = spawn(func.__name__, policy=resolved_policy) try: return sb.call( f"{func.__module__}.{func.__name__}", @@ -48,13 +48,12 @@ class Pipeline: """Sequential sandboxed stages.""" def __init__(self) -> None: - self._stages: list[tuple[str, str | None, BackendMode]] = [] + self._stages: list[tuple[str, str | Policy | dict | None]] = [] def add_stage( self, stage: str | Callable[[Any], Any], - policy: str | None = None, - backend: BackendMode = DEFAULT_BACKEND, + policy: str | Policy | dict | None = None, ) -> "Pipeline": """Register a stage by dotted path or callable.""" if callable(stage): @@ -69,6 +68,7 @@ def run(self, data: Any) -> Any: value = data for dotted, policy, backend in self._stages: name = dotted.rsplit(".", 1)[-1] - with spawn(name, policy=policy, backend=backend) as sb: + resolved_policy = resolve_policy(policy) + with spawn(name, policy=resolved_policy) as sb: value = sb.call(dotted, value) return value diff --git a/pyisolate/supervisor.py b/pyisolate/supervisor.py index 3ac7766..2f740c9 100644 --- a/pyisolate/supervisor.py +++ b/pyisolate/supervisor.py @@ -21,6 +21,7 @@ from .errors import PolicyAuthError, TenantQuotaExceeded from .observability.alerts import AlertManager from .observability.trace import Tracer +from .policy import resolve_policy from .runtime.protocol import CapabilityHandle, ControlRequest from .runtime.thread import SandboxThread from .telemetry import DenialEvent @@ -296,6 +297,8 @@ def spawn( raise ValueError("Sandbox name contains invalid characters") self._cleanup() + policy = resolve_policy(policy) + if policy is not None and getattr(policy, "imports", None): imports = set(policy.imports) if allowed_imports is not None: diff --git a/tests/test_policy.py b/tests/test_policy.py index 2bea289..6cd5332 100644 --- a/tests/test_policy.py +++ b/tests/test_policy.py @@ -170,7 +170,9 @@ def test_validation_bad_section_type(tmp_path): ("version: 0.1\nsandboxes: []\n", "sandboxes"), ], ) -def test_refresh_validation_fails_before_compile_or_reload(monkeypatch, tmp_path, doc, msg): +def test_refresh_validation_fails_before_compile_or_reload( + monkeypatch, tmp_path, doc, msg +): policy = load_policy(no_yaml=True) import pyisolate as iso @@ -264,11 +266,7 @@ def fake_reload(*_args, **_kwargs): iso.set_policy_token("tok") path = tmp_path / "p.yml" path.write_text( - "version: 1\n" - "sandboxes:\n" - " sb:\n" - " fs:\n" - ' - allow: "/tmp"\n' + "version: 1\n" "sandboxes:\n" " sb:\n" " fs:\n" ' - allow: "/tmp"\n' ) compiled = policy.refresh(str(path), token="tok", dry_run=True) @@ -305,7 +303,10 @@ def test_compile_policy_supports_inheritance_and_defaults(tmp_path): assert [r.path for r in child.fs] == ["/srv/base", "/srv/child"] assert [r.addr for r in child.tcp] == ["10.0.0.0/8"] assert child.imports == ["math", "json"] - assert compiled.deny_log == ["sandbox=base net=10.0.0.0/8", "sandbox=child net=10.0.0.0/8"] + assert compiled.deny_log == [ + "sandbox=base net=10.0.0.0/8", + "sandbox=child net=10.0.0.0/8", + ] def test_refresh_logs_explicit_deny_rules(tmp_path, caplog, monkeypatch): @@ -317,11 +318,7 @@ def test_refresh_logs_explicit_deny_rules(tmp_path, caplog, monkeypatch): ) iso.set_policy_token("tok") path = tmp_path / "deny.yml" - path.write_text( - "version: 0.1\n" - "net:\n" - ' - deny: "10.0.0.0/8"\n' - ) + path.write_text("version: 0.1\n" "net:\n" ' - deny: "10.0.0.0/8"\n') with caplog.at_level("WARNING"): policy.refresh(str(path), token="tok") assert "policy deny rule active" in caplog.text @@ -376,6 +373,43 @@ def test_reload_policy_rejects_non_canonical_root(monkeypatch, tmp_path, caplog) assert "invalid token" in caplog.text +def test_resolve_unknown_policy_fails_closed(): + import pyisolate.policy as policy + + with pytest.raises(policy.PolicyCompilerError, match="unknown policy"): + policy.resolve_policy("does-not-exist") + + +def test_named_policy_applies_runtime_restrictions(tmp_path): + import pyisolate as iso + + allowed = tmp_path / "allowed.txt" + allowed.write_text("ok") + + sb = iso.spawn("named-policy", policy="stdlib.readonly") + try: + sb.exec("import math; post(math.sqrt(25))") + assert sb.recv(timeout=1) == 5.0 + + sb.exec("import random") + with pytest.raises(iso.PolicyError): + sb.recv(timeout=1) + + sb.exec(f"post(open({str(allowed)!r}).read())") + assert sb.recv(timeout=1) == "ok" + + sb.exec("post(open('/etc/hosts').read())") + with pytest.raises(iso.PolicyError): + sb.recv(timeout=1) + + sb.exec( + "import socket\n" + "s = socket.socket()\n" + "try:\n" + " s.connect(('127.0.0.1', 9))\n" + "finally:\n" + " s.close()\n" + ) def test_compile_policy_emits_first_class_capabilities(tmp_path): import pyisolate.policy as policy from pyisolate.capabilities import ConnectTCP, CpuBudget, Import, ReadPath, WritePath