diff --git a/README.md b/README.md index b4ef483..75f4bb9 100644 --- a/README.md +++ b/README.md @@ -86,19 +86,19 @@ Choose a supervisor rollout profile based on where you are deploying: ```python import pyisolate as iso -# default: fast local iteration -dev = iso.Supervisor(rollout_mode="dev") - -# experimental fail-closed gate; requires pyisolate-doctor --mode hardened to pass +# production default: fail closed if the BPF toolchain, verifier, load, or attach fails hardened = iso.Supervisor(rollout_mode="hardened") -# compatibility testing only; it deliberately skips stricter filters and is not enforcement +# explicitly acknowledge weaker enforcement for local iteration +dev = iso.Supervisor(rollout_mode="dev") + +# explicitly acknowledge reduced enforcement for ecosystem validation compat = iso.Supervisor(rollout_mode="compatibility") ``` -* `dev`: lightweight, low-friction development mode. BPF/cgroup setup failures are reported through per-sandbox `quota_enforcement` status and logs, but sandbox creation continues so local development remains unblocked. CPU/RSS quota tests should be treated as best-effort unless the status reports the relevant controller as enforced. -* `hardened`: production fail-closed mode. BPF compile/load failures and cgroup controller failures raise during supervisor start or sandbox spawn; CPU/RSS quotas must be enforced by cgroups/eBPF and watchdog breach events terminate or quarantine the sandbox. Python `tracemalloc` values are exposed only as debugging telemetry. -* `compatibility`: ecosystem validation mode. Baseline BPF loading is attempted while stricter filters/guards may be skipped; cgroup quota status is still surfaced, and missing controllers degrade to explicit status/logs rather than silent `None`. Use this mode to find package compatibility issues, not as the authoritative security boundary. +* `hardened`: documented production default with kernel LSM/cgroup enforcement; any eBPF compile/load/attach failure raises. +* `dev`: caller-acknowledged local development mode; tooling failures are logged and kernel enforcement can be absent. +* `compatibility`: caller-acknowledged reduced enforcement to maximize third-party compatibility; strict filters are skipped. ### Hello World diff --git a/pyisolate/bpf/manager.py b/pyisolate/bpf/manager.py index e60fa39..760423b 100644 --- a/pyisolate/bpf/manager.py +++ b/pyisolate/bpf/manager.py @@ -33,11 +33,16 @@ def __init__(self): self._obj = Path(__file__).with_name("dummy.bpf.o") self._skel = Path(__file__).with_name("dummy.skel.h") self.skeleton = "" + self._compiled_skeleton = False self._filter_src = Path(__file__).with_name("syscall_filter.bpf.c") self._filter_obj = Path(__file__).with_name("syscall_filter.bpf.o") self._guard_src = Path(__file__).with_name("resource_guard.bpf.c") self._guard_obj = Path(__file__).with_name("resource_guard.bpf.o") - self._skel_cache: dict[Path, str] = {} + self._bpffs_root = Path("/sys/fs/bpf/pyisolate") + self._dummy_pin = Path("/sys/fs/bpf/dummy") + self._filter_pin_dir = self._bpffs_root / "syscall_filter" + self._guard_pin_dir = self._bpffs_root / "resource_guard" + self._skel_cache = self._SKEL_CACHE # internal helper def _run(self, cmd: list[str], *, raise_on_error: bool = False) -> bool: @@ -81,9 +86,12 @@ def load( Rollout modes: * ``dev``: low-friction mode; tolerate missing tooling and keep running. - * ``hardened``: strict mode; any failure raises a ``RuntimeError``. - * ``compatibility``: looser enforcement for ecosystem testing. Loads the - baseline program but skips stricter filter/guard attachments. + Use only for local development because BPF enforcement can be absent. + * ``hardened``: production default; any failure raises a ``RuntimeError`` + and leaves the manager unloaded so callers fail closed. + * ``compatibility``: caller-acknowledged reduced enforcement for ecosystem + testing. Loads the baseline program but skips stricter filter/guard + attachments. The legacy ``strict`` argument is still honored. When provided it overrides ``mode``. @@ -127,7 +135,9 @@ def load( ] ok = True compile_cmd = dummy_compile - if self._src not in self._skel_cache: + if self._src not in self._skel_cache or ( + self._skel_cache.get(self._src) == "" and not self._compiled_skeleton + ): ok &= self._run(compile_cmd, raise_on_error=strict_mode) skel_cmd = [ "sh", @@ -140,11 +150,13 @@ def load( self._skel_cache[self._src] = self._skel.read_text() except OSError: self._skel_cache[self._src] = "" - else: - # Cache a placeholder so repeated loads in tool-less test - # environments do not repeat compile/skeleton steps. + elif ok: + # Cache a placeholder when the build path was exercised but no + # skeleton was emitted (for example under a mocked bpftool). self._skel_cache.setdefault(self._src, "") self.skeleton = self._skel_cache.get(self._src, "") + if ok: + self._compiled_skeleton = True else: self.skeleton = self._skel_cache[self._src] @@ -152,7 +164,7 @@ def load( ["llvm-objdump", "-d", str(self._obj)], raise_on_error=strict_mode ) ok &= self._run( - ["bpftool", "prog", "load", str(self._obj), "/sys/fs/bpf/dummy"], + ["bpftool", "prog", "load", str(self._obj), str(self._dummy_pin)], raise_on_error=strict_mode, ) if mode != "compatibility": @@ -170,9 +182,14 @@ def load( [ "bpftool", "prog", - "load", + "loadall", str(self._filter_obj), - "/sys/fs/bpf/syscall_filter", + str(self._filter_pin_dir), + "type", + "lsm", + "pinmaps", + str(self._bpffs_root), + "autoattach", ], raise_on_error=strict_mode, ) @@ -180,16 +197,45 @@ def load( [ "bpftool", "prog", - "load", + "loadall", str(self._guard_obj), - "/sys/fs/bpf/resource_guard", + str(self._guard_pin_dir), + "pinmaps", + str(self._bpffs_root), + "autoattach", ], raise_on_error=strict_mode, ) + ok &= self._attach_loaded_programs(raise_on_error=strict_mode) self.loaded = ok if strict_mode and not ok: raise RuntimeError("BPF load failed; see logs for details") + def _attach_loaded_programs(self, *, raise_on_error: bool = False) -> bool: + """Attach programs that cannot rely solely on pinned objects. + + ``bpftool prog loadall ... autoattach`` creates BPF links for LSM and + tracepoint programs on modern kernels. The explicit cgroup-skb attach is + retained for kernels/tools that require a concrete cgroup attach point. + """ + + ok = True + cgroup_root = Path("/sys/fs/cgroup") + egress_prog = self._guard_pin_dir / "account_cgroup_egress" + ok &= self._run( + [ + "bpftool", + "cgroup", + "attach", + str(cgroup_root), + "egress", + "pinned", + str(egress_prog), + ], + raise_on_error=raise_on_error, + ) + return ok + def hot_reload(self, policy_path: str) -> None: """Refresh maps based on a policy JSON file.""" if not self.loaded: diff --git a/pyisolate/bpf/resource_guard.bpf.c b/pyisolate/bpf/resource_guard.bpf.c index a7dc33d..dca990c 100644 --- a/pyisolate/bpf/resource_guard.bpf.c +++ b/pyisolate/bpf/resource_guard.bpf.c @@ -1,5 +1,76 @@ #define SEC(NAME) __attribute__((section(NAME), used)) +/* Per-cgroup resource accounting and quota breach events for PyIsolate. */ + +typedef unsigned int __u32; +typedef unsigned long long __u64; + +#define BPF_MAP_TYPE_HASH 1 +#define BPF_MAP_TYPE_PERCPU_HASH 5 +#define BPF_MAP_TYPE_RINGBUF 27 + +#define PYI_RESOURCE_CPU 1U +#define PYI_RESOURCE_RSS 2U +#define PYI_RESOURCE_NET 3U + +#define __uint(name, val) int (*name)[val] +#define __type(name, val) val *name + +struct resource_account { + __u64 cpu_time_ns; + __u64 rss_bytes; + __u64 net_bytes; + __u64 last_seen_ns; +}; + +struct resource_quota { + __u64 cpu_time_ns; + __u64 rss_bytes; + __u64 net_bytes; +}; + +struct resource_event { + __u64 cgroup_id; + __u64 pid_tgid; + __u64 observed; + __u64 quota; + __u32 resource; + __u32 breached; +}; + +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +struct page_fault_args { + unsigned long long pad; + unsigned long address; + unsigned long ip; + int error_code; +}; + +struct __sk_buff { + __u32 len; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 22); +} resource_events SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 16384); + __type(key, __u64); + __type(value, struct resource_account); +} cgroup_accounting SEC(".maps"); /* Resource guard event consumed by pyisolate.watchdog.ResourceWatchdog. * The supervisor resolves cgroup_id/name to a SandboxThread and performs the * userspace kill/quarantine path immediately; Python tracemalloc accounting is @@ -41,9 +112,92 @@ struct { } usage SEC(".maps"); struct { - int dummy; -} events SEC(".maps"); + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16384); + __type(key, __u64); + __type(value, struct resource_quota); +} cgroup_quotas SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 65536); + __type(key, __u64); + __type(value, __u64); +} task_cpu_start SEC(".maps"); +static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *)1; +static long (*bpf_map_update_elem)(void *map, const void *key, const void *value, __u64 flags) = (void *)2; +static __u64 (*bpf_ktime_get_ns)(void) = (void *)5; +static __u64 (*bpf_get_current_pid_tgid)(void) = (void *)14; +static __u64 (*bpf_get_current_cgroup_id)(void) = (void *)80; +static long (*bpf_ringbuf_output)(void *ringbuf, void *data, __u64 size, __u64 flags) = (void *)130; + +static void emit_if_breached(__u64 cg, struct resource_account *account) +{ + struct resource_quota *quota = bpf_map_lookup_elem(&cgroup_quotas, &cg); + struct resource_event event = {}; + + if (!quota) + return; + + event.cgroup_id = cg; + event.pid_tgid = bpf_get_current_pid_tgid(); + if (quota->cpu_time_ns && account->cpu_time_ns > quota->cpu_time_ns) { + event.observed = account->cpu_time_ns; + event.quota = quota->cpu_time_ns; + event.resource = PYI_RESOURCE_CPU; + event.breached = 1; + bpf_ringbuf_output(&resource_events, &event, sizeof(event), 0); + } + if (quota->rss_bytes && account->rss_bytes > quota->rss_bytes) { + event.observed = account->rss_bytes; + event.quota = quota->rss_bytes; + event.resource = PYI_RESOURCE_RSS; + event.breached = 1; + bpf_ringbuf_output(&resource_events, &event, sizeof(event), 0); + } + if (quota->net_bytes && account->net_bytes > quota->net_bytes) { + event.observed = account->net_bytes; + event.quota = quota->net_bytes; + event.resource = PYI_RESOURCE_NET; + event.breached = 1; + bpf_ringbuf_output(&resource_events, &event, sizeof(event), 0); + } +} + +static struct resource_account *account_for_current_cgroup(__u64 *cg_out) +{ + __u64 cg = bpf_get_current_cgroup_id(); + struct resource_account zero = {}; + struct resource_account *account; + + account = bpf_map_lookup_elem(&cgroup_accounting, &cg); + if (!account) { + zero.last_seen_ns = bpf_ktime_get_ns(); + bpf_map_update_elem(&cgroup_accounting, &cg, &zero, 0); + account = bpf_map_lookup_elem(&cgroup_accounting, &cg); + } + *cg_out = cg; + return account; +} + +SEC("tracepoint/sched/sched_switch") +int account_sched_switch(struct sched_switch_args *ctx) +{ + __u64 cg; + __u64 now = bpf_ktime_get_ns(); + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u64 *started = bpf_map_lookup_elem(&task_cpu_start, &pid_tgid); + struct resource_account *account = account_for_current_cgroup(&cg); + + if (account && started && now > *started) { + account->cpu_time_ns += now - *started; + account->last_seen_ns = now; + emit_if_breached(cg, account); + } + + bpf_map_update_elem(&task_cpu_start, &pid_tgid, &now, 0); + return 0; static __inline int emit_breach(unsigned long cgroup_id, unsigned long cpu_time_ns, unsigned long rss_bytes, @@ -70,14 +224,32 @@ int on_cpu(void *ctx) return emit_breach(0, 0, 0, BREACH_CPU); } -SEC("perf_event") -int on_rss(void *ctx) +SEC("tracepoint/exceptions/page_fault_user") +int account_user_page_fault(struct page_fault_args *ctx) { - /* Production path samples cgroup RSS, compares it to - * quota_t.rss_quota_bytes, and emits BREACH_RSS for watchdog enforcement. - */ - (void)ctx; - return emit_breach(0, 0, 0, BREACH_RSS); + __u64 cg; + struct resource_account *account = account_for_current_cgroup(&cg); + + if (account) { + account->rss_bytes += 4096; + account->last_seen_ns = bpf_ktime_get_ns(); + emit_if_breached(cg, account); + } + return 0; +} + +SEC("cgroup_skb/egress") +int account_cgroup_egress(struct __sk_buff *skb) +{ + __u64 cg; + struct resource_account *account = account_for_current_cgroup(&cg); + + if (account) { + account->net_bytes += skb->len; + account->last_seen_ns = bpf_ktime_get_ns(); + emit_if_breached(cg, account); + } + return 1; } char _license[] SEC("license") = "GPL"; diff --git a/pyisolate/bpf/syscall_filter.bpf.c b/pyisolate/bpf/syscall_filter.bpf.c index c5d8073..295a689 100644 --- a/pyisolate/bpf/syscall_filter.bpf.c +++ b/pyisolate/bpf/syscall_filter.bpf.c @@ -1,13 +1,212 @@ #define SEC(NAME) __attribute__((section(NAME), used)) -/* Minimal syscall filter program. Returns 0 to allow all syscalls. - * Real implementation would inspect arguments and decide. +/* + * Kernel policy filter for PyIsolate sandboxes. + * + * The supervisor pins and updates these maps under /sys/fs/bpf/pyisolate. + * Every decision is keyed by bpf_get_current_cgroup_id(), so enforcement follows + * the sandbox cgroup even when guest code bypasses Python wrappers and performs + * syscalls directly through libc or native extensions. */ +typedef unsigned char __u8; +typedef unsigned int __u32; +typedef unsigned long long __u64; + +#define EPERM 1 +#define AF_INET 2 +#define AF_INET6 10 + +#define BPF_MAP_TYPE_HASH 1 +#define BPF_MAP_TYPE_LRU_HASH 9 +#define BPF_MAP_TYPE_RINGBUF 27 + +#define PYI_DENY_FS (1U << 0) +#define PYI_DENY_NET (1U << 1) +#define PYI_DENY_PROCESS (1U << 2) +#define PYI_DENY_RISKY (1U << 3) + +#define PYI_OP_FILE_OPEN 1U +#define PYI_OP_FILE_TRUNCATE 2U +#define PYI_OP_SOCKET_CONNECT 3U +#define PYI_OP_SOCKET_CREATE 4U +#define PYI_OP_TASK_ALLOC 5U +#define PYI_OP_EXEC 6U +#define PYI_OP_PTRACE 7U +#define PYI_OP_MOUNT 8U +#define PYI_OP_BPF 9U + +#define __uint(name, val) int (*name)[val] +#define __type(name, val) val *name + +union bpf_attr; + +struct sockaddr { + unsigned short sa_family; + char sa_data[14]; +}; + +struct pyisolate_policy { + __u32 deny_mask; + __u32 audit_only; +}; + +struct pyisolate_decision_key { + __u64 cgroup_id; + __u32 op; + __u32 aux; +}; + +struct pyisolate_decision { + __u64 cgroup_id; + __u64 pid_tgid; + __u32 op; + __u32 denied; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16384); + __type(key, __u64); + __type(value, struct pyisolate_policy); +} sandbox_policy SEC(".maps"); + +/* Optional per-operation overrides used for hot reload tests and staged rollout. */ +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 65536); + __type(key, struct pyisolate_decision_key); + __type(value, __u32); +} syscall_policy SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 20); +} syscall_events SEC(".maps"); + +static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *)1; +static long (*bpf_ringbuf_output)(void *ringbuf, void *data, __u64 size, __u64 flags) = (void *)130; +static __u64 (*bpf_get_current_cgroup_id)(void) = (void *)80; +static __u64 (*bpf_get_current_pid_tgid)(void) = (void *)14; + +static __u32 policy_mask_for_op(__u32 op) +{ + if (op == PYI_OP_FILE_OPEN || op == PYI_OP_FILE_TRUNCATE) + return PYI_DENY_FS; + if (op == PYI_OP_SOCKET_CONNECT || op == PYI_OP_SOCKET_CREATE) + return PYI_DENY_NET; + if (op == PYI_OP_TASK_ALLOC || op == PYI_OP_EXEC) + return PYI_DENY_PROCESS; + return PYI_DENY_RISKY; +} + +static int pyisolate_check(__u32 op, __u32 aux) +{ + __u64 cg = bpf_get_current_cgroup_id(); + struct pyisolate_policy *policy; + struct pyisolate_decision_key key = {}; + __u32 *override; + __u32 denied = 0; + + key.cgroup_id = cg; + key.op = op; + key.aux = aux; + override = bpf_map_lookup_elem(&syscall_policy, &key); + if (override) + denied = *override; + else { + policy = bpf_map_lookup_elem(&sandbox_policy, &cg); + if (policy && (policy->deny_mask & policy_mask_for_op(op))) + denied = policy->audit_only ? 0 : 1; + } + + if (denied) { + struct pyisolate_decision event = {}; + event.cgroup_id = cg; + event.pid_tgid = bpf_get_current_pid_tgid(); + event.op = op; + event.denied = 1; + bpf_ringbuf_output(&syscall_events, &event, sizeof(event), 0); + return -EPERM; + } + return 0; +} + SEC("lsm/file_open") -int filter_file_open(void *ctx) +int BPF_PROG_filter_file_open(void *file, int ret) { + if (ret) + return ret; + return pyisolate_check(PYI_OP_FILE_OPEN, 0); +} + +SEC("lsm/file_truncate") +int BPF_PROG_filter_file_truncate(void *file, int ret) +{ + if (ret) + return ret; + return pyisolate_check(PYI_OP_FILE_TRUNCATE, 0); +} + +SEC("lsm/socket_create") +int BPF_PROG_filter_socket_create(int family, int type, int protocol, int kern, int ret) +{ + if (ret) + return ret; + if (family == AF_INET || family == AF_INET6) + return pyisolate_check(PYI_OP_SOCKET_CREATE, (__u32)family); + return 0; +} + +SEC("lsm/socket_connect") +int BPF_PROG_filter_socket_connect(void *sock, struct sockaddr *address, int addrlen, int ret) +{ + if (ret) + return ret; + if (address && (address->sa_family == AF_INET || address->sa_family == AF_INET6)) + return pyisolate_check(PYI_OP_SOCKET_CONNECT, (__u32)address->sa_family); return 0; } +SEC("lsm/task_alloc") +int BPF_PROG_filter_task_alloc(void *task, unsigned long clone_flags, int ret) +{ + if (ret) + return ret; + return pyisolate_check(PYI_OP_TASK_ALLOC, 0); +} + +SEC("lsm/bprm_check_security") +int BPF_PROG_filter_exec(void *bprm, int ret) +{ + if (ret) + return ret; + return pyisolate_check(PYI_OP_EXEC, 0); +} + +SEC("lsm/ptrace_access_check") +int BPF_PROG_filter_ptrace(void *child, unsigned int mode, int ret) +{ + if (ret) + return ret; + return pyisolate_check(PYI_OP_PTRACE, mode); +} + +SEC("lsm/sb_mount") +int BPF_PROG_filter_mount(const char *dev_name, const void *path, const char *type, + unsigned long flags, void *data, int ret) +{ + if (ret) + return ret; + return pyisolate_check(PYI_OP_MOUNT, 0); +} + +SEC("lsm/bpf") +int BPF_PROG_filter_bpf(int cmd, union bpf_attr *attr, unsigned int size, int ret) +{ + if (ret) + return ret; + return pyisolate_check(PYI_OP_BPF, (__u32)cmd); +} + char _license[] SEC("license") = "GPL"; diff --git a/pyisolate/supervisor.py b/pyisolate/supervisor.py index 2f740c9..099f932 100644 --- a/pyisolate/supervisor.py +++ b/pyisolate/supervisor.py @@ -184,6 +184,11 @@ def __init__( try: self._bpf.load(mode=rollout_mode) except TypeError as exc: + if "unexpected keyword argument 'mode'" not in str(exc): + raise + # Backward-compatible path for tests or integrations that provide a + # legacy BPFManager.load(strict=...) shim. + self._bpf.load(strict=rollout_mode == "hardened") # Test and compatibility shims may still expose the legacy # load(strict=False) signature. Real BPFManager validates rollout # modes itself. diff --git a/tests/test_bpf_kernel_enforcement.py b/tests/test_bpf_kernel_enforcement.py new file mode 100644 index 0000000..626980a --- /dev/null +++ b/tests/test_bpf_kernel_enforcement.py @@ -0,0 +1,104 @@ +import os +import shutil +import socket +import subprocess +from pathlib import Path + +import pytest + +from pyisolate.bpf.manager import BPFManager + +ROOT = Path(__file__).resolve().parents[1] +SYSCALL_FILTER = ROOT / "pyisolate" / "bpf" / "syscall_filter.bpf.c" +RESOURCE_GUARD = ROOT / "pyisolate" / "bpf" / "resource_guard.bpf.c" + + +def test_syscall_filter_uses_lsm_hooks_and_cgroup_policy_maps(): + src = SYSCALL_FILTER.read_text() + + assert 'SEC("lsm/file_open")' in src + assert 'SEC("lsm/socket_connect")' in src + assert 'SEC("lsm/socket_create")' in src + assert 'SEC("lsm/task_alloc")' in src + assert 'SEC("lsm/bprm_check_security")' in src + assert 'SEC("lsm/ptrace_access_check")' in src + assert 'SEC("lsm/sb_mount")' in src + assert 'SEC("lsm/bpf")' in src + assert "bpf_get_current_cgroup_id" in src + assert "sandbox_policy" in src + assert "syscall_policy" in src + assert "return -EPERM" in src + + +def test_resource_guard_uses_ringbuf_and_per_cgroup_accounting_maps(): + src = RESOURCE_GUARD.read_text() + + assert "BPF_MAP_TYPE_RINGBUF" in src + assert "resource_events" in src + assert "cgroup_accounting" in src + assert "cgroup_quotas" in src + assert 'SEC("tracepoint/sched/sched_switch")' in src + assert 'SEC("tracepoint/exceptions/page_fault_user")' in src + assert 'SEC("cgroup_skb/egress")' in src + assert "emit_if_breached" in src + + +def test_manager_loads_and_attaches_kernel_programs(monkeypatch): + calls = [] + + def record(self, cmd, *, raise_on_error=False): + calls.append(cmd) + return True + + monkeypatch.setattr(BPFManager, "_run", record) + mgr = BPFManager() + + mgr.load(mode="hardened") + + assert any(cmd[:3] == ["bpftool", "prog", "loadall"] and "autoattach" in cmd for cmd in calls) + assert any(cmd[:3] == ["bpftool", "cgroup", "attach"] for cmd in calls) + assert mgr.loaded is True + + +@pytest.mark.skipif( + os.environ.get("PYISOLATE_LIVE_BPF_TESTS") != "1" + or os.geteuid() != 0 + or shutil.which("bpftool") is None, + reason="live kernel-enforcement test requires root, bpftool, and PYISOLATE_LIVE_BPF_TESTS=1", +) +def test_live_kernel_policy_blocks_unwrapped_file_network_and_process_actions(tmp_path): + """Exercise kernel policy directly; no PyIsolate Python wrappers are used.""" + + mgr = BPFManager() + mgr.load(mode="hardened") + + cgroup_id = os.stat("/sys/fs/cgroup").st_ino + key = cgroup_id.to_bytes(8, "little") + value = (15).to_bytes(4, "little") + (0).to_bytes(4, "little") + policy_map = "/sys/fs/bpf/pyisolate/sandbox_policy" + subprocess.run( + [ + "bpftool", + "map", + "update", + "pinned", + policy_map, + "key", + "hex", + *[f"{byte:02x}" for byte in key], + "value", + "hex", + *[f"{byte:02x}" for byte in value], + "any", + ], + check=True, + ) + + with pytest.raises(PermissionError): + (tmp_path / "blocked.txt").write_text("blocked by LSM") + + with pytest.raises(OSError): + socket.create_connection(("127.0.0.1", 9), timeout=0.05) + + with pytest.raises(PermissionError): + subprocess.run(["/bin/true"], check=True) diff --git a/tests/test_bpf_manager.py b/tests/test_bpf_manager.py index 1273a8e..4a8ef1a 100644 --- a/tests/test_bpf_manager.py +++ b/tests/test_bpf_manager.py @@ -85,20 +85,37 @@ def fake_run(self, cmd, *, raise_on_error=False): assert ["llvm-objdump", "-d", str(mgr._obj)] in calls assert ["llvm-objdump", "-d", str(mgr._filter_obj)] in calls assert ["llvm-objdump", "-d", str(mgr._guard_obj)] in calls - assert ["bpftool", "prog", "load", str(mgr._obj), "/sys/fs/bpf/dummy"] in calls + assert ["bpftool", "prog", "load", str(mgr._obj), str(mgr._dummy_pin)] in calls assert [ "bpftool", "prog", - "load", + "loadall", str(mgr._filter_obj), - "/sys/fs/bpf/syscall_filter", + str(mgr._filter_pin_dir), + "type", + "lsm", + "pinmaps", + str(mgr._bpffs_root), + "autoattach", ] in calls assert [ "bpftool", "prog", - "load", + "loadall", str(mgr._guard_obj), - "/sys/fs/bpf/resource_guard", + str(mgr._guard_pin_dir), + "pinmaps", + str(mgr._bpffs_root), + "autoattach", + ] in calls + assert [ + "bpftool", + "cgroup", + "attach", + "/sys/fs/cgroup", + "egress", + "pinned", + str(mgr._guard_pin_dir / "account_cgroup_egress"), ] in calls assert mgr.loaded @@ -242,7 +259,7 @@ def record(self, cmd, *, raise_on_error=False): "prog", "load", str(mgr._obj), - "/sys/fs/bpf/dummy", + str(mgr._dummy_pin), ] in calls assert ["llvm-objdump", "-d", str(mgr._filter_obj)] not in calls assert ["llvm-objdump", "-d", str(mgr._guard_obj)] not in calls diff --git a/tests/test_metrics.py b/tests/test_metrics.py index b677672..ae98235 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -6,9 +6,38 @@ import types + +class _StubBPFManager: + def __init__(self): + self.loaded = False + self.policy_maps = {} + + def load(self, strict: bool = False) -> None: # pragma: no cover - stub + self.loaded = False + + def hot_reload(self, policy_path: str) -> None: # pragma: no cover - stub + raise RuntimeError("BPF disabled") + + def _run(self, *_, **__): # pragma: no cover - stub + return True + + def open_ring_buffer(self): # pragma: no cover - stub + return iter(()) + + +bpf_stub = types.ModuleType("pyisolate.bpf.manager") +bpf_stub.BPFManager = _StubBPFManager # type: ignore[attr-defined] +_original_bpf_manager = sys.modules.get("pyisolate.bpf.manager") +sys.modules["pyisolate.bpf.manager"] = bpf_stub + import pyisolate as iso from pyisolate.observability.metrics import MetricsExporter +if _original_bpf_manager is None: + sys.modules.pop("pyisolate.bpf.manager", None) +else: + sys.modules["pyisolate.bpf.manager"] = _original_bpf_manager + def test_export_contains_metrics(): sb = iso.spawn("metrics")