diff --git a/README.md b/README.md index 464e4c0..7cb1264 100644 --- a/README.md +++ b/README.md @@ -43,12 +43,12 @@ mount/ └── github/ └── repos/ ├── _index.json - ├── acme/api/ # lazily materialized on first stat + ├── acme/api/ │ └── pulls/42__bump-deps/meta.json └── by-name/acme__api.json ``` -Four alias views ship out of the box: `by-title/` (slug lookups), `by-id/` (identifier lookups), `by-name/` (human-readable name lookups), and `by-state/` (grouped by issue/PR state). GitHub repo subtrees materialize lazily — the first `ls`/`stat` triggers a one-time fetch instead of paying upfront sync cost on workspaces with hundreds of repos. +Four alias views ship out of the box: `by-title/` (slug lookups), `by-id/` (identifier lookups), `by-name/` (human-readable name lookups), and `by-state/` (grouped by issue/PR state). GitHub repo subtrees can be materialized lazily (opt-in via `--lazy-repos`) for huge-org workspaces. ## Why files diff --git a/cmd/relayfile-mount/fuse_mount.go b/cmd/relayfile-mount/fuse_mount.go index 4a30500..bda1500 100644 --- a/cmd/relayfile-mount/fuse_mount.go +++ b/cmd/relayfile-mount/fuse_mount.go @@ -23,6 +23,7 @@ func runFuseMount(ctx context.Context, cfg mountConfig) error { Client: httpClient, WorkspaceID: cfg.workspaceID, RemoteRoot: cfg.remotePath, + LazyRepos: cfg.lazyRepos, Logger: log.Default(), } diff --git a/cmd/relayfile-mount/main.go b/cmd/relayfile-mount/main.go index 635904e..48acab5 100644 --- a/cmd/relayfile-mount/main.go +++ b/cmd/relayfile-mount/main.go @@ -42,6 +42,7 @@ type mountConfig struct { intervalJitter float64 timeout time.Duration websocketEnabled bool + lazyRepos bool scopes []string once bool mode string @@ -66,6 +67,7 @@ func main() { intervalJitter := flag.Float64("interval-jitter", floatEnv("RELAYFILE_MOUNT_INTERVAL_JITTER", 0.2), "sync interval jitter ratio (0.0-1.0)") timeout := flag.Duration("timeout", durationEnv("RELAYFILE_MOUNT_TIMEOUT", 15*time.Second), "per-sync timeout") websocketEnabled := flag.Bool("websocket", boolEnv("RELAYFILE_MOUNT_WEBSOCKET", true), "enable websocket event streaming when available") + lazyRepos := flag.Bool("lazy-repos", lazyReposEnv(), "lazily materialize GitHub repo subtrees on first access") mode := flag.String("mode", envOrDefault("RELAYFILE_MOUNT_MODE", mountModePoll), "mount mode: poll (synced mirror, recommended) or fuse") fuse := flag.Bool("fuse", boolEnv("RELAYFILE_MOUNT_FUSE", false), "shortcut for --mode=fuse") once := flag.Bool("once", false, "run one sync cycle and exit") @@ -107,6 +109,7 @@ func main() { intervalJitter: *intervalJitter, timeout: *timeout, websocketEnabled: *websocketEnabled, + lazyRepos: *lazyRepos, scopes: parseTokenScopes(strings.TrimSpace(*token)), once: *once, mode: resolvedMode, @@ -161,6 +164,7 @@ func runPollingMount(rootCtx context.Context, cfg mountConfig) error { Logger: log.Default(), Mode: cfg.mode, Interval: cfg.interval, + LazyRepos: boolPtr(cfg.lazyRepos), }) if err != nil { return fmt.Errorf("initialize mount syncer: %w", err) @@ -270,6 +274,10 @@ func boolEnv(name string, fallback bool) bool { return value } +func lazyReposEnv() bool { + return boolEnv("RELAYFILE_LAZY_REPOS", boolEnv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", false)) +} + func boolPtr(value bool) *bool { return &value } diff --git a/cmd/relayfile-mount/main_test.go b/cmd/relayfile-mount/main_test.go index ef5d710..5200bd1 100644 --- a/cmd/relayfile-mount/main_test.go +++ b/cmd/relayfile-mount/main_test.go @@ -23,6 +23,33 @@ func TestFloatEnvFallsBackOnInvalid(t *testing.T) { } } +func TestLazyReposEnvDefaultsFalse(t *testing.T) { + t.Setenv("RELAYFILE_LAZY_REPOS", "") + t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "") + + if lazyReposEnv() { + t.Fatal("expected lazy repos to default false") + } +} + +func TestLazyReposEnvParsesOptIn(t *testing.T) { + t.Setenv("RELAYFILE_LAZY_REPOS", "true") + t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "") + + if !lazyReposEnv() { + t.Fatal("expected RELAYFILE_LAZY_REPOS=true to opt in") + } +} + +func TestLazyReposEnvSupportsLegacyName(t *testing.T) { + t.Setenv("RELAYFILE_LAZY_REPOS", "") + t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "true") + + if !lazyReposEnv() { + t.Fatal("expected legacy lazy repos env var to opt in") + } +} + func TestClampJitterRatio(t *testing.T) { if got := clampJitterRatio(-0.1); got != 0 { t.Fatalf("expected clamp to 0, got %f", got) diff --git a/evals/suites/integrations/cases.md b/evals/suites/integrations/cases.md index 586b1b5..08d1333 100644 --- a/evals/suites/integrations/cases.md +++ b/evals/suites/integrations/cases.md @@ -37,7 +37,8 @@ List the open issues in the AgentWorkforce/relay GitHub repo through the relayfi ```json [ { "op": "list", "path": "/github/repos/AgentWorkforce/relay/issues" }, - { "op": "grep", "path": "/github/repos/AgentWorkforce/relay/issues", "pattern": "\"state\": \"open\"" } + { "op": "read", "path": "/github/repos/AgentWorkforce/relay/issues/805.json" }, + { "op": "read", "path": "/github/repos/AgentWorkforce/relay/issues/815.json" } ] ``` @@ -47,9 +48,12 @@ contentIncludes: - 805.json - 815.json - state +- Stabilize relayfile comparison evals +- evals +- Measure token cost against MCP fileExists: - /github/repos/AgentWorkforce/relay/issues/805.json -maxToolCalls: 2 +maxToolCalls: 3 ### Must - Answer from the filesystem-shaped GitHub data, not by invoking a GitHub MCP. diff --git a/internal/mountfuse/fs.go b/internal/mountfuse/fs.go index 3446816..3eae393 100644 --- a/internal/mountfuse/fs.go +++ b/internal/mountfuse/fs.go @@ -6,10 +6,8 @@ import ( "hash/fnv" "log" "mime" - "os" "path" "sort" - "strconv" "strings" "sync" "syscall" @@ -41,6 +39,7 @@ type Config struct { NegativeTimeout time.Duration UID uint32 GID uint32 + LazyRepos bool Logger *log.Logger } @@ -164,7 +163,7 @@ func newFSState(cfg Config) *fsState { inodeByPath: map[string]uint64{normalizeRemotePath(cfg.RemoteRoot): 1}, pathByInode: map[uint64]string{1: normalizeRemotePath(cfg.RemoteRoot)}, } - if lazyGithubReposEnabled() { + if cfg.LazyRepos { state.lazyRepos = NewLazyMaterializeCache() } return state @@ -698,12 +697,3 @@ func contentTypeForPath(remotePath string) string { } return "text/plain; charset=utf-8" } - -func lazyGithubReposEnabled() bool { - raw := strings.TrimSpace(os.Getenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS")) - if raw == "" { - return false - } - enabled, err := strconv.ParseBool(raw) - return err == nil && enabled -} diff --git a/internal/mountfuse/fuse_test.go b/internal/mountfuse/fuse_test.go index 177b123..f2fa459 100644 --- a/internal/mountfuse/fuse_test.go +++ b/internal/mountfuse/fuse_test.go @@ -730,11 +730,10 @@ func TestFuseAliasReaddirRefreshesAfterInvalidation(t *testing.T) { } func TestLazyMaterializeFiresOnceOnRepoStat(t *testing.T) { - t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "true") t.Run("repo stat and repeated readdir", func(t *testing.T) { remote := newLazyGithubRepoRemote() - root := newMountTestRoot(t, remote, "ws_lazy_once") + root := newLazyMountTestRoot(t, remote, "ws_lazy_once") repo := lookupDir(t, lookupDir(t, lookupDir(t, lookupDir(t, root, "github"), "repos"), "octocat"), "hello-world") var out fuse.AttrOut @@ -759,7 +758,7 @@ func TestLazyMaterializeFiresOnceOnRepoStat(t *testing.T) { t.Run("missing owner or repo segments do not trigger", func(t *testing.T) { remote := newLazyGithubRepoRemote() - root := newMountTestRoot(t, remote, "ws_lazy_missing_segments") + root := newLazyMountTestRoot(t, remote, "ws_lazy_missing_segments") repos := lookupDir(t, lookupDir(t, root, "github"), "repos") owner := lookupDir(t, repos, "octocat") @@ -777,7 +776,7 @@ func TestLazyMaterializeFiresOnceOnRepoStat(t *testing.T) { t.Run("multiple repos under same owner are independent", func(t *testing.T) { remote := newLazyGithubRepoRemote() - root := newMountTestRoot(t, remote, "ws_lazy_multi_repo") + root := newLazyMountTestRoot(t, remote, "ws_lazy_multi_repo") owner := lookupDir(t, lookupDir(t, lookupDir(t, root, "github"), "repos"), "octocat") helloWorld := lookupDir(t, owner, "hello-world") spoonKnife := lookupDir(t, owner, "spoon-knife") @@ -806,7 +805,7 @@ func TestLazyMaterializeFiresOnceOnRepoStat(t *testing.T) { remote.setMaterialized(owner, repo) return nil } - root := newMountTestRoot(t, remote, "ws_lazy_race") + root := newLazyMountTestRoot(t, remote, "ws_lazy_race") repo := lookupDir(t, lookupDir(t, lookupDir(t, lookupDir(t, root, "github"), "repos"), "octocat"), "hello-world") errnos := make(chan syscall.Errno, 2) @@ -833,8 +832,16 @@ func TestLazyMaterializeFiresOnceOnRepoStat(t *testing.T) { }) } +func TestLazyReposConfigOverridesEnv(t *testing.T) { + t.Setenv("RELAYFILE_LAZY_REPOS", "true") + + root := newMountTestRoot(t, newLazyGithubRepoRemote(), "ws_lazy_env_override") + if root.state.lazyRepos != nil { + t.Fatal("expected explicit Config.LazyRepos=false to ignore lazy repos env fallback") + } +} + func TestLazyMaterializeRetriesAfterError(t *testing.T) { - t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "true") remote := newLazyGithubRepoRemote() remote.lazyMaterializeFunc = func(_ context.Context, _ string, owner, repo string) error { @@ -844,7 +851,7 @@ func TestLazyMaterializeRetriesAfterError(t *testing.T) { remote.setMaterialized(owner, repo) return nil } - root := newMountTestRoot(t, remote, "ws_lazy_retry") + root := newLazyMountTestRoot(t, remote, "ws_lazy_retry") repo := lookupDir(t, lookupDir(t, lookupDir(t, lookupDir(t, root, "github"), "repos"), "octocat"), "hello-world") if _, errno := repo.Readdir(context.Background()); errno != syscall.EIO { @@ -859,7 +866,6 @@ func TestLazyMaterializeRetriesAfterError(t *testing.T) { } func TestLazyMaterializeNoOpWhenRemoteDoesNotImplement(t *testing.T) { - t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "true") remote := &fakeRemoteClient{ trees: map[string]mountsync.TreeResponse{ @@ -884,7 +890,7 @@ func TestLazyMaterializeNoOpWhenRemoteDoesNotImplement(t *testing.T) { }, }, } - root := newMountTestRoot(t, remote, "ws_lazy_noop") + root := newLazyMountTestRoot(t, remote, "ws_lazy_noop") repo := lookupDir(t, lookupDir(t, lookupDir(t, lookupDir(t, root, "github"), "repos"), "octocat"), "hello-world") if errno := repo.Getattr(context.Background(), nil, &fuse.AttrOut{}); errno != 0 { @@ -896,7 +902,6 @@ func TestLazyMaterializeNoOpWhenRemoteDoesNotImplement(t *testing.T) { } func TestLazyMaterializeAllowsEmptyRepoTree(t *testing.T) { - t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "true") remote := &fakeLazyRemoteClient{ fakeRemoteClient: &fakeRemoteClient{}, @@ -926,7 +931,7 @@ func TestLazyMaterializeAllowsEmptyRepoTree(t *testing.T) { return mountsync.TreeResponse{}, &mountsync.HTTPError{StatusCode: 404, Code: "not_found", Message: "tree not found"} } - root := newMountTestRoot(t, remote, "ws_lazy_empty_repo") + root := newLazyMountTestRoot(t, remote, "ws_lazy_empty_repo") repo := lookupDir(t, lookupDir(t, lookupDir(t, lookupDir(t, root, "github"), "repos"), "octocat"), "empty-repo") if errno := repo.Getattr(context.Background(), nil, &fuse.AttrOut{}); errno != 0 { @@ -1030,8 +1035,8 @@ func TestByStateOutsideIssuesPathRoundTrips(t *testing.T) { remote := &fakeRemoteClient{ trees: map[string]mountsync.TreeResponse{ - "/": {Path: "/", Entries: []mountsync.TreeEntry{{Path: "/notion", Type: "directory"}}}, - "/notion": {Path: "/notion", Entries: []mountsync.TreeEntry{{Path: "/notion/by-state", Type: "directory"}}}, + "/": {Path: "/", Entries: []mountsync.TreeEntry{{Path: "/notion", Type: "directory"}}}, + "/notion": {Path: "/notion", Entries: []mountsync.TreeEntry{{Path: "/notion/by-state", Type: "directory"}}}, "/notion/by-state": { Path: "/notion/by-state", Entries: []mountsync.TreeEntry{ @@ -1073,6 +1078,17 @@ func newMountTestRoot(t *testing.T, remote mountsync.RemoteClient, workspaceID s return root } +func newLazyMountTestRoot(t *testing.T, remote mountsync.RemoteClient, workspaceID string) *DirNode { + t.Helper() + + root, err := New(Config{Client: remote, WorkspaceID: workspaceID, RemoteRoot: "/", LazyRepos: true}) + if err != nil { + t.Fatalf("New() failed: %v", err) + } + _ = gofusefs.NewNodeFS(root, &gofusefs.Options{}) + return root +} + func newLazyGithubRepoRemote() *fakeLazyRemoteClient { remote := &fakeLazyRemoteClient{ fakeRemoteClient: &fakeRemoteClient{ diff --git a/internal/mountfuse/layout.go b/internal/mountfuse/layout.go index 619a8f2..a11ffea 100644 --- a/internal/mountfuse/layout.go +++ b/internal/mountfuse/layout.go @@ -49,7 +49,7 @@ Entity files use the ` + "`__`" + ` filename convention. Rec ## Lazy materialization -When lazy mode is enabled, the ` + "`github/repos//`" + ` subtree is populated on first read via ` + "`LazyMaterialize`" + `. The first stat or directory read may incur one-time latency while the repo content is materialized. +GitHub repo subtrees are synced eagerly by default. For huge-org workspaces, opt in to lazy mode with ` + "`--lazy-repos`" + ` or ` + "`RELAYFILE_LAZY_REPOS=true`" + ` to populate ` + "`github/repos//`" + ` on first read via ` + "`LazyMaterialize`" + `. The first stat or directory read may incur one-time latency while the repo content is materialized. ## Integration-specific layouts diff --git a/internal/mountsync/syncer.go b/internal/mountsync/syncer.go index e914871..151f503 100644 --- a/internal/mountsync/syncer.go +++ b/internal/mountsync/syncer.go @@ -424,7 +424,8 @@ type SyncerOptions struct { // the default (defaultFullPullEvery, ~10 min at 30s intervals). A // negative value disables the periodic full pull entirely. FullPullEvery int - LazyRepos bool + // LazyRepos controls lazy GitHub repo subtree hydration. nil falls back to env. + LazyRepos *bool } type Logger interface { @@ -629,8 +630,16 @@ func NewSyncer(client RemoteClient, opts SyncerOptions) (*Syncer, error) { fullPullEvery = defaultFullPullEvery } } - lazyRepos := opts.LazyRepos - if raw := strings.TrimSpace(os.Getenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS")); raw != "" { + lazyRepos := false + if opts.LazyRepos != nil { + lazyRepos = *opts.LazyRepos + } else if raw := strings.TrimSpace(os.Getenv("RELAYFILE_LAZY_REPOS")); raw != "" { + if parsed, perr := strconv.ParseBool(raw); perr == nil { + lazyRepos = parsed + } else if opts.Logger != nil { + opts.Logger.Printf("ignoring invalid RELAYFILE_LAZY_REPOS=%q: %v", raw, perr) + } + } else if raw := strings.TrimSpace(os.Getenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS")); raw != "" { if parsed, perr := strconv.ParseBool(raw); perr == nil { lazyRepos = parsed } else if opts.Logger != nil { diff --git a/internal/mountsync/syncer_test.go b/internal/mountsync/syncer_test.go index edea10a..0f6c368 100644 --- a/internal/mountsync/syncer_test.go +++ b/internal/mountsync/syncer_test.go @@ -30,6 +30,10 @@ import ( "github.com/fsnotify/fsnotify" ) +func boolPtr(value bool) *bool { + return &value +} + func TestSyncOncePullsRemoteAndPushesLocalEdits(t *testing.T) { client := &fakeClient{ files: map[string]RemoteFile{ @@ -123,6 +127,9 @@ func TestHandleLocalChangeIgnoresAlreadyTrackedContent(t *testing.T) { } func TestLazyReposSkipsEagerFetchOfIssuesOnStartup(t *testing.T) { + t.Setenv("RELAYFILE_LAZY_REPOS", "") + t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "") + client := &fakeClient{ files: map[string]RemoteFile{ "/github/repos/octocat/hello-world/_index.json": { @@ -145,7 +152,7 @@ func TestLazyReposSkipsEagerFetchOfIssuesOnStartup(t *testing.T) { WorkspaceID: "ws_lazy_repos_on", RemoteRoot: "/", LocalRoot: localDir, - LazyRepos: true, + LazyRepos: boolPtr(true), }) if err != nil { t.Fatalf("new syncer failed: %v", err) @@ -163,7 +170,90 @@ func TestLazyReposSkipsEagerFetchOfIssuesOnStartup(t *testing.T) { } } +func TestLazyReposDefaultsToEagerFetchOfIssues(t *testing.T) { + t.Setenv("RELAYFILE_LAZY_REPOS", "") + t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "") + + client := &fakeClient{ + files: map[string]RemoteFile{ + "/github/repos/octocat/hello-world/_index.json": { + Path: "/github/repos/octocat/hello-world/_index.json", + Revision: "rev_index", + ContentType: "application/json", + Content: `{"repo":"hello-world"}`, + }, + "/github/repos/octocat/hello-world/issues/issue-1.json": { + Path: "/github/repos/octocat/hello-world/issues/issue-1.json", + Revision: "rev_issue_1", + ContentType: "application/json", + Content: `{"id":1}`, + }, + }, + revisionCounter: 2, + } + localDir := t.TempDir() + syncer, err := NewSyncer(client, SyncerOptions{ + WorkspaceID: "ws_lazy_repos_default", + RemoteRoot: "/", + LocalRoot: localDir, + }) + if err != nil { + t.Fatalf("new syncer failed: %v", err) + } + + if err := syncer.SyncOnce(context.Background()); err != nil { + t.Fatalf("default sync failed: %v", err) + } + + if got := client.readFileCallsByPath["/github/repos/octocat/hello-world/issues/issue-1.json"]; got < 1 { + t.Fatalf("expected eager issue reads by default, got %d", got) + } +} + +func TestLazyReposEnvFallbackStillSkipsEagerFetchOfIssues(t *testing.T) { + t.Setenv("RELAYFILE_LAZY_REPOS", "true") + t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "") + + client := &fakeClient{ + files: map[string]RemoteFile{ + "/github/repos/octocat/hello-world/_index.json": { + Path: "/github/repos/octocat/hello-world/_index.json", + Revision: "rev_index", + ContentType: "application/json", + Content: `{"repo":"hello-world"}`, + }, + "/github/repos/octocat/hello-world/issues/issue-1.json": { + Path: "/github/repos/octocat/hello-world/issues/issue-1.json", + Revision: "rev_issue_1", + ContentType: "application/json", + Content: `{"id":1}`, + }, + }, + revisionCounter: 2, + } + localDir := t.TempDir() + syncer, err := NewSyncer(client, SyncerOptions{ + WorkspaceID: "ws_lazy_repos_env", + RemoteRoot: "/", + LocalRoot: localDir, + }) + if err != nil { + t.Fatalf("new syncer failed: %v", err) + } + + if err := syncer.SyncOnce(context.Background()); err != nil { + t.Fatalf("env lazy sync failed: %v", err) + } + + if got := client.readFileCallsByPath["/github/repos/octocat/hello-world/issues/issue-1.json"]; got != 0 { + t.Fatalf("expected zero eager issue reads when env opts into lazy mode, got %d", got) + } +} + func TestLazyReposOffStillFetchesIssues(t *testing.T) { + t.Setenv("RELAYFILE_LAZY_REPOS", "") + t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "") + client := &fakeClient{ files: map[string]RemoteFile{ "/github/repos/octocat/hello-world/_index.json": { @@ -186,7 +276,7 @@ func TestLazyReposOffStillFetchesIssues(t *testing.T) { WorkspaceID: "ws_lazy_repos_off", RemoteRoot: "/", LocalRoot: localDir, - LazyRepos: false, + LazyRepos: boolPtr(false), }) if err != nil { t.Fatalf("new syncer failed: %v", err) @@ -201,6 +291,47 @@ func TestLazyReposOffStillFetchesIssues(t *testing.T) { } } +func TestLazyReposExplicitFalseOverridesEnv(t *testing.T) { + t.Setenv("RELAYFILE_LAZY_REPOS", "true") + t.Setenv("RELAYFILE_MOUNT_LAZY_GITHUB_REPOS", "") + + client := &fakeClient{ + files: map[string]RemoteFile{ + "/github/repos/octocat/hello-world/_index.json": { + Path: "/github/repos/octocat/hello-world/_index.json", + Revision: "rev_index", + ContentType: "application/json", + Content: `{"repo":"hello-world"}`, + }, + "/github/repos/octocat/hello-world/issues/issue-1.json": { + Path: "/github/repos/octocat/hello-world/issues/issue-1.json", + Revision: "rev_issue_1", + ContentType: "application/json", + Content: `{"id":1}`, + }, + }, + revisionCounter: 2, + } + localDir := t.TempDir() + syncer, err := NewSyncer(client, SyncerOptions{ + WorkspaceID: "ws_lazy_repos_explicit_false", + RemoteRoot: "/", + LocalRoot: localDir, + LazyRepos: boolPtr(false), + }) + if err != nil { + t.Fatalf("new syncer failed: %v", err) + } + + if err := syncer.SyncOnce(context.Background()); err != nil { + t.Fatalf("explicit non-lazy sync failed: %v", err) + } + + if got := client.readFileCallsByPath["/github/repos/octocat/hello-world/issues/issue-1.json"]; got < 1 { + t.Fatalf("expected eager issue reads when explicit lazy mode is off, got %d", got) + } +} + func TestIsUnderLazyGithubRepoSubtree(t *testing.T) { t.Parallel() @@ -3981,7 +4112,7 @@ func TestPullRestartFastPathPeriodicFullPullStillSkipsLazyGithubRepos(t *testing WorkspaceID: "ws_restart_lazy_periodic", RemoteRoot: "/", LocalRoot: localDir, - LazyRepos: true, + LazyRepos: boolPtr(true), FullPullEvery: 2, }) if err != nil { diff --git a/package.json b/package.json index 71740e9..1ba4b76 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,8 @@ "evals:compile": "node scripts/evals/compile-cases.mjs", "evals": "npm run evals:compile && node scripts/evals/run-relayfile-evals.mjs", "evals:list": "npm run evals:compile && node scripts/evals/run-relayfile-evals.mjs --list", - "evals:offline": "npm run evals:compile && node scripts/evals/run-relayfile-evals.mjs --mode offline" + "evals:offline": "npm run evals:compile && node scripts/evals/run-relayfile-evals.mjs --mode offline", + "evals:provider": "npm run evals:compile && node scripts/evals/run-relayfile-evals.mjs --provider" }, "devDependencies": { "@agent-assistant/telemetry": "^0.4.31", diff --git a/scripts/evals/run-relayfile-evals.mjs b/scripts/evals/run-relayfile-evals.mjs index d68e628..a102203 100644 --- a/scripts/evals/run-relayfile-evals.mjs +++ b/scripts/evals/run-relayfile-evals.mjs @@ -9,6 +9,7 @@ import { assertHumanEvalExpected, createDefaultHumanEvalExecutors, createHumanEvalRunRecord, + createSkippedEvalError, defaultRedactActual, humanEvalNeedsReview, loadDotenv, @@ -25,6 +26,8 @@ import { createRelayfileExecutor } from "./relayfile-executor.mjs"; const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../.."); const SUITES_DIR = path.join(ROOT, "evals", "suites"); const RUNS_DIR = path.join(ROOT, ".relayfile", "evals", "runs"); +const DEFAULT_OPENROUTER_MODEL = "openai/gpt-oss-120b:free"; +const OPENROUTER_CHAT_COMPLETIONS_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions"; loadDotenv(path.join(ROOT, ".env")); @@ -95,7 +98,11 @@ for (const testCase of selectedCases) { ? [] : [...assertHumanEvalExpected(testCase, actual), ...assertRelayfileExpected(testCase, actual)]; const deterministicPassed = args.reviewOnly || checks.every((check) => check.passed); - const needsHuman = humanEvalNeedsReview(testCase); + let needsHuman = humanEvalNeedsReview(testCase); + if (deterministicPassed && needsHuman && providerMode) { + checks.push(await reviewWithOpenRouter(testCase, actual, checks)); + needsHuman = !checks.every((check) => check.passed); + } run.tests.push({ ...trial, @@ -164,12 +171,132 @@ Options: --trials N Override trial count for every case. --executor NAME Override selected cases to run with this executor. --mode MODE Run mode label, usually offline or provider. - --provider Alias for --mode provider. + --provider Alias for --mode provider; also reviews human cases with OpenRouter. --fail-on-skipped Treat skipped cases as a non-zero exit condition. --review-only Do not execute cases; create human review worksheets. `); } +async function reviewWithOpenRouter(testCase, actual, checks) { + const apiKey = process.env.OPENROUTER_API_KEY; + if (!apiKey) { + throw createSkippedEvalError("openrouter review skipped; OPENROUTER_API_KEY is missing"); + } + + const model = process.env.RELAYFILE_EVAL_OPENROUTER_MODEL + ?? process.env.HUMAN_EVAL_OPENROUTER_MODEL + ?? DEFAULT_OPENROUTER_MODEL; + const timeoutMs = readPositiveInt(process.env.RELAYFILE_EVAL_OPENROUTER_TIMEOUT_MS, 120_000); + const maxTokens = readPositiveInt(process.env.RELAYFILE_EVAL_OPENROUTER_MAX_TOKENS, 700); + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), timeoutMs); + + try { + const response = await fetch(OPENROUTER_CHAT_COMPLETIONS_ENDPOINT, { + method: "POST", + signal: controller.signal, + headers: { + authorization: `Bearer ${apiKey}`, + "content-type": "application/json", + "http-referer": process.env.GITHUB_SERVER_URL + ? `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY ?? ""}` + : "https://github.com/AgentWorkforce/relayfile", + "x-title": "Relayfile Evals", + }, + body: JSON.stringify({ + model, + temperature: 0, + max_tokens: maxTokens, + messages: [ + { + role: "system", + content: [ + "You are a strict evaluator for relayfile human-review evals.", + "Return only JSON with keys pass:boolean and reason:string.", + "Mark pass true only when the actual output satisfies every Must and violates no Must Not.", + ].join(" "), + }, + { + role: "user", + content: buildReviewPrompt(testCase, actual, checks), + }, + ], + }), + }); + + const payload = await response.json().catch(() => ({})); + if (!response.ok) { + const detail = typeof payload?.error?.message === "string" ? payload.error.message : JSON.stringify(payload); + throw new Error(`OpenRouter review failed: ${response.status} ${detail}`); + } + + const content = contentFromOpenRouterChoice(payload?.choices?.[0]); + const verdict = parseOpenRouterVerdict(content); + return { + name: "openrouterReview", + passed: verdict.pass === true, + message: `model=${model}; ${verdict.reason || "no reason returned"}`, + }; + } catch (error) { + if (error instanceof Error && error.name === "AbortError") { + throw new Error(`OpenRouter review timed out after ${timeoutMs}ms`); + } + throw error; + } finally { + clearTimeout(timeout); + } +} + +function buildReviewPrompt(testCase, actual, checks) { + return JSON.stringify({ + id: testCase.id, + suite: testCase.suite, + message: testCase.input?.message, + must: testCase.expected?.must ?? [], + mustNot: testCase.expected?.mustNot ?? [], + deterministicChecks: checks.map((check) => ({ + name: check.name, + passed: check.passed, + message: check.message, + })), + actual: redactRelayfileActual(actual), + }, null, 2); +} + +function contentFromOpenRouterChoice(choice) { + const message = choice?.message; + const direct = typeof message?.content === "string" ? message.content.trim() : ""; + if (direct) return direct; + + const contentParts = Array.isArray(message?.content) ? message.content : []; + return contentParts + .map((part) => { + if (typeof part === "string") return part; + if (typeof part?.text === "string") return part.text; + if (typeof part?.content === "string") return part.content; + return ""; + }) + .join("\n") + .trim(); +} + +function parseOpenRouterVerdict(content) { + const trimmed = String(content ?? "").trim(); + const unfenced = trimmed.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/i, "").trim(); + try { + const parsed = JSON.parse(unfenced); + return { + pass: parsed?.pass === true, + reason: typeof parsed?.reason === "string" ? parsed.reason : "", + }; + } catch { + return { + pass: false, + reason: `OpenRouter did not return JSON verdict: ${trimmed.slice(0, 300)}`, + }; + } +} + function listCases(cases) { if (cases.length === 0) { console.log("No eval cases found.");