Skip to content

Commit 26e24f1

Browse files
feat(secrets): ingest env secrets at container runtime instead of fanning into ECS taskdef
The app/socket ECS taskdefs were ~42KB, ~93% of which was the secrets[] array: 268 pointer entries each restating the full ~78-char secret ARN, marching toward the 64KB taskdef limit and growing ~150 bytes per hosted key added. The secret blob itself is only ~18KB/268 keys. Move secret delivery to container boot: new @sim/runtime-secrets loadRuntimeSecrets() reads SIM_ENV_SECRET_ID, fetches the combined secret once, and hydrates process.env (no-clobber, no-op when unset, fail-fast). Bootstrap entrypoints for app + realtime await it before importing the real server (env-flags reads env at module load). The app bootstrap is bun-bundled in the Dockerfile builder stage since it runs outside the Next standalone bundle; realtime keeps full node_modules and runs the TS entry. Backward-compatible: with the current fan-out taskdef the loader no-ops and the app reads the injected env vars unchanged. The matching infra change (empty secrets[] + SIM_ENV_SECRET_ID) ships separately, after this image is live.
1 parent c20d5fc commit 26e24f1

12 files changed

Lines changed: 287 additions & 2 deletions

File tree

apps/realtime/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@
2020
"test:watch": "vitest"
2121
},
2222
"dependencies": {
23+
"@aws-sdk/client-secrets-manager": "3.1032.0",
2324
"@sim/audit": "workspace:*",
2425
"@sim/auth": "workspace:*",
2526
"@sim/db": "workspace:*",
2627
"@sim/logger": "workspace:*",
2728
"@sim/platform-authz": "workspace:*",
2829
"@sim/realtime-protocol": "workspace:*",
30+
"@sim/runtime-secrets": "workspace:*",
2931
"@sim/security": "workspace:*",
3032
"@sim/utils": "workspace:*",
3133
"@sim/workflow-persistence": "workspace:*",

apps/realtime/src/bootstrap.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/**
2+
* Container entrypoint. Hydrates `process.env` from the runtime secret before
3+
* loading the Socket.IO server, whose modules (`@/env`, DB preflight) read env
4+
* at import time. See `@sim/runtime-secrets`.
5+
*/
6+
import { loadRuntimeSecrets } from '@sim/runtime-secrets'
7+
8+
await loadRuntimeSecrets()
9+
await import('@/index')

apps/sim/bootstrap.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/**
2+
* Container entrypoint. Hydrates `process.env` from the runtime secret before
3+
* loading the Next.js standalone server, so application modules that read env at
4+
* import time see the full configuration. See `@sim/runtime-secrets`.
5+
*/
6+
import { loadRuntimeSecrets } from '@sim/runtime-secrets'
7+
8+
await loadRuntimeSecrets()
9+
// `server.js` is the Next standalone build artifact, a sibling of this file in
10+
// the image; it does not exist at type-check time, so the specifier is held in a
11+
// variable to keep it out of static module resolution.
12+
const standaloneServer = './server.js'
13+
await import(standaloneServer)

apps/sim/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
"@sim/logger": "workspace:*",
100100
"@sim/platform-authz": "workspace:*",
101101
"@sim/realtime-protocol": "workspace:*",
102+
"@sim/runtime-secrets": "workspace:*",
102103
"@sim/security": "workspace:*",
103104
"@sim/utils": "workspace:*",
104105
"@sim/workflow-persistence": "workspace:*",

bun.lock

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docker/app.Dockerfile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ RUN --mount=type=cache,id=next-cache-${TARGETPLATFORM},target=/app/apps/sim/.nex
8181
--mount=type=cache,id=turbo-cache-${TARGETPLATFORM},target=/app/.turbo \
8282
bun run build
8383

84+
# Bundle the secrets-loading bootstrap into a self-contained entrypoint. It runs
85+
# before (and outside) the Next standalone server, so its dependencies
86+
# (@sim/runtime-secrets, AWS SDK) are inlined here rather than resolved from the
87+
# pruned standalone node_modules. The dynamic import of ./server.js stays a
88+
# runtime import.
89+
RUN bun build apps/sim/bootstrap.ts --target=bun --outfile=apps/sim/bootstrap.js
90+
8491
# ========================================
8592
# Runner Stage: Run the actual app
8693
# ========================================
@@ -100,6 +107,10 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/public ./apps/sim/public
100107
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/.next/standalone ./
101108
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/.next/static ./apps/sim/.next/static
102109

110+
# Self-contained secrets-loading bootstrap (bundled in the builder stage). Runs
111+
# before the standalone server.js to hydrate process.env from the runtime secret.
112+
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/bootstrap.js ./apps/sim/bootstrap.js
113+
103114
# Copy blog/author content for runtime filesystem reads (not part of the JS bundle)
104115
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/content ./apps/sim/content
105116

@@ -128,4 +139,4 @@ EXPOSE 3000
128139
ENV PORT=3000 \
129140
HOSTNAME="0.0.0.0"
130141

131-
CMD ["bun", "apps/sim/server.js"]
142+
CMD ["bun", "apps/sim/bootstrap.js"]

docker/realtime.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,4 @@ USER nextjs
4949

5050
EXPOSE 3002
5151

52-
CMD ["bun", "apps/realtime/src/index.ts"]
52+
CMD ["bun", "apps/realtime/src/bootstrap.ts"]
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"name": "@sim/runtime-secrets",
3+
"version": "0.1.0",
4+
"private": true,
5+
"sideEffects": false,
6+
"type": "module",
7+
"license": "Apache-2.0",
8+
"engines": {
9+
"bun": ">=1.2.13",
10+
"node": ">=20.0.0"
11+
},
12+
"exports": {
13+
".": {
14+
"types": "./src/index.ts",
15+
"default": "./src/index.ts"
16+
}
17+
},
18+
"scripts": {
19+
"type-check": "tsc --noEmit",
20+
"lint": "biome check --write --unsafe .",
21+
"lint:check": "biome check .",
22+
"format": "biome format --write .",
23+
"format:check": "biome format .",
24+
"test": "vitest run",
25+
"test:watch": "vitest"
26+
},
27+
"dependencies": {
28+
"@aws-sdk/client-secrets-manager": "3.1032.0",
29+
"@sim/logger": "workspace:*",
30+
"@sim/utils": "workspace:*"
31+
},
32+
"devDependencies": {
33+
"@sim/tsconfig": "workspace:*",
34+
"@types/node": "24.2.1",
35+
"typescript": "^5.7.3",
36+
"vitest": "^4.1.0"
37+
}
38+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
2+
3+
const { mockSend } = vi.hoisted(() => ({ mockSend: vi.fn() }))
4+
5+
vi.mock('@aws-sdk/client-secrets-manager', () => ({
6+
SecretsManagerClient: class SecretsManagerClient {
7+
send = mockSend
8+
},
9+
GetSecretValueCommand: class GetSecretValueCommand {
10+
constructor(public input: unknown) {}
11+
},
12+
}))
13+
14+
vi.mock('@sim/logger', () => ({
15+
createLogger: () => ({ info: vi.fn(), warn: vi.fn(), error: vi.fn() }),
16+
}))
17+
18+
vi.mock('@sim/utils/helpers', () => ({
19+
sleep: vi.fn().mockResolvedValue(undefined),
20+
}))
21+
22+
import { loadRuntimeSecrets } from './index'
23+
24+
const TOUCHED = ['SIM_ENV_SECRET_ID', 'FOO', 'BAZ'] as const
25+
26+
describe('loadRuntimeSecrets', () => {
27+
beforeEach(() => {
28+
vi.clearAllMocks()
29+
for (const key of TOUCHED) delete process.env[key]
30+
})
31+
32+
afterEach(() => {
33+
for (const key of TOUCHED) delete process.env[key]
34+
})
35+
36+
it('no-ops when SIM_ENV_SECRET_ID is unset', async () => {
37+
await loadRuntimeSecrets()
38+
expect(mockSend).not.toHaveBeenCalled()
39+
})
40+
41+
it('hydrates process.env from the parsed secret JSON', async () => {
42+
process.env.SIM_ENV_SECRET_ID = '/test/sim/env-vars'
43+
mockSend.mockResolvedValue({ SecretString: JSON.stringify({ FOO: 'bar', BAZ: 'qux' }) })
44+
45+
await loadRuntimeSecrets()
46+
47+
expect(process.env.FOO).toBe('bar')
48+
expect(process.env.BAZ).toBe('qux')
49+
})
50+
51+
it('never overwrites an already-set env var', async () => {
52+
process.env.SIM_ENV_SECRET_ID = '/test/sim/env-vars'
53+
process.env.FOO = 'existing'
54+
mockSend.mockResolvedValue({ SecretString: JSON.stringify({ FOO: 'new', BAZ: 'qux' }) })
55+
56+
await loadRuntimeSecrets()
57+
58+
expect(process.env.FOO).toBe('existing')
59+
expect(process.env.BAZ).toBe('qux')
60+
})
61+
62+
it('throws when the secret is not valid JSON', async () => {
63+
process.env.SIM_ENV_SECRET_ID = '/test/sim/env-vars'
64+
mockSend.mockResolvedValue({ SecretString: 'not json' })
65+
66+
await expect(loadRuntimeSecrets()).rejects.toThrow(/not valid JSON/)
67+
})
68+
69+
it('throws when the secret JSON is not an object', async () => {
70+
process.env.SIM_ENV_SECRET_ID = '/test/sim/env-vars'
71+
mockSend.mockResolvedValue({ SecretString: JSON.stringify(['a', 'b']) })
72+
73+
await expect(loadRuntimeSecrets()).rejects.toThrow(/must be a JSON object/)
74+
})
75+
76+
it('retries then throws when the fetch keeps failing', async () => {
77+
process.env.SIM_ENV_SECRET_ID = '/test/sim/env-vars'
78+
mockSend.mockRejectedValue(new Error('boom'))
79+
80+
await expect(loadRuntimeSecrets()).rejects.toThrow(/Failed to fetch runtime secrets/)
81+
expect(mockSend).toHaveBeenCalledTimes(3)
82+
})
83+
})
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import type { GetSecretValueCommandOutput } from '@aws-sdk/client-secrets-manager'
2+
import { GetSecretValueCommand, SecretsManagerClient } from '@aws-sdk/client-secrets-manager'
3+
import { createLogger } from '@sim/logger'
4+
import { getErrorMessage } from '@sim/utils/errors'
5+
import { sleep } from '@sim/utils/helpers'
6+
import { backoffWithJitter } from '@sim/utils/retry'
7+
8+
const logger = createLogger('RuntimeSecrets')
9+
10+
/** Plaintext env var (set in the ECS task definition) naming the secret to ingest. */
11+
const SECRET_ID_ENV = 'SIM_ENV_SECRET_ID'
12+
13+
const MAX_ATTEMPTS = 3
14+
15+
/**
16+
* Fetches the combined `/{env}/sim/env-vars` secret once at container boot and
17+
* hydrates `process.env`, so secrets no longer have to be fanned out into the
18+
* ECS task definition (which is approaching the 64 KB rendered-document limit).
19+
*
20+
* Must run before any application module that reads env at import time. No-ops
21+
* when {@link SECRET_ID_ENV} is unset (local dev / self-hosted keep using their
22+
* own env). Existing `process.env` keys are never overwritten, so explicit
23+
* task-definition `environment` entries win. Throws on any fetch/parse failure
24+
* so a misconfigured container crashes instead of booting without its config.
25+
*/
26+
export async function loadRuntimeSecrets(): Promise<void> {
27+
const secretId = process.env[SECRET_ID_ENV]
28+
if (!secretId) {
29+
logger.info(`${SECRET_ID_ENV} not set; skipping runtime secret ingestion`)
30+
return
31+
}
32+
33+
const client = new SecretsManagerClient(
34+
process.env.AWS_REGION ? { region: process.env.AWS_REGION } : {}
35+
)
36+
37+
const secretString = await fetchSecretString(client, secretId)
38+
const entries = parseSecretJson(secretString)
39+
40+
let loaded = 0
41+
let skipped = 0
42+
for (const [key, value] of Object.entries(entries)) {
43+
if (key in process.env) {
44+
skipped++
45+
continue
46+
}
47+
process.env[key] = typeof value === 'string' ? value : JSON.stringify(value)
48+
loaded++
49+
}
50+
51+
logger.info('Runtime secrets ingested', { secretId, loaded, skipped })
52+
}
53+
54+
async function fetchSecretString(client: SecretsManagerClient, secretId: string): Promise<string> {
55+
let lastError: unknown
56+
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
57+
try {
58+
const response: GetSecretValueCommandOutput = await client.send(
59+
new GetSecretValueCommand({ SecretId: secretId })
60+
)
61+
if (!response.SecretString) {
62+
throw new Error('Secret has no SecretString (binary secrets are not supported)')
63+
}
64+
return response.SecretString
65+
} catch (error) {
66+
lastError = error
67+
if (attempt < MAX_ATTEMPTS) {
68+
const delay = backoffWithJitter(attempt, null, { baseMs: 200, maxMs: 2000 })
69+
logger.warn(
70+
`Failed to fetch runtime secrets (attempt ${attempt}/${MAX_ATTEMPTS}), retrying`,
71+
{ error: getErrorMessage(error) }
72+
)
73+
await sleep(delay)
74+
}
75+
}
76+
}
77+
throw new Error(`Failed to fetch runtime secrets from ${secretId}: ${getErrorMessage(lastError)}`)
78+
}
79+
80+
function parseSecretJson(secretString: string): Record<string, unknown> {
81+
let parsed: unknown
82+
try {
83+
parsed = JSON.parse(secretString)
84+
} catch (error) {
85+
throw new Error(`Runtime secret is not valid JSON: ${getErrorMessage(error)}`)
86+
}
87+
if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) {
88+
throw new Error('Runtime secret must be a JSON object of key/value pairs')
89+
}
90+
return parsed as Record<string, unknown>
91+
}

0 commit comments

Comments
 (0)