Skip to content

Commit 1381736

Browse files
committed
Fix so tool results occur right after tool calls!
1 parent 99b9f41 commit 1381736

File tree

4 files changed

+316
-97
lines changed

4 files changed

+316
-97
lines changed

packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ describe('processStreamWithTags', () => {
7171
defaultProcessor,
7272
onError,
7373
onResponseChunk,
74+
executeXmlToolCall: async () => {},
7475
})) {
7576
if (chunk.type === 'text') {
7677
result.push(chunk.text)
@@ -137,6 +138,7 @@ describe('processStreamWithTags', () => {
137138
defaultProcessor,
138139
onError,
139140
onResponseChunk,
141+
executeXmlToolCall: async () => {},
140142
})) {
141143
if (chunk.type === 'text') {
142144
result.push(chunk.text)
@@ -213,6 +215,7 @@ describe('processStreamWithTags', () => {
213215
defaultProcessor,
214216
onError,
215217
onResponseChunk,
218+
executeXmlToolCall: async () => {},
216219
})) {
217220
if (chunk.type === 'text') {
218221
result.push(chunk.text)
@@ -293,6 +296,7 @@ describe('processStreamWithTags', () => {
293296
defaultProcessor,
294297
onError,
295298
onResponseChunk,
299+
executeXmlToolCall: async () => {},
296300
})) {
297301
// consume stream
298302
}
@@ -361,6 +365,7 @@ describe('processStreamWithTags', () => {
361365
defaultProcessor,
362366
onError,
363367
onResponseChunk,
368+
executeXmlToolCall: async () => {},
364369
})) {
365370
if (chunk.type === 'text') {
366371
result.push(chunk.text)
@@ -433,6 +438,7 @@ describe('processStreamWithTags', () => {
433438
defaultProcessor,
434439
onError,
435440
onResponseChunk,
441+
executeXmlToolCall: async () => {},
436442
})) {
437443
if (chunk.type === 'text') {
438444
result.push(chunk.text)
@@ -486,6 +492,7 @@ describe('processStreamWithTags', () => {
486492
defaultProcessor,
487493
onError,
488494
onResponseChunk,
495+
executeXmlToolCall: async () => {},
489496
})) {
490497
if (chunk.type === 'text') {
491498
result.push(chunk.text)
@@ -532,6 +539,7 @@ describe('processStreamWithTags', () => {
532539
defaultProcessor,
533540
onError,
534541
onResponseChunk,
542+
executeXmlToolCall: async () => {},
535543
})) {
536544
if (chunk.type === 'text') {
537545
result.push(chunk.text)
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
2+
import { beforeEach, describe, expect, it } from 'bun:test'
3+
4+
import { processStreamWithTools } from '../tool-stream-parser'
5+
6+
import type { AgentRuntimeDeps } from '@codebuff/common/types/contracts/agent-runtime'
7+
import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
8+
9+
describe('XML tool result ordering', () => {
10+
async function* createMockStream(chunks: StreamChunk[]) {
11+
for (const chunk of chunks) {
12+
yield chunk
13+
}
14+
return 'mock-message-id'
15+
}
16+
17+
function textChunk(text: string): StreamChunk {
18+
return { type: 'text' as const, text }
19+
}
20+
21+
let agentRuntimeImpl: AgentRuntimeDeps
22+
23+
beforeEach(() => {
24+
agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL }
25+
})
26+
27+
it('should call executeXmlToolCall synchronously and track execution order', async () => {
28+
// This test verifies the execution order when XML tool calls are parsed
29+
const executionOrder: string[] = []
30+
31+
// Stream with XML tool call embedded in text
32+
const xmlToolCall = `<codebuff_tool_call>
33+
{"cb_tool_name": "test_tool", "param1": "value1"}
34+
</codebuff_tool_call>`
35+
36+
const streamChunks: StreamChunk[] = [
37+
textChunk('Text before tool call\n'),
38+
textChunk(xmlToolCall),
39+
textChunk('\nText after tool call'),
40+
]
41+
42+
const stream = createMockStream(streamChunks)
43+
const responseChunks: any[] = []
44+
45+
function onResponseChunk(chunk: any) {
46+
responseChunks.push(chunk)
47+
}
48+
49+
function defaultProcessor(toolName: string) {
50+
return {
51+
onTagStart: () => {},
52+
onTagEnd: () => {},
53+
}
54+
}
55+
56+
for await (const chunk of processStreamWithTools({
57+
...agentRuntimeImpl,
58+
stream,
59+
processors: {},
60+
defaultProcessor,
61+
onError: () => {},
62+
onResponseChunk,
63+
executeXmlToolCall: async ({ toolName, input }) => {
64+
executionOrder.push(`executeXmlToolCall:${toolName}`)
65+
// Simulate some async work (like tool execution)
66+
await new Promise((resolve) => setTimeout(resolve, 10))
67+
executionOrder.push(`executeXmlToolCall:${toolName}:done`)
68+
},
69+
})) {
70+
if (chunk.type === 'text') {
71+
executionOrder.push(`text:${chunk.text.trim().slice(0, 20)}`)
72+
} else if (chunk.type === 'tool-call') {
73+
executionOrder.push(`tool-call:${chunk.toolName}`)
74+
}
75+
}
76+
77+
// The key assertion: executeXmlToolCall should complete BEFORE "Text after" is yielded
78+
// because the stream should wait for the tool to finish
79+
console.log('Execution order:', executionOrder)
80+
81+
const executeStartIndex = executionOrder.findIndex((e) =>
82+
e.startsWith('executeXmlToolCall:test_tool'),
83+
)
84+
const executeDoneIndex = executionOrder.findIndex((e) =>
85+
e.includes(':done'),
86+
)
87+
const textAfterIndex = executionOrder.findIndex((e) =>
88+
e.includes('Text after'),
89+
)
90+
91+
expect(executeStartIndex).toBeGreaterThan(-1)
92+
expect(executeDoneIndex).toBeGreaterThan(-1)
93+
94+
// The tool execution should complete before "Text after" is processed
95+
if (textAfterIndex > -1) {
96+
expect(executeDoneIndex).toBeLessThan(textAfterIndex)
97+
}
98+
})
99+
100+
it('should track tool_call and tool_result events in correct order', async () => {
101+
// This test simulates what happens in the full processStream flow
102+
// where we capture both tool_call and tool_result events
103+
104+
const events: { type: string; toolName?: string; order: number }[] = []
105+
let eventCounter = 0
106+
107+
const xmlToolCall = `<codebuff_tool_call>
108+
{"cb_tool_name": "read_files", "paths": ["test.ts"]}
109+
</codebuff_tool_call>`
110+
111+
const streamChunks: StreamChunk[] = [
112+
textChunk('Before\n'),
113+
textChunk(xmlToolCall),
114+
textChunk('\nAfter'),
115+
]
116+
117+
const stream = createMockStream(streamChunks)
118+
119+
function defaultProcessor(toolName: string) {
120+
return {
121+
onTagStart: () => {},
122+
onTagEnd: () => {},
123+
}
124+
}
125+
126+
// Simulate the xmlToolResponseHandler behavior
127+
function onResponseChunk(chunk: any) {
128+
if (chunk.type === 'text') {
129+
events.push({ type: 'text', order: eventCounter++ })
130+
}
131+
}
132+
133+
for await (const chunk of processStreamWithTools({
134+
...agentRuntimeImpl,
135+
stream,
136+
processors: {},
137+
defaultProcessor,
138+
onError: () => {},
139+
onResponseChunk,
140+
executeXmlToolCall: async ({ toolName }) => {
141+
// Simulate tool_call event
142+
events.push({ type: 'tool_call', toolName, order: eventCounter++ })
143+
144+
// Simulate async tool execution
145+
await new Promise((resolve) => setTimeout(resolve, 5))
146+
147+
// Simulate tool_result event
148+
events.push({ type: 'tool_result', toolName, order: eventCounter++ })
149+
},
150+
})) {
151+
// Consume stream
152+
}
153+
154+
// Find the indices
155+
const toolCallEvent = events.find((e) => e.type === 'tool_call')
156+
const toolResultEvent = events.find((e) => e.type === 'tool_result')
157+
const textAfterEvents = events.filter(
158+
(e) => e.type === 'text' && e.order > (toolCallEvent?.order ?? 0),
159+
)
160+
161+
expect(toolCallEvent).toBeDefined()
162+
expect(toolResultEvent).toBeDefined()
163+
164+
// The tool_result should come immediately after tool_call,
165+
// before any subsequent text events
166+
if (toolResultEvent && textAfterEvents.length > 0) {
167+
const firstTextAfter = textAfterEvents[0]
168+
expect(toolResultEvent.order).toBeLessThan(firstTextAfter.order)
169+
}
170+
})
171+
172+
it('should not deadlock when executeXmlToolCall awaits tool execution', async () => {
173+
// This test verifies that awaiting inside executeXmlToolCall doesn't cause a deadlock.
174+
// The fix: pass Promise.resolve() instead of previousToolCallFinished for XML mode,
175+
// so the tool can execute immediately without waiting for the stream to finish.
176+
177+
const xmlToolCall = `<codebuff_tool_call>
178+
{"cb_tool_name": "test_tool", "param": "value"}
179+
</codebuff_tool_call>`
180+
181+
const streamChunks: StreamChunk[] = [
182+
textChunk('Before\n'),
183+
textChunk(xmlToolCall),
184+
textChunk('\nAfter'),
185+
]
186+
187+
const stream = createMockStream(streamChunks)
188+
let toolExecuted = false
189+
190+
// This test should complete within a reasonable time.
191+
// Before the fix, it would deadlock because:
192+
// 1. executeXmlToolCall awaits toolPromise
193+
// 2. toolPromise chains on previousToolCallFinished (streamDonePromise)
194+
// 3. streamDonePromise only resolves when stream ends
195+
// 4. Stream can't end because it's waiting for executeXmlToolCall
196+
// => Deadlock!
197+
198+
const timeoutPromise = new Promise<'timeout'>((resolve) =>
199+
setTimeout(() => resolve('timeout'), 1000),
200+
)
201+
202+
const streamPromise = (async () => {
203+
for await (const chunk of processStreamWithTools({
204+
...agentRuntimeImpl,
205+
stream,
206+
processors: {},
207+
defaultProcessor: () => ({ onTagStart: () => {}, onTagEnd: () => {} }),
208+
onError: () => {},
209+
onResponseChunk: () => {},
210+
executeXmlToolCall: async () => {
211+
// Simulate tool execution with async work
212+
await new Promise((resolve) => setTimeout(resolve, 50))
213+
toolExecuted = true
214+
},
215+
})) {
216+
// Consume stream
217+
}
218+
return 'completed'
219+
})()
220+
221+
const result = await Promise.race([streamPromise, timeoutPromise])
222+
223+
expect(result).toBe('completed')
224+
expect(toolExecuted).toBe(true)
225+
})
226+
})

0 commit comments

Comments
 (0)