Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/uipath/_cli/_evals/_conversational_mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import Any, Dict, List
from uipath.core.chat import UiPathConversationMessage

def to_conversational_eval_output_schema(
messages: List[UiPathConversationMessage],
) -> Dict[str, Any]:
"""Convert list of messages to conversational eval output schema.

Args:
messages: List of message dictionaries with role, content, tool_calls, etc.

Returns:
Dict with structure: {"agentResponse": [{"text": str, "toolCalls": [...]}]}
"""
agent_messages = []

for message in messages:
if message.get("type") == "ai":
tool_calls = []
if message.get("tool_calls"):
tool_calls = [
{
"name": tc.get("name") or tc.get("function", {}).get("name"),
"arguments": tc.get("arguments")
or tc.get("function", {}).get("arguments"),
}
for tc in message["tool_calls"]
]

agent_message = {
"text": message.get("content") or "",
"toolCalls": tool_calls if tool_calls else None,
}
agent_messages.append(agent_message)

return {"agentResponse": agent_messages}
54 changes: 54 additions & 0 deletions src/uipath/_cli/_evals/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@
from uipath.runtime.logging import UiPathRuntimeExecutionLogHandler
from uipath.runtime.schema import UiPathRuntimeSchema

from uipath._cli._evals._conversational_mapper import (
to_conversational_eval_output_schema
)

from uipath._cli._evals._span_utils import (
configure_eval_set_run_span,
configure_evaluation_span,
Expand Down Expand Up @@ -309,6 +313,7 @@ async def initiate_evaluation(
)

async def execute(self) -> UiPathRuntimeResult:
print("EXECUTEE!!!")
logger.info("=" * 80)
logger.info("EVAL RUNTIME: Starting evaluation execution")
logger.info(f"EVAL RUNTIME: Execution ID: {self.execution_id}")
Expand Down Expand Up @@ -848,6 +853,31 @@ async def execute_runtime(
eval_id=eval_item.id,
)

# todo: map eval input type to this type
# inputs_with_overrides = {
# "messages": [
# {
# "messageId": "E6928DF4-AA36-46BE-B4FC-52ADA2B636D0",
# "role": "user",
# "contentParts": [
# {
# "contentPartId": "E75CBEA6-7A2C-442B-B0B6-39FFBF17E986",
# "mimeType": "text/plain",
# "data": {"inline": "Hi what can you do"},
# "citations": [],
# "createdAt": "2026-01-18T05:32:39.620Z",
# "updatedAt": "2026-01-18T05:32:39.620Z",
# }
# ],
# "toolCalls": [],
# "interrupts": [],
# "spanId": "0f32ee22-0def-4906-9cde-dbb9860c050f",
# "createdAt": "2026-01-18T05:32:38.807Z",
# "updatedAt": "2026-01-18T05:32:38.807Z",
# }
# ]
# }

# In resume mode, pass None as input
# The UiPathResumableRuntime wrapper will automatically:
# 1. Fetch triggers from storage
Expand Down Expand Up @@ -887,6 +917,30 @@ async def execute_runtime(

if result is None:
raise ValueError("Execution result cannot be None for eval runs")

if result is None:
raise ValueError("Execution result cannot be None for eval runs")

schema = await self.get_schema()
is_conversational = False

if schema.metadata and isinstance(schema.metadata, dict):
engine = schema.metadata.get("settings").get("engine")
is_conversational = "conversational" in engine

# print("result.output: " + str(result.output))
if is_conversational and result.output:
converted_output = to_conversational_eval_output_schema(result.output.get("messages"))
print("converted_output: " + str(converted_output))
result = UiPathRuntimeResult(
output=converted_output,
status=result.status,
error=result.error,
trigger=result.trigger,
triggers=result.triggers,
)

print("result: " + str(result))

return UiPathEvalRunExecutionOutput(
execution_time=end_time - start_time,
Expand Down
Loading