UiPath · shannonsuhendra · Feb 6, 2026
diff --git a/src/uipath/_cli/_evals/_conversational_mapper.py b/src/uipath/_cli/_evals/_conversational_mapper.py
@@ -0,0 +1,36 @@
+from typing import Any, Dict, List
+from uipath.core.chat import UiPathConversationMessage
+
+def to_conversational_eval_output_schema(
+    messages: List[UiPathConversationMessage],
+) -> Dict[str, Any]:
+    """Convert list of messages to conversational eval output schema.
+
+    Args:
+        messages: List of message dictionaries with role, content, tool_calls, etc.
+
+    Returns:
+        Dict with structure: {"agentResponse": [{"text": str, "toolCalls": [...]}]}
+    """
+    agent_messages = []
+
+    for message in messages:
+        if message.get("type") == "ai":
+            tool_calls = []
+            if message.get("tool_calls"):
+                tool_calls = [
+                    {
+                        "name": tc.get("name") or tc.get("function", {}).get("name"),
+                        "arguments": tc.get("arguments")
+                        or tc.get("function", {}).get("arguments"),
+                    }
+                    for tc in message["tool_calls"]
+                ]
+
+            agent_message = {
+                "text": message.get("content") or "",
+                "toolCalls": tool_calls if tool_calls else None,
+            }
+            agent_messages.append(agent_message)
+
+    return {"agentResponse": agent_messages}
diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py
@@ -47,6 +47,10 @@
 from uipath.runtime.logging import UiPathRuntimeExecutionLogHandler
 from uipath.runtime.schema import UiPathRuntimeSchema
 
+from uipath._cli._evals._conversational_mapper import (
+    to_conversational_eval_output_schema
+)
+
 from uipath._cli._evals._span_utils import (
     configure_eval_set_run_span,
     configure_evaluation_span,
@@ -309,6 +313,7 @@ async def initiate_evaluation(
         )
 
     async def execute(self) -> UiPathRuntimeResult:
+        print("EXECUTEE!!!")
         logger.info("=" * 80)
         logger.info("EVAL RUNTIME: Starting evaluation execution")
         logger.info(f"EVAL RUNTIME: Execution ID: {self.execution_id}")
@@ -848,6 +853,31 @@ async def execute_runtime(
                     eval_id=eval_item.id,
                 )
 
+                # todo: map eval input type to this type
+                # inputs_with_overrides = {
+                #     "messages": [
+                #         {
+                #             "messageId": "E6928DF4-AA36-46BE-B4FC-52ADA2B636D0",
+                #             "role": "user",
+                #             "contentParts": [
+                #                 {
+                #                     "contentPartId": "E75CBEA6-7A2C-442B-B0B6-39FFBF17E986",
+                #                     "mimeType": "text/plain",
+                #                     "data": {"inline": "Hi what can you do"},
+                #                     "citations": [],
+                #                     "createdAt": "2026-01-18T05:32:39.620Z",
+                #                     "updatedAt": "2026-01-18T05:32:39.620Z",
+                #                 }
+                #             ],
+                #             "toolCalls": [],
+                #             "interrupts": [],
+                #             "spanId": "0f32ee22-0def-4906-9cde-dbb9860c050f",
+                #             "createdAt": "2026-01-18T05:32:38.807Z",
+                #             "updatedAt": "2026-01-18T05:32:38.807Z",
+                #         }
+                #     ]
+                # }
+
                 # In resume mode, pass None as input
                 # The UiPathResumableRuntime wrapper will automatically:
                 # 1. Fetch triggers from storage
@@ -887,6 +917,30 @@ async def execute_runtime(
 
             if result is None:
                 raise ValueError("Execution result cannot be None for eval runs")
+
+            if result is None:
+                raise ValueError("Execution result cannot be None for eval runs")
+
+            schema = await self.get_schema()
+            is_conversational = False
+
+            if schema.metadata and isinstance(schema.metadata, dict):
+                engine = schema.metadata.get("settings").get("engine")
+                is_conversational = "conversational" in engine
+
+            # print("result.output: " + str(result.output))
+            if is_conversational and result.output:
+                converted_output = to_conversational_eval_output_schema(result.output.get("messages"))
+                print("converted_output: " + str(converted_output))
+                result = UiPathRuntimeResult(
+                    output=converted_output,
+                    status=result.status,
+                    error=result.error,
+                    trigger=result.trigger,
+                    triggers=result.triggers,
+                )
+
+            print("result: " + str(result))
 
             return UiPathEvalRunExecutionOutput(
                 execution_time=end_time - start_time,