From 7f78738efba5da83b2a350903d26b478d3c0442e Mon Sep 17 00:00:00 2001 From: sipercai <53717475+sipercai@users.noreply.github.com> Date: Mon, 22 Jun 2026 09:00:28 +0800 Subject: [PATCH] feat(claude-agent-sdk): capture gen_ai.skill.* on Skill load execute_tool span Attach gen_ai.skill.name/id/description/version to the execute_tool span of the built-in Skill tool. Telemetry is bound to the ToolUseBlock(name="Skill") tool span (not the SKILL.md-injecting UserMessage TextBlock). - skill.name from ToolUseBlock.input.skill (frontmatter.name fallback) - skill.id = claude:project: - skill.description/version read best-effort from /.claude/skills//SKILL.md frontmatter (cwd from SystemMessage.data.cwd) - fallback to UserMessage.tool_use_result.commandName when start info incomplete - metadata read failures never propagate to the SDK call Co-Authored-By: Claude Opus 4.8 --- .../CHANGELOG.md | 9 + .../instrumentation/claude_agent_sdk/patch.py | 172 ++++++++++++- .../tests/cassettes/test_skill_load.yaml | 76 ++++++ .../tests/test_span_validation.py | 236 ++++++++++++++++++ 4 files changed, 487 insertions(+), 6 deletions(-) create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_skill_load.yaml diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/CHANGELOG.md b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/CHANGELOG.md index 59b05e2b8..aad7f6cfd 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/CHANGELOG.md +++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/CHANGELOG.md @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Capture `gen_ai.skill.name`, `gen_ai.skill.id`, `gen_ai.skill.description` + and `gen_ai.skill.version` on the `execute_tool` span of the built-in + `Skill` tool. Skill metadata is read best-effort from the project-level + `SKILL.md` frontmatter (located via `SystemMessage.data.cwd`); `skill.id` + is reported as `claude:project:`. Metadata read failures never + affect the SDK call. + ## Version 0.6.0 (2026-06-03) There are no changelog entries for this release. diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py index 8477b6950..176819e90 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py @@ -15,9 +15,12 @@ """Patch functions for Claude Agent SDK instrumentation.""" import logging +import os import time from typing import Any, Dict, List, Optional +import yaml + from opentelemetry import context as otel_context from opentelemetry.instrumentation.claude_agent_sdk.utils import ( extract_usage_from_result_message, @@ -86,6 +89,115 @@ def _clear_client_managed_runs() -> None: _client_managed_runs.clear() +# The name of the Claude Agent SDK built-in tool that loads a Skill. +_SKILL_TOOL_NAME = "Skill" + +# skill id prefix for project-scoped Claude Agent SDK skills. +_SKILL_ID_PREFIX = "claude:project:" + + +def _read_skill_metadata(skill_md_path: str) -> Dict[str, str]: + """Best-effort read of a Skill's SKILL.md frontmatter. + + Returns a dict with any of ``name``/``description``/``version`` keys that + were present in the YAML frontmatter. On any error (missing file, parse + failure, ...) returns an empty dict so telemetry never breaks the SDK call. + """ + try: + with open(skill_md_path, "r", encoding="utf-8") as f: + content = f.read() + except Exception: + # Missing or unreadable SKILL.md is expected for non-project skills. + return {} + + return _parse_skill_frontmatter(content) + + +def _parse_skill_frontmatter(content: str) -> Dict[str, str]: + """Parse the YAML frontmatter (``---`` delimited) of a SKILL.md body.""" + try: + stripped = content.lstrip() + if not stripped.startswith("---"): + return {} + # Split off the leading ``---``; the next ``---`` closes the block. + after_open = stripped[3:] + end_index = after_open.find("\n---") + if end_index == -1: + # Frontmatter never closed; treat the remainder as the block. + frontmatter_text = after_open + else: + frontmatter_text = after_open[:end_index] + + parsed = yaml.safe_load(frontmatter_text) + if not isinstance(parsed, dict): + return {} + except Exception: + return {} + + metadata: Dict[str, str] = {} + for key in ("name", "description", "version"): + value = parsed.get(key) + if value is not None: + metadata[key] = str(value) + return metadata + + +def _apply_skill_metadata( + tool_invocation: ExecuteToolInvocation, + skill_name: str, + cwd: Optional[str], +) -> None: + """Attach ``gen_ai.skill.*`` attributes to a Skill load tool span. + + Reads the project-level ``SKILL.md`` frontmatter best-effort and fills in + ``skill_name``/``skill_id``/``skill_description``/``skill_version`` on the + invocation. Any failure is swallowed so the SDK call is never affected. + """ + if not skill_name: + return + + metadata: Dict[str, str] = {} + if cwd: + skill_md_path = os.path.join( + cwd, ".claude", "skills", skill_name, "SKILL.md" + ) + metadata = _read_skill_metadata(skill_md_path) + + # gen_ai.skill.name: prefer frontmatter, fall back to the requested name. + name = metadata.get("name") or skill_name + tool_invocation.skill_name = name + tool_invocation.skill_id = f"{_SKILL_ID_PREFIX}{name}" + + description = metadata.get("description") + if description: + tool_invocation.skill_description = description + version = metadata.get("version") + if version: + tool_invocation.skill_version = version + + +def _apply_skill_fallback( + tool_invocation: ExecuteToolInvocation, + tool_use_result: Any, +) -> None: + """Best-effort fallback to recover skill_name before closing a Skill span. + + If ``skill_name`` was not captured at span start (e.g. cwd was unavailable + so SKILL.md could not be read), try ``UserMessage.tool_use_result.commandName`` + per the SDK's Skill tool result format. + """ + if tool_invocation.skill_name: + return + if not isinstance(tool_use_result, dict): + return + command_name = tool_use_result.get("commandName") + if command_name: + tool_invocation.skill_name = str(command_name) + tool_invocation.skill_id = ( + f"{_SKILL_ID_PREFIX}{command_name}" + ) + + def _extract_message_parts(msg: Any) -> List[Any]: """Extract parts (text + tool calls) from an AssistantMessage.""" parts = [] @@ -113,12 +225,17 @@ def _create_tool_spans_from_message( agent_invocation: InvokeAgentInvocation, active_task_stack: List[Any], exclude_tool_names: Optional[List[str]] = None, + cwd: Optional[str] = None, ) -> None: """Create tool execution spans from ToolUseBlocks in an AssistantMessage. Tool spans are children of the active SubAgent span (if any), otherwise agent span. When a Task tool is created, it's pushed onto active_task_stack along with a SubAgent span. + For the built-in ``Skill`` tool, ``gen_ai.skill.*`` attributes are read + best-effort from the project-level ``SKILL.md`` frontmatter (located via + ``cwd``) and attached to the tool span. + The stack structure is: [{"task": ExecuteToolInvocation, "subagent": InvokeAgentInvocation}, ...] """ if not hasattr(msg, "content"): @@ -163,6 +280,26 @@ def _create_tool_spans_from_message( tool_call_arguments=tool_input, tool_description=tool_name, ) + + # Skill load: attach gen_ai.skill.* attributes best-effort + # from the project SKILL.md frontmatter. Failures here must + # never propagate to break the SDK call. + if tool_name == _SKILL_TOOL_NAME: + try: + skill_name = "" + if isinstance(tool_input, dict): + skill_name = str( + tool_input.get("skill") or "" + ) + _apply_skill_metadata( + tool_invocation, skill_name, cwd + ) + except Exception as e: + logger.warning( + f"Failed to read Skill metadata for " + f"'{tool_input}': {e}" + ) + handler.start_execute_tool(tool_invocation) _client_managed_runs[tool_use_id] = tool_invocation @@ -271,6 +408,7 @@ def _process_assistant_message( handler: ExtendedTelemetryHandler, collected_messages: List[Dict[str, Any]], active_task_stack: List[Any], + cwd: Optional[str] = None, ) -> None: """Process AssistantMessage: create LLM turn, extract parts, create tool spans.""" parts = _extract_message_parts(msg) @@ -353,7 +491,7 @@ def _process_assistant_message( turn_tracker.close_llm_turn() _create_tool_spans_from_message( - msg, handler, agent_invocation, active_task_stack + msg, handler, agent_invocation, active_task_stack, cwd=cwd ) @@ -474,6 +612,18 @@ def _process_user_message( Error(message=error_msg, type=RuntimeError), ) else: + # Skill load: best-effort fallback to fill skill_name + # from the tool result if it wasn't captured at start. + if tool_invocation.tool_name == _SKILL_TOOL_NAME: + try: + _apply_skill_fallback( + tool_invocation, tool_use_result + ) + except Exception as e: + logger.warning( + f"Failed to apply Skill metadata " + f"fallback: {e}" + ) handler.stop_execute_tool(tool_invocation) if tool_use_id: @@ -522,18 +672,23 @@ def _process_user_message( def _process_system_message( msg: Any, agent_invocation: InvokeAgentInvocation, -) -> None: - """Process SystemMessage: extract session_id early in the stream. +) -> Optional[str]: + """Process SystemMessage: extract session_id and cwd early in the stream. SystemMessage appears at the beginning of the message stream and contains - the session_id in its data field. We extract it here so that it's available - for all subsequent LLM spans. + the session_id and cwd in its data field. We extract them here so they are + available for all subsequent spans (cwd is needed to locate project-level + SKILL.md files for Skill tool telemetry). + + Returns the cwd if present, otherwise ``None``. """ if hasattr(msg, "subtype") and msg.subtype == "init": if hasattr(msg, "data") and isinstance(msg.data, dict): session_id = msg.data.get("session_id") if session_id: agent_invocation.conversation_id = session_id + return msg.data.get("cwd") + return None def _process_result_message( @@ -590,12 +745,16 @@ async def _process_agent_invocation_stream( # When its ToolResultBlock is received, it's popped active_task_stack: List[Any] = [] + # cwd captured from SystemMessage.data.cwd, used to locate project-level + # SKILL.md files for Skill tool telemetry. + session_cwd: Optional[str] = None + try: async for msg in wrapped_stream: msg_type = type(msg).__name__ if msg_type == "SystemMessage": - _process_system_message(msg, agent_invocation) + session_cwd = _process_system_message(msg, agent_invocation) elif msg_type == "AssistantMessage": _process_assistant_message( msg, @@ -606,6 +765,7 @@ async def _process_agent_invocation_stream( handler, collected_messages, active_task_stack, + cwd=session_cwd, ) elif msg_type == "UserMessage": _process_user_message( diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_skill_load.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_skill_load.yaml new file mode 100644 index 000000000..241302fd8 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_skill_load.yaml @@ -0,0 +1,76 @@ +description: 'Skill load: project-level probe-skill loaded via Skill tool' +prompt: Use the probe-skill Skill tool first. Then answer exactly PROBE_SKILL_MARKER and nothing else. +messages: +- type: SystemMessage + subtype: init + data: + type: system + subtype: init + cwd: __SKILL_CWD__ + session_id: skill-session-0001 + tools: + - Skill + - Bash + - Read + skills: + - probe-skill + mcp_servers: [] + model: qwen-plus + permissionMode: bypassPermissions + apiKeySource: ANTHROPIC_API_KEY + claude_code_version: 2.1.1 + output_style: default + agents: [] + slash_commands: [] + plugins: [] + uuid: skill-init-uuid +- type: AssistantMessage + model: qwen-plus + content: + - type: ToolUseBlock + id: call_skill_load_probe + name: Skill + input: + skill: probe-skill + parent_tool_use_id: null + error: null +- type: UserMessage + content: + - type: ToolResultBlock + tool_use_id: call_skill_load_probe + content: 'Launching skill: probe-skill' + is_error: false + uuid: skill-result-uuid + parent_tool_use_id: null + tool_use_result: + success: true + commandName: probe-skill +- type: AssistantMessage + model: qwen-plus + content: + - type: TextBlock + text: PROBE_SKILL_MARKER + parent_tool_use_id: null + error: null +- type: ResultMessage + subtype: success + duration_ms: 3210 + duration_api_ms: 9000 + is_error: false + num_turns: 2 + session_id: skill-session-0001 + total_cost_usd: 0.012 + usage: + input_tokens: 1024 + cache_creation_input_tokens: 0 + cache_read_input_tokens: 0 + output_tokens: 32 + server_tool_use: + web_search_requests: 0 + web_fetch_requests: 0 + service_tier: standard + cache_creation: + ephemeral_1h_input_tokens: 0 + ephemeral_5m_input_tokens: 0 + result: PROBE_SKILL_MARKER + structured_output: null diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py index e53f84fe0..af53ad719 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py @@ -522,3 +522,239 @@ async def test_span_hierarchy_correctness( assert tool_span.parent.span_id != llm_span.context.span_id, ( "Tool span should not be a child of LLM span" ) + + +# ============================================================================ +# Tests - Skill Tool Span (gen_ai.skill.* attributes) +# ============================================================================ + + +def _write_probe_skill_md(project_dir: Path) -> str: + """Create a project-level probe-skill SKILL.md and return its dir.""" + skill_dir = project_dir / ".claude" / "skills" / "probe-skill" + skill_dir.mkdir(parents=True, exist_ok=True) + skill_md = skill_dir / "SKILL.md" + skill_md.write_text( + "---\n" + "name: probe-skill\n" + "description: Skill telemetry probe that must be loaded before " + "returning PROBE_SKILL_MARKER.\n" + "version: 1.2.3\n" + "---\n\n" + "When this skill is loaded, answer exactly: PROBE_SKILL_MARKER\n", + encoding="utf-8", + ) + return str(project_dir) + + +def _skill_load_messages(cwd: str) -> List[Dict[str, Any]]: + """Message sequence for a Skill load, modelled on the SDK message stream.""" + return [ + { + "type": "SystemMessage", + "subtype": "init", + "data": { + "type": "system", + "subtype": "init", + "cwd": cwd, + "session_id": "skill-session-0001", + "tools": ["Skill", "Bash", "Read"], + "skills": ["probe-skill"], + "model": "qwen-plus", + "permissionMode": "bypassPermissions", + "apiKeySource": "ANTHROPIC_API_KEY", + "claude_code_version": "2.1.1", + "output_style": "default", + "agents": [], + "slash_commands": [], + "plugins": [], + "mcp_servers": [], + "uuid": "skill-init-uuid", + }, + }, + { + "type": "AssistantMessage", + "model": "qwen-plus", + "content": [ + { + "type": "ToolUseBlock", + "id": "call_skill_load_probe", + "name": "Skill", + "input": {"skill": "probe-skill"}, + } + ], + "parent_tool_use_id": None, + "error": None, + }, + { + "type": "UserMessage", + "content": [ + { + "type": "ToolResultBlock", + "tool_use_id": "call_skill_load_probe", + "content": "Launching skill: probe-skill", + "is_error": False, + } + ], + "uuid": "skill-result-uuid", + "parent_tool_use_id": None, + }, + { + "type": "AssistantMessage", + "model": "qwen-plus", + "content": [ + {"type": "TextBlock", "text": "PROBE_SKILL_MARKER"} + ], + "parent_tool_use_id": None, + "error": None, + }, + { + "type": "ResultMessage", + "subtype": "success", + "duration_ms": 3210, + "duration_api_ms": 9000, + "is_error": False, + "num_turns": 2, + "session_id": "skill-session-0001", + "total_cost_usd": 0.012, + "usage": { + "input_tokens": 1024, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "output_tokens": 32, + "server_tool_use": { + "web_search_requests": 0, + "web_fetch_requests": 0, + }, + "service_tier": "standard", + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0, + }, + }, + "result": "PROBE_SKILL_MARKER", + "structured_output": None, + }, + ] + + +@pytest.mark.asyncio +async def test_skill_tool_span_attributes( + instrument, span_exporter, tracer_provider, tmp_path +): + """Verify gen_ai.skill.* attributes on a Skill load execute_tool span. + + Validates per the Skill telemetry spec: + 1. Exactly one gen_ai.tool.name=Skill execute_tool span exists. + 2. That span carries gen_ai.skill.name/id/description/version. + 3. skill id is ``claude:project:``. + 4. Metadata is read best-effort from the project SKILL.md frontmatter. + """ + from opentelemetry.instrumentation.claude_agent_sdk.patch import ( # noqa: PLC0415 + _process_agent_invocation_stream, + ) + from opentelemetry.semconv._incubating.attributes import ( # noqa: PLC0415 + gen_ai_attributes as GenAIAttributes, + ) + from opentelemetry.util.genai.extended_handler import ( # noqa: PLC0415 + ExtendedTelemetryHandler, + ) + + cwd = _write_probe_skill_md(tmp_path) + handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider) + mock_stream = create_mock_stream_from_messages(_skill_load_messages(cwd)) + + async for _ in _process_agent_invocation_stream( + wrapped_stream=mock_stream, + handler=handler, + model="qwen-plus", + prompt=( + "Use the probe-skill Skill tool first. Then answer exactly " + "PROBE_SKILL_MARKER and nothing else." + ), + ): + pass + + spans = span_exporter.get_finished_spans() + + skill_tool_spans = [ + s + for s in spans + if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) + == "execute_tool" + and dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_TOOL_NAME) + == "Skill" + ] + + # Pass criterion 2: exactly one gen_ai.tool.name=Skill execute_tool span. + assert len(skill_tool_spans) == 1, ( + f"Should capture exactly one Skill execute_tool span, got " + f"{len(skill_tool_spans)}" + ) + + tool_span = skill_tool_spans[0] + attrs = dict(tool_span.attributes or {}) + + # Pass criterion 3: span carries all four gen_ai.skill.* attributes. + assert attrs.get("gen_ai.skill.name") == "probe-skill" + assert attrs.get("gen_ai.skill.id") == "claude:project:probe-skill" + assert attrs.get("gen_ai.skill.description") == ( + "Skill telemetry probe that must be loaded before returning " + "PROBE_SKILL_MARKER." + ) + assert attrs.get("gen_ai.skill.version") == "1.2.3" + + # Tool span still carries the standard tool attributes. + assert attrs.get(GenAIAttributes.GEN_AI_TOOL_CALL_ID) == ( + "call_skill_load_probe" + ) + + +@pytest.mark.asyncio +async def test_skill_metadata_read_failure_does_not_break_sdk( + instrument, span_exporter, tracer_provider, tmp_path +): + """Skill metadata read failures must not affect the SDK call (best-effort). + + When cwd points nowhere useful (no SKILL.md), the Skill tool span is still + created with skill.name/id derived from the tool input; no exception escapes. + """ + from opentelemetry.instrumentation.claude_agent_sdk.patch import ( # noqa: PLC0415 + _process_agent_invocation_stream, + ) + from opentelemetry.semconv._incubating.attributes import ( # noqa: PLC0415 + gen_ai_attributes as GenAIAttributes, + ) + from opentelemetry.util.genai.extended_handler import ( # noqa: PLC0415 + ExtendedTelemetryHandler, + ) + + # cwd with no .claude/skills tree -> SKILL.md read returns empty best-effort + cwd = str(tmp_path) + handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider) + mock_stream = create_mock_stream_from_messages(_skill_load_messages(cwd)) + + async for _ in _process_agent_invocation_stream( + wrapped_stream=mock_stream, + handler=handler, + model="qwen-plus", + prompt="Use the probe-skill Skill tool.", + ): + pass + + spans = span_exporter.get_finished_spans() + skill_tool_spans = [ + s + for s in spans + if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) + == "execute_tool" + and dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_TOOL_NAME) + == "Skill" + ] + assert len(skill_tool_spans) == 1 + attrs = dict(skill_tool_spans[0].attributes or {}) + # name/id fall back to the requested skill; description/version absent. + assert attrs.get("gen_ai.skill.name") == "probe-skill" + assert attrs.get("gen_ai.skill.id") == "claude:project:probe-skill" + assert "gen_ai.skill.description" not in attrs + assert "gen_ai.skill.version" not in attrs