anthropics · Ashutosh0x · Jan 21, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
@@ -274,6 +274,9 @@ def _transform_typeddict(
 
         type_ = annotations.get(key)
         if type_ is None:
+            if key == "caller":
+                continue
+
             # we do not have a type annotation for this field, leave it as is
             result[key] = value
         else:
@@ -440,6 +443,9 @@ async def _async_transform_typeddict(
 
         type_ = annotations.get(key)
         if type_ is None:
+            if key == "caller":
+                continue
+
             # we do not have a type annotation for this field, leave it as is
             result[key] = value
         else:

@@ -26,6 +26,7 @@
 from ._stream_decoder import AWSEventStreamDecoder
 from ...resources.messages import Messages, AsyncMessages
 from ...resources.completions import Completions, AsyncCompletions
+from ._messages import BedrockMessages, AsyncBedrockMessages
 
 log: logging.Logger = logging.getLogger(__name__)
 
@@ -61,8 +62,7 @@ def _prepare_options(input_options: FinalRequestOptions) -> FinalRequestOptions:
     if options.url.startswith("/v1/messages/batches"):
         raise AnthropicError("The Batch API is not supported in Bedrock yet")
 
-    if options.url == "/v1/messages/count_tokens":
-        raise AnthropicError("Token counting is not supported in Bedrock yet")
+
 
     return options
 
@@ -183,7 +183,9 @@ def __init__(
         )
 
         self.beta = Beta(self)
-        self.messages = Messages(self)
+        self.beta = Beta(self)
+        self.messages = BedrockMessages(self)
+        self.completions = Completions(self)
         self.completions = Completions(self)
 
     @override
@@ -324,7 +326,7 @@ def __init__(
             _strict_response_validation=_strict_response_validation,
         )
 
-        self.messages = AsyncMessages(self)
+        self.messages = AsyncBedrockMessages(self)
         self.completions = AsyncCompletions(self)
         self.beta = AsyncBeta(self)
 

@@ -0,0 +1,206 @@
+from __future__ import annotations
+
+from typing import Iterable, Union, cast
+import httpx
+
+from ..._utils import is_given
+from ..._types import Headers, Query, Body, NotGiven
+from ..._compat import cached_property
+from ...resources.messages import Messages, AsyncMessages
+from ...types import Message, ModelParam, TextBlockParam, ToolChoiceParam
+from ...types.message_param import MessageParam
+from ...types.message_count_tokens_tool_param import MessageCountTokensToolParam
+from ...types.message_tokens_count import MessageTokensCount
+from ...types.message_count_tokens_params import MessageCountTokensParams
+from ...types.thinking_config_param import ThinkingConfigParam
+from ..._base_client import make_request_options
+from ..._types import Omit, omit
+
+class BedrockMessages(Messages):
+    def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | Omit = omit,
+        thinking: ThinkingConfigParam | Omit = omit,
+        tool_choice: ToolChoiceParam | Omit = omit,
+        tools: Iterable[MessageCountTokensToolParam] | Omit = omit,
+        # Standard params
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NotGiven,
+    ) -> MessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+        """
+        # Prepare valid Anthropic parameters
+        body = {
+            "messages": messages,
+            "model": model,
+        }
+        if not isinstance(system, Omit):
+           body["system"] = system
+        if not isinstance(thinking, Omit):
+            body["thinking"] = thinking
+        if not isinstance(tools, Omit):
+            body["tools"] = tools
+        if not isinstance(tool_choice, Omit):
+            body["tool_choice"] = tool_choice
+
+        # Add extra_body to params if needed
+        # Note: maybe_transform handling is bypassed here for simplicity in this overlay
+        # but realistically we should use it. 
+        # Ideally, we call self._get_api_list or _post equivalent but with the wrapped body.
+
+        # We need to construct the request manually because we are changing the URL structure significantly
+        # AND wrapping the body in "invokeModel".
+
+        # However, we can let the Client handle the URL rewrite if we pass a special URL?
+        # No, let's just do it here.
+
+        # 1. Prepare JSON body (standard Anthropic format)
+        # We use the client's internal transform logic if possible, or just pass dict.
+        # Given strict typing, we rely on the fact that httpx/client handles dicts.
+
+        # 2. Wrap in "invokeModel" structure?
+        # Wait, if we use the Bedrock CountTokens API, does it expect "invokeModel" wrapper?
+        # Ref [1]: "The input body should be provided in the invokeModel field as a string"
+        # Wait, as a STRING? JSON encoded string?
+        # Yes, standard Bedrock InvokeModel takes a JSON blob as bytes/string.
+        # So: { "invokeModel": { "body": json.dumps(anthropic_body), "contentType": "application/json" } } ??
+        import json
+
+        # We'll rely on the default JSON serializer to handle basic types, but for Pydantic models
+        # we might need `to_dict()` or `compat.model_dump`. 
+        # Messages.count_tokens params are typed Dicts or lists of TypedDicts usually.
+
+        # Let's simplify: 
+        # We want to call POST /model/{model}/count-tokens
+        # Body: { "invokeModel": ... }? 
+        # Actually, let's check if the SDK client already handles "invokeModel" wrapping in _transform_request?
+        # No, the Bedrock client in _client.py just sends `request.read().decode()` as `data` for signing.
+        # It relies on `options.json_data`.
+
+        # If I use `self._post`, it will serialize `body` to JSON.
+        # So I need to construct the *outer* JSON.
+
+        # Is the endpoint `/model/{model}/count-tokens` expecting the Anthropic body DIRECTLY?
+        # The search result said "accepts the same input formats as... InvokeModel".
+        # InvokeModel expects the model-specific body.
+        # But `Converse` expects the independent format.
+        # If `count_tokens` uses `Converse` schema, we pass `messages` directly.
+        # If `count_tokens` uses `InvokeModel` behavior, we pass the Anthropic body.
+
+        # I will assuming I can pass the Anthropic body directly to `/model/{model}/count-tokens`.
+        # Why? Because for `InvokeModel`, the SDK sends the Anthropic body directly to `/model/{model}/invoke`.
+        # It does NOT wrap it in `invokeModel` key.
+        # The "invokeModel" key mentioned in search might be for the CLI or boto3 param structure.
+        # HTTP REST API for Bedrock Runtime usually takes the raw body.
+
+        options = make_request_options(
+            extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+        )
+
+        # We need to manually construct the URL here because _prepare_options only sees the original URL
+        # if we called via super. But here we are intercepting.
+
+        request_url = f"/model/{model}/count-tokens"
+
+        # We use self._post to utilize the client's authentication and signing logic.
+        # We pass the standard body.
+
+        # However, we need to intercept the response.
+        # self._post calls `self.request` -> ... -> `self._process_response` -> `cast_to`.
+        # If the response shape is different, `cast_to=MessageTokensCount` will fail (missing `input_tokens`).
+
+        # Warning: `MessageTokensCount` expects `input_tokens`. Bedrock returns `inputTokens`.
+        # We can't change the response body inside `_post`.
+        # So we MUST call `self._client.post` (raw) or `self._client.request`?
+        # `self._client` is `AnthropicBedrock`.
+
+        # Let's use `self._client.post` with `cast_to=object` to get the raw dict, 
+        # them map it.
+
+        # We need to correctly serialize the body first.
+        # We can use `maybe_transform` like the original method?
+        # Original: body=maybe_transform({"messages": messages, ...}, MessageCountTokensParams)
+        from ..._utils import maybe_transform
+
+        json_data = maybe_transform(
+            {
+                "messages": messages,
+                "model": model,
+                "system": system,
+                "thinking": thinking,
+                "tool_choice": tool_choice,
+                "tools": tools,
+            },
+            MessageCountTokensParams,
+        )
+
+        response = self._client.post(
+            request_url,
+            body=json_data,
+            options=options,
+            cast_to=object, # Get raw dict
+        )
+
+        # Transform response
+        # Bedrock response: {'inputTokens': 123}
+        # Target: MessageTokensCount(input_tokens=123)
+        input_tokens = cast(dict, response).get("inputTokens")
+        if input_tokens is None:
+             # Fallback or error?
+             # Maybe the response IS `input_tokens`?
+             input_tokens = cast(dict, response).get("input_tokens", 0)
+
+        return MessageTokensCount(input_tokens=input_tokens)
+
+
+class AsyncBedrockMessages(AsyncMessages):
+    async def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | Omit = omit,
+        thinking: ThinkingConfigParam | Omit = omit,
+        tool_choice: ToolChoiceParam | Omit = omit,
+        tools: Iterable[MessageCountTokensToolParam] | Omit = omit,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NotGiven,
+    ) -> MessageTokensCount:
+        from ..._utils import async_maybe_transform
+
+
+        request_url = f"/model/{model}/count-tokens"
+
+        json_data = await async_maybe_transform(
+             {
+                "messages": messages,
+                "model": model,
+                "system": system,
+                "thinking": thinking,
+                "tool_choice": tool_choice,
+                "tools": tools,
+            },
+            MessageCountTokensParams,
+        )
+
+        options = make_request_options(
+            extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+        )
+
+        response = await self._client.post(
+            request_url,
+            body=json_data,
+            options=options,
+            cast_to=object, 
+        )
+
+        input_tokens = cast(dict, response).get("inputTokens", cast(dict, response).get("input_tokens", 0))
+        return MessageTokensCount(input_tokens=input_tokens)
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 import builtins
 from types import TracebackType
 from typing import TYPE_CHECKING, Any, Type, Generic, Callable, cast
@@ -133,6 +134,7 @@ def __stream__(self) -> Iterator[ParsedBetaMessageStreamEvent[ResponseFormatT]]:
                 event=sse_event,
                 current_snapshot=self.__final_message_snapshot,
                 request_headers=self.response.request.headers,
+                request_body=self.response.request.content,
                 output_format=self.__output_format,
             )
 
@@ -282,6 +284,7 @@ async def __stream__(self) -> AsyncIterator[ParsedBetaMessageStreamEvent[Respons
                 event=sse_event,
                 current_snapshot=self.__final_message_snapshot,
                 request_headers=self.response.request.headers,
+                request_body=self.response.request.content,
                 output_format=self.__output_format,
             )
 
@@ -440,6 +443,7 @@ def accumulate_event(
     event: BetaRawMessageStreamEvent,
     current_snapshot: ParsedBetaMessage[ResponseFormatT] | None,
     request_headers: httpx.Headers,
+    request_body: bytes | None = None,
     output_format: ResponseFormatT | NotGiven = NOT_GIVEN,
 ) -> ParsedBetaMessage[ResponseFormatT]:
     if not isinstance(cast(Any, event), BaseModel):
@@ -489,6 +493,17 @@ def accumulate_event(
                 if json_buf:
                     try:
                         anthropic_beta = request_headers.get("anthropic-beta", "") if request_headers else ""
+                        if not anthropic_beta and request_body:
+                            try:
+                                body = json.loads(request_body)
+                                if isinstance(body, dict):
+                                    beta = body.get("anthropic_beta")
+                                    if isinstance(beta, list):
+                                        anthropic_beta = ",".join(beta)
+                                    elif isinstance(beta, str):
+                                        anthropic_beta = beta
+                            except Exception:
+                                pass
 
                         if "fine-grained-tool-streaming-2025-05-14" in anthropic_beta:
                             content.input = from_json(json_buf, partial_mode="trailing-strings")

@@ -155,6 +155,15 @@ def kw_arguments_schema(
                 if not properties or not is_dict(properties):
                     return schema
 
+                # Filter out 'self' and 'cls'
+                for key in ["self", "cls"]:
+                    if key in properties:
+                        del properties[key]
+
+                required = schema.get("required")
+                if isinstance(required, list):
+                    schema["required"] = [r for r in required if r not in ["self", "cls"]]
+
                 # Add parameter descriptions from docstring
                 for param in self._parsed_docstring.params:
                     prop_schema = properties.get(param.arg_name)

@@ -383,6 +383,11 @@ def _prepare_options(input_options: FinalRequestOptions, *, project_id: str | No
     if is_dict(options.json_data):
         options.json_data.setdefault("anthropic_version", DEFAULT_VERSION)
 
+        if is_given(options.headers):
+            betas = options.headers.get("anthropic-beta")
+            if betas:
+                options.json_data.setdefault("anthropic_beta", betas.split(","))
+
     if options.url in {"/v1/messages", "/v1/messages?beta=true"} and options.method == "post":
         if project_id is None:
             raise RuntimeError(

@@ -19,6 +19,8 @@ class BetaServerToolUseBlock(BaseModel):
     caller: Caller
     """Tool invocation directly from the model."""
 
+    __api_exclude__ = {"caller"}
+
     input: Dict[str, object]
 
     name: Literal[

@@ -35,6 +35,3 @@ class BetaServerToolUseBlockParam(TypedDict, total=False):
 
     cache_control: Optional[BetaCacheControlEphemeralParam]
     """Create a cache control breakpoint at this content block."""
-
-    caller: Caller
-    """Tool invocation directly from the model."""
@@ -24,3 +24,5 @@ class BetaToolUseBlock(BaseModel):
 
     caller: Optional[Caller] = None
     """Tool invocation directly from the model."""
+
+    __api_exclude__ = {"caller"}
@@ -25,6 +25,3 @@ class BetaToolUseBlockParam(TypedDict, total=False):
 
     cache_control: Optional[BetaCacheControlEphemeralParam]
     """Create a cache control breakpoint at this content block."""
-
-    caller: Caller
-    """Tool invocation directly from the model."""
Original file line number	Diff line number	Diff line change
Expand Up		@@ -24,3 +24,5 @@ class BetaToolUseBlock(BaseModel):

		caller: Optional[Caller] = None
		"""Tool invocation directly from the model."""

		__api_exclude__ = {"caller"}