diff --git a/docs/reference.md b/docs/reference.md index 504d7639dd..77a957d941 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -1033,8 +1033,7 @@ with `MyConfig.model_validate(...)` or use the typed object directly. class ClientConfig(BaseModel): client_idx: int = 0 client_type: ClientType = "openai_chat_completions" - preserve_all_thinking: bool = False - preserve_thinking_between_tool_calls: bool = False + renderer_config: RendererConfig | None = None api_key_var: str = "PRIME_API_KEY" api_base_url: str = "https://api.pinference.ai/api/v1" endpoint_configs: list[EndpointClientConfig] = [] @@ -1051,7 +1050,7 @@ class ClientConfig(BaseModel): `client_type` selects which `Client` implementation to instantiate (see [Client Classes](#client-classes)). Use `endpoint_configs` for multi-endpoint round-robin. In grouped scoring mode, groups are distributed round-robin across endpoint configs. -`preserve_all_thinking` and `preserve_thinking_between_tool_calls` are forwarded to the underlying renderer when `client_type == "renderer"`. They control whether past-assistant `reasoning_content` is re-emitted on subsequent renders — `preserve_all_thinking` keeps every past-assistant turn's thinking, and `preserve_thinking_between_tool_calls` keeps thinking only inside the in-flight assistant→tool→…→assistant block after the most recent user turn (when that block contains at least one tool response). Both default to `False` (template default applies). +`renderer_config` is a typed `renderers.RendererConfig` that drives the renderer pool when `client_type == "renderer"` (`None` is treated as `AutoRendererConfig()`). Its shared `thinking_retention` field controls whether past-assistant `reasoning_content` is re-emitted on subsequent renders: `"template"` (default) defers to the chat template, `"tool_cycle"` additionally keeps thinking inside the in-flight assistant→tool→…→assistant block after the most recent user turn (when that block contains at least one tool response), and `"all"` keeps every past-assistant turn's thinking. When `api_key_var` is `"PRIME_API_KEY"` (the default), credentials are loaded with the following precedence: - **API key**: `PRIME_API_KEY` env var > `~/.prime/config.json` > `"EMPTY"` diff --git a/tests/test_renderer_client.py b/tests/test_renderer_client.py index 24c6d4d8ca..e19f5e9c3e 100644 --- a/tests/test_renderer_client.py +++ b/tests/test_renderer_client.py @@ -91,7 +91,7 @@ def test_renderer_client_threads_chat_template_kwargs_into_pool(): bases = [ Qwen3RendererConfig(enable_thinking=True), - AutoRendererConfig(preserve_all_thinking=True), + AutoRendererConfig(thinking_retention="all"), None, ] for base in bases: @@ -134,16 +134,16 @@ async def _fake_generate(**kwargs): ) ) - expected_preserve_all = ( - base.preserve_all_thinking + expected_retention = ( + base.thinking_retention if isinstance(base, AutoRendererConfig) - else False + else "template" ) create_pool_mock.assert_called_once_with( "Qwen/Qwen3-8B", Qwen3RendererConfig( enable_thinking=False, - preserve_all_thinking=expected_preserve_all, + thinking_retention=expected_retention, ), size=1, ) diff --git a/verifiers/clients/renderer_client.py b/verifiers/clients/renderer_client.py index 90cc423f95..b6c93b179a 100644 --- a/verifiers/clients/renderer_client.py +++ b/verifiers/clients/renderer_client.py @@ -392,8 +392,8 @@ def _resolve_renderer_config( inside ``renderers.create_renderer``), we pull resolution forward via ``MODEL_RENDERER_MAP`` so kwargs land on the concrete config variant and pydantic validates them against the actual renderer's schema — - ``AutoRendererConfig`` intentionally carries only ``preserve_*`` and - would reject template kwargs like ``enable_thinking``. ``renderer_model`` + ``AutoRendererConfig`` intentionally carries only ``thinking_retention`` + and would reject template kwargs like ``enable_thinking``. ``renderer_model`` must match what the pool will tokenize with (i.e. ``ClientConfig.renderer_model_name`` when set, else the request model), so resolution agrees with the tokenizer the renderer will hold. @@ -408,7 +408,7 @@ def _resolve_renderer_config( # Resolve auto → concrete (mirrors ``renderers._resolve_auto``) so # ``enable_thinking`` etc. validate against the right schema instead of - # ``AutoRendererConfig``'s minimal one. Carries ``preserve_*`` across. + # ``AutoRendererConfig``'s minimal one. Carries ``thinking_retention`` across. if base is None or isinstance(base, AutoRendererConfig): renderer_name = MODEL_RENDERER_MAP.get(renderer_model, "default") # ``config_from_name`` returns ``None`` only for ``"auto"``, which @@ -417,10 +417,7 @@ def _resolve_renderer_config( assert concrete is not None if isinstance(base, AutoRendererConfig): concrete = concrete.model_copy( - update={ - "preserve_all_thinking": base.preserve_all_thinking, - "preserve_thinking_between_tool_calls": base.preserve_thinking_between_tool_calls, - } + update={"thinking_retention": base.thinking_retention} ) base = cast(RendererConfig, concrete)