Certora · naftali-g · May 19, 2026 · May 19, 2026 · May 20, 2026 · May 20, 2026
diff --git a/graphcore/graph.py b/graphcore/graph.py
@@ -13,6 +13,7 @@
 #      You should have received a copy of the GNU General Public License
 #      along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
+import logging
 from typing import Optional, List, Annotated, Literal, TypeVar, Type, Protocol, cast, Any, Tuple, NotRequired, Iterable, Generic, Callable, Generator, Awaitable, Coroutine
 from typing_extensions import TypedDict
 from langchain_core.messages import ToolMessage, AnyMessage, SystemMessage, HumanMessage, BaseMessage, AIMessage, RemoveMessage
@@ -29,9 +30,22 @@
 from langgraph.prebuilt.tool_node import ToolInvocationError
 from langchain_anthropic import ChatAnthropic
 from pydantic import BaseModel, ValidationError
-from .utils import cached_invoke, acached_invoke
+from .utils import cached_invoke, acached_invoke, current_prompt_tokens, default_max_prompt_tokens, get_token_usage
 from .summary import SummaryConfig
 
+logger = logging.getLogger(__name__)
+
+
+def _log_usage(msg: BaseMessage) -> None:
+    """Emit a one-line per-call token-usage record. No-op if msg lacks usage metadata."""
+    if not isinstance(msg, AIMessage):
+        return
+    u = get_token_usage(msg)
+    model = u["model_name"] or "?"
+    logger.info(
+        f"LLM call ({model}): input={u['input_tokens']} output={u['output_tokens']} cache_read={u['cache_read_input_tokens']} cache_write={u['cache_creation_input_tokens']}",
+    )
+
 """
 This provides the framework for building applications which loop with an LLM,
 using tools to refine the LLM output.
@@ -167,13 +181,18 @@ async def impl(
         s: list[AnyMessage]
     ) -> BaseMessage:
         res = await acached_invoke(llm, s)
+        _log_usage(res)
         return res
     return impl
 
 def _sync_llm(
     llm: LLM
 ) -> SyncLLM:
-    return lambda m: cached_invoke(llm, m)
+    def impl(m: list[AnyMessage]) -> BaseMessage:
+        res = cached_invoke(llm, m)
+        _log_usage(res)
+        return res
+    return impl
 
 IN = TypeVar("IN")
 OUT = TypeVar("OUT")
@@ -261,7 +280,7 @@ def to_return(state: StateT) -> PureFunctionGenerator:
         summary_prompt = config.get_summarization_prompt(state)
 
         messages = state["messages"].copy()
-        assert len(messages) >= config.max_messages
+        assert messages, "summarizer invoked with empty message history"
 
         try:
             msg = yield(messages + [HumanMessage(content=summary_prompt, display_tag="summarization")])
@@ -348,7 +367,7 @@ def impl(
             to_ret[k] = v
         return cast(O, to_ret)
     return impl
-        
+
 
 def get_summarizer(
     llm: LLM,
@@ -496,14 +515,14 @@ def with_context(self, t: type[_BContextBind]) -> "Builder[_BStateT, _BContextBi
         to_ret._summary_config = self._summary_config
         to_ret._conversation_handler = self._conversation_handler
         return to_ret
-    
+
     def with_checkpointer(self, checkpointer: Checkpointer) -> "Builder[_BStateT, _BContextT, _BInputT]":
         to_ret : "Builder[_BStateT, _BContextT, _BInputT]" = Builder()
         self._copy_typed_to(to_ret)
         self._copy_untyped_to_(to_ret)
         to_ret._checkpointer = checkpointer
         return to_ret
-    
+
     def inject[OInput: FlowInput|None, OState: MessagesState | None, OCtxt: StateLike | None](
         self,
         f: Callable[["Builder[_BStateT, _BContextT, _BInputT]"], "Builder[OState, OCtxt, OInput]"]
@@ -572,8 +591,8 @@ def with_summary_config(self, config: SummaryConfig[_BStateT]) -> "Builder[_BSta
         to_ret._summary_config = config
         return to_ret
 
-    def with_default_summarizer(self, *, max_messages: int = 20, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]":
-        return self.with_summary_config(SummaryConfig(max_messages=max_messages, enabled=enabled))
+    def with_default_summarizer(self, *, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]":
+        return self.with_summary_config(SummaryConfig(enabled=enabled))
 
     def with_tools(self, l: Iterable[BaseTool | SplitTool]) -> "Builder[_BStateT, _BContextT, _BInputT]":
         to_ret: "Builder[_BStateT, _BContextT, _BInputT]" = Builder()
@@ -638,7 +657,7 @@ def build_async(self) -> Tuple["StateGraph[_BStateT, _BContextT, _BInputT, Any]"
             i=async_initial_node,
             r=async_tool_result_generator,
         )
-    
+
     def compile_async(
         self, *,
         checkpointer: Checkpointer = None
@@ -822,10 +841,14 @@ def ai_message_router(state: StateT) -> Literal["tools", "no_tools"]:
     builder.add_edge(NO_TOOLS_NODE, TOOL_RESULT_NODE)
 
     if summary_config is not None:
+        model_name = getattr(unbound_llm, "model", "")
+        threshold = default_max_prompt_tokens(model_name)
+        logger.info(f"Summarization threshold: {threshold} prompt tokens (model={model_name})")
+
         def routing(state: StateT) -> Literal["summarize", "tool_result", "__end__"]:
             if state.get(output_key, None) is not None:
                 return "__end__"
-            elif len(state["messages"]) > summary_config.max_messages:
+            elif current_prompt_tokens(state["messages"]) > threshold:
                 return "summarize"
             else:
                 return "tool_result"

diff --git a/graphcore/summary.py b/graphcore/summary.py
@@ -22,8 +22,7 @@
 logger = logging.getLogger(__name__)
 
 class SummaryConfig(Generic[StateT]):
-    def __init__(self, max_messages: int = 20, enabled: bool = True):
-        self.max_messages = max_messages
+    def __init__(self, enabled: bool = True):
         self.enabled = enabled
 
     def get_summarization_prompt(self, state: StateT) -> str:

diff --git a/graphcore/tools/vfs.py b/graphcore/tools/vfs.py
@@ -125,7 +125,16 @@ class _GetFileSchemaBase(BaseModel):
     If the path doesn't exist, this function returns "File not found".
     """
     path: str = Field(description="The relative path of the file on the VFS. IMPORTANT: Do NOT include a leading `./` it is implied")
-    range: FileRange | None = Field(description="If set, (start, end) indicates to return lines starting from line `start` (lines are 1 indexed) until `end` (exclusive). If unset, the entire file is returned.", default=None)
+    range: FileRange | None = Field(
+        description=(
+            "Optional line range. By DEFAULT leave this unset to read the entire file — partial reads "
+            "routinely miss surrounding context (imports, related definitions, modifiers) and force "
+            "wasteful re-reads. Only set this for exceptionally large files where you are certain no "
+            "other part will be relevant. When set, (start, end) returns lines from `start` (1-indexed) "
+            "until `end` (exclusive)."
+        ),
+        default=None,
+    )
 
 
 class _ListFileSchemaBase(BaseModel):

diff --git a/graphcore/utils.py b/graphcore/utils.py
@@ -136,3 +136,40 @@ def get_token_usage(m: AIMessage) -> TokenUsageDict:
             continue # be cool
         to_ret[k] = to_ret[k] + tok
     return to_ret
+
+
+def current_prompt_tokens(messages: List[AnyMessage]) -> int:
+    """
+    Effective context size of the most recent LLM call, used to decide when to summarize.
+
+    Returns input + cache-read + cache-creation tokens from the latest AIMessage. ToolMessages
+    appended after that AIMessage are not counted (router fires after TOOLS_NODE) and the
+    summarizer's own AIMessage is discarded before reaching state. Both are small enough that
+    the threshold should be set with headroom anyway.
+    """
+    for m in reversed(messages):
+        if isinstance(m, AIMessage):
+            usage = get_token_usage(m)
+            return (
+                usage["input_tokens"]
+                + usage["cache_read_input_tokens"]
+                + usage["cache_creation_input_tokens"]
+            )
+    return 0
+
+
+def default_max_prompt_tokens(model_name: str) -> int:
+    """
+    Prompt-token threshold at which to compact history. Keep this conservatively below the model's
+    context window to leave room for output, thinking budget, and the next batch of tool results.
+    Add a new case here when introducing a new model.
+    """
+    match model_name:
+        case "claude-opus-4-6":
+            return 500_000   # 1M context window
+        case "claude-sonnet-4-6":
+            return 500_000   # 1M context window
+        case "claude-opus-4-7":
+            return 500_000   # 1M context window
+        case _:
+            return 100_000   # fallback for unknown models