diff --git a/graphcore/graph.py b/graphcore/graph.py index e1a6de6..1328a13 100644 --- a/graphcore/graph.py +++ b/graphcore/graph.py @@ -13,6 +13,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import logging from typing import Optional, List, Annotated, Literal, TypeVar, Type, Protocol, cast, Any, Tuple, NotRequired, Iterable, Generic, Callable, Generator, Awaitable, Coroutine from typing_extensions import TypedDict from langchain_core.messages import ToolMessage, AnyMessage, SystemMessage, HumanMessage, BaseMessage, AIMessage, RemoveMessage @@ -29,9 +30,22 @@ from langgraph.prebuilt.tool_node import ToolInvocationError from langchain_anthropic import ChatAnthropic from pydantic import BaseModel, ValidationError -from .utils import cached_invoke, acached_invoke +from .utils import cached_invoke, acached_invoke, current_prompt_tokens, default_max_prompt_tokens, get_token_usage from .summary import SummaryConfig +logger = logging.getLogger(__name__) + + +def _log_usage(msg: BaseMessage) -> None: + """Emit a one-line per-call token-usage record. No-op if msg lacks usage metadata.""" + if not isinstance(msg, AIMessage): + return + u = get_token_usage(msg) + model = u["model_name"] or "?" + logger.info( + f"LLM call ({model}): input={u['input_tokens']} output={u['output_tokens']} cache_read={u['cache_read_input_tokens']} cache_write={u['cache_creation_input_tokens']}", + ) + """ This provides the framework for building applications which loop with an LLM, using tools to refine the LLM output. @@ -167,13 +181,18 @@ async def impl( s: list[AnyMessage] ) -> BaseMessage: res = await acached_invoke(llm, s) + _log_usage(res) return res return impl def _sync_llm( llm: LLM ) -> SyncLLM: - return lambda m: cached_invoke(llm, m) + def impl(m: list[AnyMessage]) -> BaseMessage: + res = cached_invoke(llm, m) + _log_usage(res) + return res + return impl IN = TypeVar("IN") OUT = TypeVar("OUT") @@ -261,7 +280,7 @@ def to_return(state: StateT) -> PureFunctionGenerator: summary_prompt = config.get_summarization_prompt(state) messages = state["messages"].copy() - assert len(messages) >= config.max_messages + assert messages, "summarizer invoked with empty message history" try: msg = yield(messages + [HumanMessage(content=summary_prompt, display_tag="summarization")]) @@ -348,7 +367,7 @@ def impl( to_ret[k] = v return cast(O, to_ret) return impl - + def get_summarizer( llm: LLM, @@ -496,14 +515,14 @@ def with_context(self, t: type[_BContextBind]) -> "Builder[_BStateT, _BContextBi to_ret._summary_config = self._summary_config to_ret._conversation_handler = self._conversation_handler return to_ret - + def with_checkpointer(self, checkpointer: Checkpointer) -> "Builder[_BStateT, _BContextT, _BInputT]": to_ret : "Builder[_BStateT, _BContextT, _BInputT]" = Builder() self._copy_typed_to(to_ret) self._copy_untyped_to_(to_ret) to_ret._checkpointer = checkpointer return to_ret - + def inject[OInput: FlowInput|None, OState: MessagesState | None, OCtxt: StateLike | None]( self, f: Callable[["Builder[_BStateT, _BContextT, _BInputT]"], "Builder[OState, OCtxt, OInput]"] @@ -572,8 +591,8 @@ def with_summary_config(self, config: SummaryConfig[_BStateT]) -> "Builder[_BSta to_ret._summary_config = config return to_ret - def with_default_summarizer(self, *, max_messages: int = 20, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]": - return self.with_summary_config(SummaryConfig(max_messages=max_messages, enabled=enabled)) + def with_default_summarizer(self, *, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]": + return self.with_summary_config(SummaryConfig(enabled=enabled)) def with_tools(self, l: Iterable[BaseTool | SplitTool]) -> "Builder[_BStateT, _BContextT, _BInputT]": to_ret: "Builder[_BStateT, _BContextT, _BInputT]" = Builder() @@ -638,7 +657,7 @@ def build_async(self) -> Tuple["StateGraph[_BStateT, _BContextT, _BInputT, Any]" i=async_initial_node, r=async_tool_result_generator, ) - + def compile_async( self, *, checkpointer: Checkpointer = None @@ -822,10 +841,14 @@ def ai_message_router(state: StateT) -> Literal["tools", "no_tools"]: builder.add_edge(NO_TOOLS_NODE, TOOL_RESULT_NODE) if summary_config is not None: + model_name = getattr(unbound_llm, "model", "") + threshold = default_max_prompt_tokens(model_name) + logger.info(f"Summarization threshold: {threshold} prompt tokens (model={model_name})") + def routing(state: StateT) -> Literal["summarize", "tool_result", "__end__"]: if state.get(output_key, None) is not None: return "__end__" - elif len(state["messages"]) > summary_config.max_messages: + elif current_prompt_tokens(state["messages"]) > threshold: return "summarize" else: return "tool_result" diff --git a/graphcore/summary.py b/graphcore/summary.py index bfb5182..b71f0e6 100644 --- a/graphcore/summary.py +++ b/graphcore/summary.py @@ -22,8 +22,7 @@ logger = logging.getLogger(__name__) class SummaryConfig(Generic[StateT]): - def __init__(self, max_messages: int = 20, enabled: bool = True): - self.max_messages = max_messages + def __init__(self, enabled: bool = True): self.enabled = enabled def get_summarization_prompt(self, state: StateT) -> str: diff --git a/graphcore/tools/vfs.py b/graphcore/tools/vfs.py index 296cb2e..f0b90d2 100644 --- a/graphcore/tools/vfs.py +++ b/graphcore/tools/vfs.py @@ -125,7 +125,16 @@ class _GetFileSchemaBase(BaseModel): If the path doesn't exist, this function returns "File not found". """ path: str = Field(description="The relative path of the file on the VFS. IMPORTANT: Do NOT include a leading `./` it is implied") - range: FileRange | None = Field(description="If set, (start, end) indicates to return lines starting from line `start` (lines are 1 indexed) until `end` (exclusive). If unset, the entire file is returned.", default=None) + range: FileRange | None = Field( + description=( + "Optional line range. By DEFAULT leave this unset to read the entire file — partial reads " + "routinely miss surrounding context (imports, related definitions, modifiers) and force " + "wasteful re-reads. Only set this for exceptionally large files where you are certain no " + "other part will be relevant. When set, (start, end) returns lines from `start` (1-indexed) " + "until `end` (exclusive)." + ), + default=None, + ) class _ListFileSchemaBase(BaseModel): diff --git a/graphcore/utils.py b/graphcore/utils.py index 2a5da97..c0ad446 100644 --- a/graphcore/utils.py +++ b/graphcore/utils.py @@ -136,3 +136,40 @@ def get_token_usage(m: AIMessage) -> TokenUsageDict: continue # be cool to_ret[k] = to_ret[k] + tok return to_ret + + +def current_prompt_tokens(messages: List[AnyMessage]) -> int: + """ + Effective context size of the most recent LLM call, used to decide when to summarize. + + Returns input + cache-read + cache-creation tokens from the latest AIMessage. ToolMessages + appended after that AIMessage are not counted (router fires after TOOLS_NODE) and the + summarizer's own AIMessage is discarded before reaching state. Both are small enough that + the threshold should be set with headroom anyway. + """ + for m in reversed(messages): + if isinstance(m, AIMessage): + usage = get_token_usage(m) + return ( + usage["input_tokens"] + + usage["cache_read_input_tokens"] + + usage["cache_creation_input_tokens"] + ) + return 0 + + +def default_max_prompt_tokens(model_name: str) -> int: + """ + Prompt-token threshold at which to compact history. Keep this conservatively below the model's + context window to leave room for output, thinking budget, and the next batch of tool results. + Add a new case here when introducing a new model. + """ + match model_name: + case "claude-opus-4-6": + return 500_000 # 1M context window + case "claude-sonnet-4-6": + return 500_000 # 1M context window + case "claude-opus-4-7": + return 500_000 # 1M context window + case _: + return 100_000 # fallback for unknown models