Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 33 additions & 10 deletions graphcore/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import logging
from typing import Optional, List, Annotated, Literal, TypeVar, Type, Protocol, cast, Any, Tuple, NotRequired, Iterable, Generic, Callable, Generator, Awaitable, Coroutine
from typing_extensions import TypedDict
from langchain_core.messages import ToolMessage, AnyMessage, SystemMessage, HumanMessage, BaseMessage, AIMessage, RemoveMessage
Expand All @@ -29,9 +30,22 @@
from langgraph.prebuilt.tool_node import ToolInvocationError
from langchain_anthropic import ChatAnthropic
from pydantic import BaseModel, ValidationError
from .utils import cached_invoke, acached_invoke
from .utils import cached_invoke, acached_invoke, current_prompt_tokens, default_max_prompt_tokens, get_token_usage
from .summary import SummaryConfig

logger = logging.getLogger(__name__)


def _log_usage(msg: BaseMessage) -> None:
"""Emit a one-line per-call token-usage record. No-op if msg lacks usage metadata."""
if not isinstance(msg, AIMessage):
return
u = get_token_usage(msg)
model = u["model_name"] or "?"
logger.info(
f"LLM call ({model}): input={u['input_tokens']} output={u['output_tokens']} cache_read={u['cache_read_input_tokens']} cache_write={u['cache_creation_input_tokens']}",
)

"""
This provides the framework for building applications which loop with an LLM,
using tools to refine the LLM output.
Expand Down Expand Up @@ -167,13 +181,18 @@ async def impl(
s: list[AnyMessage]
) -> BaseMessage:
res = await acached_invoke(llm, s)
_log_usage(res)
return res
return impl

def _sync_llm(
llm: LLM
) -> SyncLLM:
return lambda m: cached_invoke(llm, m)
def impl(m: list[AnyMessage]) -> BaseMessage:
res = cached_invoke(llm, m)
_log_usage(res)
return res
return impl

IN = TypeVar("IN")
OUT = TypeVar("OUT")
Expand Down Expand Up @@ -261,7 +280,7 @@ def to_return(state: StateT) -> PureFunctionGenerator:
summary_prompt = config.get_summarization_prompt(state)

messages = state["messages"].copy()
assert len(messages) >= config.max_messages
assert messages, "summarizer invoked with empty message history"

try:
msg = yield(messages + [HumanMessage(content=summary_prompt, display_tag="summarization")])
Expand Down Expand Up @@ -348,7 +367,7 @@ def impl(
to_ret[k] = v
return cast(O, to_ret)
return impl


def get_summarizer(
llm: LLM,
Expand Down Expand Up @@ -496,14 +515,14 @@ def with_context(self, t: type[_BContextBind]) -> "Builder[_BStateT, _BContextBi
to_ret._summary_config = self._summary_config
to_ret._conversation_handler = self._conversation_handler
return to_ret

def with_checkpointer(self, checkpointer: Checkpointer) -> "Builder[_BStateT, _BContextT, _BInputT]":
to_ret : "Builder[_BStateT, _BContextT, _BInputT]" = Builder()
self._copy_typed_to(to_ret)
self._copy_untyped_to_(to_ret)
to_ret._checkpointer = checkpointer
return to_ret

def inject[OInput: FlowInput|None, OState: MessagesState | None, OCtxt: StateLike | None](
self,
f: Callable[["Builder[_BStateT, _BContextT, _BInputT]"], "Builder[OState, OCtxt, OInput]"]
Expand Down Expand Up @@ -572,8 +591,8 @@ def with_summary_config(self, config: SummaryConfig[_BStateT]) -> "Builder[_BSta
to_ret._summary_config = config
return to_ret

def with_default_summarizer(self, *, max_messages: int = 20, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]":
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not let the user configure the token threshold here?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why yes? the threshold is a function of the model being used, not of how long we expect the agent to run.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't completely agree with that, folks might want to more aggressively manage their context (for context drift reasons or simply to save money). But I agree no one needs it now so we can punt.

return self.with_summary_config(SummaryConfig(max_messages=max_messages, enabled=enabled))
def with_default_summarizer(self, *, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]":
return self.with_summary_config(SummaryConfig(enabled=enabled))

def with_tools(self, l: Iterable[BaseTool | SplitTool]) -> "Builder[_BStateT, _BContextT, _BInputT]":
to_ret: "Builder[_BStateT, _BContextT, _BInputT]" = Builder()
Expand Down Expand Up @@ -638,7 +657,7 @@ def build_async(self) -> Tuple["StateGraph[_BStateT, _BContextT, _BInputT, Any]"
i=async_initial_node,
r=async_tool_result_generator,
)

def compile_async(
self, *,
checkpointer: Checkpointer = None
Expand Down Expand Up @@ -822,10 +841,14 @@ def ai_message_router(state: StateT) -> Literal["tools", "no_tools"]:
builder.add_edge(NO_TOOLS_NODE, TOOL_RESULT_NODE)

if summary_config is not None:
model_name = getattr(unbound_llm, "model", "")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do I not understand python? Can we not use None as the default here? I guess it doesn't matter, but I'd still prefer to use the type system to our advantage if we can.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could, but then to keep pyright happy you'd need to make the function on the next line accept str | None which seemed wrong to me

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems more wrong to pretend "" (or "?" for that matter) is a valid model name, when really it is really representing "we don't have a model name" (which I'd argue None should do). At least that way making get_model_default or whatever the function is called indicates "we return something sensible if you don't have a model name" which just so happen is also the "we don't understand your model name" case.

threshold = default_max_prompt_tokens(model_name)
logger.info(f"Summarization threshold: {threshold} prompt tokens (model={model_name})")

def routing(state: StateT) -> Literal["summarize", "tool_result", "__end__"]:
if state.get(output_key, None) is not None:
return "__end__"
elif len(state["messages"]) > summary_config.max_messages:
elif current_prompt_tokens(state["messages"]) > threshold:
return "summarize"
else:
return "tool_result"
Expand Down
3 changes: 1 addition & 2 deletions graphcore/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
logger = logging.getLogger(__name__)

class SummaryConfig(Generic[StateT]):
def __init__(self, max_messages: int = 20, enabled: bool = True):
self.max_messages = max_messages
def __init__(self, enabled: bool = True):
self.enabled = enabled

def get_summarization_prompt(self, state: StateT) -> str:
Expand Down
11 changes: 10 additions & 1 deletion graphcore/tools/vfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,16 @@ class _GetFileSchemaBase(BaseModel):
If the path doesn't exist, this function returns "File not found".
"""
path: str = Field(description="The relative path of the file on the VFS. IMPORTANT: Do NOT include a leading `./` it is implied")
range: FileRange | None = Field(description="If set, (start, end) indicates to return lines starting from line `start` (lines are 1 indexed) until `end` (exclusive). If unset, the entire file is returned.", default=None)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This ... shouldn't be removed. The agent has the option to read the entire file if it needs to. There are genuine reasons to avoid splatting the whole file into the context, and to let the agent read only selected parts of said file. If you're seeing an agent trying to read files incrementally with ranges, that should be addressed at the prompt level not by forcing the agent to read entire files every time.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To wit, we can certainly copy some of the instructions used from claude code's system prompt:

Tool description body:

▎ Reads a file from the local filesystem. You can access any file directly by using this tool.
▎ Assume this tool is able to read all files on the machine. If the User provides a path to a file assume that path is valid. It is okay to read a file that does not exist; an error will be returned.

▎ Usage:
▎ - The file_path parameter must be an absolute path, not a relative path
▎ - By default, it reads up to 2000 lines starting from the beginning of the file
▎ - When you already know which part of the file you need, only read that part. This can be important for larger files.
▎ - Results are returned using cat -n format, with line numbers starting at 1
▎ - This tool allows Claude Code to read images (eg PNG, JPG, etc). When reading an image file the contents are presented visually as Claude Code is a multimodal LLM.
▎ - This tool can read PDF files (.pdf). For large PDFs (more than 10 pages), you MUST provide the pages parameter to read specific page ranges (e.g., pages: "1-5"). Reading a large PDF without the pages parameter will fail. Maximum 20
▎ pages per request.
▎ - This tool can read Jupyter notebooks (.ipynb files) and returns all cells with their outputs, combining code, text, and visualizations.
▎ - This tool can only read files, not directories. To list files in a directory, use the registered shell tool.
▎ - You will regularly be asked to read screenshots. If the user provides a path to a screenshot, ALWAYS use this tool to view the file at the path. This tool will work with all temporary file paths.
▎ - If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
▎ - Do NOT re-read a file you just edited to verify — Edit/Write would have errored if the change failed, and the harness tracks file state for you.

Parameter descriptions (the key bit):

  • offset: "The line number to start reading from. Only provide if the file is too large to read at once"
  • limit: "The number of lines to read. Only provide if the file is too large to read at once."

We likely need to emphasize that the range parameter should only be used to select a known range of the file when that range is already known; present it as an optimization as opposed to the "happy path" of passing in null.

range: FileRange | None = Field(
description=(
"Optional line range. By DEFAULT leave this unset to read the entire file — partial reads "
"routinely miss surrounding context (imports, related definitions, modifiers) and force "
"wasteful re-reads. Only set this for exceptionally large files where you are certain no "
"other part will be relevant. When set, (start, end) returns lines from `start` (1-indexed) "
"until `end` (exclusive)."
),
default=None,
)


class _ListFileSchemaBase(BaseModel):
Expand Down
37 changes: 37 additions & 0 deletions graphcore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,40 @@ def get_token_usage(m: AIMessage) -> TokenUsageDict:
continue # be cool
to_ret[k] = to_ret[k] + tok
return to_ret


def current_prompt_tokens(messages: List[AnyMessage]) -> int:
"""
Effective context size of the most recent LLM call, used to decide when to summarize.

Returns input + cache-read + cache-creation tokens from the latest AIMessage. ToolMessages
appended after that AIMessage are not counted (router fires after TOOLS_NODE) and the
summarizer's own AIMessage is discarded before reaching state. Both are small enough that
the threshold should be set with headroom anyway.
"""
for m in reversed(messages):
if isinstance(m, AIMessage):
usage = get_token_usage(m)
return (
usage["input_tokens"]
+ usage["cache_read_input_tokens"]
+ usage["cache_creation_input_tokens"]
)
return 0


def default_max_prompt_tokens(model_name: str) -> int:
"""
Prompt-token threshold at which to compact history. Keep this conservatively below the model's
context window to leave room for output, thinking budget, and the next batch of tool results.
Add a new case here when introducing a new model.
"""
match model_name:
case "claude-opus-4-6":
return 500_000 # 1M context window
case "claude-sonnet-4-6":
return 500_000 # 1M context window
case "claude-opus-4-7":
return 500_000 # 1M context window
case _:
return 100_000 # fallback for unknown models
Loading