-
Notifications
You must be signed in to change notification settings - Fork 0
Use token usage as threshold for summarization #17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
5d29c4b
36569ea
070b80a
67fb324
ef139ab
8578a5f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ | |
| # You should have received a copy of the GNU General Public License | ||
| # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
|
|
||
| import logging | ||
| from typing import Optional, List, Annotated, Literal, TypeVar, Type, Protocol, cast, Any, Tuple, NotRequired, Iterable, Generic, Callable, Generator, Awaitable, Coroutine | ||
| from typing_extensions import TypedDict | ||
| from langchain_core.messages import ToolMessage, AnyMessage, SystemMessage, HumanMessage, BaseMessage, AIMessage, RemoveMessage | ||
|
|
@@ -29,9 +30,22 @@ | |
| from langgraph.prebuilt.tool_node import ToolInvocationError | ||
| from langchain_anthropic import ChatAnthropic | ||
| from pydantic import BaseModel, ValidationError | ||
| from .utils import cached_invoke, acached_invoke | ||
| from .utils import cached_invoke, acached_invoke, current_prompt_tokens, default_max_prompt_tokens, get_token_usage | ||
| from .summary import SummaryConfig | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def _log_usage(msg: BaseMessage) -> None: | ||
| """Emit a one-line per-call token-usage record. No-op if msg lacks usage metadata.""" | ||
| if not isinstance(msg, AIMessage): | ||
| return | ||
| u = get_token_usage(msg) | ||
| model = u["model_name"] or "?" | ||
| logger.info( | ||
| f"LLM call ({model}): input={u['input_tokens']} output={u['output_tokens']} cache_read={u['cache_read_input_tokens']} cache_write={u['cache_creation_input_tokens']}", | ||
| ) | ||
|
|
||
| """ | ||
| This provides the framework for building applications which loop with an LLM, | ||
| using tools to refine the LLM output. | ||
|
|
@@ -167,13 +181,18 @@ async def impl( | |
| s: list[AnyMessage] | ||
| ) -> BaseMessage: | ||
| res = await acached_invoke(llm, s) | ||
| _log_usage(res) | ||
| return res | ||
| return impl | ||
|
|
||
| def _sync_llm( | ||
| llm: LLM | ||
| ) -> SyncLLM: | ||
| return lambda m: cached_invoke(llm, m) | ||
| def impl(m: list[AnyMessage]) -> BaseMessage: | ||
| res = cached_invoke(llm, m) | ||
| _log_usage(res) | ||
| return res | ||
| return impl | ||
|
|
||
| IN = TypeVar("IN") | ||
| OUT = TypeVar("OUT") | ||
|
|
@@ -261,7 +280,7 @@ def to_return(state: StateT) -> PureFunctionGenerator: | |
| summary_prompt = config.get_summarization_prompt(state) | ||
|
|
||
| messages = state["messages"].copy() | ||
| assert len(messages) >= config.max_messages | ||
| assert messages, "summarizer invoked with empty message history" | ||
|
|
||
| try: | ||
| msg = yield(messages + [HumanMessage(content=summary_prompt, display_tag="summarization")]) | ||
|
|
@@ -348,7 +367,7 @@ def impl( | |
| to_ret[k] = v | ||
| return cast(O, to_ret) | ||
| return impl | ||
|
|
||
|
|
||
| def get_summarizer( | ||
| llm: LLM, | ||
|
|
@@ -496,14 +515,14 @@ def with_context(self, t: type[_BContextBind]) -> "Builder[_BStateT, _BContextBi | |
| to_ret._summary_config = self._summary_config | ||
| to_ret._conversation_handler = self._conversation_handler | ||
| return to_ret | ||
|
|
||
| def with_checkpointer(self, checkpointer: Checkpointer) -> "Builder[_BStateT, _BContextT, _BInputT]": | ||
| to_ret : "Builder[_BStateT, _BContextT, _BInputT]" = Builder() | ||
| self._copy_typed_to(to_ret) | ||
| self._copy_untyped_to_(to_ret) | ||
| to_ret._checkpointer = checkpointer | ||
| return to_ret | ||
|
|
||
| def inject[OInput: FlowInput|None, OState: MessagesState | None, OCtxt: StateLike | None]( | ||
| self, | ||
| f: Callable[["Builder[_BStateT, _BContextT, _BInputT]"], "Builder[OState, OCtxt, OInput]"] | ||
|
|
@@ -572,8 +591,8 @@ def with_summary_config(self, config: SummaryConfig[_BStateT]) -> "Builder[_BSta | |
| to_ret._summary_config = config | ||
| return to_ret | ||
|
|
||
| def with_default_summarizer(self, *, max_messages: int = 20, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]": | ||
| return self.with_summary_config(SummaryConfig(max_messages=max_messages, enabled=enabled)) | ||
| def with_default_summarizer(self, *, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]": | ||
| return self.with_summary_config(SummaryConfig(enabled=enabled)) | ||
|
|
||
| def with_tools(self, l: Iterable[BaseTool | SplitTool]) -> "Builder[_BStateT, _BContextT, _BInputT]": | ||
| to_ret: "Builder[_BStateT, _BContextT, _BInputT]" = Builder() | ||
|
|
@@ -638,7 +657,7 @@ def build_async(self) -> Tuple["StateGraph[_BStateT, _BContextT, _BInputT, Any]" | |
| i=async_initial_node, | ||
| r=async_tool_result_generator, | ||
| ) | ||
|
|
||
| def compile_async( | ||
| self, *, | ||
| checkpointer: Checkpointer = None | ||
|
|
@@ -822,10 +841,14 @@ def ai_message_router(state: StateT) -> Literal["tools", "no_tools"]: | |
| builder.add_edge(NO_TOOLS_NODE, TOOL_RESULT_NODE) | ||
|
|
||
| if summary_config is not None: | ||
| model_name = getattr(unbound_llm, "model", "") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do I not understand python? Can we not use
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could, but then to keep pyright happy you'd need to make the function on the next line accept
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems more wrong to pretend |
||
| threshold = default_max_prompt_tokens(model_name) | ||
| logger.info(f"Summarization threshold: {threshold} prompt tokens (model={model_name})") | ||
|
|
||
| def routing(state: StateT) -> Literal["summarize", "tool_result", "__end__"]: | ||
| if state.get(output_key, None) is not None: | ||
| return "__end__" | ||
| elif len(state["messages"]) > summary_config.max_messages: | ||
| elif current_prompt_tokens(state["messages"]) > threshold: | ||
| return "summarize" | ||
| else: | ||
| return "tool_result" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -125,7 +125,16 @@ class _GetFileSchemaBase(BaseModel): | |
| If the path doesn't exist, this function returns "File not found". | ||
| """ | ||
| path: str = Field(description="The relative path of the file on the VFS. IMPORTANT: Do NOT include a leading `./` it is implied") | ||
| range: FileRange | None = Field(description="If set, (start, end) indicates to return lines starting from line `start` (lines are 1 indexed) until `end` (exclusive). If unset, the entire file is returned.", default=None) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This ... shouldn't be removed. The agent has the option to read the entire file if it needs to. There are genuine reasons to avoid splatting the whole file into the context, and to let the agent read only selected parts of said file. If you're seeing an agent trying to read files incrementally with ranges, that should be addressed at the prompt level not by forcing the agent to read entire files every time.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To wit, we can certainly copy some of the instructions used from claude code's system prompt: Tool description body: ▎ Reads a file from the local filesystem. You can access any file directly by using this tool. Parameter descriptions (the key bit):
We likely need to emphasize that the range parameter should only be used to select a known range of the file when that range is already known; present it as an optimization as opposed to the "happy path" of passing in null. |
||
| range: FileRange | None = Field( | ||
| description=( | ||
| "Optional line range. By DEFAULT leave this unset to read the entire file — partial reads " | ||
| "routinely miss surrounding context (imports, related definitions, modifiers) and force " | ||
| "wasteful re-reads. Only set this for exceptionally large files where you are certain no " | ||
| "other part will be relevant. When set, (start, end) returns lines from `start` (1-indexed) " | ||
| "until `end` (exclusive)." | ||
| ), | ||
| default=None, | ||
| ) | ||
|
|
||
|
|
||
| class _ListFileSchemaBase(BaseModel): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not let the user configure the token threshold here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why yes? the threshold is a function of the model being used, not of how long we expect the agent to run.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't completely agree with that, folks might want to more aggressively manage their context (for context drift reasons or simply to save money). But I agree no one needs it now so we can punt.