diff --git a/README.md b/README.md deleted file mode 100644 index ca4d96b..0000000 --- a/README.md +++ /dev/null @@ -1 +0,0 @@ -# langgraph_docs diff --git a/add-memory.mdx b/add-memory.mdx index 25bf670..80b5f3e 100644 --- a/add-memory.mdx +++ b/add-memory.mdx @@ -3,7 +3,6 @@ title: Memory --- - AI applications need [memory](/oss/concepts/memory) to share context across multiple interactions. In LangGraph, you can add two types of memory: * [Add short-term memory](#add-short-term-memory) as a part of your agent's [state](/oss/langgraph/graph-api#state) to enable multi-turn conversations. @@ -62,15 +61,31 @@ with PostgresSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight ::: :::js -```typescript -import { PostgresSaver } from "@langchain/langgraph-checkpoint-postgres"; + + + ```typescript + import { PostgresSaver } from "@langchain/langgraph-checkpoint-postgres"; -const DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable"; -const checkpointer = PostgresSaver.fromConnString(DB_URI); + const DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable"; + const checkpointer = PostgresSaver.fromConnString(DB_URI); -const builder = new StateGraph(...); -const graph = builder.compile({ checkpointer }); -``` + const builder = new StateGraph(...); + const graph = builder.compile({ checkpointer }); + ``` + + + ```typescript + import { MongoClient } from "mongodb"; + import { MongoDBSaver } from "@langchain/langgraph-checkpoint-mongodb"; + + const client = new MongoClient("mongodb://user:password@localhost:27017"); + const checkpointer = new MongoDBSaver({ client }); + + const builder = new StateGraph(...); + const graph = builder.compile({ checkpointer }); + ``` + + ::: @@ -231,16 +246,16 @@ const graph = builder.compile({ checkpointer }); ::: -:::python + :::python ``` pip install -U pymongo langgraph langgraph-checkpoint-mongodb ``` - + **Setup** To use the [MongoDB checkpointer](https://pypi.org/project/langgraph-checkpoint-mongodb/), you will need a MongoDB cluster. Follow [this guide](https://www.mongodb.com/docs/guides/atlas/cluster/) to create a cluster if you don't already have one. - + @@ -251,8 +266,8 @@ const graph = builder.compile({ checkpointer }); model = init_chat_model(model="claude-haiku-4-5-20251001") - DB_URI = "localhost:27017" - with MongoDBSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + MONGODB_URI = "localhost:27017" + with MongoDBSaver.from_conn_string(MONGODB_URI) as checkpointer: # [!code highlight] def call_model(state: MessagesState): response = model.invoke(state["messages"]) @@ -293,8 +308,8 @@ const graph = builder.compile({ checkpointer }); model = init_chat_model(model="claude-haiku-4-5-20251001") - DB_URI = "localhost:27017" - async with AsyncMongoDBSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + MONGODB_URI = "localhost:27017" + async with AsyncMongoDBSaver.from_conn_string(MONGODB_URI) as checkpointer: # [!code highlight] async def call_model(state: MessagesState): response = await model.ainvoke(state["messages"]) @@ -328,8 +343,64 @@ const graph = builder.compile({ checkpointer }); ``` + ::: + + :::js + ``` + npm install @langchain/langgraph-checkpoint-mongodb + ``` + + + **Setup** + To use `MongoDBSaver`, you will need a MongoDB cluster. Follow [this guide](https://www.mongodb.com/docs/guides/atlas/cluster/) to create a cluster if you don't already have one. + + + ```typescript + import { ChatAnthropic } from "@langchain/anthropic"; + import { StateGraph, StateSchema, MessagesValue, GraphNode, START } from "@langchain/langgraph"; + import { MongoDBSaver } from "@langchain/langgraph-checkpoint-mongodb"; + import { MongoClient } from "mongodb"; + + const State = new StateSchema({ + messages: MessagesValue, + }); + + const model = new ChatAnthropic({ model: "claude-haiku-4-5-20251001" }); + + const client = new MongoClient("mongodb://user:password@localhost:27017"); + const checkpointer = new MongoDBSaver({ client, dbName: "langgraph" }); + + const callModel: GraphNode = async (state) => { + const response = await model.invoke(state.messages); + return { messages: [response] }; + }; + + const builder = new StateGraph(State) + .addNode("call_model", callModel) + .addEdge(START, "call_model"); + + const graph = builder.compile({ checkpointer }); + + const config = { configurable: { thread_id: "1" } }; + + for await (const chunk of await graph.stream( + { messages: [{ role: "user", content: "hi! I'm bob" }] }, + { ...config, streamMode: "values" } + )) { + console.log(chunk.messages.at(-1)?.content); + } + + for await (const chunk of await graph.stream( + { messages: [{ role: "user", content: "what's my name?" }] }, + { ...config, streamMode: "values" } + )) { + console.log(chunk.messages.at(-1)?.content); + } + ``` + ::: +:::python ``` pip install -U langgraph langgraph-checkpoint-redis @@ -381,7 +452,7 @@ const graph = builder.compile({ checkpointer }); stream_mode="values" ): chunk["messages"][-1].pretty_print() -``` + ``` ```python @@ -428,6 +499,110 @@ const graph = builder.compile({ checkpointer }); + + + ``` + pip install -U langgraph langgraph-oracledb + ``` + + + **Setup** + To use the [Oracle checkpointer](https://pypi.org/project/langgraph-oracledb/), you will need an Oracle AI Database instance. A local container (for example `gvenzl/oracle-free:23-slim`) or an Oracle Autonomous Database in OCI both work. + + + + You need to call `checkpointer.setup()` the first time you're using the Oracle checkpointer. + + + + + ```python + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph_oracledb.checkpoint.oracle import OracleSaver # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + DB_URI = "user/password@localhost:1521/FREEPDB1" + with OracleSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + # checkpointer.setup() + + def call_model(state: MessagesState): + response = model.invoke(state["messages"]) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile(checkpointer=checkpointer) # [!code highlight] + + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "what's my name?"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() +``` + + + ```python + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph_oracledb.checkpoint.oracle import AsyncOracleSaver # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + DB_URI = "user/password@localhost:1521/FREEPDB1" + async with AsyncOracleSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + # await checkpointer.setup() + + async def call_model(state: MessagesState): + response = await model.ainvoke(state["messages"]) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile(checkpointer=checkpointer) # [!code highlight] + + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "what's my name?"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() +``` + + + ::: ### Use in subgraphs @@ -579,7 +754,6 @@ graph.invoke( :::js ```typescript import { StateGraph, StateSchema, MessagesValue, GraphNode, START } from "@langchain/langgraph"; -import { v4 as uuidv4 } from "uuid"; const State = new StateSchema({ messages: MessagesValue, @@ -599,7 +773,7 @@ const callModel: GraphNode = async (state, runtime) => { // ... Use memories in model call // Store a new memory - await runtime.store?.put(namespace, uuidv4(), { data: "User prefers dark mode" }); + await runtime.store?.put(namespace, crypto.randomUUID(), { data: "User prefers dark mode" }); }; const builder = new StateGraph(State) @@ -632,15 +806,33 @@ with PostgresStore.from_conn_string(DB_URI) as store: # [!code highlight] ::: :::js -```typescript -import { PostgresStore } from "@langchain/langgraph-checkpoint-postgres/store"; + + + ```typescript + import { PostgresStore } from "@langchain/langgraph-checkpoint-postgres/store"; -const DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable"; -const store = PostgresStore.fromConnString(DB_URI); + const DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable"; + const store = PostgresStore.fromConnString(DB_URI); -const builder = new StateGraph(...); -const graph = builder.compile({ store }); -``` + const builder = new StateGraph(...); + const graph = builder.compile({ store }); + ``` + + + ```typescript + import { MongoDBStore } from "@langchain/langgraph-checkpoint-mongodb"; + + const MONGODB_URI = "mongodb://user:password@localhost:27017"; + const store = await MongoDBStore.fromConnString(MONGODB_URI, { + dbName: "langgraph", + collectionName: "store", + }); + + const builder = new StateGraph(...); + const graph = builder.compile({ store }); + ``` + + ::: @@ -819,7 +1011,6 @@ const graph = builder.compile({ store }); import { StateGraph, StateSchema, MessagesValue, GraphNode, START } from "@langchain/langgraph"; import { PostgresSaver } from "@langchain/langgraph-checkpoint-postgres"; import { PostgresStore } from "@langchain/langgraph-checkpoint-postgres/store"; - import { v4 as uuidv4 } from "uuid"; const State = new StateSchema({ messages: MessagesValue, @@ -838,7 +1029,7 @@ const graph = builder.compile({ store }); const lastMessage = state.messages.at(-1); if (lastMessage?.content?.toLowerCase().includes("remember")) { const memory = "User name is Bob"; - await runtime.store?.put(namespace, uuidv4(), { data: memory }); + await runtime.store?.put(namespace, crypto.randomUUID(), { data: memory }); } const response = await model.invoke([ @@ -881,6 +1072,76 @@ const graph = builder.compile({ store }); ::: + + :::js + ``` + npm install @langchain/langgraph-checkpoint-mongodb + ``` + + ```typescript + import { ChatAnthropic } from "@langchain/anthropic"; + import { MemorySaver, StateGraph, StateSchema, MessagesValue, GraphNode, START } from "@langchain/langgraph"; + import { MongoDBStore } from "@langchain/langgraph-checkpoint-mongodb"; + + const State = new StateSchema({ + messages: MessagesValue, + }); + + const model = new ChatAnthropic({ model: "claude-sonnet-4-6" }); + + const callModel: GraphNode = async (state, runtime) => { + const userId = runtime.context?.userId; + const namespace = ["memories", userId]; + const memories = await runtime.store?.search(namespace); + const info = memories?.map(d => d.value.data).join("\n") || "n/a"; + const systemMsg = `You are a helpful assistant talking to the user. User info: ${info}`; + + // Store new memories if the user asks the model to remember + const lastMessage = state.messages.at(-1); + if (lastMessage?.content?.toLowerCase().includes("remember")) { + const memory = "User name is Bob"; + await runtime.store?.put(namespace, crypto.randomUUID(), { data: memory }); + } + + const response = await model.invoke([ + { role: "system", content: systemMsg }, + ...state.messages + ]); + return { messages: [response] }; + }; + + const MONGODB_URI = "mongodb://user:password@localhost:27017"; + + const store = await MongoDBStore.fromConnString(MONGODB_URI, { + dbName: "langgraph", + collectionName: "store", + }); + + const checkpointer = new MemorySaver(); + + const builder = new StateGraph(State) + .addNode("call_model", callModel) + .addEdge(START, "call_model"); + + const graph = builder.compile({ checkpointer, store }); + + for await (const chunk of await graph.stream( + { messages: [{ role: "user", content: "Hi! Remember: my name is Bob" }] }, + { configurable: { thread_id: "1" }, context: { userId: "1" }, streamMode: "values" } + )) { + console.log(chunk.messages.at(-1)?.content); + } + + for await (const chunk of await graph.stream( + { messages: [{ role: "user", content: "what is my name?" }] }, + { configurable: { thread_id: "2" }, context: { userId: "1" }, streamMode: "values" } + )) { + console.log(chunk.messages.at(-1)?.content); + } + ``` + ::: + + :::python @@ -1043,6 +1304,199 @@ const graph = builder.compile({ store }); + + + + ``` + pip install -U langgraph langgraph-oracledb langchain-openai + ``` + + + **Setup** + To use the [Oracle store](https://pypi.org/project/langgraph-oracledb/), you will need an Oracle AI Database instance — the vector index used for semantic `search` requires [Oracle AI Vector Search](https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/). + + + + You need to call `store.setup()` and `checkpointer.setup()` the first time you're using the Oracle store and checkpointer. + + + + + ```python + import uuid + + from langchain.chat_models import init_chat_model + from langchain.embeddings import init_embeddings + from langchain_core.runnables import RunnableConfig + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.store.base import BaseStore + from langgraph_oracledb.checkpoint.oracle import OracleSaver + from langgraph_oracledb.store.oracle import OracleStore # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + embeddings = init_embeddings("openai:text-embedding-3-small") + + DB_URI = "user/password@localhost:1521/FREEPDB1" + + with ( + OracleStore.from_conn_string( # [!code highlight] + DB_URI, + index={"embed": embeddings, "dims": 1536}, # [!code highlight] + ) as store, + OracleSaver.from_conn_string(DB_URI) as checkpointer, + ): + store.setup() + checkpointer.setup() + + def call_model( + state: MessagesState, + config: RunnableConfig, + *, + store: BaseStore, # [!code highlight] + ): + user_id = config["configurable"]["user_id"] + namespace = ("memories", user_id) + memories = store.search(namespace, query=str(state["messages"][-1].content)) # [!code highlight] + info = "\n".join([d.value["data"] for d in memories]) + system_msg = f"You are a helpful assistant talking to the user. User info: {info}" + + # Store new memories if the user asks the model to remember + last_message = state["messages"][-1] + if "remember" in last_message.content.lower(): + memory = "User name is Bob" + store.put(namespace, str(uuid.uuid4()), {"data": memory}) # [!code highlight] + + response = model.invoke( + [{"role": "system", "content": system_msg}] + state["messages"] + ) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile( + checkpointer=checkpointer, + store=store, # [!code highlight] + ) + + config = { + "configurable": { + "thread_id": "1", # [!code highlight] + "user_id": "1", # [!code highlight] + } + } + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "Hi! Remember: my name is Bob"}]}, + config, # [!code highlight] + stream_mode="values", + ): + chunk["messages"][-1].pretty_print() + + config = { + "configurable": { + "thread_id": "2", # [!code highlight] + "user_id": "1", + } + } + + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "what is my name?"}]}, + config, # [!code highlight] + stream_mode="values", + ): + chunk["messages"][-1].pretty_print() +``` + + + ```python + import uuid + + from langchain.chat_models import init_chat_model + from langchain.embeddings import init_embeddings + from langchain_core.runnables import RunnableConfig + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.store.base import BaseStore + from langgraph_oracledb.checkpoint.oracle import AsyncOracleSaver + from langgraph_oracledb.store.oracle import AsyncOracleStore # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + embeddings = init_embeddings("openai:text-embedding-3-small") + + DB_URI = "user/password@localhost:1521/FREEPDB1" + + async with ( + AsyncOracleStore.from_conn_string( # [!code highlight] + DB_URI, + index={"embed": embeddings, "dims": 1536}, # [!code highlight] + ) as store, + AsyncOracleSaver.from_conn_string(DB_URI) as checkpointer, + ): + await store.setup() + await checkpointer.setup() + + async def call_model( + state: MessagesState, + config: RunnableConfig, + *, + store: BaseStore, # [!code highlight] + ): + user_id = config["configurable"]["user_id"] + namespace = ("memories", user_id) + memories = await store.asearch(namespace, query=str(state["messages"][-1].content)) # [!code highlight] + info = "\n".join([d.value["data"] for d in memories]) + system_msg = f"You are a helpful assistant talking to the user. User info: {info}" + + # Store new memories if the user asks the model to remember + last_message = state["messages"][-1] + if "remember" in last_message.content.lower(): + memory = "User name is Bob" + await store.aput(namespace, str(uuid.uuid4()), {"data": memory}) # [!code highlight] + + response = await model.ainvoke( + [{"role": "system", "content": system_msg}] + state["messages"] + ) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile( + checkpointer=checkpointer, + store=store, # [!code highlight] + ) + + config = { + "configurable": { + "thread_id": "1", # [!code highlight] + "user_id": "1", # [!code highlight] + } + } + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "Hi! Remember: my name is Bob"}]}, + config, # [!code highlight] + stream_mode="values", + ): + chunk["messages"][-1].pretty_print() + + config = { + "configurable": { + "thread_id": "2", # [!code highlight] + "user_id": "1", + } + } + + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "what is my name?"}]}, + config, # [!code highlight] + stream_mode="values", + ): + chunk["messages"][-1].pretty_print() +``` + + + ::: ### Use semantic search @@ -1096,6 +1550,12 @@ const items = await store.search(["user_123", "memories"], { ``` ::: +:::js + +`InMemoryStore` is suitable for development. For production, use a persistent store like `PostgresStore`, `MongoDBStore`, or `RedisStore`. + +::: + :::python @@ -1107,7 +1567,7 @@ const items = await store.search(["user_123", "memories"], { from langgraph.graph import START, MessagesState, StateGraph from langgraph.runtime import Runtime # [!code highlight] - model = init_chat_model("gpt-4.1-mini") + model = init_chat_model("gpt-5.4-mini") # Create store with semantic search enabled embeddings = init_embeddings("openai:text-embedding-3-small") @@ -1151,60 +1611,188 @@ const items = await store.search(["user_123", "memories"], { ::: :::js - - ```typescript - import { OpenAIEmbeddings, ChatOpenAI } from "@langchain/openai"; - import { StateGraph, StateSchema, MessagesValue, GraphNode, START, InMemoryStore } from "@langchain/langgraph"; - - const State = new StateSchema({ - messages: MessagesValue, - }); - - const model = new ChatOpenAI({ model: "gpt-4.1-mini" }); - - // Create store with semantic search enabled - const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" }); - const store = new InMemoryStore({ - index: { - embeddings, - dims: 1536, - } - }); - - await store.put(["user_123", "memories"], "1", { text: "I love pizza" }); - await store.put(["user_123", "memories"], "2", { text: "I am a plumber" }); - - const chat: GraphNode = async (state, runtime) => { - // Search based on user's last message - const items = await runtime.store.search( - ["user_123", "memories"], - { query: state.messages.at(-1)?.content, limit: 2 } - ); - const memories = items.map(item => item.value.text).join("\n"); - const memoriesText = memories ? `## Memories of user\n${memories}` : ""; - - const response = await model.invoke([ - { role: "system", content: `You are a helpful assistant.\n${memoriesText}` }, - ...state.messages, - ]); - - return { messages: [response] }; - }; - - const builder = new StateGraph(State) - .addNode("chat", chat) - .addEdge(START, "chat"); - const graph = builder.compile({ store }); - - for await (const [message, metadata] of await graph.stream( - { messages: [{ role: "user", content: "I'm hungry" }] }, - { streamMode: "messages" } - )) { - if (message.content) { - console.log(message.content); - } - } - ``` + + + ```typescript + import { OpenAIEmbeddings, ChatOpenAI } from "@langchain/openai"; + import { StateGraph, StateSchema, MessagesValue, GraphNode, START, InMemoryStore } from "@langchain/langgraph"; + + const State = new StateSchema({ + messages: MessagesValue, + }); + + const model = new ChatOpenAI({ model: "gpt-5.4-mini" }); + + // Create store with semantic search enabled + const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" }); + const store = new InMemoryStore({ + index: { + embeddings, + dims: 1536, + } + }); + + await store.put(["user_123", "memories"], "1", { text: "I love pizza" }); + await store.put(["user_123", "memories"], "2", { text: "I am a plumber" }); + + const chat: GraphNode = async (state, runtime) => { + // Search based on user's last message + const items = await runtime.store.search( + ["user_123", "memories"], + { query: state.messages.at(-1)?.content, limit: 2 } + ); + const memories = items.map(item => item.value.text).join("\n"); + const memoriesText = memories ? `## Memories of user\n${memories}` : ""; + + const response = await model.invoke([ + { role: "system", content: `You are a helpful assistant.\n${memoriesText}` }, + ...state.messages, + ]); + + return { messages: [response] }; + }; + + const builder = new StateGraph(State) + .addNode("chat", chat) + .addEdge(START, "chat"); + const graph = builder.compile({ store }); + + for await (const [message, metadata] of await graph.stream( + { messages: [{ role: "user", content: "I'm hungry" }] }, + { streamMode: "messages" } + )) { + if (message.content) { + console.log(message.content); + } + } + ``` + + + ```typescript + import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai"; + import { MongoDBStore } from "@langchain/langgraph-checkpoint-mongodb"; + import { StateGraph, StateSchema, MessagesValue, GraphNode, START } from "@langchain/langgraph"; + + const State = new StateSchema({ + messages: MessagesValue, + }); + + const model = new ChatOpenAI({ model: "gpt-5.4-mini" }); + + // Create store with semantic search enabled + const MONGODB_URI = "mongodb://user:password@localhost:27017"; + const store = await MongoDBStore.fromConnString(MONGODB_URI, { + dbName: "langgraph", + collectionName: "store", + embeddings: new OpenAIEmbeddings({ model: "text-embedding-3-small" }), + indexConfig: { + name: "store_vector_index", + dims: 1536, + embeddingKey: "text", + }, + }); + + await store.put(["user_123", "memories"], "1", { text: "I love pizza" }); + await store.put(["user_123", "memories"], "2", { text: "I am a plumber" }); + + const chat: GraphNode = async (state, runtime) => { + // Search based on user's last message + const items = await runtime.store.search( + ["user_123", "memories"], + { query: state.messages.at(-1)?.content, limit: 2 } + ); + const memories = items.map(item => item.value.text).join("\n"); + const memoriesText = memories ? `## Memories of user\n${memories}` : ""; + + const response = await model.invoke([ + { role: "system", content: `You are a helpful assistant.\n${memoriesText}` }, + ...state.messages, + ]); + + return { messages: [response] }; + }; + + const builder = new StateGraph(State) + .addNode("chat", chat) + .addEdge(START, "chat"); + const graph = builder.compile({ store }); + + for await (const [message, metadata] of await graph.stream( + { messages: [{ role: "user", content: "I'm hungry" }] }, + { streamMode: "messages" } + )) { + if (message.content) { + console.log(message.content); + } + } + ``` + + + + Auto embedding requires MongoDB Atlas. MongoDB generates embeddings server-side via Voyage AI. See the [Automated Embedding documentation](https://www.mongodb.com/docs/atlas/atlas-vector-search/automated-embedding/) for more information. + + + ```typescript + import { StateGraph, StateSchema, MessagesValue, GraphNode, START } from "@langchain/langgraph"; + import { MongoDBStore } from "@langchain/langgraph-checkpoint-mongodb"; + import { ChatOpenAI } from "@langchain/openai"; + + const State = new StateSchema({ + messages: MessagesValue, + }); + + const model = new ChatOpenAI({ model: "gpt-5.4-mini" }); + + // Auto embedding: no embeddings instance needed. + // Configure the Voyage AI model and the field path MongoDB will read server-side. + const MONGODB_URI = "mongodb://user:password@localhost:27017"; + const store = await MongoDBStore.fromConnString(MONGODB_URI, { + dbName: "langgraph", + collectionName: "store", + indexConfig: { + name: "store_vector_index", + path: "value.content", // MongoDB reads this field and embeds it server-side + model: "voyage-4", // Voyage AI model used by MongoDB Atlas + }, + }); + + // Values must have the content field matching the configured path (value.content) + await store.put(["user_123", "memories"], "1", { content: "I love pizza" }); + await store.put(["user_123", "memories"], "2", { content: "I am a plumber" }); + + const chat: GraphNode = async (state, runtime) => { + // MongoDB generates the query embedding server-side + const items = await runtime.store.search( + ["user_123", "memories"], + { query: state.messages.at(-1)?.content, limit: 2 } + ); + const memories = items.map(item => item.value.content).join("\n"); + const memoriesText = memories ? `## Memories of user\n${memories}` : ""; + + const response = await model.invoke([ + { role: "system", content: `You are a helpful assistant.\n${memoriesText}` }, + ...state.messages, + ]); + + return { messages: [response] }; + }; + + const builder = new StateGraph(State) + .addNode("chat", chat) + .addEdge(START, "chat"); + const graph = builder.compile({ store }); + + for await (const [message, metadata] of await graph.stream( + { messages: [{ role: "user", content: "I'm hungry" }] }, + { streamMode: "messages" } + )) { + if (message.content) { + console.log(message.content); + } + } + ``` + + ::: @@ -1732,7 +2320,6 @@ const summarizeConversation: GraphNode = async (state) => { MemorySaver, } from "@langchain/langgraph"; import * as z from "zod"; - import { v4 as uuidv4 } from "uuid"; const memory = new MemorySaver(); @@ -1752,7 +2339,7 @@ const summarizeConversation: GraphNode = async (state) => { let { messages } = state; if (summary) { const systemMessage = new SystemMessage({ - id: uuidv4(), + id: crypto.randomUUID(), content: `Summary of conversation earlier: ${summary}`, }); messages = [systemMessage, ...messages]; @@ -1789,7 +2376,7 @@ const summarizeConversation: GraphNode = async (state) => { const allMessages = [ ...messages, - new HumanMessage({ id: uuidv4(), content: summaryMessage }), + new HumanMessage({ id: crypto.randomUUID(), content: summaryMessage }), ]; const response = await model.invoke(allMessages); @@ -2160,7 +2747,7 @@ await checkpointer.deleteThread(threadId); ## Database management -If you are using any database-backed persistence implementation (such as Postgres or Redis) to store short and/or long-term memory, you will need to run migrations to set up the required schema before you can use it with your database. +If you are using any database-backed persistence implementation (such as Postgres, Redis, or Oracle) to store short and/or long-term memory, you will need to run migrations to set up the required schema before you can use it with your database. By convention, most database-specific libraries define a `setup()` method on the checkpointer or store instance that runs the required migrations. However, you should check with your specific implementation of @[`BaseCheckpointSaver`] or @[`BaseStore`] to confirm the exact method name and usage. diff --git a/agentic-rag.mdx b/agentic-rag.mdx index e3f55a6..477f41e 100644 --- a/agentic-rag.mdx +++ b/agentic-rag.mdx @@ -3,6 +3,8 @@ title: Build a custom RAG agent with LangGraph sidebarTitle: Custom RAG agent --- +import AgenticRagAssembleGraphPy from '/snippets/code-samples/agentic-rag-assemble-graph-py.mdx'; + ## Overview In this tutorial we will build a [retrieval](/oss/langchain/retrieval) agent using LangGraph. @@ -30,7 +32,7 @@ Let's download the required packages and set our API keys: :::python ```python -pip install -U langgraph "langchain[openai]" langchain-community langchain-text-splitters bs4 +pip install -U langgraph "langchain[openai]" langchain-text-splitters bs4 requests ``` ```python @@ -50,19 +52,19 @@ _set_env("OPENAI_API_KEY") :::js ```bash npm -npm install @langchain/langgraph @langchain/openai @langchain/community @langchain/textsplitters +npm install @langchain/langgraph @langchain/openai @langchain/textsplitters cheerio ``` ```bash pnpm -pnpm install @langchain/langgraph @langchain/openai @langchain/community @langchain/textsplitters +pnpm install @langchain/langgraph @langchain/openai @langchain/textsplitters cheerio ``` ```bash yarn -yarn add @langchain/langgraph @langchain/openai @langchain/community @langchain/textsplitters +yarn add @langchain/langgraph @langchain/openai @langchain/textsplitters cheerio ``` ```bash bun -bun add @langchain/langgraph @langchain/openai @langchain/community @langchain/textsplitters +bun add @langchain/langgraph @langchain/openai @langchain/textsplitters cheerio ``` @@ -75,9 +77,20 @@ bun add @langchain/langgraph @langchain/openai @langchain/community @langchain/t ## 1. Preprocess documents :::python -1. Fetch documents to use in our RAG system. We will use three of the most recent pages from [Lilian Weng's excellent blog](https://lilianweng.github.io/). We'll start by fetching the content of the pages using `WebBaseLoader` utility: +1. Fetch documents to use in our RAG system. We will use three of the most recent pages from [Lilian Weng's excellent blog](https://lilianweng.github.io/). We'll start by fetching the content of the pages with a minimal helper built on `requests` and `BeautifulSoup`: ```python - from langchain_community.document_loaders import WebBaseLoader + import bs4 + import requests + from langchain_core.documents import Document + + + # Below is a minimal helper for demonstration purposes. + def load_web_page(url: str, bs_kwargs: dict | None = None) -> list[Document]: + response = requests.get(url) + response.raise_for_status() + soup = bs4.BeautifulSoup(response.text, "html.parser", **(bs_kwargs or {})) + return [Document(page_content=soup.get_text(), metadata={"source": url})] + urls = [ "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/", @@ -85,7 +98,7 @@ bun add @langchain/langgraph @langchain/openai @langchain/community @langchain/t "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/", ] - docs = [WebBaseLoader(url).load() for url in urls] + docs = [load_web_page(url) for url in urls] ``` ```python docs[0][0].page_content.strip()[:1000] @@ -107,9 +120,26 @@ bun add @langchain/langgraph @langchain/openai @langchain/community @langchain/t ::: :::js -1. Fetch documents to use in our RAG system. We will use three of the most recent pages from [Lilian Weng's excellent blog](https://lilianweng.github.io/). We'll start by fetching the content of the pages using `CheerioWebBaseLoader`: +1. Fetch documents to use in our RAG system. We will use three of the most recent pages from [Lilian Weng's excellent blog](https://lilianweng.github.io/). We'll start by fetching the content of the pages with a minimal helper built on `fetch` and `cheerio`: ```typescript - import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/cheerio"; + import * as cheerio from "cheerio"; + import { Document } from "@langchain/core/documents"; + + // Below is a minimal helper for demonstration purposes. + async function loadWebPage( + url: string, + selector: string = "body" + ): Promise { + const response = await fetch(url); + const html = await response.text(); + const $ = cheerio.load(html); + return [ + new Document({ + pageContent: $(selector).text(), + metadata: { source: url }, + }), + ]; + } const urls = [ "https://lilianweng.github.io/posts/2023-06-23-agent/", @@ -117,9 +147,7 @@ bun add @langchain/langgraph @langchain/openai @langchain/community @langchain/t "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/", ]; - const docs = await Promise.all( - urls.map((url) => new CheerioWebBaseLoader(url).load()), - ); + const docs = await Promise.all(urls.map((url) => loadWebPage(url))); ``` 2. Split the fetched documents into smaller chunks for indexing into our vectorstore: ```typescript @@ -209,7 +237,7 @@ Note that the components will operate on the [`MessagesState`](/oss/langgraph/gr from langgraph.graph import MessagesState from langchain.chat_models import init_chat_model - response_model = init_chat_model("gpt-4.1", temperature=0) + response_model = init_chat_model("gpt-5.4", temperature=0) def generate_query_or_respond(state: MessagesState): @@ -264,7 +292,7 @@ Note that the components will operate on the [`MessagesState`](/oss/langgraph/gr const generateQueryOrRespond: GraphNode = async (state) => { const model = new ChatOpenAI({ - model: "gpt-4.1", + model: "gpt-5.4", temperature: 0, }).bindTools(tools); // [!code highlight] @@ -340,7 +368,7 @@ Note that the components will operate on the [`MessagesState`](/oss/langgraph/gr ) - grader_model = init_chat_model("gpt-4.1", temperature=0) + grader_model = init_chat_model("gpt-5.4", temperature=0) def grade_documents( @@ -452,7 +480,7 @@ Note that the components will operate on the [`MessagesState`](/oss/langgraph/gr const gradeDocuments: GraphNode = async (state) => { const model = new ChatOpenAI({ - model: "gpt-4.1", + model: "gpt-5.4", temperature: 0, }).withStructuredOutput(gradeDocumentsSchema); @@ -596,7 +624,7 @@ Note that the components will operate on the [`MessagesState`](/oss/langgraph/gr const question = state.messages.at(0)?.content; const model = new ChatOpenAI({ - model: "gpt-4.1", + model: "gpt-5.4", temperature: 0, }); @@ -721,7 +749,7 @@ Note that the components will operate on the [`MessagesState`](/oss/langgraph/gr ); const llm = new ChatOpenAI({ - model: "gpt-4.1", + model: "gpt-5.4", temperature: 0, }); @@ -777,51 +805,14 @@ Now we'll assemble all the nodes and edges into a complete graph: :::python * Start with a `generate_query_or_respond` and determine if we need to call `retriever_tool` -* Route to next step using `tools_condition`: +* Route to next step based on whether the model made tool calls: * If `generate_query_or_respond` returned `tool_calls`, call `retriever_tool` to retrieve context * Otherwise, respond directly to the user * Grade retrieved document content for relevance to the question (`grade_documents`) and route to next step: * If not relevant, rewrite the question using `rewrite_question` and then call `generate_query_or_respond` again * If relevant, proceed to `generate_answer` and generate final response using the @[`ToolMessage`] with the retrieved document context -```python -from langgraph.graph import StateGraph, START, END -from langgraph.prebuilt import ToolNode, tools_condition - -workflow = StateGraph(MessagesState) - -# Define the nodes we will cycle between -workflow.add_node(generate_query_or_respond) -workflow.add_node("retrieve", ToolNode([retriever_tool])) -workflow.add_node(rewrite_question) -workflow.add_node(generate_answer) - -workflow.add_edge(START, "generate_query_or_respond") - -# Decide whether to retrieve -workflow.add_conditional_edges( - "generate_query_or_respond", - # Assess LLM decision (call `retriever_tool` tool or respond to the user) - tools_condition, - { - # Translate the condition outputs to nodes in our graph - "tools": "retrieve", - END: END, - }, -) - -# Edges taken after the `action` node is called. -workflow.add_conditional_edges( - "retrieve", - # Assess agent decision - grade_documents, -) -workflow.add_edge("generate_answer", END) -workflow.add_edge("rewrite_question", "generate_query_or_respond") - -# Compile -graph = workflow.compile() -``` + Visualize the graph: @@ -850,7 +841,7 @@ display(Image(graph.get_graph().draw_mermaid_png())) ```typescript import { StateGraph, START, END, ConditionalEdgeRouter } from "@langchain/langgraph"; import { ToolNode } from "@langchain/langgraph/prebuilt"; -import { AIMessage } from "@langchain/core/messages"; +import { AIMessage } from "langchain"; // Create a ToolNode for the retriever const toolNode = new ToolNode(tools); @@ -865,7 +856,7 @@ const shouldRetrieve: ConditionalEdgeRouter = (state) } // Define the graph -const builder = new StateGraph(State) +const builder = new StateGraph(GraphState) .addNode("generateQueryOrRespond", generateQueryOrRespond) .addNode("retrieve", toolNode) .addNode("gradeDocuments", gradeDocuments) @@ -928,7 +919,6 @@ Tool Calls: query: types of reward hacking - Update from node retrieve ================================= Tool Message ================================== Name: retrieve_blog_posts @@ -944,7 +934,6 @@ Let's Define Reward Hacking# Reward shaping in RL is challenging. Reward hacking occurs when an RL agent exploits flaws or ambiguities in the reward function to obtain high rewards without genuinely learning the intended behaviors or completing the task as designed. In recent years, several related concepts have been proposed, all referring to some form of reward hacking: - Update from node generate_answer ================================== Ai Message ================================== diff --git a/backward-compatibility.mdx b/backward-compatibility.mdx new file mode 100644 index 0000000..9125628 --- /dev/null +++ b/backward-compatibility.mdx @@ -0,0 +1,167 @@ +--- +title: Backward compatibility +description: Update LangGraph graph code in production without breaking in-flight runs. +--- + +Software needs to change in production. New requirements, bug fixes, and refactors all eventually land in your graph code. Because LangGraph runs the latest deployed graph against state that has been [persisted](/oss/langgraph/persistence) for existing threads, every change you ship is effectively a backward-compatible API change with respect to your existing checkpoints. + +Unlike workflow engines that pin a run to the version of code it started with, LangGraph applies the latest graph immediately to *every* thread, both new threads and threads that resume from a checkpoint. This is convenient: bug fixes propagate to in-flight conversations and agents without ceremony. It also means you must reason about how each change interacts with runs that started under the previous version of the code. + +There are three categories of compatibility issues to watch for, in roughly the order you will encounter them: + +1. [Technical compatibility](#technical-compatibility): The most common; the new code must still load and execute against existing State. +2. [Business compatibility](#business-compatibility): Less common; existing runs should keep following the old business logic even though the code has changed. +3. [Non-determinism](#non-determinism): Only applies to the [Functional API](/oss/langgraph/functional-api). + + +For a short summary of which graph topology and state changes the runtime supports by default, see [Graph migrations](/oss/langgraph/graph-api#graph-migrations). The rest of this page covers the patterns you can apply when a change falls outside that supported set. + + +## Technical compatibility + +Technical compatibility is the equivalent of an API breaking change in a microservice. The "API" here is the contract between your graph code and the data already persisted by the [checkpointer](/oss/langgraph/persistence#checkpointer-libraries) for existing threads. When a thread resumes, LangGraph deserializes the saved state, dispatches it to a node by name, and expects the node to return values that fit the state schema. + +Common technical breakages: + +- **Renaming or removing a node** while threads are paused at or about to enter that node, for example at an @[`interrupt`] or via a checkpointed conditional edge that still routes to the old name. On resume, LangGraph cannot find the node by its saved name and the run fails. The starting point for resuming a run is the beginning of the node where execution stopped, so a missing node has nowhere to resume from. +- **Renaming or removing a State key** that older checkpoints still contain or that downstream nodes still read. +- **Tightening a State field**, such as making an `Optional` field required, narrowing a type, or adding a new required field with no default. Existing checkpoints will not satisfy the new schema. + +Edge topology itself is *not* persisted in the checkpoint. Adding, removing, or rerouting edges between nodes that still exist is safe for in-flight threads. Per the [Graph migrations](/oss/langgraph/graph-api#graph-migrations) summary, the only topology change that can break an interrupted thread is renaming or removing a node. + +### Recommended patterns + +:::python + +- Add new state fields as `NotRequired` (or `Optional[...] = None`) so old checkpoints still validate: + + ```python + from typing import NotRequired + from typing_extensions import TypedDict + + class State(TypedDict): + messages: list + summary: NotRequired[str] # [!code ++] + ``` + +- Treat removals as deprecations. Keep the field defined on the state for at least one drain cycle, even if no node reads it, so existing checkpoints continue to load. +- Rename through *add-then-remove*. Add the new field or node alongside the old one, dual-write or route to both for a deprecation window, then remove the old one once you have confirmed no in-flight thread depends on it. +- Keep node functions tolerant of unknown keys. `TypedDict` ignores extra keys at runtime, so leftover state from an older code version will not raise unless a node explicitly reads a missing key. +- Use [time travel](/oss/langgraph/use-time-travel) and @[`graph.get_state`][get_state] to spot-check existing threads against the new code in a staging deployment before rolling out. + +::: + +:::js + +- Mark new state fields as optional (`z.string().optional()` or `.nullish()`) so old checkpoints still validate. +- Treat removals as deprecations: keep the field on the schema for at least one drain cycle so existing checkpoints continue to load. +- Rename via add-then-remove: add the new field or node alongside the old one, dual-write or route to both for a deprecation window, then remove the old one once no in-flight thread depends on it. +- Use [time travel](/oss/langgraph/use-time-travel) and @[`graph.getState`][get_state] to spot-check existing threads against the new code in a staging deployment before rolling out. + +::: + +### Detecting in-flight threads + +Before you remove a node, rename a State key, or otherwise make a change that older threads cannot tolerate, you want to know whether any threads are currently parked on the version of the code you are about to drop. LangGraph itself does not maintain a search index over thread state, so the answer depends on where your graph runs. + +**If you deploy to [LangSmith](/langsmith/deployment).** Use the Agent Server's thread search to filter by status. The `status` field accepts `idle`, `busy`, `interrupted`, and `error`, so you can bulk-query for `interrupted` or `busy` threads, optionally narrowed with metadata filters. See [Filter by thread status](/langsmith/use-threads#filter-by-thread-status) and [List threads](/langsmith/use-threads#list-threads). + +**Anywhere LangGraph runs.** Use [LangSmith tracing](/oss/langgraph/observability) to monitor which nodes are being entered and exited in production. This is the most reliable signal that a node or state field is no longer reachable in any active code path. + +**When you already have a `thread_id`.** Inspect that single thread directly: + +:::python + +- @[`graph.get_state(config)`][get_state] returns the latest checkpoint, including which node the thread is paused at and any pending interrupts. +- @[`graph.get_state_history(config)`][get_state_history] returns the full chronological list of checkpoints for the thread. + +::: + +:::js + +- @[`graph.getState(config)`][get_state] returns the latest checkpoint, including which node the thread is paused at and any pending interrupts. +- @[`graph.getStateHistory(config)`][get_state_history] returns the full chronological list of checkpoints for the thread. + +::: + +When in doubt, keep the deprecated node or field in place until both the Agent Server thread list and tracing show no further activity on it. + +## Business compatibility + +Sometimes a change is technically valid (every existing checkpoint still loads and every node still resolves), but the *meaning* of the new graph differs from the old one. The new behavior is correct for new threads, and you do not want to retroactively apply it to threads that started under the old logic. + +For example, suppose your graph runs `intake → triage → respond`, and you decide to insert a new `policy_check` step between `triage` and `respond`: + +- Threads that have already passed `triage` should continue straight to `respond` (the old flow). +- New threads should run the full new flow. + +The recommended pattern is to record the relevant *behavioral version* on the state at thread start, then branch on it with a [conditional edge](/oss/langgraph/graph-api#conditional-edges): + +:::python + +```python +from typing import NotRequired +from typing_extensions import TypedDict + +from langgraph.graph import END, START, StateGraph + + +class State(TypedDict): + request: str + flow_version: NotRequired[int] + response: NotRequired[str] + + +def intake(state: State) -> dict: + # Stamp new threads with the current flow version. Existing threads + # that resume past `intake` keep whatever value was already saved. + return {"flow_version": state.get("flow_version", 2)} + + +def triage(state: State) -> dict: ... +def policy_check(state: State) -> dict: ... +def respond(state: State) -> dict: ... + + +def after_triage(state: State) -> str: + if state.get("flow_version", 1) >= 2: + return "policy_check" + return "respond" + + +builder = StateGraph(State) +builder.add_node("intake", intake) +builder.add_node("triage", triage) +builder.add_node("policy_check", policy_check) +builder.add_node("respond", respond) +builder.add_edge(START, "intake") +builder.add_edge("intake", "triage") +builder.add_conditional_edges("triage", after_triage, ["policy_check", "respond"]) +builder.add_edge("policy_check", "respond") +builder.add_edge("respond", END) + +graph = builder.compile() +``` + +::: + +Old threads that resume after `triage` read `flow_version` from their saved state (or fall through to the v1 default) and skip `policy_check`. New threads start at `intake`, are stamped with `flow_version=2`, and run the new path. Once all v1 threads have completed, you can remove the version flag and the conditional edge. + +This pattern only works if you set the version *at thread start*, before any branch that needs to be versioned. Setting it later means existing threads will not have it set when they need it. + +## Non-determinism + +This category only applies to the [Functional API](/oss/langgraph/functional-api) and to [**tasks**](/oss/langgraph/functional-api#task) or @[`interrupt`] calls inside a [Graph API](/oss/langgraph/graph-api) **node**. Plain Graph API **nodes** [re-run from the start of the node function](/oss/langgraph/graph-api#re-execution-and-idempotency) on resume; design side effects to be idempotent, but you do not need to preserve task call order unless you use **tasks** or @[`interrupt`] in that **node**. + +A Functional API **entrypoint** compiles to a single **node** that replays the entrypoint body from the beginning when a run resumes, using cached @[`@task`][task] results to skip work that has already been done. Two kinds of changes break this model: + +- **Adding, removing, or reordering `@task` calls or @[`interrupt`] calls** that come *before* the resume point. LangGraph matches cached results and resume values to calls by their position in the replay, so shifting that position can cause the wrong cached value to be replayed against a different call. +- **Introducing non-deterministic operations outside of a `@task`**, such as `time.time()`, `random.random()`, or a network call inlined in the entrypoint body. On replay these produce different values than they did on the first run, which can change the control flow. + +For a deeper treatment with examples, see [Determinism](/oss/langgraph/functional-api#determinism) and [Common pitfalls](/oss/langgraph/functional-api#common-pitfalls) in the Functional API guide. + +If you need to make non-trivial code changes to an `@entrypoint` that has in-flight runs, the safest options are: + +- Let in-flight runs drain before deploying the change. +- Wrap any new logic in a new `@task` so its results are checkpointed independently. +- Register a new entrypoint under a new graph name in `langgraph.json` for the new behavior, and route new threads to it. diff --git a/case-studies.mdx b/case-studies.mdx index 9c191a1..a0db9f3 100644 --- a/case-studies.mdx +++ b/case-studies.mdx @@ -3,7 +3,6 @@ title: Case studies --- - This list of companies using LangGraph and their success stories is compiled from public sources. If your company uses LangGraph, we'd love for you to share your story and add it to the list. You’re also welcome to contribute updates based on publicly available information from other companies, such as blog posts or press releases. | Company | Industry | Use case | Reference | diff --git a/deploy.mdx b/deploy.mdx index a6c1846..7b75353 100644 --- a/deploy.mdx +++ b/deploy.mdx @@ -7,7 +7,7 @@ This guide shows you how to deploy your agent to **[LangSmith Cloud](/langsmith/ Traditional hosting platforms are built for stateless, short-lived web applications. LangSmith Cloud is **purpose-built for stateful, long-running agents** that require persistent state and background execution. -LangSmith offers multiple deployment options beyond Cloud, including deploying with a [control plane (hybrid/self-hosted)](/langsmith/deploy-with-control-plane) or as [standalone servers](/langsmith/deploy-standalone-server). For more information, refer to the [Deployment overview](/langsmith/deployment). +LangSmith offers multiple deployment options beyond Cloud, including [hybrid](/langsmith/hybrid), [standalone servers](/langsmith/deploy-standalone-server), and [self-hosted with control plane](/langsmith/deploy-with-control-plane). For more information, refer to the [Deployment overview](/langsmith/deployment). ## Prerequisites @@ -15,7 +15,7 @@ LangSmith offers multiple deployment options beyond Cloud, including deploying w Before you begin, ensure you have the following: - A [GitHub account](https://github.com/) -- A [LangSmith account](https://smith.langchain.com/) (free to sign up) +- A [LangSmith account](https://smith.langchain.com) (free to sign up) ## Deploy your agent @@ -28,7 +28,7 @@ Your application's code must reside in a GitHub repository to be deployed on Lan - Log in to [LangSmith](https://smith.langchain.com/). In the left sidebar, select **Deployments**. + Log in to [LangSmith](https://smith.langchain.com). In the left sidebar, select **Deployments**. Click the **+ New Deployment** button. A pane will open where you can fill in the required fields. @@ -125,9 +125,9 @@ for await (const chunk of streamResponse) { curl -s --request POST \ --url /runs/stream \ --header 'Content-Type: application/json' \ - --header "X-Api-Key: " \ + --header "X-Api-Key: \ --data "{ - \"assistant_id\": \"agent\", + \"assistant_id\": \"agent\", `# Name of agent. Defined in langgraph.json.` \"input\": { \"messages\": [ { diff --git a/durable-execution.mdx b/durable-execution.mdx deleted file mode 100644 index 388a477..0000000 --- a/durable-execution.mdx +++ /dev/null @@ -1,284 +0,0 @@ ---- -title: Durable execution ---- - - - -**Durable execution** is a technique in which a process or workflow saves its progress at key points, allowing it to pause and later resume exactly where it left off. This is particularly useful in scenarios that require [human-in-the-loop](/oss/langgraph/interrupts), where users can inspect, validate, or modify the process before continuing, and in long-running tasks that might encounter interruptions or errors (e.g., calls to an LLM timing out). By preserving completed work, durable execution enables a process to resume without reprocessing previous steps -- even after a significant delay (e.g., a week later). - -LangGraph's built-in [persistence](/oss/langgraph/persistence) layer provides durable execution for workflows, ensuring that the state of each execution step is saved to a durable store. This capability guarantees that if a workflow is interrupted -- whether by a system failure or for [human-in-the-loop](/oss/langgraph/interrupts) interactions -- it can be resumed from its last recorded state. - - -If you are using LangGraph with a checkpointer, you already have durable execution enabled. You can pause and resume workflows at any point, even after interruptions or failures. -To make the most of durable execution, ensure that your workflow is designed to be [deterministic](#determinism-and-consistent-replay) and [idempotent](#determinism-and-consistent-replay) and wrap any side effects or non-deterministic operations inside [tasks](/oss/langgraph/functional-api#task). You can use [tasks](/oss/langgraph/functional-api#task) from both the [StateGraph (Graph API)](/oss/langgraph/graph-api) and the [Functional API](/oss/langgraph/functional-api). - - -## Requirements - -To leverage durable execution in LangGraph, you need to: - -1. Enable [persistence](/oss/langgraph/persistence) in your workflow by specifying a [checkpointer](/oss/langgraph/persistence#checkpointer-libraries) that will save workflow progress. -2. Specify a [thread identifier](/oss/langgraph/persistence#threads) when executing a workflow. This will track the execution history for a particular instance of the workflow. - -3. Wrap any non-deterministic operations (e.g., random number generation) or operations with side effects (e.g., file writes, API calls) inside @[tasks][task] to ensure that when a workflow is resumed, these operations are not repeated for the particular run, and instead their results are retrieved from the persistence layer. For more information, see [Determinism and Consistent Replay](#determinism-and-consistent-replay). - -## Determinism and consistent replay - -When you resume a workflow run, the code does **NOT** resume from the **same line of code** where execution stopped; instead, it will identify an appropriate [starting point](#starting-points-for-resuming-workflows) from which to pick up where it left off. This means that the workflow will replay all steps from the [starting point](#starting-points-for-resuming-workflows) until it reaches the point where it was stopped. - -As a result, when you are writing a workflow for durable execution, you must wrap any non-deterministic operations (e.g., random number generation) and any operations with side effects (e.g., file writes, API calls) inside [tasks](/oss/langgraph/functional-api#task) or [nodes](/oss/langgraph/graph-api#nodes). - -To ensure that your workflow is deterministic and can be consistently replayed, follow these guidelines: - -* **Avoid Repeating Work**: If a [node](/oss/langgraph/graph-api#nodes) contains multiple operations with side effects (e.g., logging, file writes, or network calls), wrap each operation in a separate **task**. This ensures that when the workflow is resumed, the operations are not repeated, and their results are retrieved from the persistence layer. -* **Encapsulate Non-Deterministic Operations:** Wrap any code that might yield non-deterministic results (e.g., random number generation) inside **tasks** or **nodes**. This ensures that, upon resumption, the workflow follows the exact recorded sequence of steps with the same outcomes. -* **Use Idempotent Operations**: When possible ensure that side effects (e.g., API calls, file writes) are idempotent. This means that if an operation is retried after a failure in the workflow, it will have the same effect as the first time it was executed. This is particularly important for operations that result in data writes. In the event that a **task** starts but fails to complete successfully, the workflow's resumption will re-run the **task**, relying on recorded outcomes to maintain consistency. Use idempotency keys or verify existing results to avoid unintended duplication, ensuring a smooth and predictable workflow execution. - -For some examples of pitfalls to avoid, see the [Common Pitfalls](/oss/langgraph/functional-api#common-pitfalls) section in the functional API, which shows -how to structure your code using **tasks** to avoid these issues. The same principles apply to the @[StateGraph (Graph API)][StateGraph]. - -## Durability modes - -LangGraph supports three durability modes that allow you to balance performance and data consistency based on your application's requirements. A higher durability mode adds more overhead to the workflow execution. You can specify the durability mode when calling any graph execution method: - -:::python -```python -graph.stream( - {"input": "test"}, - durability="sync" -) -``` -::: - -:::js -```typescript -await graph.stream( - { input: "test" }, - { durability: "sync" } -) -``` -::: - -The durability modes, from least to most durable, are as follows: - -* `"exit"`: LangGraph persists changes only when graph execution exits either successfully, with an error, or due to a human in the loop interrupt. This provides the best performance for long-running graphs but means intermediate state is not saved, so you cannot recover from system failures (like process crashes) that occur mid-execution. -* `"async"`: LangGraph persists changes asynchronously while the next step executes. This provides good performance and durability, but there's a small risk that LangGraph does not write checkpoints if the process crashes during execution. -* `"sync"`: LangGraph persists changes synchronously before the next step starts. This ensures that LangGraph writes every checkpoint before continuing execution, providing high durability at the cost of some performance overhead. - -## Using tasks in nodes - -If a [node](/oss/langgraph/graph-api#nodes) contains multiple operations, you may find it easier to convert each operation into a **task** rather than refactor the operations into individual nodes. - -:::python - - - ```python - from typing import NotRequired - from typing_extensions import TypedDict - from langchain_core.utils.uuid import uuid7 - - from langgraph.checkpoint.memory import InMemorySaver - from langgraph.graph import StateGraph, START, END - import requests - - # Define a TypedDict to represent the state - class State(TypedDict): - url: str - result: NotRequired[str] - - def call_api(state: State): - """Example node that makes an API request.""" - result = requests.get(state['url']).text[:100] # Side-effect # [!code highlight] - return { - "result": result - } - - # Create a StateGraph builder and add a node for the call_api function - builder = StateGraph(State) - builder.add_node("call_api", call_api) - - # Connect the start and end nodes to the call_api node - builder.add_edge(START, "call_api") - builder.add_edge("call_api", END) - - # Specify a checkpointer - checkpointer = InMemorySaver() - - # Compile the graph with the checkpointer - graph = builder.compile(checkpointer=checkpointer) - - # Define a config with a thread ID. - thread_id = str(uuid7()) - config = {"configurable": {"thread_id": thread_id}} - - # Invoke the graph - graph.invoke({"url": "https://www.example.com"}, config) -``` - - - ```python - from typing import NotRequired - from typing_extensions import TypedDict - from langchain_core.utils.uuid import uuid7 - - from langgraph.checkpoint.memory import InMemorySaver - from langgraph.func import task - from langgraph.graph import StateGraph, START, END - import requests - - # Define a TypedDict to represent the state - class State(TypedDict): - urls: list[str] - result: NotRequired[list[str]] - - - @task - def _make_request(url: str): - """Make a request.""" - return requests.get(url).text[:100] # [!code highlight] - - def call_api(state: State): - """Example node that makes an API request.""" - requests = [_make_request(url) for url in state['urls']] # [!code highlight] - results = [request.result() for request in requests] - return { - "results": results - } - - # Create a StateGraph builder and add a node for the call_api function - builder = StateGraph(State) - builder.add_node("call_api", call_api) - - # Connect the start and end nodes to the call_api node - builder.add_edge(START, "call_api") - builder.add_edge("call_api", END) - - # Specify a checkpointer - checkpointer = InMemorySaver() - - # Compile the graph with the checkpointer - graph = builder.compile(checkpointer=checkpointer) - - # Define a config with a thread ID. - thread_id = str(uuid7()) - config = {"configurable": {"thread_id": thread_id}} - - # Invoke the graph - graph.invoke({"urls": ["https://www.example.com"]}, config) -``` - - -::: - -:::js - - - ```typescript - import { StateGraph, StateSchema, GraphNode, START, END, MemorySaver } from "@langchain/langgraph"; - import { v7 as uuid7 } from "uuid"; - import * as z from "zod"; - - // Define a StateSchema to represent the state - const State = new StateSchema({ - url: z.string(), - result: z.string().optional(), - }); - - const callApi: GraphNode = async (state) => { - const response = await fetch(state.url); // [!code highlight] - const text = await response.text(); - const result = text.slice(0, 100); // Side-effect - return { - result, - }; - }; - - // Create a StateGraph builder and add a node for the callApi function - const builder = new StateGraph(State) - .addNode("callApi", callApi) - .addEdge(START, "callApi") - .addEdge("callApi", END); - - // Specify a checkpointer - const checkpointer = new MemorySaver(); - - // Compile the graph with the checkpointer - const graph = builder.compile({ checkpointer }); - - // Define a config with a thread ID. - const threadId = uuid7(); - const config = { configurable: { thread_id: threadId } }; - - // Invoke the graph - await graph.invoke({ url: "https://www.example.com" }, config); -``` - - - ```typescript - import { StateGraph, StateSchema, GraphNode, START, END, MemorySaver, task } from "@langchain/langgraph"; - import { v7 as uuid7 } from "uuid"; - import * as z from "zod"; - - // Define a StateSchema to represent the state - const State = new StateSchema({ - urls: z.array(z.string()), - results: z.array(z.string()).optional(), - }); - - const makeRequest = task("makeRequest", async (url: string) => { - const response = await fetch(url); // [!code highlight] - const text = await response.text(); - return text.slice(0, 100); - }); - - const callApi: GraphNode = async (state) => { - const requests = state.urls.map((url) => makeRequest(url)); // [!code highlight] - const results = await Promise.all(requests); - return { - results, - }; - }; - - // Create a StateGraph builder and add a node for the callApi function - const builder = new StateGraph(State) - .addNode("callApi", callApi) - .addEdge(START, "callApi") - .addEdge("callApi", END); - - // Specify a checkpointer - const checkpointer = new MemorySaver(); - - // Compile the graph with the checkpointer - const graph = builder.compile({ checkpointer }); - - // Define a config with a thread ID. - const threadId = uuid7(); - const config = { configurable: { thread_id: threadId } }; - - // Invoke the graph - await graph.invoke({ urls: ["https://www.example.com"] }, config); -``` - - -::: - -## Resuming workflows - -Once you have enabled durable execution in your workflow, you can resume execution for the following scenarios: - -:::python -* **Pausing and Resuming Workflows:** Use the @[interrupt][interrupt] function to pause a workflow at specific points and the @[`Command`] primitive to resume it with updated state. See [**Interrupts**](/oss/langgraph/interrupts) for more details. -* **Recovering from Failures:** Automatically resume workflows from the last successful checkpoint after an exception (e.g., LLM provider outage). This involves executing the workflow with the same thread identifier by providing it with a `None` as the input value (see this [example](/oss/langgraph/use-functional-api#resuming-after-an-error) with the functional API). -::: - -:::js -* **Pausing and Resuming Workflows:** Use the @[interrupt][interrupt] function to pause a workflow at specific points and the @[`Command`] primitive to resume it with updated state. See [**Interrupts**](/oss/langgraph/interrupts) for more details. -* **Recovering from Failures:** Automatically resume workflows from the last successful checkpoint after an exception (e.g., LLM provider outage). This involves executing the workflow with the same thread identifier by providing it with a `null` as the input value (see this [example](/oss/langgraph/use-functional-api#resuming-after-an-error) with the functional API). -::: - -## Starting points for resuming workflows - -* If you're using a [StateGraph (Graph API)](/oss/langgraph/graph-api), the starting point is the beginning of the [**node**](/oss/langgraph/graph-api#nodes) where execution stopped. -* If you're making a subgraph call inside a node, the starting point will be the **parent** node that called the subgraph that was halted. - Inside the subgraph, the starting point will be the specific [**node**](/oss/langgraph/graph-api#nodes) where execution stopped. -* If you're using the Functional API, the starting point is the beginning of the [**entrypoint**](/oss/langgraph/functional-api#entrypoint) where execution stopped. diff --git a/errors/GRAPH_RECURSION_LIMIT.mdx b/errors/GRAPH_RECURSION_LIMIT.mdx index d91c73f..fbd64be 100644 --- a/errors/GRAPH_RECURSION_LIMIT.mdx +++ b/errors/GRAPH_RECURSION_LIMIT.mdx @@ -51,7 +51,7 @@ However, complex graphs may hit the default limit naturally. * If you have a complex graph, you can pass in a higher `recursion_limit` value into your `config` object when invoking your graph like this: ```python -graph.invoke({...}, {"recursion_limit": 100}) +graph.invoke({...}, {"recursion_limit": 1000}) ``` ::: @@ -59,6 +59,6 @@ graph.invoke({...}, {"recursion_limit": 100}) * If you have a complex graph, you can pass in a higher `recursionLimit` value into your `config` object when invoking your graph like this: ```typescript -await graph.invoke({...}, { recursionLimit: 100 }); +await graph.invoke({...}, { recursionLimit: 1000 }); ``` ::: diff --git a/event-streaming.mdx b/event-streaming.mdx new file mode 100644 index 0000000..e2b811b --- /dev/null +++ b/event-streaming.mdx @@ -0,0 +1,825 @@ +--- +title: Event streaming +description: Stream LangGraph runs with typed projections for messages, state, subgraphs, output, and extensions. +--- + +Event streaming is the recommended in-process streaming model for most LangGraph application code. It returns a run stream object that can be consumed in multiple ways at the same time. + +## Quickstart + +:::python +```py +stream = graph.stream_events({ + "messages": [{"role": "user", "content": "What is 42 * 17?"}], +}, version="v3") + +for message in stream.messages: + for token in message.text: + print(token, end="", flush=True) + +final_state = stream.output +``` +::: + +:::js +```ts +const stream = await graph.streamEvents( + { messages: [{ role: "user", content: "What is 42 * 17?" }] }, + { version: "v3" } +); + +for await (const message of stream.messages) { + for await (const token of message.text) { + process.stdout.write(token); + } +} + +const finalState = await stream.output; +``` +::: + +To stream against a graph deployed behind an Agent Server, see the [LangSmith Streaming API](/langsmith/streaming). + +## How the pieces fit together + +The streaming stack has two main layers: + +1. **Streaming** emits raw graph execution events from the Pregel engine. +2. **Event streaming** normalizes those events, runs them through stream transformers, and exposes typed projections. + +
+
+
+
Pregel engine
+
Runs graph steps
+
+
emits
+
+
Raw Pregel events
+
updates, values, messages, custom, checkpoints, tasks, debug
+
+
sent to
+
+
Event router
+
Routes each event through the transformer pipeline
+
+
cascades through
+
+
Stream transformers
+
+
ValuesTransformer
+
MessagesTransformer
+
...
+
Custom transformers
+
+
+
produces
+
+
Event Stream
+
Projected events for application code
+
+
+
+ +The event router is the bridge between the two layers. It receives normalized Pregel events and passes each event through the registered stream transformers. Built-in transformers create standard projections such as `stream.messages`, `stream.values`, `stream.subgraphs`, and `stream.output`. Custom transformers can add application-specific projections under `stream.extensions`. + +## What event streaming provides + +The run stream exposes typed projections over one underlying event flow: + +| Projection | Use | +| ---------- | --- | +| `stream` | Iterate every protocol event. | +| `stream.messages` | Stream chat model messages and token deltas. | +| `stream.values` | Iterate state snapshots and await the final value. | +| `stream.output` | Await the final output. | +| `stream.subgraphs` | Discover and observe nested graph executions. | +| `stream.interrupts` | Inspect human-in-the-loop interrupt payloads. | +| `stream.interrupted` | Check whether the run paused for human input. | +| `stream.extensions` | Consume custom stream transformer projections. | + +Multiple consumers can read these projections concurrently. Reading `stream.messages` does not consume events needed by `stream.values`, `stream.subgraphs`, or `stream.output`. + +Event streaming sits one level above [streaming](/oss/langgraph/streaming), which exposes raw graph execution events through `stream_mode` modes such as `updates`, `values`, `messages`, `custom`, `checkpoints`, `tasks`, and `debug`. Use streaming when you need low-level access to those modes; use event streaming when application code benefits from typed projections. + +## Stream messages + +Use `stream.messages` for chat model output: + +:::python +```py +stream = graph.stream_events(input, version="v3") + +for message in stream.messages: + text = str(message.text) + usage = message.output.usage_metadata + + print(text) + print(usage) +``` +::: + +:::js +```ts +const stream = await graph.streamEvents(input, { version: "v3" }); + +for await (const message of stream.messages) { + const text = await message.text; + const usage = await message.usage; + + console.log(text); + console.log(usage); +} +``` +::: + +:::python +`message.text` is iterable in synchronous code. Iterate it for token-by-token output, or call `str(message.text)` for the complete text. + +`message.reasoning` exposes reasoning deltas, and `message.tool_calls` exposes tool-call argument chunks. If you need text, reasoning, and tool-call chunks in exact arrival order, iterate the message stream's raw events instead of each projection separately. +::: + +:::js +`message.text` is both an async iterable and a promise-like value. Iterate it for token-by-token output, or await it for the complete text. +::: + +## Stream subgraphs + +Use `stream.subgraphs` to observe nested graph work without parsing namespace strings: + +:::python +```py +stream = graph.stream_events(input, version="v3") + +for subgraph in stream.subgraphs: + print(subgraph.graph_name, subgraph.path) + + for message in subgraph.messages: + print(message.text) +``` +::: + +:::js +```ts +const stream = await graph.streamEvents(input, { version: "v3" }); + +for await (const subgraph of stream.subgraphs) { + console.log(subgraph.name, subgraph.path); + + for await (const message of subgraph.messages) { + console.log(await message.text); + } +} +``` +::: + +For product-specific streams, see [Deep Agents streaming](/oss/deepagents/event-streaming) for subagent streams and [LangChain agent streaming](/oss/langchain/streaming) for tool calls and middleware events. + +## Stream state + +Use `stream.values` to stream full state snapshots after each step: + +:::python +```py +stream = graph.stream_events(input, version="v3") + +for snapshot in stream.values: + print(snapshot) + +final_state = stream.output +``` +::: + +:::js +```ts +const stream = await graph.streamEvents(input, { version: "v3" }); + +for await (const snapshot of stream.values) { + console.log(snapshot); +} + +const finalState = await stream.output; +``` +::: + +## Stream multiple projections + +:::python +For concurrent consumption in async code, use `astream_events` with `asyncio.gather`: + +```py +import asyncio + +stream = await graph.astream_events(input, version="v3") + +async def consume_messages(): + async for message in stream.messages: + print(f"[llm] node={message.node}") + +async def consume_subgraphs(): + async for subgraph in stream.subgraphs: + print(f"[subgraph] path={subgraph.path}") + +await asyncio.gather(consume_messages(), consume_subgraphs()) +``` + +For synchronous code, use `stream.interleave(...)` to consume multiple projections in strict arrival order: + +```py +stream = graph.stream_events(input, version="v3") + +for name, item in stream.interleave("values", "messages", "subgraphs"): + if name == "values": + print(f"[state] keys={list(item)}") + elif name == "messages": + print(f"[llm] node={item.node}") + elif name == "subgraphs": + print(f"[subgraph] path={item.path}") +``` +::: + +:::js +Use concurrent consumers when you need multiple projections in JavaScript: + +```ts +await Promise.all([ + (async () => { + for await (const message of stream.messages) { + console.log(await message.text); + } + })(), + (async () => { + for await (const subgraph of stream.subgraphs) { + console.log(subgraph.path); + } + })(), +]); +``` +::: + +## Resume after an interrupt + +When a graph pauses for human input, inspect `stream.interrupted` and `stream.interrupts`, then resume by calling `stream_events(..., version="v3")` again with `Command`. + +Resume requires a graph compiled with a checkpointer and a config carrying a thread ID — see [persistence](/oss/langgraph/persistence). + +:::python +```py +from langgraph.types import Command + +stream = graph.stream_events(input, version="v3") + +for message in stream.messages: + print(message.text) + +if stream.interrupted: + print(stream.interrupts) + +stream = graph.stream_events( + Command(resume={"decisions": [{"type": "approve"}]}), + version="v3", +) +final_state = stream.output +``` +::: + +:::js +```ts +import { Command } from "@langchain/langgraph"; + +let stream = await graph.streamEvents(input, { version: "v3" }); + +for await (const message of stream.messages) { + console.log(await message.text); +} + +if (stream.interrupted) { + console.log(stream.interrupts); +} + +stream = await graph.streamEvents( + new Command({ resume: { decisions: [{ type: "approve" }] } }), + { version: "v3" } +); +const finalState = await stream.output; +``` +::: + +## Stream all protocol events + +Use the run object itself when you want the raw protocol event stream: + +:::python +```py +stream = graph.stream_events({ + "messages": [{"role": "user", "content": "What is 42 * 17?"}], +}, version="v3") + +for event in stream: + namespace = event["params"]["namespace"] + print(namespace, event["method"], event["params"]["data"]) +``` +::: + +:::js +```ts +const stream = await graph.streamEvents( + { messages: [{ role: "user", content: "What is 42 * 17?" }] }, + { version: "v3" } +); + +for await (const event of stream) { + const namespace = event.params.namespace; + console.log(namespace, event.method, event.params.data); +} +``` +::: + +Each event is a `ProtocolEvent` envelope wrapping a channel-specific payload. The same shape is what a transformer's `process(event)` receives. + +:::python +```py +class ProtocolEvent(TypedDict): + seq: int # strictly increasing within a run; use for ordering + method: str # channel name: "messages", "values", "updates", "custom", "tools", "lifecycle", ... + params: ProtocolEventParams + + +class ProtocolEventParams(TypedDict): + namespace: list[str] # path of ":" segments from the root graph; [] is the root + timestamp: int # wall-clock milliseconds; can drift, don't rely on for ordering + data: Any # channel-specific payload; shape depends on `method` +``` +::: + +:::js +```ts +interface ProtocolEvent { + readonly seq: number; // strictly increasing within a run; use for ordering + readonly method: string; // channel name: "messages", "values", "updates", "custom", "tools", "lifecycle", ... + readonly params: { + readonly namespace: string[]; // path of ":" segments from the root graph; [] is the root + readonly timestamp: number; // wall-clock milliseconds; can drift, don't rely on for ordering + readonly node?: string; // graph node that emitted this event, when applicable + readonly data: unknown; // channel-specific payload; shape depends on `method` + }; +} +``` +::: + +The `namespace` is a path from the root graph to the scope that emitted the event. The root is the empty array `[]`. Each child execution adds one `"name:runtime_id"` segment, so a nested tool call inside a subgraph looks like `["researcher:6f4d", "tools:91ac"]`. The name before `:` is the stable graph or node name; the suffix is a per-invocation runtime ID. Filter raw events by namespace yourself when you only care about a specific subtree — `stream.subgraphs` already does this for nested graph executions. + +## Channels and event lifecycle + +Raw events flow on channels. The channel name appears as the event's `method`; each channel emits a specific event shape. + +| Channel | Purpose | +| ------- | ------- | +| `values` | Full graph state snapshots. | +| `updates` | Per-node state deltas. | +| `messages` | Content-block-centric chat model output. | +| `tools` | Tool call start, streamed output, finish, and error events. | +| `lifecycle` | Run, subgraph, and subagent status changes. | +| `checkpoints` | Lightweight checkpoint envelopes for branching and time travel. | +| `input` | Human-in-the-loop input requests and responses. | +| `tasks` | Pregel task creation and result events. | +| `custom` | User-defined payloads from graph code. | +| `custom:` | Application-defined stream transformer output. | + +The typed projections (`stream.messages`, `stream.values`, etc.) are built from these channels. The channel name appears as the `method` field on raw events when you iterate the run object directly. + +### Messages + +The `messages` channel models output as content blocks. The data's `event` field is one of: + +- `message-start` +- `content-block-start` +- `content-block-delta` +- `content-block-finish` +- `message-finish` + +Content blocks have explicit boundaries: a block starts, emits zero or more deltas, and finishes before the next block in the same message starts. This makes token streaming, reasoning blocks, tool-call blocks, and multimodal content explicit without requiring provider-specific formats. `message-finish` may include token usage; unrecoverable model-call failures arrive as message error events. + +To consume raw content-block events directly instead of using the `stream.messages` projection: + +:::python +```py +for event in stream: + if event["method"] != "messages": + continue + + data = event["params"]["data"][0] + if not isinstance(data, dict): + continue + if data.get("event") != "content-block-delta": + continue + + block = data.get("delta") or {} + if block.get("type") == "text-delta": + print(block.get("text", ""), end="", flush=True) + elif block.get("type") == "reasoning-delta": + print(f"[thinking]{block.get('reasoning', '')}", end="", flush=True) +``` +::: + +:::js +```ts +for await (const event of stream) { + if (event.method !== "messages") continue; + + const data = event.params.data; + if (data.event !== "content-block-delta") continue; + + const block = data.delta ?? {}; + if (block.type === "text-delta") { + process.stdout.write(block.text ?? ""); + } else if (block.type === "reasoning-delta") { + process.stdout.write(`[thinking]${block.reasoning ?? ""}`); + } +} +``` +::: + +### Tools + +The `tools` channel exposes tool execution. The data's `event` field is one of: + +- `tool-started` +- `tool-output-delta` +- `tool-finished` +- `tool-error` + +Tool events are correlated by tool call ID, so a tool execution can be joined back to its originating tool-call content block on the `messages` channel. + +### Lifecycle + +The `lifecycle` channel tracks root run, subgraph, and subagent status. The data's `event` field is one of: + +- `started` +- `running` +- `completed` +- `failed` +- `interrupted` + +Beyond `event`, lifecycle data may include an optional `graph_name`, `error`, and `cause` describing why a child scope started (parent tool call, fan-out send, edge transition). + +## Build your own projection + +Stream transformers are the projection layer in event streaming. They observe protocol events, keep their own state, and expose derived views of a run — things like tool activity, token totals, progress events, artifacts, or messages for another protocol. `StreamChannel` is the projection primitive transformers use to publish those views. + +Built-in projections (`stream.messages`, `stream.values`, `stream.subgraphs`, `stream.output`) and product-specific projections (LangChain's `stream.tool_calls`, Deep Agents' `stream.subagents`) are themselves transformers using this same contract. User transformers stack on top via compile-time or call-time registration, and their projections appear under `stream.extensions`. + +Write one when the existing projections don't match the shape an application needs. + +### How transformers work + +Event streaming starts with streaming output from the LangGraph Pregel engine. The runtime normalizes those chunks into protocol events, then a stream handler routes each event through a stack of stream transformers. + +```mermaid +flowchart TD + A[Pregel modes] --> B[Events] + B --> C[Built-in projections] + C --> D[User transformers] + D --> E[Run projections] +``` + +The stream handler is the central dispatcher for one stream. For every protocol event, it: + +1. Calls each registered transformer's `process(event)` hook in order. +2. Wires named `StreamChannel` pushes back onto the protocol event stream. +3. Stores the event in the run stream unless a transformer suppresses it. +4. Calls `finalize()` or `fail()` on every transformer when the run ends. + +Transformers are observational. They do not call back into the graph runtime. Instead, they consume events and push derived values into `StreamChannel`, promises, or other projection objects. + +### Transformer shape + +A transformer implements the `StreamTransformer` interface: + +:::python +```py +from langgraph.stream import ProtocolEvent, StreamTransformer + + +class MyTransformer(StreamTransformer): + def init(self) -> dict: + ... + + def process(self, event: ProtocolEvent) -> bool: + ... + + def finalize(self) -> None: + ... + + def fail(self, err: BaseException) -> None: + ... +``` +::: + +:::js +```ts +interface StreamTransformer { + init(): TProjection; + process(event: ProtocolEvent): boolean; + finalize?(): void | PromiseLike; + fail?(err: unknown): void; +} +``` +::: + +- `init()` creates the projection object. User transformer projections appear under `stream.extensions`. +- `process()` observes each protocol event. See [Stream all protocol events](#stream-all-protocol-events) for the `ProtocolEvent` shape. Return `false` only when you intentionally want to suppress the original event. +- `finalize()` closes or resolves non-channel projections after a successful stream. +- `fail()` propagates errors to non-channel projections. + +### Declaring required stream modes + +`required_stream_modes` controls which Pregel stream modes the underlying graph emits during the stream. The runtime takes the union of every registered transformer's `required_stream_modes` and passes that union as the `stream_mode` argument to the graph's `.stream()` call. **Modes that no transformer requests are never emitted** — declaring `("custom",)` is what causes `custom` events to flow through the run at all. + +:::python +```py +class CustomTransformer(StreamTransformer): + required_stream_modes = ("custom",) # [!code highlight] + + def process(self, event: ProtocolEvent) -> bool: + if event["method"] == "custom": + ... + return True +``` +::: + +`process()` receives every event the graph emits and is responsible for filtering by `event["method"]`. The declaration turns on upstream emission; it does not narrow what `process()` sees. Valid values are the Pregel stream modes: `"messages"`, `"tools"`, `"custom"`, `"values"`, `"updates"`, `"checkpoints"`, `"tasks"`, `"debug"`. Each transformer must declare every mode it acts on — an omitted mode is not emitted by the graph and never reaches `process()`. + +### StreamChannel + +`StreamChannel` is the projection primitive a transformer uses for streaming values. It always exposes an iterable stream on `stream.extensions.`. The constructor argument decides whether each `push()` also flows into the run's main event stream as a `custom:` event—that is, whether the projection's values show up when iterating raw protocol events. + +:::js +| Need | Use | +| ---- | --- | +| Side-channel projection only | `new StreamChannel()` | +| Also flow each push into the main event stream | `new StreamChannel(name)` | +::: + +:::python +| Need | Use | +| ---- | --- | +| Side-channel projection only | `StreamChannel()` | +| Also flow each push into the main event stream | `StreamChannel(name)` | +::: + +Named channel payloads must be serializable, because each pushed value also becomes a `custom:` protocol event in the main stream. Keep promises, async iterables, class instances, and other in-process handles in unnamed channels. + +The stream handler owns channel lifecycle. Once `init()` returns a channel, the handler closes or fails it for you when the run ends. Transformers only push values. + +### Example: named channel + +Pass a string name to `StreamChannel` to expose a streaming projection through `stream.extensions` *and* forward each pushed value into the run's main event stream as a `custom:` protocol event: + +:::python +```py +from typing import TypedDict + +from langgraph.stream import ProtocolEvent, StreamChannel, StreamTransformer + + +class ToolActivity(TypedDict): + name: str + status: str + + +class ToolActivityTransformer(StreamTransformer): + required_stream_modes = ("tools",) + + def __init__(self, scope: tuple[str, ...] = ()) -> None: + super().__init__(scope) + self.activity = StreamChannel[ToolActivity]("tool_activity") + + def init(self) -> dict: + return {"tool_activity": self.activity} + + def process(self, event: ProtocolEvent) -> bool: + if event["method"] != "tools": + return True + + data = event["params"]["data"] + if isinstance(data, dict) and data.get("tool_name") and data.get("event"): + status = "error" if data["event"] == "tool-error" else "started" + self.activity.push({"name": data["tool_name"], "status": status}) + return True +``` +::: + +:::js +```ts +import { StreamChannel } from "@langchain/langgraph"; + +const toolActivityTransformer = () => { + const activity = new StreamChannel<{ + name: string; + status: "started" | "finished" | "error"; + }>("toolActivity"); + + return { + init: () => ({ toolActivity: activity }), + process(event) { + if (event.method === "tools") { + const data = event.params.data as { tool_name?: string; event?: string }; + if (data.tool_name && data.event) { + activity.push({ + name: data.tool_name, + status: data.event === "tool-error" ? "error" : "started", + }); + } + } + return true; + }, + }; +}; +``` +::: + +### Example: unnamed channel + +Without a name, the channel is a side-channel projection only — accessible on `stream.extensions` but not visible to consumers iterating raw events. This is the right choice for projections that hold in-process handles (promises, async iterables, class instances) that can't be serialized onto the main event stream. + +The example below pairs an unnamed channel with `get_stream_writer`, which lets graph nodes emit `custom`-channel events that the transformer then drains into the projection: + +:::python +```py +from langgraph.config import get_stream_writer +from langgraph.stream import ProtocolEvent, StreamChannel, StreamTransformer + + +def node(state): + writer = get_stream_writer() + writer({"kind": "progress", "message": "retrieving context"}) + return state + + +class CustomTransformer(StreamTransformer): + required_stream_modes = ("custom",) + + def __init__(self, scope: tuple[str, ...] = ()) -> None: + super().__init__(scope) + self.log = StreamChannel() + + def init(self) -> dict: + return {"custom": self.log} + + def process(self, event: ProtocolEvent) -> bool: + if event["method"] == "custom": + self.log.push(event["params"]["data"]) + return True + + +stream = graph.stream_events(input, version="v3", transformers=[CustomTransformer]) + +for item in stream.extensions["custom"]: + print(item) +``` +::: + +:::js +```ts +import { StreamChannel } from "@langchain/langgraph"; + +const customTransformer = () => { + const custom = new StreamChannel(); + + return { + init: () => ({ custom }), + process(event) { + if (event.method === "custom") { + custom.push(event.params.data); + } + return true; + }, + }; +}; +``` +::: + +### Example: final-value projection + +Use unnamed streams, promises, or other in-process objects when the projection should not flow into the main event stream: + +:::python +```py +from langgraph.stream import ProtocolEvent, StreamChannel, StreamTransformer + + +class StatsTransformer(StreamTransformer): + required_stream_modes = ("messages",) + + def __init__(self, scope: tuple[str, ...] = ()) -> None: + super().__init__(scope) + self.total_tokens = 0 + self.total_tokens_log = StreamChannel[int]() + + def init(self) -> dict: + return {"total_tokens": self.total_tokens_log} + + def process(self, event: ProtocolEvent) -> bool: + data = event["params"]["data"] + if isinstance(data, dict): + usage = data.get("usage") or {} + self.total_tokens += usage.get("output_tokens") or 0 + return True + + def finalize(self) -> None: + self.total_tokens_log.push(self.total_tokens) + self.total_tokens_log.close() +``` +::: + +:::js +```ts +const statsTransformer = () => { + let totalTokens = 0; + let resolveTotal!: (value: number) => void; + const totalTokensPromise = new Promise((resolve) => { + resolveTotal = resolve; + }); + + return { + init: () => ({ totalTokens: totalTokensPromise }), + process(event) { + if (event.method === "messages") { + const data = event.params.data as { usage?: { output_tokens?: number } }; + totalTokens += data.usage?.output_tokens ?? 0; + } + return true; + }, + finalize: () => resolveTotal(totalTokens), + }; +}; +``` +::: + +### Register at call time or compile time + +Pass transformers at call time for local experimentation: + +:::python +```py +stream = graph.stream_events( + input, + version="v3", + transformers=[StatsTransformer, ToolActivityTransformer], +) +``` +::: + +:::js +```ts +const stream = await graph.streamEvents(input, { + version: "v3", + transformers: [statsTransformer, toolActivityTransformer], +}); +``` +::: + +Compile transformers into the graph when every run of that graph should produce the projection: + +:::python +```py +graph = builder.compile( + transformers=[StatsTransformer, ToolActivityTransformer], +) +``` +::: + +:::js +```ts +const graph = builder.compile({ + transformers: [statsTransformer, toolActivityTransformer], +}); +``` +::: + +### Built-in: `ToolCallTransformer` + +:::python +LangGraph ships `ToolCallTransformer` as a built-in. Register it to expose `stream.tool_calls` on a plain `StateGraph`: + +```py +from langgraph.prebuilt import ToolCallTransformer + +stream = graph.stream_events(input, version="v3", transformers=[ToolCallTransformer]) + +for tool_call in stream.tool_calls: + print(tool_call.tool_name, tool_call.input) +``` +::: + +## Related + +LangGraph defines the streaming primitives. For using streaming with LangChain or Deep Agents, review the relevant product docs: + +- [LangChain agent streaming](/oss/langchain/event-streaming) covers ReAct-style agent messages, tool calls, and middleware updates. +- [Deep Agents streaming](/oss/deepagents/event-streaming) covers subagents, nested messages, and subagent tool calls. +- [LangChain frontend patterns](/oss/langchain/frontend/overview) and [LangGraph frontend patterns](/oss/langgraph/frontend/overview) show UI use cases built on top of streamed state. +- [LangSmith Streaming API](/langsmith/streaming) covers streaming against a graph deployed behind an Agent Server. + +The wire-level event and command formats are defined in the [Agent Protocol](https://github.com/langchain-ai/agent-protocol) repository and consumable as [`langchain-protocol`](https://pypi.org/project/langchain-protocol/) on PyPI and [`@langchain/protocol`](https://www.npmjs.com/package/@langchain/protocol) on npm. diff --git a/fault-tolerance.mdx b/fault-tolerance.mdx new file mode 100644 index 0000000..ecde14d --- /dev/null +++ b/fault-tolerance.mdx @@ -0,0 +1,652 @@ +--- +title: Fault tolerance +description: Configure per-node timeouts, retries, and error handlers in LangGraph. +--- + +:::python + +When a node fails—from a slow external API, a transient network error, or an unhandled exception—LangGraph gives you three composable mechanisms to respond: + +- [**Retries**](#retries) — automatically re-run failed attempts based on exception type and backoff settings +- [**Timeouts**](#timeouts) — cap how long a single attempt may run +- [**Error handling**](#error-handling) — run a recovery function after all retries are exhausted + +Use [**`set_node_defaults`**](#graph-defaults) to configure these mechanisms once for all nodes instead of repeating them on every `add_node` call. + +These compose in a fixed order: when a node attempt raises any exception (including @[`NodeTimeoutError`] from a timeout), the retry policy decides whether to retry. Only after retries are exhausted does the error handler run. + +For stopping a run cleanly at a superstep boundary and resuming later, see [Graceful shutdown](#graceful-shutdown). + + +Per-node timeouts and node-level error handlers require `langgraph>=1.2`. + + +```mermaid +%%{init:{'theme':'base','themeVariables':{'lineColor':'#40668D','primaryColor':'#E5F4FF','primaryTextColor':'#030710','primaryBorderColor':'#006DDD'}}}%% +flowchart LR + start([Attempt starts]) --> exec[Run node] + exec -->|"success"| done([Continue graph]) + exec -->|"any exception
including NodeTimeoutError"| retry{retry_policy
matches?} + retry -->|"yes, attempts left"| exec + retry -->|"exhausted or absent"| handler{error_handler?} + handler -->|"yes"| run_handler["Invoke handler
with NodeError"] + run_handler --> route([Update state +
Command goto]) + handler -->|"no"| bubble([Exception
bubbles up]) + + classDef process fill:#E5F4FF,stroke:#006DDD,stroke-width:2px,color:#030710 + classDef decision fill:#FDF3FF,stroke:#7E65AE,stroke-width:2px,color:#504B5F + classDef alert fill:#F8E8E6,stroke:#B27D75,stroke-width:2px,color:#634643 + classDef output fill:#EBD0F0,stroke:#885270,stroke-width:2px,color:#441E33 + + class exec,run_handler process + class retry,handler decision + class bubble alert + class done,route,start output +``` + +## Retries + +A retry policy automatically re-runs a failed node attempt based on exception type and backoff settings. Pass `retry_policy=` to @[`add_node`]: + +```python +from langgraph.types import RetryPolicy + +builder.add_node( + "call_api", + call_api, + retry_policy=RetryPolicy(max_attempts=3), +) +``` + +### Default behavior + +By default, `retry_on` uses `default_retry_on`, which retries on **any** exception except the following (and their subclasses): + +- `ValueError` +- `TypeError` +- `ArithmeticError` +- `ImportError` +- `LookupError` +- `NameError` +- `SyntaxError` +- `RuntimeError` +- `ReferenceError` +- `StopIteration` +- `StopAsyncIteration` +- `OSError` + +For exceptions from popular HTTP libraries such as `requests` and `httpx`, it only retries on 5xx status codes. @[`NodeTimeoutError`] is retryable by default. + +### Parameters + +| Parameter | Type | Default | Description | +| --------- | ---- | ------- | ----------- | +| `max_attempts` | `int` | `3` | Maximum number of attempts, including the first. | +| `initial_interval` | `float` | `0.5` | Seconds before the first retry. | +| `backoff_factor` | `float` | `2.0` | Multiplier applied to the interval after each retry. | +| `max_interval` | `float` | `128.0` | Maximum seconds between retries. | +| `jitter` | `bool` | `True` | Add random jitter to the interval. | +| `retry_on` | `type[Exception] \| Sequence[type[Exception]] \| Callable[[Exception], bool]` | `default_retry_on` | Exceptions to retry on, or a callable returning `True` for retryable exceptions. | + +### Custom retry logic + +Pass a callable or exception type to `retry_on`. Import `default_retry_on` to extend the default behavior: + +```python +from langgraph.types import RetryPolicy, default_retry_on + +def custom_retry_on(exc: BaseException) -> bool: + if isinstance(exc, MyCustomError): + return False + return default_retry_on(exc) + +builder.add_node( + "call_api", + call_api, + retry_policy=RetryPolicy(max_attempts=3, retry_on=custom_retry_on), +) +``` + +### Inspect retry state + +Use `runtime.execution_info` inside a node to inspect the current attempt number. This is useful for switching to a fallback when the primary call keeps failing: + +```python +from langgraph.graph import StateGraph, START, END +from langgraph.runtime import Runtime +from langgraph.types import RetryPolicy +from typing_extensions import TypedDict + +class State(TypedDict): + result: str + +def my_node(state: State, runtime: Runtime) -> State: + if runtime.execution_info.node_attempt > 1: # [!code highlight] + return {"result": call_fallback_api()} + return {"result": call_primary_api()} + +builder = StateGraph(State) +builder.add_node("my_node", my_node, retry_policy=RetryPolicy(max_attempts=3)) +builder.add_edge(START, "my_node") +builder.add_edge("my_node", END) +``` + +`execution_info` exposes the following fields: + +| Attribute | Type | Description | +| --------- | ---- | ----------- | +| `node_attempt` | `int` | Current attempt number (1-indexed). `1` on the first try, `2` on the first retry, etc. | +| `node_first_attempt_time` | `float \| None` | Unix timestamp of when the first attempt started. Constant across retries. | +| `thread_id` | `str \| None` | Thread ID for the current execution. `None` without a checkpointer. | +| `run_id` | `str \| None` | Run ID for the current execution. `None` when not provided in config. | +| `checkpoint_id` | `str` | Checkpoint ID for the current execution. | +| `task_id` | `str` | Task ID for the current execution. | + +`execution_info` is available even without a retry policy—`node_attempt` defaults to `1`. + +## Timeouts + + +Requires `langgraph>=1.2`. + + +The `timeout=` parameter on @[`add_node`] caps how long a single node attempt may run. Pass a number (seconds), a `timedelta`, or a @[`TimeoutPolicy`] for separate run and idle limits: + +```python +from datetime import timedelta +from langgraph.types import TimeoutPolicy + +# Simple wall-clock cap +builder.add_node("call_model", call_model, timeout=60) +builder.add_node("call_model", call_model, timeout=timedelta(minutes=2)) + +# Separate run and idle limits +builder.add_node( + "call_model", + call_model, + timeout=TimeoutPolicy(run_timeout=120, idle_timeout=30), +) +``` + + +Node timeouts only apply to **async** nodes. Sync nodes with a `timeout` are rejected at compile time. To wrap blocking I/O, use `asyncio.to_thread` inside an async node. + + +### Run timeout + +`run_timeout` is a hard wall-clock cap on a single attempt. It is never refreshed, regardless of node activity: + +```python +from langgraph.types import TimeoutPolicy + +builder.add_node( + "call_model", + call_model, + timeout=TimeoutPolicy(run_timeout=120), +) +``` + +When the limit is exceeded, LangGraph raises @[`NodeTimeoutError`], clears any writes from the failed attempt, and lets the retry policy decide whether to retry. + +### Idle timeout + +`idle_timeout` is a progress-resetting cap. It fires only when the node stops making observable progress for the specified duration—unlike `run_timeout`, the clock resets whenever the node produces a progress signal: + +```python +builder.add_node( + "call_model", + call_model, + timeout=TimeoutPolicy(idle_timeout=30), +) +``` + +You can set `run_timeout` and `idle_timeout` together. Whichever fires first cancels the attempt. + +#### Progress signals + +Under the default `refresh_on="auto"`, the idle clock resets on any of the following: + +- State writes via `CONFIG_KEY_SEND` +- Stream output (yielded async stream chunks) +- Child-task scheduling +- Runtime stream-writer calls +- Any LangChain callback event from the node or its descendants (LLM tokens, tool calls, chain start/end, etc.) + +#### Heartbeat mode + +Set `refresh_on="heartbeat"` to narrow the refresh source to explicit `runtime.heartbeat()` calls only. This is useful when you want a strict idle definition that isn't reset by chatty subordinates: + +```python +builder.add_node( + "call_model", + call_model, + timeout=TimeoutPolicy(idle_timeout=30, refresh_on="heartbeat"), +) +``` + +#### Manual heartbeats + +For long-running async work that doesn't naturally emit progress signals, call `runtime.heartbeat()` to manually reset the idle clock: + +```python +from langgraph.graph import StateGraph, START, END +from langgraph.runtime import Runtime +from langgraph.types import TimeoutPolicy +from typing_extensions import TypedDict + +class State(TypedDict): + result: str + +async def long_running_node(state: State, runtime: Runtime) -> State: + for batch in fetch_batches(): + process(batch) + runtime.heartbeat() # [!code highlight] + return {"result": "done"} + +builder = StateGraph(State) +builder.add_node( + "long_running_node", + long_running_node, + timeout=TimeoutPolicy(idle_timeout=30, refresh_on="heartbeat"), +) +builder.add_edge(START, "long_running_node") +builder.add_edge("long_running_node", END) +``` + +`runtime.heartbeat()` is a no-op outside an idle-timed attempt, so you can call it unconditionally. + +### NodeTimeoutError + +When a timeout fires, LangGraph raises @[`NodeTimeoutError`] with structured context about which limit was hit: + +| Attribute | Type | Description | +| --------- | ---- | ----------- | +| `node` | `str` | Name of the node whose execution timed out. | +| `elapsed` | `float` | Seconds elapsed before the timeout fired. | +| `kind` | `Literal["idle", "run"]` | Which timeout fired. | +| `idle_timeout` | `float \| None` | The configured idle timeout (seconds), if any. | +| `run_timeout` | `float \| None` | The configured run timeout (seconds), if any. | + +`NodeTimeoutError` is retryable by default. Combining `timeout=` with `retry_policy=` works out of the box—the timeout clock resets on each new attempt, and writes from a timed-out attempt are cleared before the next retry: + +```python +from langgraph.types import RetryPolicy, TimeoutPolicy + +builder.add_node( + "call_model", + call_model, + timeout=TimeoutPolicy(idle_timeout=30), + retry_policy=RetryPolicy(max_attempts=3), +) +``` + +### Dynamic timeouts with Send + +When using @[`Send`] to dispatch nodes dynamically (for example, in map-reduce patterns), you can pass a `timeout=` directly on the `Send` to override the target node's static timeout for that specific push: + +```python +from langgraph.types import Send, TimeoutPolicy + +def fan_out(state: OverallState): + return [ + Send("process_item", {"item": item}, timeout=TimeoutPolicy(idle_timeout=15)) + for item in state["items"] + ] +``` + +If `timeout=` is omitted on the `Send`, the target node's timeout (set at `add_node` time) applies. This lets you set a default timeout on the node and tighten it for individual calls. + +## Error handling + + +Requires `langgraph>=1.2`. + + +An error handler runs after a node fails and all retries are exhausted. It receives the current state and can update it or route to a different node using @[`Command`]. This is useful for compensation flows (Saga patterns) where you want to recover gracefully rather than abort the entire graph. + +Pass `error_handler=` to @[`add_node`]: + +```python +from langgraph.errors import NodeError +from langgraph.types import Command, RetryPolicy +from langgraph.graph import StateGraph, START +from typing_extensions import TypedDict + +class State(TypedDict): + status: str + +def charge_payment(state: State) -> State: + raise RuntimeError("payment gateway timeout") + +def payment_error_handler(state: State, error: NodeError) -> Command: + return Command( + update={"status": f"compensated: {error.error}"}, + goto="finalize", + ) + +def finalize(state: State) -> State: + return state + +graph = ( + StateGraph(State) + .add_node( + "charge_payment", + charge_payment, + retry_policy=RetryPolicy(max_attempts=3, retry_on=ConnectionError), + error_handler=payment_error_handler, + ) + .add_node("finalize", finalize) + .add_edge(START, "charge_payment") + .compile() +) +``` + +The handler fires only after `retry_policy` is exhausted, or immediately if no retry policy is configured. The retry policy and the error handler stay decoupled: configure when to retry and when to compensate independently. + +### NodeError + +Error handlers receive failure context through a typed `error: NodeError` parameter, injected by type annotation (the same pattern as `runtime: Runtime`): + +```python +from langgraph.errors import NodeError + +def my_handler(state: State, error: NodeError) -> Command: + print(f"Node {error.node} failed with: {error.error}") + return Command(update={"status": "recovered"}, goto="next_step") +``` + +@[`NodeError`] is a frozen dataclass with two fields: + +| Attribute | Type | Description | +| --------- | ---- | ----------- | +| `node` | `str` | Name of the node whose execution failed. | +| `error` | `BaseException` | The exception raised by the failed node. | + +The `error: NodeError` parameter is opt-in. Handlers that don't need failure context can use simpler signatures like `(state)` or `(state, runtime)`. + +### Route with Command + +Error handlers can return a @[`Command`] to update state and route to a specific node, enabling Saga / compensation patterns: + +```python +from langgraph.errors import NodeError +from langgraph.types import Command, RetryPolicy +from langgraph.graph import StateGraph, START +from typing_extensions import TypedDict + +class State(TypedDict): + status: str + +def reserve_inventory(state: State) -> State: + return {"status": "reserved"} + +def charge_payment(state: State) -> State: + raise RuntimeError("payment timeout") + +def payment_error_handler(state: State, error: NodeError) -> Command: + return Command( + update={"status": f"compensated_after_{error.node}: {error.error}"}, + goto="finalize", + ) + +def finalize(state: State) -> State: + return state + +graph = ( + StateGraph(State) + .add_node("reserve_inventory", reserve_inventory) + .add_node( + "charge_payment", + charge_payment, + retry_policy=RetryPolicy(max_attempts=3, retry_on=ConnectionError), + error_handler=payment_error_handler, + ) + .add_node("finalize", finalize) + .add_edge(START, "reserve_inventory") + .add_edge("reserve_inventory", "charge_payment") + .compile() +) +``` + +`charge_payment` retries on `ConnectionError` up to 3 times. If retries are exhausted (or the error isn't a `ConnectionError`), the handler compensates by updating state and routing to `finalize` instead of aborting the graph. + +### Resume-safe failures + + +Failure provenance is checkpointed. If the graph is interrupted or the process crashes after a node fails but before the handler completes, the handler sees the same `NodeError` context when the graph resumes from its checkpoint. + + +### Behavior with `interrupt()` + + +`interrupt()` raised inside a node is **not** routed to the error handler. Interrupts use the `GraphBubbleUp` mechanism to pause graph execution for human-in-the-loop workflows, bypassing both retry policies and error handlers. The graph pauses as usual. + + +### Subgraph failures + +If a node wraps a subgraph and the subgraph raises an unhandled exception, that exception surfaces to the parent node. If the parent node has an `error_handler`, the handler fires with the subgraph's exception in `error.error`. + +## Graph defaults + + +Requires `langgraph>=1.2`. + + +Instead of repeating the same `retry_policy=`, `error_handler=`, `timeout=`, or `cache_policy=` on every `add_node` call, use `set_node_defaults()` to configure graph-wide defaults in one place: + +```python +from langgraph.errors import NodeError +from langgraph.types import RetryPolicy, TimeoutPolicy +from langgraph.graph import StateGraph, START +from typing_extensions import TypedDict + +class State(TypedDict): + status: str + +def default_error_handler(state: State, error: NodeError) -> State: + return {"status": f"handled: {error.error}"} + +graph = ( + StateGraph(State) + .set_node_defaults( + retry_policy=RetryPolicy(max_attempts=3), + error_handler=default_error_handler, + timeout=TimeoutPolicy(run_timeout=30), + ) + .add_node("step_a", step_a) + .add_node("step_b", step_b) + .add_edge(START, "step_a") + .compile() +) +``` + +Both `step_a` and `step_b` now share the same retry policy, error handler, and timeout without any duplication. + +### Precedence + +Per-node values passed directly to `add_node()` always override the defaults set by `set_node_defaults()`. Defaults are resolved at `compile()` time, so you can call `set_node_defaults()` before or after `add_node()` in any order: + +```python +graph = ( + StateGraph(State) + .set_node_defaults(error_handler=default_error_handler) + .add_node("step_a", step_a) # uses default_error_handler + .add_node("step_b", step_b, error_handler=custom_error_handler) # uses custom_error_handler + .add_edge(START, "step_a") + .compile() +) +``` + +### Default error handler + +The `error_handler` default is particularly valuable when you want a single catch-all recovery function for any node that fails without its own handler. The handler accepts the same `(state, error: NodeError)` signature described in [Error handling](#error-handling): + +```python +from langgraph.errors import NodeError +from langgraph.graph import StateGraph, START +from langgraph.types import RetryPolicy +from typing_extensions import TypedDict + +class State(TypedDict): + status: str + +def always_failing(state: State) -> State: + raise ValueError("something went wrong") + +def default_handler(state: State, error: NodeError) -> State: + return {"status": f"recovered from {error.node}: {error.error}"} + +graph = ( + StateGraph(State) + .set_node_defaults( + retry_policy=RetryPolicy(max_attempts=2), + error_handler=default_handler, + ) + .add_node("always_failing", always_failing) + .add_edge(START, "always_failing") + .compile() +) +``` + +The node is retried twice, then `default_handler` runs. The default handler also accepts `RunnableConfig` as an optional third argument if you need access to config values such as `thread_id`: + +```python +from langchain_core.runnables import RunnableConfig + +def default_handler(state: State, error: NodeError, config: RunnableConfig) -> State: + thread_id = config["configurable"].get("thread_id") + return {"status": f"handled on thread {thread_id}"} +``` + +### Applicability matrix + +Not all defaults apply to all node types. Error-handler nodes (those registered via `add_node(error_handler=...)`) are excluded from certain defaults to prevent unsafe behavior: + +| `set_node_defaults` parameter | Applies to regular nodes | Applies to error-handler nodes | Reason | +| ----------------------------- | ------------------------ | ------------------------------ | ------ | +| `retry_policy` | ✅ | ✅ | Handlers should be retried on transient failures | +| `timeout` | ✅ | ✅ | Stuck handlers should be cancelled like stuck regular nodes | +| `error_handler` | ✅ | ❌ | Handlers must never catch themselves | +| `cache_policy` | ✅ | ❌ | Caching handler results is unsafe | + +### Scope + +Defaults set on a parent graph are **not** inherited by subgraphs. Each graph maintains its own defaults. + +## Functional API + +The same `timeout=` and `retry_policy=` parameters are available on `@task` and `@entrypoint` in the functional API: + +```python +from langgraph.func import entrypoint, task +from langgraph.types import RetryPolicy, TimeoutPolicy + +@task( + timeout=TimeoutPolicy(idle_timeout=30), + retry_policy=RetryPolicy(max_attempts=3), +) +async def call_api(url: str) -> str: + response = await fetch(url) + return response.text + +@entrypoint(timeout=60) +async def my_workflow(inputs: dict) -> str: + result = await call_api("https://api.example.com/data") + return result +``` + +The behavior is identical to `add_node`: `NodeTimeoutError` is raised on timeout, buffered writes are cleared, and the retry policy decides whether to retry. + +## Graceful shutdown + + +Requires `langgraph>=1.2`. + + +Graceful shutdown lets you stop an in-flight graph run cooperatively—after the current superstep completes—and save a resumable checkpoint. This is useful for handling SIGTERM signals or any external supervisor that needs to reclaim resources without losing work. + +Create a @[`RunControl`] and pass it as `control=` to `invoke` or `stream`. Call `request_drain()` from any thread to signal that the run should stop: + +```python +from langgraph.runtime import RunControl +from langgraph.errors import GraphDrained + +control = RunControl() + +# In a signal handler or supervisor: +# control.request_drain("sigterm") + +try: + result = graph.invoke(inputs, config, control=control) +except GraphDrained as e: + # The graph stopped early and saved a checkpoint. + # Resume later with the same config. + print(f"Drained: {e.reason}") +``` + +### Semantics + +Drain is cooperative and operates between supersteps, never preempting work that is already running: + +| Scenario | Behavior | +| -------- | -------- | +| Node mid-execution | Runs to completion. Drain takes effect on the next superstep. | +| Node with a retry policy currently retrying | Retry loop runs to exhaustion or success. Drain takes effect after. | +| Graph finishes naturally on the same tick as drain | Returns normally. Inspect `control.drain_requested` to distinguish from a normal run. | +| More supersteps remain | Raises `GraphDrained(reason)`. Checkpoint is saved and resumable. | +| Subgraph requests drain | `GraphDrained` bubbles up through the parent and stops it at its own next superstep boundary. | + +### Resume after drain + +Resume a drained run with `invoke(None, config)` using the same `thread_id`: + +```python +result = graph.invoke(None, config) +``` + +### Read drain state inside a node + +Access drain state through the `runtime` parameter to adjust node behavior before the superstep boundary is reached: + +```python +from langgraph.runtime import Runtime + +async def my_node(state: State, runtime: Runtime) -> State: + if runtime.drain_requested: + # Skip expensive work and return a minimal result + return {"status": "skipped", "reason": runtime.drain_reason} + return {"status": await do_work()} +``` + +### SIGTERM hook pattern + +The recommended pattern for handling process shutdown: + +```python +import signal +from langgraph.runtime import RunControl +from langgraph.errors import GraphDrained + +control = RunControl() +signal.signal(signal.SIGTERM, lambda *_: control.request_drain("sigterm")) + +try: + result = graph.invoke(inputs, config, control=control) +except GraphDrained as e: + log.info("graph drained: %s", e.reason) + # Resume on next startup with the same config +``` + + +`request_drain()` does not cancel running asyncio tasks or kill threads. For a hard upper bound, pair drain with a graceful timeout and task cancellation. + + +## Limitations + +- **Python only**: timeouts and error handlers are not available in the JavaScript/TypeScript SDK. Retry policies work in both Python and TypeScript. +- **Timeouts are async-only**: sync nodes with a `timeout` are rejected at compile time. +- **One handler per node**: each node can have at most one `error_handler`. +- **Handler failures bubble up**: if the error handler itself raises, that exception propagates as if the node had no handler. +- **`set_node_defaults` is not inherited by subgraphs**: each graph manages its own defaults independently. + +::: diff --git a/frontend/graph-execution.mdx b/frontend/graph-execution.mdx index 8395954..f781f33 100644 --- a/frontend/graph-execution.mdx +++ b/frontend/graph-execution.mdx @@ -45,7 +45,7 @@ Wire up `useStream` as usual. The key properties you'll use are `messages` (for streaming content routing), `values` (for completed node outputs), and `getMessagesMetadata` (for identifying which node produced each token). -:::js +:::python Define a TypeScript interface matching your agent's state schema and pass it as a type parameter to `useStream` for type-safe access to state values, including custom state keys for each pipeline node. In the examples below, replace `typeof myAgent` with your interface name: diff --git a/functional-api.mdx b/functional-api.mdx index db0006a..eda3203 100644 --- a/functional-api.mdx +++ b/functional-api.mdx @@ -3,8 +3,6 @@ title: Functional API overview sidebarTitle: Functional API --- - - The **Functional API** allows you to add LangGraph's key features ([persistence](/oss/langgraph/persistence), [memory](/oss/langgraph/add-memory), [human-in-the-loop](/oss/langgraph/interrupts), and [streaming](/oss/langgraph/streaming)) to your applications with minimal changes to your existing code. It is designed to integrate these features into existing code that may use standard language primitives for branching and control flow, such as `if` statements, `for` loops, and function calls. Unlike many data orchestration frameworks that require restructuring code into an explicit pipeline or DAG, the Functional API allows you to incorporate these capabilities without enforcing a rigid execution model. @@ -901,15 +899,25 @@ Providing non-serializable inputs or outputs will result in a runtime error when ## Determinism -To utilize features like **human-in-the-loop**, any randomness should be encapsulated inside of **tasks**. This guarantees that when execution is halted (e.g., for human in the loop) and then resumed, it will follow the same _sequence of steps_, even if **task** results are non-deterministic. +When you resume a workflow run, the code does **NOT** resume from the **same line of code** where execution stopped. Execution returns to a checkpoint boundary, and the workflow **replays** forward until it reaches the pause again. + +For the Functional API, replay starts at the beginning of the **entrypoint** while LangGraph restores completed [**task**](/oss/langgraph/functional-api#task) and [**subgraph**](/oss/langgraph/use-subgraphs) results from the checkpointer instead of recomputing them. That preserves the recorded order of steps across pauses, including for long-running or non-deterministic **task** outputs. + +To use features like **human-in-the-loop**, you must place non-deterministic work (for example, random values) and side effects (for example, file writes or API calls) in [**tasks**](/oss/langgraph/functional-api#task). + +Different runs of a workflow can produce different results, but resuming a **specific** thread should replay the same persisted **task** and **subgraph** results. -LangGraph achieves this behavior by persisting **task** and [**subgraph**](/oss/langgraph/use-subgraphs) results as they execute. A well-designed workflow ensures that resuming execution follows the _same sequence of steps_, allowing previously computed results to be retrieved correctly without having to re-execute them. This is particularly useful for long-running **tasks** or **tasks** with non-deterministic results, as it avoids repeating previously done work and allows resuming from essentially the same. +To ensure that your workflow is deterministic and can be consistently replayed, follow these guidelines: -While different runs of a workflow can produce different results, resuming a **specific** run should always follow the same sequence of recorded steps. This allows LangGraph to efficiently look up **task** and **subgraph** results that were executed prior to the graph being interrupted and avoid recomputing them. +* **Avoid repeating work**: In an **entrypoint**, if you chain several side effects (for example, logging, file writes, or network calls), give each its own **task** so resume restores their outputs from the checkpointer instead of running them again. +* **Encapsulate non-deterministic operations**: Keep values that can change between attempts (for example, random numbers or wall-clock reads) inside **tasks**, so replay lines up with what was checkpointed. +* **Use idempotent operations**: For partial task failures and retries, see [Idempotency](#idempotency). ## Idempotency -Idempotency ensures that running the same operation multiple times produces the same result. This helps prevent duplicate API calls and redundant processing if a step is rerun due to a failure. Always place API calls inside **tasks** functions for checkpointing, and design them to be idempotent in case of re-execution. Re-execution can occur if a **task** starts, but does not complete successfully. Then, if the workflow is resumed, the **task** will run again. Use idempotency keys or verify existing results to avoid duplication. +Idempotency ensures that running the same operation multiple times produces the same result. This helps prevent duplicate API calls and redundant processing if a step is rerun due to a failure. Always place API calls inside **tasks** functions for checkpointing, and design them to be idempotent in case of re-execution. +This is particularly important for operations that result in data writes. +When a workflow resumes, LangGraph replays completed **task** results from the checkpoint. A **task** that started but did not finish may run again on that resume, so design side effects to be idempotent. Use idempotency keys or verify existing results to avoid unintended duplication. ## Common pitfalls diff --git a/graph-api.mdx b/graph-api.mdx index b3a6d2b..f6d897c 100644 --- a/graph-api.mdx +++ b/graph-api.mdx @@ -3,7 +3,11 @@ title: Graph API overview sidebarTitle: Graph API --- - +import GraphApiUsingTasksOriginalJs from '/snippets/code-samples/graph-api-using-tasks-original-js.mdx'; +import GraphApiUsingTasksOriginalPy from '/snippets/code-samples/graph-api-using-tasks-original-py.mdx'; +import GraphApiUsingTasksTaskJs from '/snippets/code-samples/graph-api-using-tasks-task-js.mdx'; +import GraphApiUsingTasksTaskPy from '/snippets/code-samples/graph-api-using-tasks-task-py.mdx'; +import LanggraphGraphApiResumeV2Py from '/snippets/code-samples/langgraph-graph-api-resume-v2-py.mdx'; ## Graphs @@ -627,7 +631,7 @@ In LangGraph, nodes are Python functions (either synchronous or asynchronous) th 1. `state`—The [state](#state) of the graph 2. `config`—A @[`RunnableConfig`] object that contains configuration information like `thread_id` and tracing information like `tags` -3. `runtime`—A `Runtime` object that contains [runtime `context`](#runtime-context) and other information like `store`, `stream_writer`, `execution_info`, and `server_info` +3. `runtime`—A `Runtime` object that contains [runtime `context`](#runtime-context) and other information like `store`, `stream_writer`, `execution_info`, `server_info`, `heartbeat` (for idle timeout refresh), and `control` (for [graceful shutdown](/oss/langgraph/fault-tolerance#graceful-shutdown)) Similar to `NetworkX`, you add these nodes to a graph using the @[`add_node`] method: @@ -727,6 +731,58 @@ builder.addNode(myNode); ::: +### Re-execution and idempotency + +:::python + +When you compile with a [checkpointer](/oss/langgraph/persistence), LangGraph saves checkpoints at [super-step](#graphs) boundaries, not mid-function inside a node. If execution stops and later resumes (for example after an [interrupt](/oss/langgraph/interrupts) or a [retry](/oss/langgraph/fault-tolerance#retries)), the affected **node** runs again from the start of its function. Code and side effects before the pause run again. + +**Idempotency.** Design **node** logic so re-execution does not corrupt state. If a node inserts a database row, running it twice should not create duplicate rows unless that is intentional. Use idempotency keys, upserts, or read-before-write checks. For effects around `interrupt()`, see [Side effects called before `interrupt` must be idempotent](/oss/langgraph/interrupts#side-effects-called-before-interrupt-must-be-idempotent). + +**Graph changes.** [Determinism](/oss/langgraph/functional-api#determinism) rules about code changes do not apply to graph structure. You can add or remove **nodes** and edges without breaking resume for existing threads. Resumed runs use saved state and execute whatever graph you compile now. + +**Tasks and interrupts inside a node.** If a **node** calls [**tasks**](/oss/langgraph/functional-api#task) or @[`interrupt`], stricter determinism rules apply on resume. LangGraph restores completed **task** results from the checkpointer, but changing **task** or @[`interrupt`] order in code before the resume point can mismatch cached values. A [Functional API](/oss/langgraph/functional-api) **entrypoint** compiles to a single **node** that runs the whole entrypoint method this way. See [Determinism](/oss/langgraph/functional-api#determinism), [Idempotency](/oss/langgraph/functional-api#idempotency), and [Using tasks in nodes](#using-tasks-in-nodes). + +::: + +:::js + +When you compile with a [checkpointer](/oss/langgraph/persistence), LangGraph saves checkpoints at [super-step](#graphs) boundaries, not mid-function inside a node. If execution stops and later resumes (for example after an [interrupt](/oss/langgraph/interrupts) or a retry), the affected **node** runs again from the start of its function. Code and side effects before the pause run again. + +**Idempotency.** Design **node** logic so re-execution does not corrupt state. If a node inserts a database row, running it twice should not create duplicate rows unless that is intentional. Use idempotency keys, upserts, or read-before-write checks. For effects around `interrupt()`, see [Side effects called before `interrupt` must be idempotent](/oss/langgraph/interrupts#side-effects-called-before-interrupt-must-be-idempotent). + +**Graph changes.** [Determinism](/oss/langgraph/functional-api#determinism) rules about code changes do not apply to graph structure. You can add or remove **nodes** and edges without breaking resume for existing threads. Resumed runs use saved state and execute whatever graph you compile now. + +**Tasks and interrupts inside a node.** If a **node** calls [**tasks**](/oss/langgraph/functional-api#task) or @[`interrupt`], stricter determinism rules apply on resume. LangGraph restores completed **task** results from the checkpointer, but changing **task** or @[`interrupt`] order in code before the resume point can mismatch cached values. A [Functional API](/oss/langgraph/functional-api) **entrypoint** compiles to a single **node** that runs the whole entrypoint method this way. See [Determinism](/oss/langgraph/functional-api#determinism), [Idempotency](/oss/langgraph/functional-api#idempotency), and [Using tasks in nodes](#using-tasks-in-nodes). + +::: + +### Using tasks in nodes + +If a [node](#nodes) contains multiple operations, you may find it easier to implement each operation as a [**task**](/oss/langgraph/functional-api#task) instead of splitting the logic across multiple nodes. Task results are checkpointed when the graph uses a checkpointer, so resuming a thread can skip completed **task** work inside the node. + +:::python + + + + + + + + +::: + +:::js + + + + + + + + +::: + ### `START` node The @[`START`] Node is a special node that represents the node that sends user input to the graph. The main purpose for referencing this node is to determine which nodes should be called first. @@ -873,6 +929,10 @@ Edges define how the logic is routed and how the graph decides to stop. This is A node can have multiple outgoing edges. If a node has multiple outgoing edges, **all** of those destination nodes will be executed in parallel as a part of the next superstep. + +For each node, choose one routing mechanism: use normal edges for static routing, or use conditional edges / @[`Command`] for dynamic routing. Do not mix normal edges and dynamic routing from the same node, because both paths can execute and make graph behavior harder to reason about. + + ### Normal edges :::python @@ -1013,6 +1073,8 @@ By default, `Nodes` and `Edges` are defined ahead of time and operate on the sam To support this design pattern, LangGraph supports returning @[`Send`] objects from conditional edges. `Send` takes two arguments: first is the name of the node, and second is the state to pass to that node. ```python +from langgraph.types import Send + def continue_to_jokes(state: OverallState): return [Send("generate_joke", {"subject": s}) for s in state['subjects']] @@ -1131,7 +1193,7 @@ builder.addNode("myNode", myNode, { -@[`Command`] only adds dynamic edges—static edges defined with `add_edge` / `addEdge` still execute. For example, if `node_a` returns `Command(goto="my_other_node")` and you also have `graph.add_edge("node_a", "node_b")`, both `node_b` and `my_other_node` will run. +@[`Command`] only adds dynamic edges—static edges defined with `add_edge` / `addEdge` still execute. For example, if `node_a` returns `Command(goto="my_other_node")` and you also have `graph.add_edge("node_a", "node_b")`, both `node_b` and `my_other_node` will run. For each node, use either @[`Command`] or static edges to route to the next nodes, not both. @@ -1238,20 +1300,7 @@ await graph.invoke({ messages: [{ role: "user", content: "follow up" }] }, confi Use `Command(resume=...)` to provide a value and resume graph execution after an [interrupt](/oss/langgraph/interrupts). The value passed to `resume` becomes the return value of the `interrupt()` call inside the paused node: -```python -from langgraph.types import Command, interrupt - -def human_review(state: State): - # Pauses the graph and waits for a value - answer = interrupt("Do you approve?") - return {"messages": [{"role": "user", "content": answer}]} - -# First invocation - hits the interrupt and pauses -result = graph.invoke({"messages": [...]}, config) - -# Resume with a value - the interrupt() call returns "yes" -result = graph.invoke(Command(resume="yes"), config) -``` + Check out the [interrupts conceptual guide](/oss/langgraph/interrupts) for full details on interrupt patterns, including multiple interrupts and validation loops. @@ -1287,7 +1336,7 @@ You can return @[`Command`] from tools to update graph state and control flow. U -When used inside tools, `goto` adds a dynamic edge—any static edges already defined on the node that called the tool will still execute. +When used inside tools, `goto` adds a dynamic edge—any static edges already defined on the node that called the tool will still execute. For each node, use either tool-driven dynamic routing or static edges to route to the next nodes, not both. @@ -1436,13 +1485,13 @@ The current step counter is accessible in `config.metadata.langgraph_step` withi :::python -The step counter is stored in `config["metadata"]["langgraph_step"]`. The recursion limit check follows the logic: `step > stop` where `stop = step + recursion_limit + 1`. When the limit is exceeded, LangGraph raises a `GraphRecursionError`. +The step counter is stored in `config["metadata"]["langgraph_step"]`. LangGraph increments this counter as the graph executes and raises a `GraphRecursionError` once the configured `recursion_limit` is exceeded. ::: :::js -The step counter is stored in `config.metadata.langgraph_step`. The recursion limit check follows the logic: `step > stop` where `stop = step + recursionLimit + 1`. When the limit is exceeded, LangGraph raises a `GraphRecursionError`. +The step counter is stored in `config.metadata.langgraph_step`. LangGraph increments this counter as the graph executes and raises a `GraphRecursionError` once the configured `recursionLimit` is exceeded. ::: diff --git a/install.mdx b/install.mdx index aa321a6..dbb0879 100644 --- a/install.mdx +++ b/install.mdx @@ -4,7 +4,6 @@ sidebarTitle: Install --- - To install the base LangGraph package: :::python diff --git a/interrupts.mdx b/interrupts.mdx index a0b49eb..57b09eb 100644 --- a/interrupts.mdx +++ b/interrupts.mdx @@ -2,6 +2,13 @@ title: Interrupts --- +import LanggraphInterruptsResumeV2Py from '/snippets/code-samples/langgraph-interrupts-resume-v2-py.mdx'; +import LanggraphInterruptsMultiplePy from '/snippets/code-samples/langgraph-interrupts-multiple-py.mdx'; +import LanggraphInterruptsHitlStreamPy from '/snippets/code-samples/langgraph-interrupts-hitl-stream-py.mdx'; +import LanggraphInterruptsApprovalPy from '/snippets/code-samples/langgraph-interrupts-approval-py.mdx'; +import LanggraphInterruptsReviewPy from '/snippets/code-samples/langgraph-interrupts-review-py.mdx'; +import LanggraphInterruptsValidatePy from '/snippets/code-samples/langgraph-interrupts-validate-py.mdx'; + Interrupts allow you to pause graph execution at specific points and wait for external input before continuing. This enables human-in-the-loop patterns where you need external input to proceed. When an interrupt is triggered, LangGraph saves the graph state using its [persistence](/oss/langgraph/persistence) layer and waits indefinitely until you resume execution. Interrupts work by calling the `interrupt()` function at any point in your graph nodes. The function accepts any JSON-serializable value which is surfaced to the caller. When you're ready to continue, you resume execution by re-invoking the graph using `Command`, which then becomes the return value of the `interrupt()` call from inside the node. @@ -61,7 +68,12 @@ When you call @[`interrupt`], here's what happens: 1. **Graph execution gets suspended** at the exact point where @[`interrupt`] is called 2. **State is saved** using the checkpointer so execution can be resumed later, In production, this should be a persistent checkpointer (e.g. backed by a database) +:::python +3. **Value is returned** to the caller on `result.interrupts` when using `version="v2"` (or under `__interrupt__` with the default v1 invoke API); it can be any JSON-serializable value (string, object, array, etc.) +::: +:::js 3. **Value is returned** to the caller under `__interrupt__`; it can be any JSON-serializable value (string, object, array, etc.) +::: 4. **Graph waits indefinitely** until you resume execution with a response 5. **Response is passed back** into the node when you resume, becoming the return value of the `interrupt()` call @@ -72,23 +84,7 @@ After an interrupt pauses execution, you resume the graph by invoking it again w :::python - ```python - from langgraph.types import Command - - # Initial run - hits the interrupt and pauses - # thread_id is the persistent pointer (stores a stable ID in production) - config = {"configurable": {"thread_id": "thread-1"}} - result = graph.invoke({"input": "data"}, config=config, version="v2") - - # result is a GraphOutput with .value and .interrupts - # .interrupts contains the payloads passed to interrupt() - print(result.interrupts) - # > (Interrupt(value='Do you approve this action?'),) - - # Resume with the human's response - # The resume payload becomes the return value of interrupt() inside the node - graph.invoke(Command(resume=True), config=config, version="v2") - ``` + ```python @@ -170,39 +166,16 @@ The key thing that interrupts unlock is the ability to pause execution and wait When building interactive agents with human-in-the-loop workflows, you can stream both message chunks and node updates simultaneously to provide real-time feedback while handling interrupts. -Use multiple stream modes (`"messages"` and `"updates"`) with `subgraphs=True` (if subgraphs are present) to: +Use multiple stream modes (`"messages"`, `"updates"`, and `"values"`) with `subgraphs=True` (if subgraphs are present) to: - Stream AI responses in real-time as they're generated - Detect when the graph encounters an interrupt - Handle user input and resume execution seamlessly :::python -```python -async for chunk in graph.astream( - initial_input, - stream_mode=["messages", "updates"], - subgraphs=True, - config=config, - version="v2", -): - if chunk["type"] == "messages": - # Handle streaming message content - msg, _ = chunk["data"] - if isinstance(msg, AIMessageChunk) and msg.content: - display_streaming_content(msg.content) - - elif chunk["type"] == "updates": - # Check for interrupts in the updates data - if "__interrupt__" in chunk["data"]: - interrupt_info = chunk["data"]["__interrupt__"][0].value - user_response = get_user_input(interrupt_info) - initial_input = Command(resume=user_response) - break - else: - current_node = list(chunk["data"].keys())[0] -``` + -- **`version="v2"`**: All chunks are `StreamPart` dicts with `type`, `ns`, and `data` keys -- **`chunk["type"]`**: Narrow on the stream mode (`"messages"`, `"updates"`, etc.) for type inference +- **`version="v2"`**: All chunks are `StreamPart` dicts with `type`, `ns`, and `data` keys; pending interrupts appear on `chunk["interrupts"]` for `"values"` parts +- **`chunk["type"]`**: Narrow on the stream mode (`"messages"`, `"updates"`, `"values"`, etc.) for type inference - **`chunk["ns"]`**: Identifies the source graph (empty tuple for root, populated for subgraphs) - **`subgraphs=True`**: Required for interrupt detection in nested graphs ::: @@ -217,65 +190,7 @@ This ensures each response is paired with the correct interrupt at runtime. :::python -```python -from typing import Annotated, TypedDict -import operator - -from langgraph.checkpoint.memory import InMemorySaver -from langgraph.graph import START, END, StateGraph -from langgraph.types import Command, interrupt - - -class State(TypedDict): - vals: Annotated[list[str], operator.add] - - -def node_a(state): - answer = interrupt("question_a") - return {"vals": [f"a:{answer}"]} - - -def node_b(state): - answer = interrupt("question_b") - return {"vals": [f"b:{answer}"]} - - -graph = ( - StateGraph(State) - .add_node("a", node_a) - .add_node("b", node_b) - .add_edge(START, "a") - .add_edge(START, "b") - .add_edge("a", END) - .add_edge("b", END) - .compile(checkpointer=InMemorySaver()) -) - -config = {"configurable": {"thread_id": "1"}} - -# Step 1: invoke - both parallel nodes hit interrupt() and pause -interrupted_result = graph.invoke({"vals": []}, config) -print(interrupted_result) -""" -{ - 'vals': [], - '__interrupt__': [ - Interrupt(value='question_a', id='bd4f3183600f2c41dddafbf8f0f7be7b'), - Interrupt(value='question_b', id='29963e3d3585f0cef025dd0f14323f55') - ] -} -""" - -# Step 2: resume all pending interrupts at once -resume_map = { - i.id: f"answer for {i.value}" - for i in interrupted_result["__interrupt__"] -} -result = graph.invoke(Command(resume=resume_map), config) - -print("Final state:", result) -#> Final state: {'vals': ['a:answer for question_a', 'b:answer for question_b']} -``` + ::: @@ -365,7 +280,7 @@ from typing import Literal from langgraph.types import interrupt, Command def approval_node(state: State) -> Command[Literal["proceed", "cancel"]]: - # Pause execution; payload shows up under result["__interrupt__"] + # Pause execution; payload shows up in result.interrupts (v2) or result["__interrupt__"] (v1) is_approved = interrupt({ "question": "Do you want to proceed with this action?", "details": state["action_details"] @@ -404,10 +319,10 @@ const approvalNode: typeof State.Node = (state) => { When you resume the graph, pass `True` to approve or `False` to reject: ```python # To approve -graph.invoke(Command(resume=True), config=config) +graph.invoke(Command(resume=True), config=config, version="v2") # To reject -graph.invoke(Command(resume=False), config=config) +graph.invoke(Command(resume=False), config=config, version="v2") ``` ::: @@ -425,61 +340,8 @@ await graph.invoke(new Command({ resume: false }), config); :::python - ```python - from typing import Literal, Optional, TypedDict - - from langgraph.checkpoint.memory import MemorySaver - from langgraph.graph import StateGraph, START, END - from langgraph.types import Command, interrupt - - - class ApprovalState(TypedDict): - action_details: str - status: Optional[Literal["pending", "approved", "rejected"]] - - - def approval_node(state: ApprovalState) -> Command[Literal["proceed", "cancel"]]: - # Expose details so the caller can render them in a UI - decision = interrupt({ - "question": "Approve this action?", - "details": state["action_details"], - }) - - # Route to the appropriate node after resume - return Command(goto="proceed" if decision else "cancel") - - - def proceed_node(state: ApprovalState): - return {"status": "approved"} - - - def cancel_node(state: ApprovalState): - return {"status": "rejected"} - + - builder = StateGraph(ApprovalState) - builder.add_node("approval", approval_node) - builder.add_node("proceed", proceed_node) - builder.add_node("cancel", cancel_node) - builder.add_edge(START, "approval") - builder.add_edge("proceed", END) - builder.add_edge("cancel", END) - - # Use a more durable checkpointer in production - checkpointer = MemorySaver() - graph = builder.compile(checkpointer=checkpointer) - - config = {"configurable": {"thread_id": "approval-123"}} - initial = graph.invoke( - {"action_details": "Transfer $500", "status": "pending"}, - config=config, - ) - print(initial["__interrupt__"]) # -> [Interrupt(value={'question': ..., 'details': ...})] - - # Resume with the decision; True routes to proceed, False to cancel - resumed = graph.invoke(Command(resume=True), config=config) - print(resumed["status"]) # -> "approved" - ``` ::: :::js @@ -545,7 +407,7 @@ Sometimes you want to let a human review and edit part of the graph state before from langgraph.types import interrupt def review_node(state: State): - # Pause and show the current content for review (surfaces in result["__interrupt__"]) + # Pause and show the current content for review (surfaces in result.interrupts with v2) edited_content = interrupt({ "instruction": "Review and edit this content", "content": state["generated_text"] @@ -579,7 +441,8 @@ When resuming, provide the edited content: ```python graph.invoke( Command(resume="The edited and improved text"), # Value becomes the return from interrupt() - config=config + config=config, + version="v2", ) ``` ::: @@ -596,47 +459,8 @@ await graph.invoke( :::python - ```python - import sqlite3 - from typing import TypedDict - - from langgraph.checkpoint.memory import MemorySaver - from langgraph.graph import StateGraph, START, END - from langgraph.types import Command, interrupt - - - class ReviewState(TypedDict): - generated_text: str - - - def review_node(state: ReviewState): - # Ask a reviewer to edit the generated content - updated = interrupt({ - "instruction": "Review and edit this content", - "content": state["generated_text"], - }) - return {"generated_text": updated} - - - builder = StateGraph(ReviewState) - builder.add_node("review", review_node) - builder.add_edge(START, "review") - builder.add_edge("review", END) - - checkpointer = MemorySaver() - graph = builder.compile(checkpointer=checkpointer) - - config = {"configurable": {"thread_id": "review-42"}} - initial = graph.invoke({"generated_text": "Initial draft"}, config=config) - print(initial["__interrupt__"]) # -> [Interrupt(value={'instruction': ..., 'content': ...})] + - # Resume with the edited text from the reviewer - final_state = graph.invoke( - Command(resume="Improved draft after review"), - config=config, - ) - print(final_state["generated_text"]) # -> "Improved draft after review" - ``` ::: :::js @@ -703,7 +527,7 @@ from langgraph.types import interrupt def send_email(to: str, subject: str, body: str): """Send an email to a recipient.""" - # Pause before sending; payload surfaces in result["__interrupt__"] + # Pause before sending; payload surfaces in result.interrupts (v2) response = interrupt({ "action": "send_email", "to": to, @@ -785,7 +609,7 @@ This approach is useful when you want the approval logic to live with the tool i def send_email(to: str, subject: str, body: str): """Send an email to a recipient.""" - # Pause before sending; payload surfaces in result["__interrupt__"] + # Pause before sending; payload surfaces in result.interrupts (v2) response = interrupt({ "action": "send_email", "to": to, @@ -831,15 +655,17 @@ This approach is useful when you want the approval logic to live with the tool i ] }, config=config, + version="v2", ) - print(initial["__interrupt__"]) # -> [Interrupt(value={'action': 'send_email', ...})] + print(initial.interrupts) # -> (Interrupt(value={'action': 'send_email', ...}),) # Resume with approval and optionally edited arguments resumed = graph.invoke( Command(resume={"action": "approve", "subject": "Updated subject"}), config=config, + version="v2", ) - print(resumed["messages"][-1]) # -> Tool result returned by send_email + print(resumed.value["messages"][-1]) # -> Tool result returned by send_email ``` ::: @@ -948,7 +774,7 @@ def get_age_node(state: State): prompt = "What is your age?" while True: - answer = interrupt(prompt) # payload surfaces in result["__interrupt__"] + answer = interrupt(prompt) # payload surfaces in result.interrupts (v2) # Validate the input if isinstance(answer, int) and answer > 0: @@ -990,51 +816,8 @@ Each time you resume the graph with invalid input, it will ask again with a clea :::python - ```python - import sqlite3 - from typing import TypedDict - - from langgraph.checkpoint.sqlite import SqliteSaver - from langgraph.graph import StateGraph, START, END - from langgraph.types import Command, interrupt - - - class FormState(TypedDict): - age: int | None - + - def get_age_node(state: FormState): - prompt = "What is your age?" - - while True: - answer = interrupt(prompt) # payload surfaces in result["__interrupt__"] - - if isinstance(answer, int) and answer > 0: - return {"age": answer} - - prompt = f"'{answer}' is not a valid age. Please enter a positive number." - - - builder = StateGraph(FormState) - builder.add_node("collect_age", get_age_node) - builder.add_edge(START, "collect_age") - builder.add_edge("collect_age", END) - - checkpointer = SqliteSaver(sqlite3.connect("forms.db")) - graph = builder.compile(checkpointer=checkpointer) - - config = {"configurable": {"thread_id": "form-1"}} - first = graph.invoke({"age": None}, config=config) - print(first["__interrupt__"]) # -> [Interrupt(value='What is your age?', ...)] - - # Provide invalid data; the node re-prompts - retry = graph.invoke(Command(resume="thirty"), config=config) - print(retry["__interrupt__"]) # -> [Interrupt(value="'thirty' is not a valid age...", ...)] - - # Provide valid data; loop exits and state updates - final = graph.invoke(Command(resume=30), config=config) - print(final["age"]) # -> 30 - ``` ::: :::js diff --git a/local-server.mdx b/local-server.mdx index 4d9d3b8..9b7f8cf 100644 --- a/local-server.mdx +++ b/local-server.mdx @@ -4,7 +4,6 @@ sidebarTitle: Local server --- - This guide shows you how to run a LangGraph application locally. ## Prerequisites diff --git a/memory.mdx b/memory.mdx deleted file mode 100644 index ac44a23..0000000 --- a/memory.mdx +++ /dev/null @@ -1,274 +0,0 @@ ---- -title: Memory overview ---- - - - -[Memory](/oss/langgraph/add-memory) is a system that remembers information about previous interactions. For AI agents, memory is crucial because it lets them remember previous interactions, learn from feedback, and adapt to user preferences. As agents tackle more complex tasks with numerous user interactions, this capability becomes essential for both efficiency and user satisfaction. - -This conceptual guide covers two types of memory, based on their recall scope: - -* [Short-term memory](#short-term-memory), or [thread](/oss/langgraph/persistence#threads)-scoped memory, tracks the ongoing conversation by maintaining message history within a session. LangGraph manages short-term memory as a part of your agent's [state](/oss/langgraph/graph-api#state). State is persisted to a database using a [checkpointer](/oss/langgraph/persistence#checkpoints) so the thread can be resumed at any time. Short-term memory updates when the graph is invoked or a step is completed, and the State is read at the start of each step. -* [Long-term memory](#long-term-memory) stores user-specific or application-level data across sessions and is shared _across_ conversational threads. It can be recalled _at any time_ and _in any thread_. Memories are scoped to any custom namespace, not just within a single thread ID. LangGraph provides [stores](/oss/langgraph/persistence#memory-store) ([reference doc](https://langchain-ai.github.io/langgraph/reference/store/#langgraph.store.base.BaseStore)) to let you save and recall long-term memories. - -![Short vs long](/oss/images/short-vs-long.png) - -## Short-term memory - -[Short-term memory](/oss/langgraph/add-memory#add-short-term-memory) lets your application remember previous interactions within a single [thread](/oss/langgraph/persistence#threads) or conversation. A [thread](/oss/langgraph/persistence#threads) organizes multiple interactions in a session, similar to the way email groups messages in a single conversation. - -LangGraph manages short-term memory as part of the agent's state, persisted via thread-scoped checkpoints. This state can normally include the conversation history along with other stateful data, such as uploaded files, retrieved documents, or generated artifacts. By storing these in the graph's state, the bot can access the full context for a given conversation while maintaining separation between different threads. - -### Manage short-term memory - -Conversation history is the most common form of short-term memory, and long conversations pose a challenge to today's LLMs. A full history may not fit inside an LLM's context window, resulting in an irrecoverable error. Even if your LLM supports the full context length, most LLMs still perform poorly over long contexts. They get "distracted" by stale or off-topic content, all while suffering from slower response times and higher costs. - -Chat models accept context using messages, which include developer provided instructions (a system message) and user inputs (human messages). In chat applications, messages alternate between human inputs and model responses, resulting in a list of messages that grows longer over time. Because context windows are limited and token-rich message lists can be costly, many applications can benefit from using techniques to manually remove or forget stale information. - -![Filter](/oss/images/filter.png) - -For more information on common techniques for managing messages, see the [Add and manage memory](/oss/langgraph/add-memory#manage-short-term-memory) guide. - -## Long-term memory - -[Long-term memory](/oss/langgraph/add-memory#add-long-term-memory) in LangGraph allows systems to retain information across different conversations or sessions. Unlike short-term memory, which is **thread-scoped**, long-term memory is saved within custom "namespaces." - -Long-term memory is a complex challenge without a one-size-fits-all solution. However, the following questions provide a framework to help you navigate the different techniques: - -* What is the type of memory? Humans use memories to remember facts ([semantic memory](#semantic-memory)), experiences ([episodic memory](#episodic-memory)), and rules ([procedural memory](#procedural-memory)). AI agents can use memory in the same ways. For example, AI agents can use memory to remember specific facts about a user to accomplish a task. -* [When do you want to update memories?](#writing-memories) Memory can be updated as part of an agent's application logic (e.g., "on the hot path"). In this case, the agent typically decides to remember facts before responding to a user. Alternatively, memory can be updated as a background task (logic that runs in the background / asynchronously and generates memories). We explain the tradeoffs between these approaches in the [section below](#writing-memories). - -Different applications require various types of memory. Although the analogy isn't perfect, examining [human memory types](https://www.psychologytoday.com/us/basics/memory/types-of-memory?ref=blog.langchain.dev) can be insightful. Some research (e.g., the [CoALA paper](https://arxiv.org/pdf/2309.02427)) have even mapped these human memory types to those used in AI agents. - -| Memory Type | What is Stored | Human Example | Agent Example | -|-------------|----------------|---------------|---------------| -| [Semantic](#semantic-memory) | Facts | Things I learned in school | Facts about a user | -| [Episodic](#episodic-memory) | Experiences | Things I did | Past agent actions | -| [Procedural](#procedural-memory) | Instructions | Instincts or motor skills | Agent system prompt | - -### Semantic memory - -[Semantic memory](https://en.wikipedia.org/wiki/Semantic_memory), both in humans and AI agents, involves the retention of specific facts and concepts. In humans, it can include information learned in school and the understanding of concepts and their relationships. For AI agents, semantic memory is often used to personalize applications by remembering facts or concepts from past interactions. - - -Semantic memory is different from "semantic search," which is a technique for finding similar content using "meaning" (usually as embeddings). Semantic memory is a term from psychology, referring to storing facts and knowledge, while semantic search is a method for retrieving information based on meaning rather than exact matches. - - -#### Profile - -Semantic memories can be managed in different ways. For example, memories can be a single, continuously updated "profile" of well-scoped and specific information about a user, organization, or other entity (including the agent itself). A profile is generally just a JSON document with various key-value pairs you've selected to represent your domain. - -When remembering a profile, you will want to make sure that you are **updating** the profile each time. As a result, you will want to pass in the previous profile and [ask the model to generate a new profile](https://github.com/langchain-ai/memory-template) (or some [JSON patch](https://github.com/hinthornw/trustcall) to apply to the old profile). This can be become error-prone as the profile gets larger, and may benefit from splitting a profile into multiple documents or **strict** decoding when generating documents to ensure the memory schemas remains valid. - -![Update profile](/oss/images/update-profile.png) - -#### Collection - -Alternatively, memories can be a collection of documents that are continuously updated and extended over time. Each individual memory can be more narrowly scoped and easier to generate, which means that you're less likely to **lose** information over time. It's easier for an LLM to generate _new_ objects for new information than reconcile new information with an existing profile. As a result, a document collection tends to lead to [higher recall downstream](https://en.wikipedia.org/wiki/Precision_and_recall). - -However, this shifts some complexity memory updating. The model must now _delete_ or _update_ existing items in the list, which can be tricky. In addition, some models may default to over-inserting and others may default to over-updating. See the [Trustcall](https://github.com/hinthornw/trustcall) package for one way to manage this and consider evaluation (e.g., with a tool like [LangSmith](/langsmith/evaluate-chatbot-tutorial)) to help you tune the behavior. - -Working with document collections also shifts complexity to memory **search** over the list. The `Store` currently supports both [semantic search](https://langchain-ai.github.io/langgraph/reference/store/#langgraph.store.base.SearchOp.query) and [filtering by content](https://langchain-ai.github.io/langgraph/reference/store/#langgraph.store.base.SearchOp.filter). - -Finally, using a collection of memories can make it challenging to provide comprehensive context to the model. While individual memories may follow a specific schema, this structure might not capture the full context or relationships between memories. As a result, when using these memories to generate responses, the model may lack important contextual information that would be more readily available in a unified profile approach. - -![Update list](/oss/images/update-list.png) - -Regardless of memory management approach, the central point is that the agent will use the semantic memories to [ground its responses](https://python.langchain.com/docs/concepts/rag/), which often leads to more personalized and relevant interactions. - -### Episodic memory - -[Episodic memory](https://en.wikipedia.org/wiki/Episodic_memory), in both humans and AI agents, involves recalling past events or actions. The [CoALA paper](https://arxiv.org/pdf/2309.02427) frames this well: facts can be written to semantic memory, whereas *experiences* can be written to episodic memory. For AI agents, episodic memory is often used to help an agent remember how to accomplish a task. - -:::python -In practice, episodic memories are often implemented through [few-shot example prompting](/langsmith/create-few-shot-evaluators), where agents learn from past sequences to perform tasks correctly. Sometimes it's easier to "show" than "tell" and LLMs learn well from examples. Few-shot learning lets you ["program"](https://x.com/karpathy/status/1627366413840322562) your LLM by updating the prompt with input-output examples to illustrate the intended behavior. While various [best-practices](https://python.langchain.com/docs/concepts/#1-generating-examples) can be used to generate few-shot examples, often the challenge lies in selecting the most relevant examples based on user input. -::: - -:::js -In practice, episodic memories are often implemented through few-shot example prompting, where agents learn from past sequences to perform tasks correctly. Sometimes it's easier to "show" than "tell" and LLMs learn well from examples. Few-shot learning lets you ["program"](https://x.com/karpathy/status/1627366413840322562) your LLM by updating the prompt with input-output examples to illustrate the intended behavior. While various best-practices can be used to generate few-shot examples, often the challenge lies in selecting the most relevant examples based on user input. -::: - -:::python -Note that the memory [store](/oss/langgraph/persistence#memory-store) is just one way to store data as few-shot examples. If you want to have more developer involvement, or tie few-shots more closely to your evaluation harness, you can also use a [LangSmith Dataset](/langsmith/manage-datasets) to store your data and implement your own retrieval logic to select the most relevant examples based on user input. - -See this [blog post](https://blog.langchain.dev/few-shot-prompting-to-improve-tool-calling-performance/) showcasing few-shot prompting to improve tool calling performance and this [blog post](https://blog.langchain.dev/aligning-llm-as-a-judge-with-human-preferences/) using few-shot examples to align an LLM to human preferences. -::: - -:::js -Note that the memory [store](/oss/langgraph/persistence#memory-store) is just one way to store data as few-shot examples. If you want to have more developer involvement, or tie few-shots more closely to your evaluation harness, you can also use a LangSmith Dataset to store your data and implement your own retrieval logic to select the most relevant examples based on user input. - -See this [blog post](https://blog.langchain.dev/few-shot-prompting-to-improve-tool-calling-performance/) showcasing few-shot prompting to improve tool calling performance and this [blog post](https://blog.langchain.dev/aligning-llm-as-a-judge-with-human-preferences/) using few-shot examples to align an LLM to human preferences. -::: - -### Procedural memory - -[Procedural memory](https://en.wikipedia.org/wiki/Procedural_memory), in both humans and AI agents, involves remembering the rules used to perform tasks. In humans, procedural memory is like the internalized knowledge of how to perform tasks, such as riding a bike via basic motor skills and balance. Episodic memory, on the other hand, involves recalling specific experiences, such as the first time you successfully rode a bike without training wheels or a memorable bike ride through a scenic route. For AI agents, procedural memory is a combination of model weights, agent code, and agent's prompt that collectively determine the agent's functionality. - -In practice, it is fairly uncommon for agents to modify their model weights or rewrite their code. However, it is more common for agents to modify their own prompts. - -One effective approach to refining an agent's instructions is through ["Reflection"](https://blog.langchain.dev/reflection-agents/) or meta-prompting. This involves prompting the agent with its current instructions (e.g., the system prompt) along with recent conversations or explicit user feedback. The agent then refines its own instructions based on this input. This method is particularly useful for tasks where instructions are challenging to specify upfront, as it allows the agent to learn and adapt from its interactions. - -For example, we built a [Tweet generator](https://www.youtube.com/watch?v=Vn8A3BxfplE) using external feedback and prompt re-writing to produce high-quality paper summaries for Twitter. In this case, the specific summarization prompt was difficult to specify *a priori*, but it was fairly easy for a user to critique the generated Tweets and provide feedback on how to improve the summarization process. - -The below pseudo-code shows how you might implement this with the LangGraph memory [store](/oss/langgraph/persistence#memory-store), using the store to save a prompt, the `update_instructions` node to get the current prompt (as well as feedback from the conversation with the user captured in `state["messages"]`), update the prompt, and save the new prompt back to the store. Then, the `call_model` get the updated prompt from the store and uses it to generate a response. - -:::python -```python -# Node that *uses* the instructions -def call_model(state: State, store: BaseStore): - namespace = ("agent_instructions", ) - instructions = store.get(namespace, key="agent_a")[0] - # Application logic - prompt = prompt_template.format(instructions=instructions.value["instructions"]) - ... - -# Node that updates instructions -def update_instructions(state: State, store: BaseStore): - namespace = ("instructions",) - instructions = store.search(namespace)[0] - # Memory logic - prompt = prompt_template.format(instructions=instructions.value["instructions"], conversation=state["messages"]) - output = llm.invoke(prompt) - new_instructions = output['new_instructions'] - store.put(("agent_instructions",), "agent_a", {"instructions": new_instructions}) - ... -``` -::: - -:::js -```typescript -// Node that *uses* the instructions -const callModel: GraphNode = async (state, config) => { - const namespace = ["agent_instructions"]; - const instructions = await config.store?.get(namespace, "agent_a"); - // Application logic - const prompt = promptTemplate.format({ - instructions: instructions[0].value.instructions - }); - // ... -}; - -// Node that updates instructions -const updateInstructions: GraphNode = async (state, config) => { - const namespace = ["instructions"]; - const currentInstructions = await config.store?.search(namespace); - // Memory logic - const prompt = promptTemplate.format({ - instructions: currentInstructions[0].value.instructions, - conversation: state.messages - }); - const output = await llm.invoke(prompt); - const newInstructions = output.new_instructions; - await store.put(["agent_instructions"], "agent_a", { - instructions: newInstructions - }); - // ... -}; -``` -::: - -![Update instructions](/oss/images/update-instructions.png) - -### Writing memories - -There are two primary methods for agents to write memories: ["in the hot path"](#in-the-hot-path) and ["in the background"](#in-the-background). - -![Hot path vs background](/oss/images/hot_path_vs_background.png) - -#### In the hot path - -Creating memories during runtime offers both advantages and challenges. On the positive side, this approach allows for real-time updates, making new memories immediately available for use in subsequent interactions. It also enables transparency, as users can be notified when memories are created and stored. - -However, this method also presents challenges. It may increase complexity if the agent requires a new tool to decide what to commit to memory. In addition, the process of reasoning about what to save to memory can impact agent latency. Finally, the agent must multitask between memory creation and its other responsibilities, potentially affecting the quantity and quality of memories created. - -As an example, ChatGPT uses a [save_memories](https://openai.com/index/memory-and-new-controls-for-chatgpt/) tool to upsert memories as content strings, deciding whether and how to use this tool with each user message. See our [memory-agent](https://github.com/langchain-ai/memory-agent) template as an reference implementation. - -#### In the background - -Creating memories as a separate background task offers several advantages. It eliminates latency in the primary application, separates application logic from memory management, and allows for more focused task completion by the agent. This approach also provides flexibility in timing memory creation to avoid redundant work. - -However, this method has its own challenges. Determining the frequency of memory writing becomes crucial, as infrequent updates may leave other threads without new context. Deciding when to trigger memory formation is also important. Common strategies include scheduling after a set time period (with rescheduling if new events occur), using a cron schedule, or allowing manual triggers by users or the application logic. - -See our [memory-service](https://github.com/langchain-ai/memory-template) template as an reference implementation. - -### Memory storage - -LangGraph stores long-term memories as JSON documents in a [store](/oss/langgraph/persistence#memory-store). Each memory is organized under a custom `namespace` (similar to a folder) and a distinct `key` (like a file name). Namespaces often include user or org IDs or other labels that makes it easier to organize information. This structure enables hierarchical organization of memories. Cross-namespace searching is then supported through content filters. - -:::python -```python -from langgraph.store.memory import InMemoryStore - - -def embed(texts: list[str]) -> list[list[float]]: - # Replace with an actual embedding function or LangChain embeddings object - return [[1.0, 2.0] * len(texts)] - - -# InMemoryStore saves data to an in-memory dictionary. Use a DB-backed store in production use. -store = InMemoryStore(index={"embed": embed, "dims": 2}) -user_id = "my-user" -application_context = "chitchat" -namespace = (user_id, application_context) -store.put( - namespace, - "a-memory", - { - "rules": [ - "User likes short, direct language", - "User only speaks English & python", - ], - "my-key": "my-value", - }, -) -# get the "memory" by ID -item = store.get(namespace, "a-memory") -# search for "memories" within this namespace, filtering on content equivalence, sorted by vector similarity -items = store.search( - namespace, filter={"my-key": "my-value"}, query="language preferences" -) -``` -::: - -:::js -```typescript -import { InMemoryStore } from "@langchain/langgraph"; - -const embed = (texts: string[]): number[][] => { - // Replace with an actual embedding function or LangChain embeddings object - return texts.map(() => [1.0, 2.0]); -}; - -// InMemoryStore saves data to an in-memory dictionary. Use a DB-backed store in production use. -const store = new InMemoryStore({ index: { embed, dims: 2 } }); -const userId = "my-user"; -const applicationContext = "chitchat"; -const namespace = [userId, applicationContext]; - -await store.put( - namespace, - "a-memory", - { - rules: [ - "User likes short, direct language", - "User only speaks English & TypeScript", - ], - "my-key": "my-value", - } -); - -// get the "memory" by ID -const item = await store.get(namespace, "a-memory"); - -// search for "memories" within this namespace, filtering on content equivalence, sorted by vector similarity -const items = await store.search( - namespace, - { - filter: { "my-key": "my-value" }, - query: "language preferences" - } -); -``` -::: - -For more information about the memory store, see the [Persistence](/oss/langgraph/persistence#memory-store) guide. diff --git a/observability.mdx b/observability.mdx index 58f7c6c..fb5644e 100644 --- a/observability.mdx +++ b/observability.mdx @@ -2,7 +2,7 @@ title: LangSmith Observability --- -Traces are a series of steps that your application takes to go from input to output. Each of these individual steps is represented by a run. You can use [LangSmith](https://smith.langchain.com/) to visualize these execution steps. To use it, [enable tracing for your application](/langsmith/trace-with-langgraph). This enables you to do the following: +Traces are a series of steps that your application takes to go from input to output. Each of these individual steps is represented by a run. You can use [LangSmith](https://smith.langchain.com) to visualize these execution steps. To use it, [enable tracing for your application](/langsmith/trace-with-langgraph). This enables you to do the following: * [Debug a locally running application](/langsmith/observability-studio#debug-langsmith-traces). * [Evaluate the application performance](/oss/langchain/test/evals). @@ -13,13 +13,13 @@ Traces are a series of steps that your application takes to go from input to out Before you begin, ensure you have the following: - **A LangSmith account**: Sign up (for free) or log in at [smith.langchain.com](https://smith.langchain.com). -- **A LangSmith API key**: Follow the [Create an API key](/langsmith/create-account-api-key#create-an-api-key) guide. +- **A LangSmith API key**: Follow the [Create an API key](/langsmith/create-account-api-key) guide. ## Enable tracing To enable tracing for your application, set the following environment variables: -```bash +```python export LANGSMITH_TRACING=true export LANGSMITH_API_KEY= ``` @@ -150,7 +150,7 @@ await agent.invoke( { messages: [{role: "user", content: "Send a test email to alice@example.com"}] }, - { + config: { tags: ["production", "email-assistant", "v1.0"], metadata: { userId: "user123", diff --git a/overview.mdx b/overview.mdx index eae819a..32c3736 100644 --- a/overview.mdx +++ b/overview.mdx @@ -12,6 +12,18 @@ We will commonly use [LangChain](/oss/langchain/overview) components throughout LangGraph is focused on the underlying capabilities important for agent orchestration: durable execution, streaming, human-in-the-loop, and more. + + +- [Deep Agents](/oss/deepagents/overview) is an [agent harness](/oss/concepts/products#agent-harnesses-like-the-deep-agents-sdk): planning, subagents, filesystem tools, and context management on top of LangGraph. +- [LangChain](/oss/langchain/overview) is the agent framework: abstractions and integrations for models, tools, and agent loops. +- [LangGraph](/oss/langgraph/overview) is the orchestration runtime: durable execution, streaming, human-in-the-loop, and persistence. +- [LangSmith](/langsmith/home) is the platform for tracing, evaluation, prompts, and deployment across frameworks. +- [LangSmith Engine](/langsmith/engine) detects issues in your LangGraph agent traces and proposes fixes. You can open a pull request with the proposed fix directly from the Engine tab. +- [LangSmith Fleet](/langsmith/fleet/index) is the no-code agent builder for templates, integrations, and routine automation. + +Read [Frameworks, runtimes, and harnesses](/oss/concepts/products) for a comparison of the open source stack. + + ## Install :::python @@ -95,7 +107,7 @@ Use [LangSmith](/langsmith/home) to trace requests, debug agent behavior, and ev LangGraph provides low-level supporting infrastructure for *any* long-running, stateful workflow or agent. LangGraph does not abstract prompts or architecture, and provides the following central benefits: -* [Durable execution](/oss/langgraph/durable-execution): Build agents that persist through failures and can run for extended periods, resuming from where they left off. +* [Persistence](/oss/langgraph/persistence): Build agents that persist through failures and can run for extended periods, resuming from where they left off. * [Human-in-the-loop](/oss/langgraph/interrupts): Incorporate human oversight by inspecting and modifying agent state at any point. * [Comprehensive memory](/oss/concepts/memory): Create stateful agents with both short-term working memory for ongoing reasoning and long-term memory across sessions. * [Debugging with LangSmith](/langsmith/home): Gain deep visibility into complex agent behavior with visualization tools that trace execution paths, capture state transitions, and provide detailed runtime metrics. diff --git a/persistence.mdx b/persistence.mdx index 8fd752d..553d28b 100644 --- a/persistence.mdx +++ b/persistence.mdx @@ -2,7 +2,12 @@ title: Persistence --- - +import StoreListNamespaceSearchPy from '/snippets/code-samples/store-list-namespace-search-py.mdx'; +import StoreListNamespaceSearchJs from '/snippets/code-samples/store-list-namespace-search-js.mdx'; +import StoreListNamespacePaginatePy from '/snippets/code-samples/store-list-namespace-paginate-py.mdx'; +import StoreListNamespacePaginateJs from '/snippets/code-samples/store-list-namespace-paginate-js.mdx'; +import StoreListNamespaceListPy from '/snippets/code-samples/store-list-namespace-list-py.mdx'; +import StoreListNamespaceListJs from '/snippets/code-samples/store-list-namespace-list-js.mdx'; LangGraph has a built-in persistence layer that saves graph state as checkpoints. When you compile a graph with a checkpointer, a snapshot of the graph state is saved at every step of execution, organized into threads. This enables human-in-the-loop workflows, conversational memory, time travel debugging, and fault-tolerant execution. @@ -13,6 +18,10 @@ LangGraph has a built-in persistence layer that saves graph state as checkpoints When using the [Agent Server](/langsmith/agent-server), you don't need to implement or configure checkpointers manually. The server handles all persistence infrastructure for you behind the scenes. + +Trace checkpointed state and debug how your agent resumes across sessions with [LangSmith](https://smith.langchain.com). Follow the [tracing quickstart](/langsmith/trace-with-langgraph) to get set up. + + ## Why use persistence Persistence is required for the following features: @@ -58,7 +67,11 @@ The state of a thread at a particular point in time is called a checkpoint. A ch #### Super-steps -LangGraph created a checkpoint at each **super-step** boundary. A super-step is a single "tick" of the graph where all nodes scheduled for that step execute (potentially in parallel). For a sequential graph like `START -> A -> B -> END`, there are separate super-steps for the input, node A, and node B — producing a checkpoint after each one. Understanding super-step boundaries is important for [time travel](/oss/langgraph/use-time-travel), because you can only resume execution from a checkpoint (i.e., a super-step boundary). +LangGraph creates a checkpoint at each **super-step** boundary. A super-step is a single "tick" of the graph where all nodes scheduled for that step execute (potentially in parallel). For a sequential graph like `START -> A -> B -> END`, there are separate super-steps for the input, node A, and node B — producing a checkpoint after each one. Understanding super-step boundaries is important for [time travel](/oss/langgraph/use-time-travel), because you can only resume execution from a checkpoint (i.e., a super-step boundary). + +In addition to super-step checkpoints, LangGraph also persists writes at the **node (task) level**. As each node within a super-step finishes, its outputs are written to the checkpointer's `checkpoint_writes` table as task entries linked to the in-progress checkpoint. These per-task writes are what enable [pending writes](#pending-writes) recovery: if another node in the same super-step fails, the successful nodes' writes are already durable and don't need to be re-run on resume. The full state snapshot is then committed once the super-step completes. + +LangGraph also persists writes from individual node executions within a super-step. These writes are stored as tasks and used for fault tolerance: if another node in the same super-step fails, successful node writes do not need to be recomputed when you resume. These task writes are not full `StateSnapshot` checkpoints, so time travel resumes from full checkpoints at super-step boundaries. Checkpoints are persisted and can be used to restore the state of a thread at a later time. @@ -583,7 +596,7 @@ When using the LangGraph API, you don't need to implement or configure stores ma -@[InMemoryStore] is suitable for development and testing. For production, use a persistent store like `PostgresStore` or `RedisStore`. All implementations extend @[BaseStore], which is the type annotation to use in node function signatures. +@[InMemoryStore] is suitable for development and testing. For production, use a persistent store like `PostgresStore`, `MongoDBStore`, or `RedisStore`. All implementations extend @[BaseStore], which is the type annotation to use in node function signatures. ### Basic usage @@ -633,15 +646,13 @@ store.put(namespace_for_memory, memory_id, memory) :::js ```typescript -import { v4 as uuidv4 } from "uuid"; - -const memoryId = uuidv4(); +const memoryId = crypto.randomUUID(); const memory = { food_preference: "I like pizza" }; await memoryStore.put(namespaceForMemory, memoryId, memory); ``` ::: -We can read out memories in our namespace using the `store.search` method, which will return all memories for a given user as a list. The most recent memory is the last in the list. +We can read out memories in our namespace using the `store.search` method, which will return memories for a given user as a list, up to the `limit` argument (default `10`). With `InMemoryStore`, items are returned in insertion order, so the most recent memory is last in the list; other backends may order differently (see [Listing items in a namespace](#listing-items-in-a-namespace)). :::python ```python @@ -700,6 +711,62 @@ The attributes it has are: ::: +### Listing items in a namespace + +:::python +Calling [`store.search`](https://reference.langchain.com/python/langgraph/store/#langgraph.store.base.BaseStore.search) (or the async [`store.asearch`](https://reference.langchain.com/python/langgraph/store/#langgraph.store.base.BaseStore.asearch)) with no `query` and no `filter` returns the items stored under `namespace_prefix`, up to `limit`. Use this to enumerate everything in a namespace when you don't need semantic ranking. +::: + +:::js +Calling `store.search` with no `query` and no `filter` returns the items stored under the namespace prefix, up to `limit`. Use this to enumerate everything in a namespace when you don't need semantic ranking. +::: + +:::python + + + +::: + +:::js + + + +::: + +Three behaviors to keep in mind: + +- **`namespace_prefix` matches by prefix, not exactly.** `("alice",)` also returns items under `("alice", "memories")`, `("alice", "preferences")`, and so on. To restrict to a single level, pass the full namespace or filter the returned items client-side on `item.namespace`. +- **Results past `limit` are silently truncated.** There is no overflow signal—set `limit` above your expected maximum, or paginate with `offset`. +- **Default ordering depends on the store backend.** `PostgresStore` and `AsyncPostgresStore` return results ordered by `updated_at` descending (most recently updated first). `InMemoryStore` returns results in insertion order (most recently inserted last). Do not rely on a specific order across implementations—sort client-side on `item.updated_at` if order matters. + +:::python +To page through a large namespace: + + + +::: + +:::js +To page through a large namespace: + + + +::: + +:::python +To discover which namespaces exist (for example, to iterate over every user before listing their memories), use [`store.list_namespaces`](https://reference.langchain.com/python/langgraph/store/#langgraph.store.base.BaseStore.list_namespaces) or [`store.alist_namespaces`](https://reference.langchain.com/python/langgraph/store/#langgraph.store.base.BaseStore.alist_namespaces): + + + +::: + +:::js +To discover which namespaces exist (for example, to iterate over every user before listing their memories), use `store.listNamespaces`: + + + +::: + ### Semantic search Beyond simple retrieval, the store also supports semantic search, allowing you to find memories based on meaning rather than exact matches. To enable this, configure the store with an embedding model: @@ -787,7 +854,7 @@ store.put( // Store with specific fields to embed await store.put( namespaceForMemory, - uuidv4(), + crypto.randomUUID(), { food_preference: "I love Italian cuisine", context: "Discussing dinner plans", @@ -798,7 +865,7 @@ await store.put( // Store without embedding (still retrievable, but not searchable) await store.put( namespaceForMemory, - uuidv4(), + crypto.randomUUID(), { system_info: "Last updated: 2024-01-01" }, { index: false } ); @@ -917,7 +984,6 @@ You can access the store and the `userId` in _any node_ with the `runtime` argum ```typescript import { StateSchema, MessagesValue, Runtime } from "@langchain/langgraph"; -import { v4 as uuidv4 } from "uuid"; const MessagesState = new StateSchema({ messages: MessagesValue, @@ -935,7 +1001,7 @@ const updateMemory: GraphNode = async (state, runtime) => const memory = "Some memory content"; // Create a new memory ID - const memoryId = uuidv4(); + const memoryId = crypto.randomUUID(); // We create a new memory await runtime.store?.put(namespace, memoryId, { memory }); @@ -1070,6 +1136,46 @@ When we use the LangSmith, either locally (e.g., in [Studio](/langsmith/studio)) See the [deployment guide](/langsmith/semantic-search) for more details and configuration options. +## Durability modes + +LangGraph supports three durability modes that allow you to balance performance and data consistency based on your application's requirements. A higher durability mode adds more overhead to the workflow execution. You can specify the durability mode when calling any graph execution method: + +:::python +```python +graph.stream( + {"input": "test"}, + durability="sync" +) +``` +::: + +:::js +```typescript +await graph.stream( + { input: "test" }, + { durability: "sync" } +) +``` +::: + +The durability modes, from least to most durable, are as follows: + +* `"exit"`: LangGraph persists changes only when graph execution exits either successfully, with an error, or due to a human in the loop interrupt. This provides the best performance for long-running graphs but means intermediate state is not saved, so you cannot recover from system failures (like process crashes) that occur mid-execution. +* `"async"`: LangGraph persists changes asynchronously while the next step executes. This provides good performance and durability, but there's a small risk that LangGraph does not write checkpoints if the process crashes during execution. +* `"sync"`: LangGraph persists changes synchronously before the next step starts. This ensures that LangGraph writes every checkpoint before continuing execution, providing high durability at the cost of some performance overhead. + +## Optimize checkpoint storage + +:::python +By default, LangGraph checkpoints write the full value of every state channel at each super-step. For long-running threads with large accumulations—such as multi-turn conversations—this can produce significant storage growth over time. + +@[`DeltaChannel`] stores only incremental deltas instead of the full accumulated value, substantially reducing checkpoint size for append-heavy channels. See [DeltaChannel](/oss/langgraph/pregel#deltachannel-beta) for usage and the storage-vs-latency tradeoff. + + +`DeltaChannel` requires `langgraph>=1.2` and is currently in beta. The API may change in future releases. + +::: + ## Checkpointer libraries Under the hood, checkpointing is powered by checkpointer objects that conform to @[`BaseCheckpointSaver`] interface. LangGraph provides several checkpointer implementations, all implemented via standalone, installable libraries. @@ -1082,14 +1188,14 @@ See [checkpointer integrations](/oss/integrations/checkpointers/index) for avail * `langgraph-checkpoint`: The base interface for checkpointer savers (@[`BaseCheckpointSaver`]) and serialization/deserialization interface (@[`SerializerProtocol`]). Includes in-memory checkpointer implementation (@[`InMemorySaver`]) for experimentation. LangGraph comes with `langgraph-checkpoint` included. * `langgraph-checkpoint-sqlite`: An implementation of LangGraph checkpointer that uses SQLite database (@[`SqliteSaver`] / @[`AsyncSqliteSaver`]). Ideal for experimentation and local workflows. Needs to be installed separately. * `langgraph-checkpoint-postgres`: An advanced checkpointer that uses Postgres database (@[`PostgresSaver`] / @[`AsyncPostgresSaver`]), used in LangSmith. Ideal for using in production. Needs to be installed separately. -* `langgraph-checkpoint-cosmosdb`: An implementation of LangGraph checkpointer that uses Azure Cosmos DB (`CosmosDBSaver` / `AsyncCosmosDBSaver`). Ideal for using in production with Azure. Supports both sync and async operations. Needs to be installed separately. +* `langchain-azure-cosmosdb`: An implementation of LangGraph checkpointer that uses Azure Cosmos DB for NoSQL (@[`CosmosDBSaverSync`] / @[`CosmosDBSaver`]). Ideal for using in production with Azure. Supports both sync and async operations, with Microsoft Entra ID authentication. Needs to be installed separately. ::: :::js * `@langchain/langgraph-checkpoint`: The base interface for checkpointer savers (@[`BaseCheckpointSaver`]) and serialization/deserialization interface (@[`SerializerProtocol`]). Includes in-memory checkpointer implementation (@[`MemorySaver`]) for experimentation. LangGraph comes with `@langchain/langgraph-checkpoint` included. * `@langchain/langgraph-checkpoint-sqlite`: An implementation of LangGraph checkpointer that uses SQLite database (@[`SqliteSaver`]). Ideal for experimentation and local workflows. Needs to be installed separately. * `@langchain/langgraph-checkpoint-postgres`: An advanced checkpointer that uses Postgres database (@[`PostgresSaver`]), used in LangSmith. Ideal for using in production. Needs to be installed separately. -* `@langchain/langgraph-checkpoint-mongodb`: An advanced checkpointer that uses MongoDB database (`MongoDBSaver`). Ideal for using in production. Needs to be installed separately. +* `@langchain/langgraph-checkpoint-mongodb`: An advanced checkpointer (`MongoDBSaver`) and long-term memory store (`MongoDBStore`) backed by MongoDB. The store supports cross-thread persistence with optional integrated vector search. Ideal for production use. Needs to be installed separately. * `@langchain/langgraph-checkpoint-redis`: An advanced checkpointer that uses Redis database (`RedisSaver`). Ideal for using in production. Needs to be installed separately. ::: diff --git a/pregel.mdx b/pregel.mdx index 3923876..b266d8d 100644 --- a/pregel.mdx +++ b/pregel.mdx @@ -4,7 +4,6 @@ sidebarTitle: Runtime --- - :::python @[`Pregel`] implements LangGraph's runtime, managing the execution of LangGraph applications. @@ -45,18 +44,168 @@ An **actor** is a `PregelNode`. It subscribes to channels, reads data from them, ## Channels -Channels are used to communicate between actors (PregelNodes). Each channel has a value type, an update type, and an update function—which takes a sequence of updates and modifies the stored value. Channels can be used to send data from one chain to another, or to send data from a chain to itself in a future step. LangGraph provides a number of built-in channels: +Channels are used to communicate between actors (PregelNodes). Each channel has a value type, an update type, and an update function—which takes a sequence of updates and modifies the stored value. Channels can be used to send data from one chain to another, or to send data from a chain to itself in a future step. + +### LastValue + +@[`LastValue`] is the default channel type. It stores the last value written to it, overwriting any previous value. Use it for input and output values, or for passing data from one step to the next. + +:::python +```python +from langgraph.channels import LastValue + +channel: LastValue[int] = LastValue(int) +``` +::: + +:::js +```typescript +import { LastValue } from "@langchain/langgraph/channels"; + +const channel = new LastValue(); +``` +::: + +### Topic + +@[`Topic`] is a configurable PubSub channel useful for sending multiple values between actors or accumulating output across steps. It can be configured to deduplicate values or to accumulate all values written during a run. + +:::python +```python +from langgraph.channels import Topic + +# Accumulate all values written across steps +channel: Topic[str] = Topic(str, accumulate=True) +``` +::: + +:::js +```typescript +import { Topic } from "@langchain/langgraph/channels"; + +// Accumulate all values written across steps +const channel = new Topic({ accumulate: true }); +``` +::: + +### BinaryOperatorAggregate + +@[`BinaryOperatorAggregate`] stores a persistent value that is updated by applying a binary operator to the current value and each new update. Use it to compute running aggregates across steps. :::python -* @[`LastValue`]: The default channel, stores the last value sent to the channel, useful for input and output values, or for sending data from one step to the next. -* @[`Topic`]: A configurable PubSub Topic, useful for sending multiple values between **actors**, or for accumulating output. Can be configured to deduplicate values or to accumulate values over the course of multiple steps. -* @[`BinaryOperatorAggregate`]: stores a persistent value, updated by applying a binary operator to the current value and each update sent to the channel, useful for computing aggregates over multiple steps; e.g.,`total = BinaryOperatorAggregate(int, operator.add)` +```python +import operator +from langgraph.channels import BinaryOperatorAggregate + +# Running total: each write adds to the current value +total = BinaryOperatorAggregate(int, operator.add) +``` ::: :::js -* @[`LastValue`]: The default channel, stores the last value sent to the channel, useful for input and output values, or for sending data from one step to the next. -* @[`Topic`]: A configurable PubSub Topic, useful for sending multiple values between **actors**, or for accumulating output. Can be configured to deduplicate values or to accumulate values over the course of multiple steps. -* @[`BinaryOperatorAggregate`]: stores a persistent value, updated by applying a binary operator to the current value and each update sent to the channel, useful for computing aggregates over multiple steps; e.g.,`total = BinaryOperatorAggregate(int, operator.add)` +```typescript +import { BinaryOperatorAggregate } from "@langchain/langgraph/channels"; + +// Running total: each write adds to the current value +const total = new BinaryOperatorAggregate({ operator: (a, b) => a + b }); +``` +::: + +:::python +### DeltaChannel (beta) + + +`DeltaChannel` requires `langgraph>=1.2` and is currently in beta. The API may change in future releases. + + +@[`DeltaChannel`] stores only the incremental delta at each step rather than the full accumulated value. This is most useful for channels that are written frequently and accumulate large values over time—for example, a conversation message list in a long-running thread. Without delta storage, the full list is re-serialized into every checkpoint; with `DeltaChannel`, only the new messages written at each step are stored. + + +Consider `DeltaChannel` when a channel is both written to frequently and grows large over time. A good signal: if you notice checkpoint sizes growing linearly with thread length for a particular channel, `DeltaChannel` is likely a good fit. + + +Use `DeltaChannel` in an `Annotated` type annotation the same way you would use a plain reducer: + +```python +from typing import Annotated, Sequence +from typing_extensions import TypedDict +from langgraph.channels import DeltaChannel + + +def my_reducer(state: list[str], writes: Sequence[list[str]]) -> list[str]: + result = list(state) + for write in writes: + result.extend(write) + return result + + +class State(TypedDict): + messages: Annotated[list[str], DeltaChannel(my_reducer)] +``` + +#### Bulk reducer requirement + +The `reducer` passed to `DeltaChannel` is a **bulk reducer**: it receives the current state and a *sequence* of all writes from the current step in a single call—not pairwise like a standard reducer. This differs from the per-key reducers used with `Annotated` in a `StateGraph`, where the reducer is called once per update. + + +The bulk reducer **must be associative** (batching-invariant): + +``` +reducer(reducer(state, [xs]), [ys]) == reducer(state, [xs, ys]) +``` + +If your reducer is not associative, the reconstructed state may differ depending on how LangGraph batches writes across steps, producing inconsistent behavior. + + + +**The reducer runs on reconstruction, not on write.** Unlike a @[`BinaryOperatorAggregate`], whose reducer is invoked at write time so the combined value is what gets serialized into the checkpoint, a `DeltaChannel` reducer is invoked when the channel value is *rebuilt* from its persisted writes. The raw per-step writes are what get serialized; the reducer is only called when the value is materialized—on the next read, on the next step's actors, or when replaying history. + +Practical consequences when designing a reducer: + +- **Make it a pure function of `(state, writes)`.** Any side effects, randomness, or wall-clock reads (e.g., `uuid.uuid4()`, `datetime.now()`) execute every time the value is reconstructed and produce different results on each replay. They are *not* baked into the persisted writes. +- **Do not rely on mutations to incoming writes being persisted.** If your reducer mutates a write object (for example, assigning a stable ID to an item that arrived without one), that mutation lives only in the reconstructed value. The stored write still has the original shape, so the next reconstruction will see the un-mutated input again. +- **Attach identity and other stable metadata upstream.** If downstream code needs to reference an item by ID across turns (e.g., to update or remove it later), assign that ID before the value is written to the channel—not inside the reducer. + + +Here are bulk reducers for the two most common cases: + +```python +from typing import Any, Sequence + + +# List: append all writes in order +def list_reducer(state: list[Any], writes: Sequence[list[Any]]) -> list[Any]: + result = list(state) + for write in writes: + result.extend(write) + return result + + +# Dict: merge all writes, last write wins on key conflicts +def dict_reducer( + state: dict[str, Any], writes: Sequence[dict[str, Any]] +) -> dict[str, Any]: + result = dict(state) + for write in writes: + result.update(write) + return result +``` + +Both are associative: applying batches one at a time produces the same result as applying them together. + +#### Use snapshot_frequency for bounded read latency + +Without snapshots, reading a `DeltaChannel` value requires replaying the full write history—O(N) for a thread with N steps. Setting `snapshot_frequency=K` writes a full snapshot every K pregel steps, bounding read depth to at most K steps: + +```python +class State(TypedDict): + messages: Annotated[ + list[str], + DeltaChannel(my_reducer, snapshot_frequency=5), + ] +``` + +Higher values of `snapshot_frequency` reduce storage overhead but increase read latency. Lower values bound latency more tightly at the cost of larger checkpoints. `None` (the default) skips snapshots entirely—appropriate when reads are rare or threads are short. ::: ## Examples diff --git a/quickstart.mdx b/quickstart.mdx index 90da63e..d640d71 100644 --- a/quickstart.mdx +++ b/quickstart.mdx @@ -3,7 +3,6 @@ title: Quickstart --- - This quickstart demonstrates how to build a calculator agent using the LangGraph Graph API or the Functional API. @@ -27,7 +26,7 @@ For this example, you will need to set up a [Claude (Anthropic)](https://www.ant ## 1. Define tools and model -In this example, we'll use the Claude Sonnet 4.6 model and define tools for addition, multiplication, and division. +In this example, we'll use the Claude Sonnet 4.5 model and define tools for addition, multiplication, and division. :::python ```python @@ -400,7 +399,7 @@ for (const message of result.messages) { ::: - To learn how to trace your agent with LangSmith, see the [LangSmith documentation](/langsmith/trace-with-langgraph). +Trace and debug your agent with [LangSmith](https://smith.langchain.com). Follow the [tracing quickstart](/langsmith/trace-with-langgraph) to get set up. When ready for production, see [Deploy](/langsmith/deployment) for hosting options. Congratulations! You've built your first agent using the LangGraph Graph API. @@ -724,7 +723,7 @@ for (const message of result.messages) { ## 1. Define tools and model -In this example, we'll use the Claude Sonnet 4.6 model and define tools for addition, multiplication, and division. +In this example, we'll use the Claude Sonnet 4.5 model and define tools for addition, multiplication, and division. :::python @@ -982,7 +981,7 @@ for (const message of result) { ::: - To learn how to trace your agent with LangSmith, see the [LangSmith documentation](/langsmith/trace-with-langgraph). +Trace and debug your agent with [LangSmith](https://smith.langchain.com). Follow the [tracing quickstart](/langsmith/trace-with-langgraph) to get set up. When ready for production, see [Deploy](/langsmith/deployment) for hosting options. Congratulations! You've built your first agent using the LangGraph Functional API. diff --git a/sql-agent.mdx b/sql-agent.mdx index 177f60e..0365212 100644 --- a/sql-agent.mdx +++ b/sql-agent.mdx @@ -34,20 +34,20 @@ We will cover the following concepts: :::python ```bash pip - pip install langchain langgraph langchain-community + pip install langchain langgraph ``` ::: :::js ```bash npm - npm i langchain @langchain/core @langchain/classic @langchain/langgraph @langchain/openai typeorm sqlite3 zod + npm i langchain @langchain/core @langchain/langgraph @langchain/openai sqlite3 zod ``` ```bash yarn - yarn add langchain @langchain/core @langchain/classic @langchain/langgraph @langchain/openai typeorm sqlite3 zod + yarn add langchain @langchain/core @langchain/langgraph @langchain/openai sqlite3 zod ``` ```bash pnpm - pnpm add langchain @langchain/core @langchain/classic @langchain/langgraph @langchain/openai typeorm sqlite3 zod + pnpm add langchain @langchain/core @langchain/langgraph @langchain/openai sqlite3 zod ``` ::: @@ -97,16 +97,23 @@ else: print(f"Failed to download the file. Status code: {response.status_code}") ``` -We will use a handy SQL database wrapper available in the `langchain_community` package to interact with the database. The wrapper provides a simple interface to execute SQL queries and fetch results: +We will use Python's built-in `sqlite3` module to interact with the database: ```python -from langchain_community.utilities import SQLDatabase +import sqlite3 -db = SQLDatabase.from_uri("sqlite:///Chinook.db") +con = sqlite3.connect("Chinook.db") +cursor = con.cursor() -print(f"Dialect: {db.dialect}") -print(f"Available tables: {db.get_usable_table_names()}") -print(f'Sample output: {db.run("SELECT * FROM Artist LIMIT 5;")}') +cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") +tables = [row[0] for row in cursor.fetchall() if not row[0].startswith("sqlite_")] + +print("Dialect: sqlite") +print(f"Available tables: {tables}") + +cursor.execute("SELECT * FROM Artist LIMIT 5;") +print(f"Sample output: {cursor.fetchall()}") +con.close() ``` ``` Dialect: sqlite @@ -137,22 +144,31 @@ async function resolveDbPath() { } ``` -We will use a handy SQL database wrapper available in the `@langchain/classic/sql_db` module to interact with the database. The wrapper provides a simple interface to execute SQL queries and fetch results: +We will use the `sqlite3` library to interact with the database: ```typescript -import { SqlDatabase } from "@langchain/classic/sql_db"; -import { DataSource } from "typeorm"; - -const dbPath = await resolveDbPath(); -const datasource = new DataSource({ type: "sqlite", database: dbPath }); -const db = await SqlDatabase.fromDataSourceParams({ appDataSource: datasource }); -const dialect = db.appDataSourceOptions.type; +import sqlite3 from "sqlite3"; + +const dialect = "sqlite"; + +async function runQuery(query: string, params: unknown[] = []): Promise { + const dbPath = await resolveDbPath(); + const db = new sqlite3.Database(dbPath); + return new Promise((resolve, reject) => { + db.all(query, params, (err, rows) => { + db.close(); + if (err) reject(err); + else resolve(rows); + }); + }); +} +const tableRows = await runQuery("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"); +const tableNames = tableRows.map((row) => String(row.name)); console.log(`Dialect: ${dialect}`); -const tableNames = db.allTables.map(t => t.tableName); console.log(`Available tables: ${tableNames.join(", ")}`); -const sampleResults = await db.run("SELECT * FROM Artist LIMIT 5;"); -console.log(`Sample output: ${sampleResults}`); +const sampleResults = await runQuery("SELECT * FROM Artist LIMIT 5;"); +console.log(`Sample output: ${JSON.stringify(sampleResults)}`); ``` ``` Dialect: sqlite @@ -163,27 +179,97 @@ Sample output: [{"ArtistId":1,"Name":"AC/DC"},{"ArtistId":2,"Name":"Accept"},{"A ## 3. Add tools for database interactions + +The following database tools are minimal wrappers for demonstration purposes only. They are not intended to be secure or used in production. Use narrowly scoped database permissions and add application-specific validation before executing model-generated SQL. + + :::python -Use the `SQLDatabase` wrapper available in the `langchain_community` package to interact with the database. The wrapper provides a simple interface to execute SQL queries and fetch results: +We can implement database [tools](/oss/langchain/tools) as thin wrappers using the `@tool` decorator from `langchain.tools`: ```python -from langchain_community.agent_toolkits import SQLDatabaseToolkit - -toolkit = SQLDatabaseToolkit(db=db, llm=model) +import sqlite3 +from langchain.tools import tool -tools = toolkit.get_tools() +# Below are minimal tools for demonstration purposes. + +@tool +def sql_db_list_tables() -> str: + """Input is an empty string, output is a comma-separated list of tables in the database.""" + con = sqlite3.connect("Chinook.db") + try: + cursor = con.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + tables = [row[0] for row in cursor.fetchall() if not row[0].startswith("sqlite_")] + return ", ".join(tables) + finally: + con.close() + +@tool +def sql_db_schema(table_names: str) -> str: + """Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. + Be sure that the tables actually exist by calling sql_db_list_tables first! + Example Input: table1, table2, table3""" + con = sqlite3.connect("Chinook.db") + try: + cursor = con.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + valid_tables = {row[0] for row in cursor.fetchall() if not row[0].startswith("sqlite_")} + results = [] + for table in table_names.split(","): + table = table.strip() + if table not in valid_tables: + results.append(f"Error: table_names {{{table!r}}} not found in database") + continue + cursor.execute("SELECT sql FROM sqlite_master WHERE type='table' AND name=?;", (table,)) + schema_row = cursor.fetchone() + if schema_row: + results.append(schema_row[0]) + try: + quoted_table = '"' + table.replace('"', '""') + '"' + cursor.execute(f"SELECT * FROM {quoted_table} LIMIT 3;") + rows = cursor.fetchall() + if rows: + col_names = [description[0] for description in cursor.description] + results.append(f"/*\n3 rows from {table} table:\n" + "\t".join(col_names) + "\n" + "\n".join("\t".join(str(x) for x in row) for row in rows) + "\n*/") + except Exception as e: + results.append(f"Error fetching sample rows: {e}") + return "\n\n".join(results) + finally: + con.close() + +@tool +def sql_db_query(query: str) -> str: + """Input to this tool is a detailed and correct SQL query, output is a result from the database. + If the query is not correct, an error message will be returned. + If an error is returned, rewrite the query, check the query, and try again. + If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.""" + con = sqlite3.connect("Chinook.db") + try: + cursor = con.cursor() + cursor.execute(query) + res = cursor.fetchall() + return str(res) + except Exception as e: + return f"Error: {e}" + finally: + con.close() + +tools = [sql_db_list_tables, sql_db_schema, sql_db_query] for tool in tools: print(f"{tool.name}: {tool.description}\n") ``` ``` -sql_db_query: Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields. - -sql_db_schema: Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3 - sql_db_list_tables: Input is an empty string, output is a comma-separated list of tables in the database. -sql_db_query_checker: Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query! +sql_db_schema: Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. + Be sure that the tables actually exist by calling sql_db_list_tables first! + Example Input: table1, table2, table3 + +sql_db_query: Input to this tool is a detailed and correct SQL query, output is a result from the database. + If the query is not correct, an error message will be returned. + If an error is returned, rewrite the query, check the query, and try again. + If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields. ``` ::: :::js @@ -193,10 +279,18 @@ We'll create custom tools to interact with the database: import { tool } from "langchain"; import * as z from "zod"; -// Tool to list all tables +async function getTableNames() { + const rows = await runQuery("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"); + return rows.map((row) => String(row.name)); +} + +function quoteSqliteIdentifier(identifier: string) { + return `"${identifier.replaceAll('"', '""')}"`; +} + const listTablesTool = tool( async () => { - const tableNames = db.allTables.map(t => t.tableName); + const tableNames = await getTableNames(); return tableNames.join(", "); }, { @@ -206,11 +300,38 @@ const listTablesTool = tool( } ); -// Tool to get schema for specific tables const getSchemaTool = tool( async ({ table_names }) => { - const tables = table_names.split(",").map(t => t.trim()); - return await db.getTableInfo(tables); + const validTables = new Set(await getTableNames()); + const results: string[] = []; + for (const table of table_names.split(",").map((t) => t.trim())) { + if (!validTables.has(table)) { + results.push(`Error: table_names {'${table}'} not found in database`); + continue; + } + const schemaRows = await runQuery( + "SELECT sql FROM sqlite_master WHERE type='table' AND name=?;", + [table] + ); + const schema = schemaRows[0]?.sql; + if (schema) { + results.push(String(schema)); + try { + const rows = await runQuery(`SELECT * FROM ${quoteSqliteIdentifier(table)} LIMIT 3;`); + if (rows.length > 0) { + const colNames = Object.keys(rows[0]); + results.push( + `/*\n3 rows from ${table} table:\n${colNames.join("\t")}\n` + + rows.map((row) => colNames.map((col) => String(row[col])).join("\t")).join("\n") + + "\n*/" + ); + } + } catch (e) { + results.push(`Error fetching sample rows: ${e}`); + } + } + } + return results.join("\n\n"); }, { name: "sql_db_schema", @@ -221,14 +342,13 @@ const getSchemaTool = tool( } ); -// Tool to execute SQL query const queryTool = tool( async ({ query }) => { try { - const result = await db.run(query); - return typeof result === "string" ? result : JSON.stringify(result); + const result = await runQuery(query); + return JSON.stringify(result); } catch (error) { - return `Error: ${error.message}`; + return `Error: ${error instanceof Error ? error.message : String(error)}`; } }, { @@ -323,7 +443,7 @@ only ask for the relevant columns given the question. DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database. """.format( - dialect=db.dialect, + dialect="sqlite", top_k=5, ) @@ -357,7 +477,7 @@ If there are any of the above mistakes, rewrite the query. If there are no mista just reproduce the original query. You will call the appropriate tool to execute the query after running this check. -""".format(dialect=db.dialect) +""".format(dialect="sqlite") def check_query(state: MessagesState): diff --git a/streaming.mdx b/streaming.mdx index 8e33c37..e865a30 100644 --- a/streaming.mdx +++ b/streaming.mdx @@ -5,7 +5,11 @@ title: Streaming import NostreamTagPy from '/snippets/code-samples/nostream-tag-py.mdx'; import NostreamTagJs from '/snippets/code-samples/nostream-tag-js.mdx'; -LangGraph implements a streaming system to surface real-time updates. Streaming is crucial for enhancing the responsiveness of applications built on LLMs. By displaying output progressively, even before a complete response is ready, streaming significantly improves user experience (UX), particularly when dealing with the latency of LLMs. + +For new applications, we recommend [event streaming](/oss/langgraph/event-streaming)—the typed-projection API introduced in LangGraph v1.2. Event streaming gives you separate iterators per projection (messages, values, subgraphs, output) so you can consume them independently instead of branching on `stream_mode` chunks. + + +This page covers LangGraph's stream-mode API. It exposes graph execution through stream modes such as `updates`, `values`, `messages`, `custom`, `checkpoints`, `tasks`, and `debug`. Use it when you need direct access to graph-runtime events or specific stream-mode output. ## Get started @@ -88,6 +92,10 @@ for await (const chunk of await graph.stream(inputs, { ``` ::: + +Debug streaming events, inspect token-by-token LLM output, and monitor latency with [LangSmith](https://smith.langchain.com). Follow the [tracing quickstart](/langsmith/trace-with-langgraph) to get set up. + + :::python ### Stream output format (v2) @@ -338,7 +346,7 @@ class MyState: joke: str = "" -model = init_chat_model(model="gpt-4.1-mini") +model = init_chat_model(model="gpt-5.4-mini") def call_model(state: MyState): """Call the LLM to generate a joke about a topic""" @@ -389,7 +397,7 @@ const MyState = new StateSchema({ joke: z.string().default(""), }); -const model = new ChatOpenAI({ model: "gpt-4.1-mini" }); +const model = new ChatOpenAI({ model: "gpt-5.4-mini" }); const callModel: GraphNode = async (state) => { // Call the LLM to generate a joke about a topic @@ -428,9 +436,9 @@ You can associate `tags` with LLM invocations to filter the streamed tokens by L from langchain.chat_models import init_chat_model # model_1 is tagged with "joke" -model_1 = init_chat_model(model="gpt-4.1-mini", tags=['joke']) +model_1 = init_chat_model(model="gpt-5.4-mini", tags=['joke']) # model_2 is tagged with "poem" -model_2 = init_chat_model(model="gpt-4.1-mini", tags=['poem']) +model_2 = init_chat_model(model="gpt-5.4-mini", tags=['poem']) graph = ... # define a graph that uses these LLMs @@ -456,12 +464,12 @@ import { ChatOpenAI } from "@langchain/openai"; // model1 is tagged with "joke" const model1 = new ChatOpenAI({ - model: "gpt-4.1-mini", + model: "gpt-5.4-mini", tags: ['joke'] }); // model2 is tagged with "poem" const model2 = new ChatOpenAI({ - model: "gpt-4.1-mini", + model: "gpt-5.4-mini", tags: ['poem'] }); @@ -491,9 +499,9 @@ for await (const [msg, metadata] of await graph.stream( from langgraph.graph import START, StateGraph # The joke_model is tagged with "joke" - joke_model = init_chat_model(model="gpt-4.1-mini", tags=["joke"]) + joke_model = init_chat_model(model="gpt-5.4-mini", tags=["joke"]) # The poem_model is tagged with "poem" - poem_model = init_chat_model(model="gpt-4.1-mini", tags=["poem"]) + poem_model = init_chat_model(model="gpt-5.4-mini", tags=["poem"]) class State(TypedDict): @@ -550,12 +558,12 @@ for await (const [msg, metadata] of await graph.stream( // The jokeModel is tagged with "joke" const jokeModel = new ChatOpenAI({ - model: "gpt-4.1-mini", + model: "gpt-5.4-mini", tags: ["joke"] }); // The poemModel is tagged with "poem" const poemModel = new ChatOpenAI({ - model: "gpt-4.1-mini", + model: "gpt-5.4-mini", tags: ["poem"] }); @@ -673,7 +681,7 @@ for await (const [msg, metadata] of await graph.stream( from langgraph.graph import START, StateGraph from langchain_openai import ChatOpenAI - model = ChatOpenAI(model="gpt-4.1-mini") + model = ChatOpenAI(model="gpt-5.4-mini") class State(TypedDict): @@ -730,7 +738,7 @@ for await (const [msg, metadata] of await graph.stream( import { StateGraph, StateSchema, GraphNode, START } from "@langchain/langgraph"; import * as z from "zod"; - const model = new ChatOpenAI({ model: "gpt-4.1-mini" }); + const model = new ChatOpenAI({ model: "gpt-5.4-mini" }); const State = new StateSchema({ topic: z.string(), @@ -1618,7 +1626,7 @@ for await (const chunk of await graph.stream( from openai import AsyncOpenAI openai_client = AsyncOpenAI() - model_name = "gpt-4.1-mini" + model_name = "gpt-5.4-mini" async def stream_tokens(model_name: str, messages: list[dict]): @@ -1735,7 +1743,7 @@ for await (const chunk of await graph.stream( import OpenAI from "openai"; const openaiClient = new OpenAI(); - const modelName = "gpt-4.1-mini"; + const modelName = "gpt-5.4-mini"; async function* streamTokens(modelName: string, messages: any[]) { const response = await openaiClient.chat.completions.create({ @@ -2000,7 +2008,7 @@ This limits LangGraph ability to automatically propagate context, and affects La from langgraph.graph import START, StateGraph from langchain.chat_models import init_chat_model - model = init_chat_model(model="gpt-4.1-mini") + model = init_chat_model(model="gpt-5.4-mini") class State(TypedDict): topic: str diff --git a/studio.mdx b/studio.mdx index 435dbc8..dca5b51 100644 --- a/studio.mdx +++ b/studio.mdx @@ -6,14 +6,14 @@ When building agents with LangChain locally, it's helpful to visualize what's ha Studio connects to your locally running agent to show you each step your agent takes: the prompts sent to the model, tool calls and their results, and the final output. You can test different inputs, inspect intermediate states, and iterate on your agent's behavior without additional code or deployment. -This page describes how to set up Studio with your local LangChain agent. +This pages describes how to set up Studio with your local LangChain agent. ## Prerequisites Before you begin, ensure you have the following: - **A LangSmith account**: Sign up (for free) or log in at [smith.langchain.com](https://smith.langchain.com). -- **A LangSmith API key**: Follow the [Create an API key](/langsmith/create-account-api-key#create-an-api-key) guide. +- **A LangSmith API key**: Follow the [Create an API key](/langsmith/create-account-api-key) guide. - If you don't want data [traced](/langsmith/observability-concepts#traces) to LangSmith, set `LANGSMITH_TRACING=false` in your application's `.env` file. With tracing disabled, no data leaves your local server. ## Set up local Agent server @@ -55,7 +55,7 @@ def send_email(to: str, subject: str, body: str): return f"Email sent to {to}" agent = create_agent( - "gpt-4.1", + "gpt-5.4", tools=[send_email], system_prompt="You are an email assistant. Always use the send_email tool.", ) @@ -79,7 +79,7 @@ function sendEmail(to: string, subject: string, body: string): string { } const agent = createAgent({ - model: "gpt-4.1", + model: "gpt-5.4", tools: [sendEmail], systemPrompt: "You are an email assistant. Always use the sendEmail tool.", }); diff --git a/test.mdx b/test.mdx index 5fc5a75..891835e 100644 --- a/test.mdx +++ b/test.mdx @@ -3,7 +3,6 @@ title: Test --- - After you've prototyped your LangGraph agent, a natural next step is to add tests. This guide covers some useful patterns you can use when writing unit tests. :::python diff --git a/thinking-in-langgraph.mdx b/thinking-in-langgraph.mdx index b763243..9ddfe6e 100644 --- a/thinking-in-langgraph.mdx +++ b/thinking-in-langgraph.mdx @@ -3,6 +3,8 @@ title: Thinking in LangGraph description: Learn how to think about building agents with LangGraph --- +import LanggraphThinkingHitlV2Py from '/snippets/code-samples/langgraph-thinking-hitl-v2-py.mdx'; + When you build an agent with LangGraph, you will first break it apart into discrete steps called **nodes**. Then, you will describe the different decisions and transitions from each of your nodes. Finally, you connect nodes together through a shared **state** that each node can read from and write to. In this walkthrough, we'll guide you through the thought process of building a customer support email agent with LangGraph. @@ -55,7 +57,7 @@ flowchart TD H --> J[END] I --> J[END] - classDef process fill:#DBEAFE,stroke:#2563EB,stroke-width:2px,color:#1E3A8A + classDef process fill:#E5F4FF,stroke:#006DDD,stroke-width:2px,color:#030710 class A,B,C,D,E,F,G,H,I,J process ``` @@ -295,14 +297,17 @@ Different errors need different handling strategies: | Transient errors (network issues, rate limits) | System (automatic) | Retry policy | Temporary failures that usually resolve on retry | | LLM-recoverable errors (tool failures, parsing issues) | LLM | Store error in state and loop back | LLM can see the error and adjust its approach | | User-fixable errors (missing information, unclear instructions) | Human | Pause with `interrupt()` | Need user input to proceed | +| Recoverable failure after retries | Developer (declarative) | `error_handler` | Run a compensation/recovery branch after retry exhaustion | | Unexpected errors | Developer | Let them bubble up | Unknown issues that need debugging | - Add a retry policy to automatically retry network issues and rate limits: + Add a retry policy to automatically retry network issues and rate limits. :::python + Combine with `timeout=` to cap each attempt. See [Fault tolerance](/oss/langgraph/fault-tolerance) for the full lifecycle. + ```python from langgraph.types import RetryPolicy @@ -469,6 +474,39 @@ Different errors need different handling strategies: ::: + + + After retries are exhausted, run a recovery function that updates state and routes to a compensation branch. + + :::python + + See [Fault tolerance](/oss/langgraph/fault-tolerance#error-handling) for the full pattern. + + + `error_handler` requires `langgraph>=1.2`. + + + ```python + from langgraph.errors import NodeError + from langgraph.types import Command, RetryPolicy + + def payment_error_handler(state: State, error: NodeError) -> Command: + return Command( + update={"status": f"compensated: {error.error}"}, + goto="finalize", + ) + + workflow.add_node( + "charge_payment", + charge_payment, + retry_policy=RetryPolicy(max_attempts=3, retry_on=ConnectionError), + error_handler=payment_error_handler, + ) + ``` + + ::: + + @@ -967,35 +1005,7 @@ Let's run our agent with an urgent billing issue that needs human review: :::python -```python -# Test with an urgent billing issue -initial_state = { - "email_content": "I was charged twice for my subscription! This is urgent!", - "sender_email": "customer@example.com", - "email_id": "email_123", - "messages": [] -} - -# Run with a thread_id for persistence -config = {"configurable": {"thread_id": "customer_123"}} -result = app.invoke(initial_state, config) -# The graph will pause at human_review -print(f"human review interrupt:{result['__interrupt__']}") - -# When ready, provide human input to resume -from langgraph.types import Command - -human_response = Command( - resume={ - "approved": True, - "edited_response": "We sincerely apologize for the double charge. I've initiated an immediate refund..." - } -) - -# Resume execution -final_result = app.invoke(human_response, config) -print(f"Email sent successfully!") -``` + ::: @@ -1082,7 +1092,7 @@ Or why separate Doc Search from Draft Reply? The answer involves trade-offs between resilience and observability. -**The resilience consideration:** LangGraph's [durable execution](/oss/langgraph/durable-execution) creates checkpoints at node boundaries. When a workflow resumes after an interruption or failure, it starts from the beginning of the node where execution stopped. Smaller nodes mean more frequent checkpoints, which means less work to repeat if something goes wrong. If you combine multiple operations into one large node, a failure near the end means re-executing everything from the start of that node. +**The resilience consideration:** LangGraph's [persistence layer](/oss/langgraph/persistence) creates checkpoints at node boundaries. When a workflow resumes after an interruption or failure, it starts from the beginning of the node where execution stopped. Smaller nodes mean more frequent checkpoints, which means less work to repeat if something goes wrong. If you combine multiple operations into one large node, a failure near the end means re-executing everything from the start of that node. Why we chose this breakdown for the email agent: @@ -1098,7 +1108,7 @@ A different valid approach: You could combine `Read Email` and `Classify Intent` Application-level concerns: The caching discussion in Step 2 (whether to cache search results) is an application-level decision, not a LangGraph framework feature. You implement caching within your node functions based on your specific requirements—LangGraph doesn't prescribe this. -Performance considerations: More nodes doesn't mean slower execution. LangGraph writes checkpoints in the background by default ([async durability mode](/oss/langgraph/durable-execution#durability-modes)), so your graph continues running without waiting for checkpoints to complete. This means you get frequent checkpoints with minimal performance impact. You can adjust this behavior if needed—use `"exit"` mode to checkpoint only at completion, or `"sync"` mode to block execution until each checkpoint is written. +Performance considerations: More nodes doesn't mean slower execution. LangGraph writes checkpoints in the background by default ([async durability mode](/oss/langgraph/persistence#durability-modes)), so your graph continues running without waiting for checkpoints to complete. This means you get frequent checkpoints with minimal performance impact. You can adjust this behavior if needed—use `"exit"` mode to checkpoint only at completion, or `"sync"` mode to block execution until each checkpoint is written. ### Where to go from here diff --git a/ui.mdx b/ui.mdx index b7c0ee2..b4d631b 100644 --- a/ui.mdx +++ b/ui.mdx @@ -2,9 +2,9 @@ title: Agent Chat UI --- -import AgentChatUi from '/snippets/oss/agent-chat-ui.mdx'; +import agent_chat_ui from '/snippets/oss/agent-chat-ui.mdx'; - + ### Connect to your agent diff --git a/use-functional-api.mdx b/use-functional-api.mdx index 4e6a7ea..2b48faa 100644 --- a/use-functional-api.mdx +++ b/use-functional-api.mdx @@ -4,7 +4,6 @@ sidebarTitle: Use the Functional API --- - The [**Functional API**](/oss/langgraph/functional-api) allows you to add LangGraph's key features ([persistence](/oss/langgraph/persistence), [memory](/oss/langgraph/add-memory), [human-in-the-loop](/oss/langgraph/interrupts), and [streaming](/oss/langgraph/streaming)) to your applications with minimal changes to your existing code. @@ -730,6 +729,46 @@ await main.invoke({ any_input: "foobar" }, config); ``` ::: +:::python + +## Set task and entrypoint timeouts + +Use the `timeout` parameter with `@task` or `@entrypoint` to limit how long a single async attempt can run. Provide the timeout in seconds or as a `datetime.timedelta`. + +```python +import asyncio + +from langgraph.errors import NodeTimeoutError +from langgraph.func import entrypoint, task +from langgraph.types import RetryPolicy + + +@task( + timeout=1.0, + retry_policy=RetryPolicy(retry_on=NodeTimeoutError), +) +async def call_api(url: str) -> str: + await asyncio.sleep(2) + return f"result from {url}" + + +@entrypoint(timeout=5.0) +async def workflow(inputs: dict) -> str: + return await call_api(inputs["url"]) + + +try: + await workflow.ainvoke({"url": "https://example.com"}) +except NodeTimeoutError: + print("Task timed out") +``` + +Timeouts are supported only for async tasks and entrypoints. If you set `timeout` on a sync function, LangGraph raises an error when the task or entrypoint is declared. + +When a task or entrypoint exceeds its timeout, LangGraph raises `NodeTimeoutError`, which subclasses Python's built-in `TimeoutError`. If a retry policy retries `TimeoutError` or `NodeTimeoutError`, the timed-out attempt is retried. The timeout applies to each attempt independently, so the timer resets for every retry. + +::: + ## Caching tasks :::python diff --git a/use-graph-api.mdx b/use-graph-api.mdx index 8b5cdad..4b99039 100644 --- a/use-graph-api.mdx +++ b/use-graph-api.mdx @@ -3,8 +3,6 @@ title: Use the graph API sidebarTitle: Use the graph API --- - - import ChatModelTabs from '/snippets/chat-model-tabs.mdx'; This guide demonstrates the basics of LangGraph's Graph API. It walks through [state](#define-and-update-state), as well as composing common graph structures such as [sequences](#create-a-sequence-of-steps), [branches](#create-branches), and [loops](#create-and-control-loops). It also covers LangGraph's control features, including the [Send API](#map-reduce-and-the-send-api) for map-reduce workflows and the [Command API](#combine-control-flow-and-state-updates-with-command) for combining state updates with "hops" across nodes. @@ -1209,7 +1207,7 @@ console.log(await graph.invoke({}, { context: { myRuntimeValue: "b" } })); // [ MODELS = { "anthropic": init_chat_model("claude-haiku-4-5-20251001"), - "openai": init_chat_model("gpt-4.1-mini"), + "openai": init_chat_model("gpt-5.4-mini"), } def call_model(state: MessagesState, runtime: Runtime[ContextSchema]): @@ -1237,7 +1235,7 @@ console.log(await graph.invoke({}, { context: { myRuntimeValue: "b" } })); // [ ``` claude-haiku-4-5-20251001 - gpt-4.1-mini-2025-04-14 + gpt-5.4-mini ``` ::: @@ -1260,7 +1258,7 @@ console.log(await graph.invoke({}, { context: { myRuntimeValue: "b" } })); // [ const MODELS = { anthropic: new ChatAnthropic({ model: "claude-haiku-4-5-20251001" }), - openai: new ChatOpenAI({ model: "gpt-4.1-mini" }), + openai: new ChatOpenAI({ model: "gpt-5.4-mini" }), }; const callModel: GraphNode = async (state, config) => { @@ -1292,7 +1290,7 @@ console.log(await graph.invoke({}, { context: { myRuntimeValue: "b" } })); // [ ``` claude-haiku-4-5-20251001 - gpt-4.1-mini-2025-04-14 + gpt-5.4-mini ``` ::: @@ -1316,7 +1314,7 @@ console.log(await graph.invoke({}, { context: { myRuntimeValue: "b" } })); // [ MODELS = { "anthropic": init_chat_model("claude-haiku-4-5-20251001"), - "openai": init_chat_model("gpt-4.1-mini"), + "openai": init_chat_model("gpt-5.4-mini"), } def call_model(state: MessagesState, runtime: Runtime[ContextSchema]): @@ -1372,7 +1370,7 @@ console.log(await graph.invoke({}, { context: { myRuntimeValue: "b" } })); // [ const MODELS = { anthropic: new ChatAnthropic({ model: "claude-haiku-4-5-20251001" }), - openai: new ChatOpenAI({ model: "gpt-4.1-mini" }), + openai: new ChatOpenAI({ model: "gpt-5.4-mini" }), }; const callModel: GraphNode = async (state, config) => { @@ -1483,14 +1481,15 @@ By default, the retry policy retries on any exception except for the following: from langchain.chat_models import init_chat_model from langgraph.graph import END, MessagesState, StateGraph, START from langgraph.types import RetryPolicy - from langchain_community.utilities import SQLDatabase from langchain.messages import AIMessage - db = SQLDatabase.from_uri("sqlite:///:memory:") + con = sqlite3.connect(":memory:") model = init_chat_model("claude-haiku-4-5-20251001") def query_database(state: MessagesState): - query_result = db.run("SELECT * FROM Artist LIMIT 10;") + cursor = con.cursor() + cursor.execute("SELECT * FROM Artist LIMIT 10;") + query_result = str(cursor.fetchall()) return {"messages": [AIMessage(content=query_result)]} def call_model(state: MessagesState): @@ -1568,6 +1567,97 @@ By default, the retry policy retries on any exception except for the following: :::python +## Set node timeouts + +Use the `timeout` parameter with @[`add_node`] to limit how long a single async node invocation can run. Provide the timeout in seconds or as a `datetime.timedelta`. + +```python +import asyncio +from typing_extensions import TypedDict + +from langgraph.errors import NodeTimeoutError +from langgraph.graph import END, START, StateGraph + + +class State(TypedDict): + value: str + + +async def call_model(state: State) -> State: + await asyncio.sleep(2) + return {"value": "done"} + + +builder = StateGraph(State) +builder.add_node("model", call_model, timeout=1.0) +builder.add_edge(START, "model") +builder.add_edge("model", END) +graph = builder.compile() + +try: + await graph.ainvoke({"value": "start"}) +except NodeTimeoutError: + print("Node timed out") +``` + +Node timeouts are supported only for async nodes. If you set `timeout` on a sync node, LangGraph raises an error when the graph is compiled because sync Python execution cannot be safely canceled in-process. + +When a node exceeds its timeout, LangGraph raises `NodeTimeoutError`, which subclasses Python's built-in `TimeoutError`. If the node has a `retry_policy` that retries `TimeoutError` or `NodeTimeoutError`, the timed-out attempt is retried. The timeout applies to each attempt independently, so the timer resets for every retry. + +Timed-out attempts do not commit their buffered writes. This prevents state updates or child-task scheduling from leaking out after the timeout boundary. + +## Configure node timeouts + +The `timeout=` parameter on @[`add_node`] caps how long a single async node attempt may run. Pass a number (seconds), a `timedelta`, or a @[`TimeoutPolicy`] for finer control over run and idle timeouts. When the limit is exceeded, LangGraph raises @[`NodeTimeoutError`] and lets the retry policy decide whether to retry. + + +Per-node timeouts require `langgraph>=1.2`. + + +```python +from langgraph.types import TimeoutPolicy + +builder.add_node( + "call_model", + call_model, + timeout=TimeoutPolicy(run_timeout=120, idle_timeout=30), +) +``` + +See [Fault tolerance](/oss/langgraph/fault-tolerance#timeouts) for the full timeout lifecycle, idle-timeout refresh sources, and `runtime.heartbeat()`. + +## Handle node errors + +The `error_handler=` parameter on @[`add_node`] registers a function that runs after a node fails and all retries are exhausted. The handler receives the current state and a typed @[`NodeError`] with failure context, and can route to a recovery branch via @[`Command`]: + + +Node-level error handlers require `langgraph>=1.2`. + + +```python +from langgraph.errors import NodeError +from langgraph.types import Command, RetryPolicy + +def payment_error_handler(state: State, error: NodeError) -> Command: + return Command( + update={"status": f"compensated: {error.error}"}, + goto="finalize", + ) + +builder.add_node( + "charge_payment", + charge_payment, + retry_policy=RetryPolicy(max_attempts=3, retry_on=ConnectionError), + error_handler=payment_error_handler, +) +``` + +See [Fault tolerance](/oss/langgraph/fault-tolerance#error-handling) for compensation patterns and `Command` routing. + +::: + +:::python + ### Access execution info inside a node You can access execution identity and retry information via `runtime.execution_info`. This surfaces thread, run, and checkpoint identifiers as well as retry state, without needing to read from `config` directly. @@ -1674,6 +1764,28 @@ graph = builder.compile() Requires `deepagents>=0.5.0` (or `langgraph>=1.1.5`) for `runtime.execution_info` and `runtime.server_info`. +### Access drain state inside a node + +When a [graceful shutdown](/oss/langgraph/fault-tolerance#graceful-shutdown) has been requested, `runtime.drain_requested` is `True`. Read this inside a node to skip expensive work before the next superstep boundary: + +```python +from langgraph.runtime import Runtime + +def my_node(state: State, runtime: Runtime) -> State: + if runtime.drain_requested: # [!code highlight] + return {"status": "skipped", "reason": runtime.drain_reason} + return {"status": do_work()} +``` + +| Property | Type | Description | +| -------- | ---- | ----------- | +| `drain_requested` | `bool` | `True` if `RunControl.request_drain()` has been called for this run. | +| `drain_reason` | `str \| None` | The reason string passed to `request_drain()`, or `None` if drain was not requested. | + + +Requires `langgraph>=1.2`. See [Graceful shutdown](/oss/langgraph/fault-tolerance#graceful-shutdown) for the full `RunControl` API. + + ::: :::js @@ -2725,9 +2837,8 @@ const graph = new StateGraph(State) ``` ::: -To control the recursion limit, specify `"recursionLimit"` in the config. This will raise a `GraphRecursionError`, which you can catch and handle: - :::python +To control the recursion limit, specify `"recursion_limit"` in the config. This will raise a `GraphRecursionError`, which you can catch and handle: ```python from langgraph.errors import GraphRecursionError @@ -2739,6 +2850,7 @@ except GraphRecursionError: ::: :::js +To control the recursion limit, specify `"recursionLimit"` in the config. This will raise a `GraphRecursionError`, which you can catch and handle: ```typescript import { GraphRecursionError } from "@langchain/langgraph"; diff --git a/use-subgraphs.mdx b/use-subgraphs.mdx index ff625c1..9ebb346 100644 --- a/use-subgraphs.mdx +++ b/use-subgraphs.mdx @@ -3,6 +3,8 @@ title: Subgraphs sidebarTitle: Subgraphs --- +import LanggraphSubgraphsInterruptV2Py from '/snippets/code-samples/langgraph-subgraphs-interrupt-v2-py.mdx'; + This guide explains the mechanics of using subgraphs. A subgraph is a [graph](/oss/langgraph/graph-api#graphs) that is used as a [node](/oss/langgraph/graph-api#nodes) in another graph. Subgraphs are useful for: @@ -603,7 +605,7 @@ The `checkpointer` parameter on `.compile()` controls subgraph persistence: | Mode | `checkpointer=` | Behavior | |------|-----------------|----------| -| [Per-invocation](#per-invocation-default) | `None` (default) | Each call starts fresh and inherits the parent's checkpointer to support [interrupts](/oss/langgraph/interrupts) and [durable execution](/oss/langgraph/durable-execution) within a single call. | +| [Per-invocation](#per-invocation-default) | `None` (default) | Each call starts fresh and inherits the parent's checkpointer to support [interrupts](/oss/langgraph/interrupts) and [durable execution](/oss/langgraph/persistence) within a single call. | | [Per-thread](#per-thread) | `True` | State accumulates across calls on the same thread. Each call picks up where the last one left off. | | [Stateless](#stateless) | `False` | No checkpointing at all—runs like a plain function call. No interrupts or durable execution. | @@ -619,12 +621,12 @@ The examples below use LangChain's @[`create_agent`], which is a common way to b ### Stateful -Stateful subgraphs inherit the parent graph's checkpointer, which enables [interrupts](/oss/langgraph/interrupts), [durable execution](/oss/langgraph/durable-execution), and state inspection. The two stateful modes differ in how long state is retained. +Stateful subgraphs inherit the parent graph's checkpointer, which enables [interrupts](/oss/langgraph/interrupts), [persistence](/oss/langgraph/persistence), and state inspection. The two stateful modes differ in how long state is retained. #### Per-invocation (default) -This is the recommended mode for most applications, including [multi-agent](/oss/langchain/multi-agent) systems where subagents are invoked as tools. It supports interrupts, [durable execution](/oss/langgraph/durable-execution), and parallel calls while keeping each invocation isolated. +This is the recommended mode for most applications, including [multi-agent](/oss/langchain/multi-agent) systems where subagents are invoked as tools. It supports [interrupts](/oss/langgraph/interrupts), [persistence](/oss/langgraph/persistence), and parallel calls while keeping each invocation isolated. Use per-invocation persistence when each call to the subgraph is independent and the subagent doesn't need to remember anything from previous calls. This is the most common pattern, especially for [multi-agent](/oss/langchain/multi-agent) systems where subagents handle one-off requests like "look up this customer's order" or "summarize this document." @@ -653,13 +655,13 @@ def veggie_info(veggie_name: str) -> str: # Subagents - no checkpointer setting (inherits parent) fruit_agent = create_agent( - model="gpt-4.1-mini", + model="gpt-5.4-mini", tools=[fruit_info], prompt="You are a fruit expert. Use the fruit_info tool. Respond in one sentence.", ) veggie_agent = create_agent( - model="gpt-4.1-mini", + model="gpt-5.4-mini", tools=[veggie_info], prompt="You are a veggie expert. Use the veggie_info tool. Respond in one sentence.", ) @@ -683,7 +685,7 @@ def ask_veggie_expert(question: str) -> str: # Outer agent with checkpointer agent = create_agent( - model="gpt-4.1-mini", + model="gpt-5.4-mini", tools=[ask_fruit_expert, ask_veggie_expert], prompt=( "You have two experts: ask_fruit_expert and ask_veggie_expert. " @@ -705,20 +707,7 @@ agent = create_agent( return f"Info about {fruit_name}" ``` - ```python - config = {"configurable": {"thread_id": "1"}} - - # Invoke - the subagent's tool calls interrupt() - response = agent.invoke( - {"messages": [{"role": "user", "content": "Tell me about apples"}]}, - config=config, - ) - # response contains __interrupt__ - - # Resume - approve the interrupt - response = agent.invoke(Command(resume=True), config=config) # [!code highlight] - # Subagent message count: 4 - ``` + Each invocation starts with a fresh subagent state. The subagent does not remember previous calls: @@ -786,13 +775,13 @@ const veggieInfo = tool( // Subagents - no checkpointer setting (inherits parent) const fruitAgent = createAgent({ - model: "gpt-4.1-mini", + model: "gpt-5.4-mini", tools: [fruitInfo], prompt: "You are a fruit expert. Use the fruit_info tool. Respond in one sentence.", }); const veggieAgent = createAgent({ - model: "gpt-4.1-mini", + model: "gpt-5.4-mini", tools: [veggieInfo], prompt: "You are a veggie expert. Use the veggie_info tool. Respond in one sentence.", }); @@ -828,7 +817,7 @@ const askVeggieExpert = tool( // Outer agent with checkpointer const agent = createAgent({ - model: "gpt-4.1-mini", + model: "gpt-5.4-mini", tools: [askFruitExpert, askVeggieExpert], prompt: "You have two experts: ask_fruit_expert and ask_veggie_expert. " + @@ -939,7 +928,7 @@ def fruit_info(fruit_name: str) -> str: # Subagent with checkpointer=True for persistent state fruit_agent = create_agent( - model="gpt-4.1-mini", + model="gpt-5.4-mini", tools=[fruit_info], prompt="You are a fruit expert. Use the fruit_info tool. Respond in one sentence.", checkpointer=True, # [!code highlight] @@ -958,7 +947,7 @@ def ask_fruit_expert(question: str) -> str: # Use ToolCallLimitMiddleware to prevent parallel calls to per-thread subagents, # which would cause checkpoint conflicts. agent = create_agent( - model="gpt-4.1-mini", + model="gpt-5.4-mini", tools=[ask_fruit_expert], prompt="You have a fruit expert. ALWAYS delegate fruit questions to ask_fruit_expert.", middleware=[ # [!code highlight] @@ -980,20 +969,7 @@ agent = create_agent( return f"Info about {fruit_name}" ``` - ```python - config = {"configurable": {"thread_id": "1"}} - - # Invoke - the subagent's tool calls interrupt() - response = agent.invoke( - {"messages": [{"role": "user", "content": "Tell me about apples"}]}, - config=config, - ) - # response contains __interrupt__ - - # Resume - approve the interrupt - response = agent.invoke(Command(resume=True), config=config) # [!code highlight] - # Subagent message count: 4 - ``` + State accumulates across invocations—the subagent remembers past conversations: @@ -1036,11 +1012,11 @@ agent = create_agent( ) fruit_agent = create_sub_agent( - "gpt-4.1-mini", name="fruit_agent", + "gpt-5.4-mini", name="fruit_agent", tools=[fruit_info], prompt="...", checkpointer=True, ) veggie_agent = create_sub_agent( - "gpt-4.1-mini", name="veggie_agent", + "gpt-5.4-mini", name="veggie_agent", tools=[veggie_info], prompt="...", checkpointer=True, ) @@ -1086,7 +1062,7 @@ const fruitInfo = tool( // Subagent with checkpointer=true for persistent state const fruitAgent = createAgent({ - model: "gpt-4.1-mini", + model: "gpt-5.4-mini", tools: [fruitInfo], prompt: "You are a fruit expert. Use the fruit_info tool. Respond in one sentence.", checkpointer: true, // [!code highlight] @@ -1111,7 +1087,7 @@ const askFruitExpert = tool( // Use toolCallLimitMiddleware to prevent parallel calls to per-thread subagents, // which would cause checkpoint conflicts. const agent = createAgent({ - model: "gpt-4.1-mini", + model: "gpt-5.4-mini", tools: [askFruitExpert], prompt: "You have a fruit expert. ALWAYS delegate fruit questions to ask_fruit_expert.", middleware: [ // [!code highlight] @@ -1192,10 +1168,10 @@ const agent = createAgent({ .compile(); } - const fruitAgent = createSubAgent("gpt-4.1-mini", { + const fruitAgent = createSubAgent("gpt-5.4-mini", { name: "fruit_agent", tools: [fruitInfo], prompt: "...", checkpointer: true, }); - const veggieAgent = createSubAgent("gpt-4.1-mini", { + const veggieAgent = createSubAgent("gpt-5.4-mini", { name: "veggie_agent", tools: [veggieInfo], prompt: "...", checkpointer: true, }); const config = { configurable: { thread_id: "1" } }; diff --git a/workflows-agents.mdx b/workflows-agents.mdx index 4def587..73961c2 100644 --- a/workflows-agents.mdx +++ b/workflows-agents.mdx @@ -4,7 +4,6 @@ sidebarTitle: Workflows + agents --- - This guide reviews common workflow and agent patterns. - Workflows have predetermined code paths and are designed to operate in a certain order. @@ -14,6 +13,10 @@ This guide reviews common workflow and agent patterns. LangGraph offers several benefits when building agents and workflows, including [persistence](/oss/langgraph/persistence), [streaming](/oss/langgraph/streaming), and support for debugging as well as [deployment](/oss/langgraph/deploy). + +Trace and compare these workflow patterns with [LangSmith](https://smith.langchain.com). Follow the [tracing quickstart](/langsmith/trace-with-langgraph) to see how data flows through each step. + + ## Setup To build a workflow or agent, you can use [any chat model](/oss/integrations/chat) that supports structured outputs and tool calling. The following example uses Anthropic: @@ -2177,3 +2180,64 @@ for await (const step of stream) { ``` ::: + +### ToolNode + +@[`ToolNode`] is a prebuilt node that executes tools in LangGraph workflows. It handles parallel tool execution, error handling, and state injection automatically. + +Use @[`ToolNode`] when you need fine-grained control over how your graph executes tools. This is the building block that powers tool execution in many LangGraph agent patterns. + +:::python + +```python +from langchain.tools import tool +from langgraph.prebuilt import ToolNode +from langgraph.graph import MessagesState, StateGraph + +@tool +def search(query: str) -> str: + """Search for information.""" + return f"Results for: {query}" + +@tool +def calculator(expression: str) -> str: + """Evaluate a math expression.""" + return str(eval(expression)) + +builder = StateGraph(MessagesState) +builder.add_node("tools", ToolNode([search, calculator])) +# ... add other nodes and edges +graph = builder.compile() +``` + +::: + +:::js + +```typescript +import { ToolNode } from "@langchain/langgraph/prebuilt"; +import { tool } from "@langchain/core/tools"; +import * as z from "zod"; + +const search = tool( + ({ query }) => `Results for: ${query}`, + { + name: "search", + description: "Search for information.", + schema: z.object({ query: z.string() }), + } +); + +const calculator = tool( + ({ expression }) => String(eval(expression)), + { + name: "calculator", + description: "Evaluate a math expression.", + schema: z.object({ expression: z.string() }), + } +); + +const toolNode = new ToolNode([search, calculator]); +``` + +:::