Implement LangGraph integration: refactor agent-tool interaction, add graph compilation, and enhance state management
Build and Push Agent API / build (push) Successful in 22s

This commit is contained in:
2026-05-24 10:18:59 +02:00
parent 1d821d18fe
commit 2f7f94f1ce
8 changed files with 534 additions and 242 deletions
+53 -177
View File
@@ -1,13 +1,12 @@
from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, Request
from fastapi.responses import StreamingResponse
from openai import OpenAI
from pydantic import BaseModel
import json
import asyncio
from api.dependencies import get_llm_client
from api.dependencies import get_llm_client, get_agent_graph
from agents import get as get_agent, list_all as list_all_agents
from skills import get_all_tools, execute_tool
from core.state import AgentState
router = APIRouter()
@@ -42,166 +41,65 @@ def _resolve_agent(agent_id: str | None = None, model: str | None = None):
# ---------------------------------------------------------------------------
# Tool-calling loop (non-streaming)
# LangGraph helpers
# ---------------------------------------------------------------------------
async def _invoke_graph(graph, messages: list[dict]) -> str:
"""Run the graph synchronously (non-streaming) and return the final text."""
state: AgentState = {"messages": messages}
result = await graph.ainvoke(state)
last_msg = result["messages"][-1]
return last_msg.content or ""
async def _stream_graph(graph, messages: list[dict]):
"""
Run the graph and stream the final response token-by-token.
LangGraph's astream_events would require langchain-openai's ChatOpenAI
to intercept LLM chunks. Instead we run the graph to completion (tools
execute silently) and then stream the final text content character by
character — this gives the client a real SSE stream without adding new
dependencies.
"""
state: AgentState = {"messages": messages}
result = await graph.ainvoke(state)
content = result["messages"][-1].content or ""
# Yield token-by-token so the SSE client sees incremental output
for token in content:
yield token
# ---------------------------------------------------------------------------
# Non-streaming run (kept for /chat/sync and sync completions)
# ---------------------------------------------------------------------------
async def run_agent_with_tools(
client: OpenAI,
request: Request,
messages: list[dict],
agent_id: str | None = None,
model: str | None = None,
max_turns: int = 5,
) -> str:
"""Send messages to the LLM with tool definitions. Tool-calling loop."""
"""Send messages through the agent's LangGraph. Non-streaming."""
agent = _resolve_agent(agent_id, model)
tools = get_all_tools(agent.skills)
system_prompt = agent.build_system_prompt()
full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
full_messages.extend(messages)
loop = asyncio.get_running_loop()
for _ in range(max_turns):
resp = await loop.run_in_executor(
None,
lambda: client.chat.completions.create(
model="deepseek-chat",
messages=full_messages,
tools=tools if tools else None,
tool_choice="auto" if tools else None,
),
)
choice = resp.choices[0]
if choice.finish_reason == "stop" and choice.message.content:
return choice.message.content
if choice.message.tool_calls:
full_messages.append(choice.message.model_dump(exclude_none=True))
for tc in choice.message.tool_calls:
fn_name = tc.function.name
fn_args = json.loads(tc.function.arguments)
tr = await execute_tool(agent.skills, fn_name, fn_args)
result = tr.content if tr else f"Tool '{fn_name}' is not available."
full_messages.append({
"role": "tool", "tool_call_id": tc.id, "content": result,
})
continue
return choice.message.content or "I'm not sure how to help with that."
return "I've taken several actions but still need more information. Could you clarify?"
graph = get_agent_graph(agent.agent_id, request)
return await _invoke_graph(graph, messages)
# ---------------------------------------------------------------------------
# Streaming generators
# Streaming generator (kept for /chat and stream completions)
# ---------------------------------------------------------------------------
async def _stream_with_tools(
client: OpenAI,
messages: list[dict],
agent_id: str | None = None,
model: str | None = None,
max_turns: int = 5,
):
"""Streaming tool-calling loop. Tools run silently, final text is streamed."""
agent = _resolve_agent(agent_id, model)
tools = get_all_tools(agent.skills)
system_prompt = agent.build_system_prompt()
full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
full_messages.extend(messages)
loop = asyncio.get_running_loop()
for turn in range(max_turns):
resp = await loop.run_in_executor(
None,
lambda: client.chat.completions.create(
model="deepseek-chat",
messages=full_messages,
tools=tools if tools else None,
tool_choice="auto" if tools else None,
),
)
choice = resp.choices[0]
if choice.message.tool_calls:
full_messages.append(choice.message.model_dump(exclude_none=True))
for tc in choice.message.tool_calls:
fn_name = tc.function.name
fn_args = json.loads(tc.function.arguments)
tr = await execute_tool(agent.skills, fn_name, fn_args)
result = tr.content if tr else f"Tool '{fn_name}' is not available."
full_messages.append({
"role": "tool",
"tool_call_id": tc.id,
"content": result,
})
continue
if choice.finish_reason == "stop" and choice.message.content:
for token in choice.message.content:
yield token
await asyncio.sleep(0)
return
def _sync_stream():
stream = client.chat.completions.create(
model="deepseek-chat", messages=full_messages, stream=True,
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta and delta.content:
yield delta.content
gen = _sync_stream()
while True:
token = await loop.run_in_executor(None, next, gen, None)
if token is None:
return
yield token
yield "\u2026"
async def run_agent_stream(
client: OpenAI,
request: Request,
messages: list[dict],
agent_id: str | None = None,
model: str | None = None,
):
"""Async generator — yields tokens. Uses tool-loop when skills have tools."""
"""Async generator — yields tokens via the agent's LangGraph."""
agent = _resolve_agent(agent_id, model)
tools = get_all_tools(agent.skills)
if tools:
async for token in _stream_with_tools(client, messages, agent_id, model):
yield token
return
# No tools — simple streaming
system_prompt = agent.build_system_prompt()
full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
full_messages.extend(messages)
loop = asyncio.get_running_loop()
def _sync_stream():
stream = client.chat.completions.create(
model="deepseek-chat", messages=full_messages, stream=True,
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta and delta.content:
yield delta.content
gen = _sync_stream()
while True:
token = await loop.run_in_executor(None, next, gen, None)
if token is None:
break
graph = get_agent_graph(agent.agent_id, request)
async for token in _stream_graph(graph, messages):
yield token
@@ -217,13 +115,14 @@ def root():
@router.post("/chat")
async def chat(
req: ChatRequest,
request: Request,
client: OpenAI = Depends(get_llm_client),
):
"""Streaming chat — single message, no history."""
messages = [{"role": "user", "content": req.message}]
async def event_stream():
async for token in run_agent_stream(client, messages, req.agent_id):
async for token in run_agent_stream(request, messages, req.agent_id):
payload = json.dumps({"token": token, "session_id": req.session_id})
yield f"data: {payload}\n\n"
yield f"data: {json.dumps({'done': True, 'session_id': req.session_id})}\n\n"
@@ -242,26 +141,12 @@ async def chat(
@router.post("/chat/sync")
async def chat_sync(
req: ChatRequest,
request: Request,
client: OpenAI = Depends(get_llm_client),
):
"""Non-streaming chat — single message."""
agent = _resolve_agent(req.agent_id)
tools = get_all_tools(agent.skills)
messages = [{"role": "user", "content": req.message}]
if tools:
response = await run_agent_with_tools(client, messages, req.agent_id)
else:
agent_obj = _resolve_agent(req.agent_id)
resp = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": agent_obj.build_system_prompt()},
{"role": "user", "content": req.message},
],
)
response = resp.choices[0].message.content
response = await run_agent_with_tools(request, messages, req.agent_id)
return {"response": response, "session_id": req.session_id}
@@ -300,6 +185,7 @@ def list_models():
@router.post("/chat/completions")
async def chat_completions(
req: ChatCompletionRequest,
request: Request,
client: OpenAI = Depends(get_llm_client),
):
"""OpenAI-compatible /chat/completions — supports stream=True.
@@ -311,7 +197,7 @@ async def chat_completions(
if req.stream:
async def sse_stream():
async for token in run_agent_stream(
client, req.messages, agent_id=agent.agent_id,
request, req.messages, agent_id=agent.agent_id,
):
chunk = {
"id": "chatcmpl-local",
@@ -335,20 +221,10 @@ async def chat_completions(
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
)
# Non-streaming — full history, tool-calling
tools = get_all_tools(agent.skills)
if tools:
response = await run_agent_with_tools(
client, req.messages, agent_id=agent.agent_id,
)
else:
system_prompt = agent.build_system_prompt()
full_msgs: list[dict] = [{"role": "system", "content": system_prompt}]
full_msgs.extend(req.messages)
resp = client.chat.completions.create(
model="deepseek-chat", messages=full_msgs,
)
response = resp.choices[0].message.content
# Non-streaming — full history, LangGraph agent
response = await run_agent_with_tools(
request, req.messages, agent_id=agent.agent_id,
)
return {
"id": "chatcmpl-local",