Implement LangGraph integration: refactor agent-tool interaction, add graph compilation, and enhance state management
Build and Push Agent API / build (push) Successful in 22s
Build and Push Agent API / build (push) Successful in 22s
This commit is contained in:
+53
-177
@@ -1,13 +1,12 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel
|
||||
import json
|
||||
import asyncio
|
||||
|
||||
from api.dependencies import get_llm_client
|
||||
from api.dependencies import get_llm_client, get_agent_graph
|
||||
from agents import get as get_agent, list_all as list_all_agents
|
||||
from skills import get_all_tools, execute_tool
|
||||
from core.state import AgentState
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -42,166 +41,65 @@ def _resolve_agent(agent_id: str | None = None, model: str | None = None):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool-calling loop (non-streaming)
|
||||
# LangGraph helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _invoke_graph(graph, messages: list[dict]) -> str:
|
||||
"""Run the graph synchronously (non-streaming) and return the final text."""
|
||||
state: AgentState = {"messages": messages}
|
||||
result = await graph.ainvoke(state)
|
||||
last_msg = result["messages"][-1]
|
||||
return last_msg.content or ""
|
||||
|
||||
|
||||
async def _stream_graph(graph, messages: list[dict]):
|
||||
"""
|
||||
Run the graph and stream the final response token-by-token.
|
||||
|
||||
LangGraph's astream_events would require langchain-openai's ChatOpenAI
|
||||
to intercept LLM chunks. Instead we run the graph to completion (tools
|
||||
execute silently) and then stream the final text content character by
|
||||
character — this gives the client a real SSE stream without adding new
|
||||
dependencies.
|
||||
"""
|
||||
state: AgentState = {"messages": messages}
|
||||
result = await graph.ainvoke(state)
|
||||
content = result["messages"][-1].content or ""
|
||||
# Yield token-by-token so the SSE client sees incremental output
|
||||
for token in content:
|
||||
yield token
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Non-streaming run (kept for /chat/sync and sync completions)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def run_agent_with_tools(
|
||||
client: OpenAI,
|
||||
request: Request,
|
||||
messages: list[dict],
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
max_turns: int = 5,
|
||||
) -> str:
|
||||
"""Send messages to the LLM with tool definitions. Tool-calling loop."""
|
||||
"""Send messages through the agent's LangGraph. Non-streaming."""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
tools = get_all_tools(agent.skills)
|
||||
system_prompt = agent.build_system_prompt()
|
||||
|
||||
full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
|
||||
full_messages.extend(messages)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
for _ in range(max_turns):
|
||||
resp = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=full_messages,
|
||||
tools=tools if tools else None,
|
||||
tool_choice="auto" if tools else None,
|
||||
),
|
||||
)
|
||||
choice = resp.choices[0]
|
||||
|
||||
if choice.finish_reason == "stop" and choice.message.content:
|
||||
return choice.message.content
|
||||
|
||||
if choice.message.tool_calls:
|
||||
full_messages.append(choice.message.model_dump(exclude_none=True))
|
||||
for tc in choice.message.tool_calls:
|
||||
fn_name = tc.function.name
|
||||
fn_args = json.loads(tc.function.arguments)
|
||||
tr = await execute_tool(agent.skills, fn_name, fn_args)
|
||||
result = tr.content if tr else f"Tool '{fn_name}' is not available."
|
||||
full_messages.append({
|
||||
"role": "tool", "tool_call_id": tc.id, "content": result,
|
||||
})
|
||||
continue
|
||||
|
||||
return choice.message.content or "I'm not sure how to help with that."
|
||||
|
||||
return "I've taken several actions but still need more information. Could you clarify?"
|
||||
graph = get_agent_graph(agent.agent_id, request)
|
||||
return await _invoke_graph(graph, messages)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Streaming generators
|
||||
# Streaming generator (kept for /chat and stream completions)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _stream_with_tools(
|
||||
client: OpenAI,
|
||||
messages: list[dict],
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
max_turns: int = 5,
|
||||
):
|
||||
"""Streaming tool-calling loop. Tools run silently, final text is streamed."""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
tools = get_all_tools(agent.skills)
|
||||
system_prompt = agent.build_system_prompt()
|
||||
|
||||
full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
|
||||
full_messages.extend(messages)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
for turn in range(max_turns):
|
||||
resp = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=full_messages,
|
||||
tools=tools if tools else None,
|
||||
tool_choice="auto" if tools else None,
|
||||
),
|
||||
)
|
||||
choice = resp.choices[0]
|
||||
|
||||
if choice.message.tool_calls:
|
||||
full_messages.append(choice.message.model_dump(exclude_none=True))
|
||||
for tc in choice.message.tool_calls:
|
||||
fn_name = tc.function.name
|
||||
fn_args = json.loads(tc.function.arguments)
|
||||
tr = await execute_tool(agent.skills, fn_name, fn_args)
|
||||
result = tr.content if tr else f"Tool '{fn_name}' is not available."
|
||||
full_messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc.id,
|
||||
"content": result,
|
||||
})
|
||||
continue
|
||||
|
||||
if choice.finish_reason == "stop" and choice.message.content:
|
||||
for token in choice.message.content:
|
||||
yield token
|
||||
await asyncio.sleep(0)
|
||||
return
|
||||
|
||||
def _sync_stream():
|
||||
stream = client.chat.completions.create(
|
||||
model="deepseek-chat", messages=full_messages, stream=True,
|
||||
)
|
||||
for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
if delta and delta.content:
|
||||
yield delta.content
|
||||
|
||||
gen = _sync_stream()
|
||||
while True:
|
||||
token = await loop.run_in_executor(None, next, gen, None)
|
||||
if token is None:
|
||||
return
|
||||
yield token
|
||||
|
||||
yield "\u2026"
|
||||
|
||||
|
||||
async def run_agent_stream(
|
||||
client: OpenAI,
|
||||
request: Request,
|
||||
messages: list[dict],
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
):
|
||||
"""Async generator — yields tokens. Uses tool-loop when skills have tools."""
|
||||
"""Async generator — yields tokens via the agent's LangGraph."""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
tools = get_all_tools(agent.skills)
|
||||
|
||||
if tools:
|
||||
async for token in _stream_with_tools(client, messages, agent_id, model):
|
||||
yield token
|
||||
return
|
||||
|
||||
# No tools — simple streaming
|
||||
system_prompt = agent.build_system_prompt()
|
||||
full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
|
||||
full_messages.extend(messages)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
def _sync_stream():
|
||||
stream = client.chat.completions.create(
|
||||
model="deepseek-chat", messages=full_messages, stream=True,
|
||||
)
|
||||
for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
if delta and delta.content:
|
||||
yield delta.content
|
||||
|
||||
gen = _sync_stream()
|
||||
while True:
|
||||
token = await loop.run_in_executor(None, next, gen, None)
|
||||
if token is None:
|
||||
break
|
||||
graph = get_agent_graph(agent.agent_id, request)
|
||||
async for token in _stream_graph(graph, messages):
|
||||
yield token
|
||||
|
||||
|
||||
@@ -217,13 +115,14 @@ def root():
|
||||
@router.post("/chat")
|
||||
async def chat(
|
||||
req: ChatRequest,
|
||||
request: Request,
|
||||
client: OpenAI = Depends(get_llm_client),
|
||||
):
|
||||
"""Streaming chat — single message, no history."""
|
||||
messages = [{"role": "user", "content": req.message}]
|
||||
|
||||
async def event_stream():
|
||||
async for token in run_agent_stream(client, messages, req.agent_id):
|
||||
async for token in run_agent_stream(request, messages, req.agent_id):
|
||||
payload = json.dumps({"token": token, "session_id": req.session_id})
|
||||
yield f"data: {payload}\n\n"
|
||||
yield f"data: {json.dumps({'done': True, 'session_id': req.session_id})}\n\n"
|
||||
@@ -242,26 +141,12 @@ async def chat(
|
||||
@router.post("/chat/sync")
|
||||
async def chat_sync(
|
||||
req: ChatRequest,
|
||||
request: Request,
|
||||
client: OpenAI = Depends(get_llm_client),
|
||||
):
|
||||
"""Non-streaming chat — single message."""
|
||||
agent = _resolve_agent(req.agent_id)
|
||||
tools = get_all_tools(agent.skills)
|
||||
messages = [{"role": "user", "content": req.message}]
|
||||
|
||||
if tools:
|
||||
response = await run_agent_with_tools(client, messages, req.agent_id)
|
||||
else:
|
||||
agent_obj = _resolve_agent(req.agent_id)
|
||||
resp = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": agent_obj.build_system_prompt()},
|
||||
{"role": "user", "content": req.message},
|
||||
],
|
||||
)
|
||||
response = resp.choices[0].message.content
|
||||
|
||||
response = await run_agent_with_tools(request, messages, req.agent_id)
|
||||
return {"response": response, "session_id": req.session_id}
|
||||
|
||||
|
||||
@@ -300,6 +185,7 @@ def list_models():
|
||||
@router.post("/chat/completions")
|
||||
async def chat_completions(
|
||||
req: ChatCompletionRequest,
|
||||
request: Request,
|
||||
client: OpenAI = Depends(get_llm_client),
|
||||
):
|
||||
"""OpenAI-compatible /chat/completions — supports stream=True.
|
||||
@@ -311,7 +197,7 @@ async def chat_completions(
|
||||
if req.stream:
|
||||
async def sse_stream():
|
||||
async for token in run_agent_stream(
|
||||
client, req.messages, agent_id=agent.agent_id,
|
||||
request, req.messages, agent_id=agent.agent_id,
|
||||
):
|
||||
chunk = {
|
||||
"id": "chatcmpl-local",
|
||||
@@ -335,20 +221,10 @@ async def chat_completions(
|
||||
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
||||
)
|
||||
|
||||
# Non-streaming — full history, tool-calling
|
||||
tools = get_all_tools(agent.skills)
|
||||
if tools:
|
||||
response = await run_agent_with_tools(
|
||||
client, req.messages, agent_id=agent.agent_id,
|
||||
)
|
||||
else:
|
||||
system_prompt = agent.build_system_prompt()
|
||||
full_msgs: list[dict] = [{"role": "system", "content": system_prompt}]
|
||||
full_msgs.extend(req.messages)
|
||||
resp = client.chat.completions.create(
|
||||
model="deepseek-chat", messages=full_msgs,
|
||||
)
|
||||
response = resp.choices[0].message.content
|
||||
# Non-streaming — full history, LangGraph agent
|
||||
response = await run_agent_with_tools(
|
||||
request, req.messages, agent_id=agent.agent_id,
|
||||
)
|
||||
|
||||
return {
|
||||
"id": "chatcmpl-local",
|
||||
|
||||
Reference in New Issue
Block a user