Implement LangGraph integration: refactor agent-tool interaction, add graph compilation, and enhance state management

2026-05-24 10:18:59 +02:00
parent 1d821d18fe
commit 2f7f94f1ce
8 changed files with 534 additions and 242 deletions
@@ -1,13 +1,12 @@
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, Request
 from fastapi.responses import StreamingResponse
 from openai import OpenAI
 from pydantic import BaseModel
 import json
-import asyncio

-from api.dependencies import get_llm_client
+from api.dependencies import get_llm_client, get_agent_graph
 from agents import get as get_agent, list_all as list_all_agents
-from skills import get_all_tools, execute_tool
+from core.state import AgentState

 router = APIRouter()

@@ -42,166 +41,65 @@ def _resolve_agent(agent_id: str | None = None, model: str | None = None):


 # ---------------------------------------------------------------------------
-# Tool-calling loop (non-streaming)
+# LangGraph helpers
+# ---------------------------------------------------------------------------
+
+async def _invoke_graph(graph, messages: list[dict]) -> str:
+    """Run the graph synchronously (non-streaming) and return the final text."""
+    state: AgentState = {"messages": messages}
+    result = await graph.ainvoke(state)
+    last_msg = result["messages"][-1]
+    return last_msg.content or ""
+
+
+async def _stream_graph(graph, messages: list[dict]):
+    """
+    Run the graph and stream the final response token-by-token.
+
+    LangGraph's astream_events would require langchain-openai's ChatOpenAI
+    to intercept LLM chunks.  Instead we run the graph to completion (tools
+    execute silently) and then stream the final text content character by
+    character — this gives the client a real SSE stream without adding new
+    dependencies.
+    """
+    state: AgentState = {"messages": messages}
+    result = await graph.ainvoke(state)
+    content = result["messages"][-1].content or ""
+    # Yield token-by-token so the SSE client sees incremental output
+    for token in content:
+        yield token
+
+
+# ---------------------------------------------------------------------------
+# Non-streaming run (kept for /chat/sync and sync completions)
 # ---------------------------------------------------------------------------

 async def run_agent_with_tools(
-    client: OpenAI,
+    request: Request,
    messages: list[dict],
    agent_id: str | None = None,
    model: str | None = None,
-    max_turns: int = 5,
 ) -> str:
-    """Send messages to the LLM with tool definitions. Tool-calling loop."""
+    """Send messages through the agent's LangGraph.  Non-streaming."""
    agent = _resolve_agent(agent_id, model)
-    tools = get_all_tools(agent.skills)
-    system_prompt = agent.build_system_prompt()
-
-    full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
-    full_messages.extend(messages)
-
-    loop = asyncio.get_running_loop()
-
-    for _ in range(max_turns):
-        resp = await loop.run_in_executor(
-            None,
-            lambda: client.chat.completions.create(
-                model="deepseek-chat",
-                messages=full_messages,
-                tools=tools if tools else None,
-                tool_choice="auto" if tools else None,
-            ),
-        )
-        choice = resp.choices[0]
-
-        if choice.finish_reason == "stop" and choice.message.content:
-            return choice.message.content
-
-        if choice.message.tool_calls:
-            full_messages.append(choice.message.model_dump(exclude_none=True))
-            for tc in choice.message.tool_calls:
-                fn_name = tc.function.name
-                fn_args = json.loads(tc.function.arguments)
-                tr = await execute_tool(agent.skills, fn_name, fn_args)
-                result = tr.content if tr else f"Tool '{fn_name}' is not available."
-                full_messages.append({
-                    "role": "tool", "tool_call_id": tc.id, "content": result,
-                })
-            continue
-
-        return choice.message.content or "I'm not sure how to help with that."
-
-    return "I've taken several actions but still need more information. Could you clarify?"
+    graph = get_agent_graph(agent.agent_id, request)
+    return await _invoke_graph(graph, messages)


 # ---------------------------------------------------------------------------
-# Streaming generators
+# Streaming generator (kept for /chat and stream completions)
 # ---------------------------------------------------------------------------

-async def _stream_with_tools(
-    client: OpenAI,
-    messages: list[dict],
-    agent_id: str | None = None,
-    model: str | None = None,
-    max_turns: int = 5,
-):
-    """Streaming tool-calling loop. Tools run silently, final text is streamed."""
-    agent = _resolve_agent(agent_id, model)
-    tools = get_all_tools(agent.skills)
-    system_prompt = agent.build_system_prompt()
-
-    full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
-    full_messages.extend(messages)
-
-    loop = asyncio.get_running_loop()
-
-    for turn in range(max_turns):
-        resp = await loop.run_in_executor(
-            None,
-            lambda: client.chat.completions.create(
-                model="deepseek-chat",
-                messages=full_messages,
-                tools=tools if tools else None,
-                tool_choice="auto" if tools else None,
-            ),
-        )
-        choice = resp.choices[0]
-
-        if choice.message.tool_calls:
-            full_messages.append(choice.message.model_dump(exclude_none=True))
-            for tc in choice.message.tool_calls:
-                fn_name = tc.function.name
-                fn_args = json.loads(tc.function.arguments)
-                tr = await execute_tool(agent.skills, fn_name, fn_args)
-                result = tr.content if tr else f"Tool '{fn_name}' is not available."
-                full_messages.append({
-                    "role": "tool",
-                    "tool_call_id": tc.id,
-                    "content": result,
-                })
-            continue
-
-        if choice.finish_reason == "stop" and choice.message.content:
-            for token in choice.message.content:
-                yield token
-                await asyncio.sleep(0)
-            return
-
-        def _sync_stream():
-            stream = client.chat.completions.create(
-                model="deepseek-chat", messages=full_messages, stream=True,
-            )
-            for chunk in stream:
-                delta = chunk.choices[0].delta
-                if delta and delta.content:
-                    yield delta.content
-
-        gen = _sync_stream()
-        while True:
-            token = await loop.run_in_executor(None, next, gen, None)
-            if token is None:
-                return
-            yield token
-
-    yield "\u2026"
-
-
 async def run_agent_stream(
-    client: OpenAI,
+    request: Request,
    messages: list[dict],
    agent_id: str | None = None,
    model: str | None = None,
 ):
-    """Async generator — yields tokens. Uses tool-loop when skills have tools."""
+    """Async generator — yields tokens via the agent's LangGraph."""
    agent = _resolve_agent(agent_id, model)
-    tools = get_all_tools(agent.skills)
-
-    if tools:
-        async for token in _stream_with_tools(client, messages, agent_id, model):
-            yield token
-        return
-
-    # No tools — simple streaming
-    system_prompt = agent.build_system_prompt()
-    full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
-    full_messages.extend(messages)
-
-    loop = asyncio.get_running_loop()
-
-    def _sync_stream():
-        stream = client.chat.completions.create(
-            model="deepseek-chat", messages=full_messages, stream=True,
-        )
-        for chunk in stream:
-            delta = chunk.choices[0].delta
-            if delta and delta.content:
-                yield delta.content
-
-    gen = _sync_stream()
-    while True:
-        token = await loop.run_in_executor(None, next, gen, None)
-        if token is None:
-            break
+    graph = get_agent_graph(agent.agent_id, request)
+    async for token in _stream_graph(graph, messages):
        yield token


@@ -217,13 +115,14 @@ def root():
@router.post("/chat")
 async def chat(
    req: ChatRequest,
+    request: Request,
    client: OpenAI = Depends(get_llm_client),
 ):
    """Streaming chat — single message, no history."""
    messages = [{"role": "user", "content": req.message}]

    async def event_stream():
-        async for token in run_agent_stream(client, messages, req.agent_id):
+        async for token in run_agent_stream(request, messages, req.agent_id):
            payload = json.dumps({"token": token, "session_id": req.session_id})
            yield f"data: {payload}\n\n"
        yield f"data: {json.dumps({'done': True, 'session_id': req.session_id})}\n\n"
@@ -242,26 +141,12 @@ async def chat(
@router.post("/chat/sync")
 async def chat_sync(
    req: ChatRequest,
+    request: Request,
    client: OpenAI = Depends(get_llm_client),
 ):
    """Non-streaming chat — single message."""
-    agent = _resolve_agent(req.agent_id)
-    tools = get_all_tools(agent.skills)
    messages = [{"role": "user", "content": req.message}]
-
-    if tools:
-        response = await run_agent_with_tools(client, messages, req.agent_id)
-    else:
-        agent_obj = _resolve_agent(req.agent_id)
-        resp = client.chat.completions.create(
-            model="deepseek-chat",
-            messages=[
-                {"role": "system", "content": agent_obj.build_system_prompt()},
-                {"role": "user", "content": req.message},
-            ],
-        )
-        response = resp.choices[0].message.content
-
+    response = await run_agent_with_tools(request, messages, req.agent_id)
    return {"response": response, "session_id": req.session_id}


@@ -300,6 +185,7 @@ def list_models():
@router.post("/chat/completions")
 async def chat_completions(
    req: ChatCompletionRequest,
+    request: Request,
    client: OpenAI = Depends(get_llm_client),
 ):
    """OpenAI-compatible /chat/completions — supports stream=True.
@@ -311,7 +197,7 @@ async def chat_completions(
    if req.stream:
        async def sse_stream():
            async for token in run_agent_stream(
-                client, req.messages, agent_id=agent.agent_id,
+                request, req.messages, agent_id=agent.agent_id,
            ):
                chunk = {
                    "id": "chatcmpl-local",
@@ -335,20 +221,10 @@ async def chat_completions(
            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
        )

-    # Non-streaming — full history, tool-calling
-    tools = get_all_tools(agent.skills)
-    if tools:
-        response = await run_agent_with_tools(
-            client, req.messages, agent_id=agent.agent_id,
-        )
-    else:
-        system_prompt = agent.build_system_prompt()
-        full_msgs: list[dict] = [{"role": "system", "content": system_prompt}]
-        full_msgs.extend(req.messages)
-        resp = client.chat.completions.create(
-            model="deepseek-chat", messages=full_msgs,
-        )
-        response = resp.choices[0].message.content
+    # Non-streaming — full history, LangGraph agent
+    response = await run_agent_with_tools(
+        request, req.messages, agent_id=agent.agent_id,
+    )

    return {
        "id": "chatcmpl-local",