small refactor of the structure

2026-05-25 12:16:24 +02:00
parent 51e099acdd
commit b0f10b6bb1
26 changed files with 37 additions and 37 deletions
@@ -0,0 +1,241 @@
+from fastapi import APIRouter, Depends, Request
+from fastapi.responses import StreamingResponse
+from openai import OpenAI
+from pydantic import BaseModel
+import json
+
+from gateway.dependencies import get_llm_client, get_agent_graph
+from agents import get as get_agent, list_all as list_all_agents
+from src.state import AgentState
+
+router = APIRouter()
+
+
+class ChatRequest(BaseModel):
+    message: str
+    session_id: str | None = None
+    agent_id: str | None = None
+
+
+class ChatCompletionRequest(BaseModel):
+    messages: list[dict]
+    stream: bool = False
+    model: str = "deepseek-chat"
+
+
+# ---------------------------------------------------------------------------
+# Agent resolution
+# ---------------------------------------------------------------------------
+
+def _resolve_agent(agent_id: str | None = None, model: str | None = None):
+    """
+    1. explicit agent_id
+    2. model field (OpenWebUI sends this — maps to agent_id if registered)
+    3. fallback to "naked"
+    """
+    lookup = agent_id or model
+    if lookup is None:
+        return get_agent("naked")
+    agent = get_agent(lookup)
+    return agent if agent else get_agent("naked")
+
+
+# ---------------------------------------------------------------------------
+# LangGraph helpers
+# ---------------------------------------------------------------------------
+
+async def _invoke_graph(graph, messages: list[dict]) -> str:
+    """Run the graph synchronously (non-streaming) and return the final text."""
+    state: AgentState = {"messages": messages}
+    result = await graph.ainvoke(state)
+    last_msg = result["messages"][-1]
+    return last_msg.content or ""
+
+
+async def _stream_graph(graph, messages: list[dict]):
+    """
+    Run the graph and stream the final response token-by-token.
+
+    LangGraph's astream_events would require langchain-openai's ChatOpenAI
+    to intercept LLM chunks.  Instead we run the graph to completion (tools
+    execute silently) and then stream the final text content character by
+    character — this gives the client a real SSE stream without adding new
+    dependencies.
+    """
+    state: AgentState = {"messages": messages}
+    result = await graph.ainvoke(state)
+    content = result["messages"][-1].content or ""
+    # Yield token-by-token so the SSE client sees incremental output
+    for token in content:
+        yield token
+
+
+# ---------------------------------------------------------------------------
+# Non-streaming run (kept for /chat/sync and sync completions)
+# ---------------------------------------------------------------------------
+
+async def run_agent_with_tools(
+    request: Request,
+    messages: list[dict],
+    agent_id: str | None = None,
+    model: str | None = None,
+) -> str:
+    """Send messages through the agent's LangGraph.  Non-streaming."""
+    agent = _resolve_agent(agent_id, model)
+    graph = get_agent_graph(agent.agent_id, request)
+    return await _invoke_graph(graph, messages)
+
+
+# ---------------------------------------------------------------------------
+# Streaming generator (kept for /chat and stream completions)
+# ---------------------------------------------------------------------------
+
+async def run_agent_stream(
+    request: Request,
+    messages: list[dict],
+    agent_id: str | None = None,
+    model: str | None = None,
+):
+    """Async generator — yields tokens via the agent's LangGraph."""
+    agent = _resolve_agent(agent_id, model)
+    graph = get_agent_graph(agent.agent_id, request)
+    async for token in _stream_graph(graph, messages):
+        yield token
+
+
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+
+@router.get("/")
+def root():
+    return {"status": "ok"}
+
+
+@router.post("/chat")
+async def chat(
+    req: ChatRequest,
+    request: Request,
+    client: OpenAI = Depends(get_llm_client),
+):
+    """Streaming chat — single message, no history."""
+    messages = [{"role": "user", "content": req.message}]
+
+    async def event_stream():
+        async for token in run_agent_stream(request, messages, req.agent_id):
+            payload = json.dumps({"token": token, "session_id": req.session_id})
+            yield f"data: {payload}\n\n"
+        yield f"data: {json.dumps({'done': True, 'session_id': req.session_id})}\n\n"
+
+    return StreamingResponse(
+        event_stream(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )
+
+
+@router.post("/chat/sync")
+async def chat_sync(
+    req: ChatRequest,
+    request: Request,
+    client: OpenAI = Depends(get_llm_client),
+):
+    """Non-streaming chat — single message."""
+    messages = [{"role": "user", "content": req.message}]
+    response = await run_agent_with_tools(request, messages, req.agent_id)
+    return {"response": response, "session_id": req.session_id}
+
+
+@router.get("/agents")
+def list_agents():
+    """Return all registered agents."""
+    return {
+        "agents": [
+            {
+                "agent_id": a.agent_id,
+                "description": a.description,
+                "skills": a.skills,
+            }
+            for a in list_all_agents().values()
+        ]
+    }
+
+
+@router.get("/models")
+def list_models():
+    """Return agents as selectable models for OpenWebUI."""
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id": a.agent_id,
+                "object": "model",
+                "created": 0,
+                "owned_by": "local-agent",
+            }
+            for a in list_all_agents().values()
+        ],
+    }
+
+
+@router.post("/chat/completions")
+async def chat_completions(
+    req: ChatCompletionRequest,
+    request: Request,
+    client: OpenAI = Depends(get_llm_client),
+):
+    """OpenAI-compatible /chat/completions — supports stream=True.
+    Multi-turn: req.messages contains the FULL conversation history.
+    Agent resolved from the model field (OpenWebUI sends this).
+    """
+    agent = _resolve_agent(model=req.model)
+
+    if req.stream:
+        async def sse_stream():
+            async for token in run_agent_stream(
+                request, req.messages, agent_id=agent.agent_id,
+            ):
+                chunk = {
+                    "id": "chatcmpl-local",
+                    "object": "chat.completion.chunk",
+                    "choices": [
+                        {"index": 0, "delta": {"content": token}, "finish_reason": None}
+                    ],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n"
+            final_chunk = {
+                "id": "chatcmpl-local",
+                "object": "chat.completion.chunk",
+                "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+            }
+            yield f"data: {json.dumps(final_chunk)}\n\n"
+            yield "data: [DONE]\n\n"
+
+        return StreamingResponse(
+            sse_stream(),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+
+    # Non-streaming — full history, LangGraph agent
+    response = await run_agent_with_tools(
+        request, req.messages, agent_id=agent.agent_id,
+    )
+
+    return {
+        "id": "chatcmpl-local",
+        "object": "chat.completion",
+        "created": 0,
+        "model": req.model,
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": response},
+                "finish_reason": "stop",
+            }
+        ],
+    }