Files
Agents/api/v1/chat.py
T

237 lines
7.0 KiB
Python

from fastapi import APIRouter, Body, Depends
from fastapi.responses import StreamingResponse
from openai import OpenAI
from pydantic import BaseModel
import json
import asyncio
from api.dependencies import get_llm_client
from agents import get as get_agent, list_all as list_all_agents
router = APIRouter()
class ChatRequest(BaseModel):
message: str
session_id: str | None = None
agent_id: str | None = None # which agent to use ("naked", "media-agent", …)
class ChatCompletionRequest(BaseModel):
messages: list[dict]
stream: bool = False
model: str = "deepseek-chat"
# ---------------------------------------------------------------------------
# Core helpers
# ---------------------------------------------------------------------------
def _resolve_agent(agent_id: str | None = None, model: str | None = None):
"""
Look up the agent. Resolution order:
1. explicit agent_id
2. model name (OpenWebUI sends this — maps to agent_id if registered)
3. fallback to "naked"
"""
lookup = agent_id or model
if lookup is None:
agent = get_agent("naked")
else:
agent = get_agent(lookup)
if agent is None:
agent = get_agent("naked")
return agent
def run_agent(
client: OpenAI,
message: str,
session_id: str | None = None,
agent_id: str | None = None,
model: str | None = None,
) -> str:
"""Non-streaming: uses the chosen agent's system prompt."""
agent = _resolve_agent(agent_id, model)
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": agent.build_system_prompt()},
{"role": "user", "content": message},
],
)
return response.choices[0].message.content
async def run_agent_stream(
client: OpenAI,
message: str,
session_id: str | None = None,
agent_id: str | None = None,
model: str | None = None,
):
"""Async generator — yields tokens using the chosen agent's system prompt."""
agent = _resolve_agent(agent_id, model)
system_prompt = agent.build_system_prompt()
loop = asyncio.get_running_loop()
def _sync_stream():
stream = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": message},
],
stream=True,
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta and delta.content:
yield delta.content
gen = _sync_stream()
while True:
token = await loop.run_in_executor(None, next, gen, None)
if token is None:
break
yield token
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get("/")
def root():
return {"status": "ok"}
@router.post("/chat")
async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
"""Streaming chat endpoint — returns Server-Sent Events."""
async def event_stream():
async for token in run_agent_stream(
client, req.message, req.session_id, req.agent_id,
):
payload = json.dumps({"token": token, "session_id": req.session_id})
yield f"data: {payload}\n\n"
yield f"data: {json.dumps({'done': True, 'session_id': req.session_id})}\n\n"
return StreamingResponse(
event_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
@router.post("/chat/sync")
def chat_sync(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
"""Non-streaming fallback — returns the full response at once."""
response = run_agent(client, req.message, req.session_id, req.agent_id)
return {"response": response, "session_id": req.session_id}
@router.get("/agents")
def list_agents():
"""Return all registered agents with their ids, descriptions, and skills."""
return {
"agents": [
{
"agent_id": a.agent_id,
"description": a.description,
"skills": a.skills,
}
for a in list_all_agents().values()
]
}
@router.get("/models")
def list_models():
"""Return all registered agents as selectable models for OpenWebUI."""
return {
"object": "list",
"data": [
{
"id": a.agent_id,
"object": "model",
"created": 0,
"owned_by": "local-agent",
}
for a in list_all_agents().values()
],
}
@router.post("/chat/completions")
async def chat_completions(
req: ChatCompletionRequest,
client: OpenAI = Depends(get_llm_client),
):
"""OpenAI-compatible /chat/completions — supports stream=True.
The last message's content is used as the user prompt; defaults to 'naked' agent.
"""
user_message = req.messages[-1]["content"]
# Resolve agent from the model field (OpenWebUI sends this)
agent = _resolve_agent(model=req.model)
if req.stream:
async def sse_stream():
async for token in run_agent_stream(client, user_message, agent_id=agent.agent_id):
chunk = {
"id": "chatcmpl-local",
"object": "chat.completion.chunk",
"choices": [
{
"index": 0,
"delta": {"content": token},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(chunk)}\n\n"
final_chunk = {
"id": "chatcmpl-local",
"object": "chat.completion.chunk",
"choices": [
{
"index": 0,
"delta": {},
"finish_reason": "stop",
}
],
}
yield f"data: {json.dumps(final_chunk)}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(
sse_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
},
)
# Non-streaming path — resolve agent from model field
agent = _resolve_agent(model=req.model)
response = run_agent(client, user_message, agent_id=agent.agent_id)
return {
"id": "chatcmpl-local",
"object": "chat.completion",
"created": 0,
"model": req.model,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": response},
"finish_reason": "stop",
}
],
}