Files
Agents/api/v1/chat.py
T
2026-05-10 18:54:46 +02:00

182 lines
5.6 KiB
Python

from fastapi import APIRouter, Body, Depends
from fastapi.responses import StreamingResponse
from openai import OpenAI
from pydantic import BaseModel
import json
import asyncio
from api.dependencies import get_llm_client
router = APIRouter()
class ChatRequest(BaseModel):
message: str
session_id: str | None = None
class ChatCompletionRequest(BaseModel):
messages: list[dict]
stream: bool = False
model: str = "deepseek-chat"
# ---------------------------------------------------------------------------
# Core helpers
# ---------------------------------------------------------------------------
def run_agent(client: OpenAI, message: str, session_id: str | None = None) -> str:
"""Non-streaming: returns the full response as a single string."""
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": "You are a helpful agent."},
{"role": "user", "content": message},
],
)
return response.choices[0].message.content
async def run_agent_stream(client: OpenAI, message: str, session_id: str | None = None):
"""Async generator that yields text tokens as they arrive from the LLM."""
loop = asyncio.get_running_loop()
# OpenAI's sync streaming iterator must run in a thread so it doesn't block the event loop
def _sync_stream():
stream = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": "You are a helpful agent."},
{"role": "user", "content": message},
],
stream=True,
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta and delta.content:
yield delta.content
# Run the sync generator in a thread, yield results back to the async world
gen = _sync_stream()
while True:
token = await loop.run_in_executor(None, next, gen, None)
if token is None:
break
yield token
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get("/")
def root():
return {"status": "ok"}
@router.post("/chat")
async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
"""Streaming chat endpoint — returns Server-Sent Events."""
async def event_stream():
async for token in run_agent_stream(client, req.message, req.session_id):
payload = json.dumps({"token": token, "session_id": req.session_id})
yield f"data: {payload}\n\n"
# Signal completion
yield f"data: {json.dumps({'done': True, 'session_id': req.session_id})}\n\n"
return StreamingResponse(
event_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", # Disable nginx buffering if behind a proxy
},
)
@router.post("/chat/sync")
def chat_sync(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
"""Non-streaming fallback — returns the full response at once."""
response = run_agent(client, req.message, req.session_id)
return {"response": response, "session_id": req.session_id}
@router.get("/models")
def list_models():
return {
"object": "list",
"data": [
{
"id": "agent-model",
"object": "model",
"created": 0,
"owned_by": "local-agent",
},
],
}
@router.post("/chat/completions")
async def chat_completions(
req: ChatCompletionRequest,
client: OpenAI = Depends(get_llm_client),
):
"""OpenAI-compatible /chat/completions — supports stream=True."""
user_message = req.messages[-1]["content"]
if req.stream:
async def sse_stream():
async for token in run_agent_stream(client, user_message):
chunk = {
"id": "chatcmpl-local",
"object": "chat.completion.chunk",
"choices": [
{
"index": 0,
"delta": {"content": token},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(chunk)}\n\n"
# Final chunk with finish_reason
final_chunk = {
"id": "chatcmpl-local",
"object": "chat.completion.chunk",
"choices": [
{
"index": 0,
"delta": {},
"finish_reason": "stop",
}
],
}
yield f"data: {json.dumps(final_chunk)}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(
sse_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
},
)
# Non-streaming path
response = run_agent(client, user_message)
return {
"id": "chatcmpl-local",
"object": "chat.completion",
"created": 0,
"model": req.model,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": response},
"finish_reason": "stop",
}
],
}