fixed api calls with seerr, added full context for models, beginning to standardizing single id as source of truths for future tools
Build and Push Agent API / build (push) Successful in 14s
Build and Push Agent API / build (push) Successful in 14s
This commit is contained in:
@@ -0,0 +1,221 @@
|
||||
# API Architecture — Agent + Skill + Tool Pipeline
|
||||
|
||||
This document explains how the API routes user messages through the agent/skill/tool pipeline to produce responses.
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ OpenWebUI / Client │
|
||||
│ POST /v1/chat/completions { model, messages, stream } │
|
||||
└──────────────────────────────┬──────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────────────────────────────────────────────────────┐
|
||||
│ api/v1/chat.py — chat_completions() │
|
||||
│ │
|
||||
│ 1. _resolve_agent(req.model) → Agent │
|
||||
│ 2. agent.build_system_prompt() → system prompt │
|
||||
│ 3. Build full_messages = [system] + req.messages │
|
||||
│ 4. run_agent_with_tools(client, messages, agent_id) │
|
||||
└──────────────────────────────┬───────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────────────────────────────────────────────────────┐
|
||||
│ Tool-Calling Loop (run_agent_with_tools / run_agent_stream) │
|
||||
│ │
|
||||
│ while turns < max_turns: │
|
||||
│ response = LLM.chat(messages, tools=agent_tools) │
|
||||
│ if response has tool_calls: │
|
||||
│ for each tool_call: │
|
||||
│ result = execute_tool(skills, name, args) │
|
||||
│ append result to messages │
|
||||
│ else: │
|
||||
│ return response.text (stream tokens if streaming) │
|
||||
└──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Key Concepts
|
||||
|
||||
### 1. Agent
|
||||
|
||||
An **Agent** is a persona + skill bundle. Defined in `agents/`.
|
||||
|
||||
```python
|
||||
# agents/media_agent.py
|
||||
Agent(
|
||||
agent_id="media-agent",
|
||||
description="Media assistant with Seerr integration",
|
||||
skills=["media_info", "seerr", "triage"],
|
||||
base_prompt="You are a media assistant...",
|
||||
)
|
||||
```
|
||||
|
||||
- `agent_id` — unique name, exposed as a model in OpenWebUI
|
||||
- `skills` — list of skill names to load
|
||||
- `base_prompt` — starting system prompt, combined with skill fragments
|
||||
- `build_system_prompt()` — merges base_prompt + all skill prompt fragments
|
||||
|
||||
Agents self-register at import time via `agents/__init__.py`'s `register()`.
|
||||
`main.py` calls `load_all_agents()` at startup to import all agent/skill modules.
|
||||
|
||||
### 2. Skill
|
||||
|
||||
A **Skill** is a capability bundle. Defined in `skills/`.
|
||||
|
||||
```python
|
||||
# skills/seerr.py
|
||||
Skill(
|
||||
name="seerr",
|
||||
description="Seerr integration — trending, discover, request media, submit issues",
|
||||
prompt_fragment="## Seerr Media Tools\n...",
|
||||
tools=[...], # OpenAI function-calling schema
|
||||
execute=_execute, # async handler: tool_name + args → ToolResult
|
||||
)
|
||||
```
|
||||
|
||||
- `prompt_fragment` — injected into the agent's system prompt. Teaches the LLM what tools are available and when to use them.
|
||||
- `tools` — list of OpenAI function definitions (name, description, parameters).
|
||||
- `execute` — async callable that routes tool calls to API handlers.
|
||||
|
||||
### 3. Tool
|
||||
|
||||
A **Tool** is a single function the LLM can call. Defined as part of a skill's `tools` list.
|
||||
|
||||
```python
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "seerr_trending",
|
||||
"description": "Get trending movies and TV shows from Seerr...",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kind": {"type": "string", "enum": ["movie", "tv", "all"]},
|
||||
"language": {"type": "string"},
|
||||
},
|
||||
"required": ["kind"],
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
When the LLM responds with a tool call, the loop:
|
||||
1. Extracts `function.name` (e.g. `"seerr_trending"`) and `function.arguments` (e.g. `{"kind": "movie"}`)
|
||||
2. Calls `execute_tool(agent.skills, name, args)` which finds the owning skill and runs it
|
||||
3. Appends the result text to the message history
|
||||
4. Sends back to the LLM for a follow-up response
|
||||
|
||||
---
|
||||
|
||||
## Full Request Flow
|
||||
|
||||
### Step-by-step: "What are trending movies?"
|
||||
|
||||
```
|
||||
1. OpenWebUI sends:
|
||||
POST /v1/chat/completions
|
||||
{
|
||||
"model": "media-agent",
|
||||
"messages": [
|
||||
{"role": "user", "content": "What are trending movies?"}
|
||||
],
|
||||
"stream": false
|
||||
}
|
||||
|
||||
2. chat_completions():
|
||||
→ _resolve_agent(model="media-agent")
|
||||
→ get_agent("media-agent") → Agent(skills=["media_info", "seerr", "triage"])
|
||||
→ tools = get_all_tools(["media_info", "seerr", "triage"])
|
||||
→ Returns 7 tool definitions from seerr.py
|
||||
→ system_prompt = agent.build_system_prompt()
|
||||
→ base_prompt + media_info fragment + seerr fragment + triage fragment
|
||||
|
||||
3. run_agent_with_tools() — Turn 1:
|
||||
→ LLM receives: [system prompt with tools] + [user: "What are trending movies?"]
|
||||
→ LLM responds: tool_calls = [{"function": {"name": "seerr_trending", "arguments": {"kind": "movie"}}}]
|
||||
|
||||
4. Execute tool:
|
||||
→ execute_tool(["media_info", "seerr", "triage"], "seerr_trending", {"kind": "movie"})
|
||||
→ Finds seerr skill → calls _execute("seerr_trending", ...) → _trending(args)
|
||||
→ GET /api/v1/discover/trending?mediaType=movie
|
||||
→ Returns formatted list with [tmdb:IDs]
|
||||
|
||||
5. run_agent_with_tools() — Turn 2:
|
||||
→ LLM receives: previous messages + [tool: "Found 20 trending movies..."]
|
||||
→ LLM responds: text = "Here are the top trending movies! 🎬 ..."
|
||||
→ finish_reason="stop" → return the text
|
||||
|
||||
6. chat_completions() returns:
|
||||
{ "choices": [{"message": {"content": "Here are the top trending movies!..."}}] }
|
||||
```
|
||||
|
||||
### Step-by-step: "Request the 2026 one" (multi-turn context)
|
||||
|
||||
```
|
||||
1. OpenWebUI sends the FULL history:
|
||||
{
|
||||
"model": "media-agent",
|
||||
"messages": [
|
||||
{"role": "user", "content": "What are trending movies?"},
|
||||
{"role": "assistant", "content": "Here are the top 10 trending movies!
|
||||
1. **Mortal Kombat II** (2026) [tmdb:931285] — ..."},
|
||||
{"role": "user", "content": "could request the mortal kombat one?"},
|
||||
{"role": "assistant", "content": "There are several Mortal Kombat entries! ..."},
|
||||
{"role": "user", "content": "the 2026 one"}
|
||||
]
|
||||
}
|
||||
|
||||
2. chat_completions():
|
||||
→ req.messages contains the ENTIRE conversation history
|
||||
→ System prompt prepended → full_messages = [system] + 5 history messages
|
||||
→ LLM sees everything: the trending list with [tmdb:931285], the disambiguation, "the 2026 one"
|
||||
|
||||
3. LLM reasons:
|
||||
- I previously listed Mortal Kombat II (2026) with [tmdb:931285]
|
||||
- The user said "request the mortal kombat one" → I searched and showed 4 options
|
||||
- Now they say "the 2026 one" → that matches Mortal Kombat II (2026) [tmdb:931285]
|
||||
- I should call seerr_request_media(kind="movie", title="Mortal Kombat II", tmdb_id=931285)
|
||||
|
||||
4. Tool executes the request → ✅ Success
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File Map
|
||||
|
||||
```
|
||||
main.py # FastAPI app entry point, creates singletons
|
||||
├── core/
|
||||
│ ├── config.py # .env loader, config constants
|
||||
│ └── llm.py # create_client() factory for OpenAI client
|
||||
├── api/
|
||||
│ ├── dependencies.py # FastAPI Depends: get_llm_client()
|
||||
│ └── v1/
|
||||
│ └── chat.py # APIRouter, endpoints, tool-calling loop
|
||||
├── agents/
|
||||
│ ├── __init__.py # Agent dataclass, registry, load_all_agents()
|
||||
│ ├── naked.py # Agent: barebone LLM, no skills
|
||||
│ └── media_agent.py # Agent: media assistant with Seerr skills
|
||||
└── skills/
|
||||
├── __init__.py # Skill dataclass, ToolResult, registry, execution
|
||||
├── media_info.py # Skill: base media assistant persona (prompt-only)
|
||||
├── seerr.py # Skill: Seerr API tools (7 tools, real API calls)
|
||||
└── triage.py # Skill: fallback for unsupported actions (prompt-only)
|
||||
```
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
1. **Full multi-turn history**: `req.messages` passes through unchanged. The LLM has access to its own previous responses (including `[tmdb:IDs]`). No external state management needed.
|
||||
|
||||
2. **No deterministic pre-processing**: No affirmation detectors, reference resolvers, or hardcoded rules. The LLM interprets user intent naturally from full conversation context.
|
||||
|
||||
3. **Agent selection via `model` field**: OpenWebUI sends `model` in the request. `_resolve_agent()` maps it to a registered agent. The `/v1/models` endpoint lists all agents as selectable models.
|
||||
|
||||
4. **Skills = prompts + tools**: Skills inject prompt fragments AND optionally expose OpenAI function-calling tools. Prompt-only skills (like `triage`) just shape behavior. Tool-enabled skills (like `seerr`) let the LLM take real actions.
|
||||
|
||||
5. **Singleton LLM client**: Created once in `main.py`, stored on `app.state.llm_client`, accessed via FastAPI `Depends(get_llm_client)`.
|
||||
+1
-1
@@ -3,5 +3,5 @@ from openai import OpenAI
|
||||
|
||||
|
||||
def get_llm_client(request: Request) -> OpenAI:
|
||||
"""FastAPI dependency - returns the singleton OpenAI client from app.state."""
|
||||
"""FastAPI dependency — returns the singleton OpenAI client from app.state."""
|
||||
return request.app.state.llm_client
|
||||
|
||||
+73
-112
@@ -7,7 +7,7 @@ import asyncio
|
||||
|
||||
from api.dependencies import get_llm_client
|
||||
from agents import get as get_agent, list_all as list_all_agents
|
||||
from skills import get_all_tools, execute_tool, ToolResult
|
||||
from skills import get_all_tools, execute_tool
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -15,7 +15,7 @@ router = APIRouter()
|
||||
class ChatRequest(BaseModel):
|
||||
message: str
|
||||
session_id: str | None = None
|
||||
agent_id: str | None = None # which agent to use ("naked", "media-agent", …)
|
||||
agent_id: str | None = None
|
||||
|
||||
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
@@ -30,7 +30,6 @@ class ChatCompletionRequest(BaseModel):
|
||||
|
||||
def _resolve_agent(agent_id: str | None = None, model: str | None = None):
|
||||
"""
|
||||
Resolution order:
|
||||
1. explicit agent_id
|
||||
2. model field (OpenWebUI sends this — maps to agent_id if registered)
|
||||
3. fallback to "naked"
|
||||
@@ -48,23 +47,18 @@ def _resolve_agent(agent_id: str | None = None, model: str | None = None):
|
||||
|
||||
async def run_agent_with_tools(
|
||||
client: OpenAI,
|
||||
message: str,
|
||||
messages: list[dict],
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
max_turns: int = 5,
|
||||
) -> str:
|
||||
"""Send the user message to the LLM with tool definitions.
|
||||
Loop: if the LLM responds with tool_calls, execute them and feed
|
||||
results back until the LLM produces a final text answer.
|
||||
"""
|
||||
"""Send messages to the LLM with tool definitions. Tool-calling loop."""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
tools = get_all_tools(agent.skills)
|
||||
system_prompt = agent.build_system_prompt()
|
||||
|
||||
messages: list[dict] = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": message},
|
||||
]
|
||||
full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
|
||||
full_messages.extend(messages)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
@@ -73,129 +67,89 @@ async def run_agent_with_tools(
|
||||
None,
|
||||
lambda: client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=messages,
|
||||
messages=full_messages,
|
||||
tools=tools if tools else None,
|
||||
tool_choice="auto" if tools else None,
|
||||
),
|
||||
)
|
||||
choice = resp.choices[0]
|
||||
|
||||
# If the model sends a final text answer, return it
|
||||
if choice.finish_reason == "stop" and choice.message.content:
|
||||
return choice.message.content
|
||||
|
||||
# If the model wants to call tools
|
||||
if choice.message.tool_calls:
|
||||
# Append the assistant message with tool_calls
|
||||
messages.append(choice.message.model_dump(exclude_none=True))
|
||||
|
||||
full_messages.append(choice.message.model_dump(exclude_none=True))
|
||||
for tc in choice.message.tool_calls:
|
||||
fn_name = tc.function.name
|
||||
fn_args = json.loads(tc.function.arguments)
|
||||
tr = await execute_tool(agent.skills, fn_name, fn_args)
|
||||
result = tr.content if tr else f"Tool '{fn_name}' is not available right now."
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc.id,
|
||||
"content": result,
|
||||
result = tr.content if tr else f"Tool '{fn_name}' is not available."
|
||||
full_messages.append({
|
||||
"role": "tool", "tool_call_id": tc.id, "content": result,
|
||||
})
|
||||
continue
|
||||
|
||||
# Fallback — should not normally happen
|
||||
return choice.message.content or "I'm not sure how to help with that."
|
||||
|
||||
return "I've taken several actions but still need more information. Could you clarify?"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Non-streaming helper (no tools — used by sync endpoint if tools are absent)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_agent_simple(
|
||||
client: OpenAI,
|
||||
message: str,
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> str:
|
||||
"""Plain LLM call — no tools. Used when the agent has no tool-enabled skills."""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
response = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": agent.build_system_prompt()},
|
||||
{"role": "user", "content": message},
|
||||
],
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Streaming generators
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _stream_with_tools(
|
||||
client: OpenAI,
|
||||
message: str,
|
||||
messages: list[dict],
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
max_turns: int = 5,
|
||||
):
|
||||
"""Streaming version with tool-calling loop.
|
||||
Yields tokens from the final text response (tools run silently in the background).
|
||||
"""
|
||||
"""Streaming tool-calling loop. Tools run silently, final text is streamed."""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
tools = get_all_tools(agent.skills)
|
||||
system_prompt = agent.build_system_prompt()
|
||||
|
||||
messages: list[dict] = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": message},
|
||||
]
|
||||
full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
|
||||
full_messages.extend(messages)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
for turn in range(max_turns):
|
||||
# Non-streaming call to check for tool_calls
|
||||
resp = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=messages,
|
||||
messages=full_messages,
|
||||
tools=tools if tools else None,
|
||||
tool_choice="auto" if tools else None,
|
||||
),
|
||||
)
|
||||
choice = resp.choices[0]
|
||||
|
||||
# Tool calls? Execute them and loop
|
||||
if choice.message.tool_calls:
|
||||
messages.append(choice.message.model_dump(exclude_none=True))
|
||||
full_messages.append(choice.message.model_dump(exclude_none=True))
|
||||
for tc in choice.message.tool_calls:
|
||||
fn_name = tc.function.name
|
||||
fn_args = json.loads(tc.function.arguments)
|
||||
tr = await execute_tool(agent.skills, fn_name, fn_args)
|
||||
result = tr.content if tr else f"Tool '{fn_name}' is not available right now."
|
||||
messages.append({
|
||||
result = tr.content if tr else f"Tool '{fn_name}' is not available."
|
||||
full_messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc.id,
|
||||
"content": result,
|
||||
})
|
||||
continue
|
||||
|
||||
# Final text answer — stream it
|
||||
if choice.finish_reason == "stop" and choice.message.content:
|
||||
# Already have a non-streaming answer — yield it token-by-token
|
||||
for token in choice.message.content:
|
||||
yield token
|
||||
await asyncio.sleep(0)
|
||||
return
|
||||
|
||||
# Last resort: stream the final response
|
||||
def _sync_stream():
|
||||
stream = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=messages,
|
||||
stream=True,
|
||||
model="deepseek-chat", messages=full_messages, stream=True,
|
||||
)
|
||||
for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
@@ -209,12 +163,12 @@ async def _stream_with_tools(
|
||||
return
|
||||
yield token
|
||||
|
||||
yield "…"
|
||||
yield "\u2026"
|
||||
|
||||
|
||||
async def run_agent_stream(
|
||||
client: OpenAI,
|
||||
message: str,
|
||||
messages: list[dict],
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
):
|
||||
@@ -223,22 +177,20 @@ async def run_agent_stream(
|
||||
tools = get_all_tools(agent.skills)
|
||||
|
||||
if tools:
|
||||
async for token in _stream_with_tools(client, message, agent_id, model):
|
||||
async for token in _stream_with_tools(client, messages, agent_id, model):
|
||||
yield token
|
||||
return
|
||||
|
||||
# No tools — simple streaming
|
||||
system_prompt = agent.build_system_prompt()
|
||||
full_messages: list[dict] = [{"role": "system", "content": system_prompt}]
|
||||
full_messages.extend(messages)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
def _sync_stream():
|
||||
stream = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": message},
|
||||
],
|
||||
stream=True,
|
||||
model="deepseek-chat", messages=full_messages, stream=True,
|
||||
)
|
||||
for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
@@ -263,15 +215,17 @@ def root():
|
||||
|
||||
|
||||
@router.post("/chat")
|
||||
async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
|
||||
"""Streaming chat endpoint — returns Server-Sent Events."""
|
||||
async def chat(
|
||||
req: ChatRequest,
|
||||
client: OpenAI = Depends(get_llm_client),
|
||||
):
|
||||
"""Streaming chat — single message, no history."""
|
||||
messages = [{"role": "user", "content": req.message}]
|
||||
|
||||
async def event_stream():
|
||||
async for token in run_agent_stream(
|
||||
client, req.message, req.agent_id,
|
||||
):
|
||||
async for token in run_agent_stream(client, messages, req.agent_id):
|
||||
payload = json.dumps({"token": token, "session_id": req.session_id})
|
||||
yield f"data: {payload}\n\n"
|
||||
|
||||
yield f"data: {json.dumps({'done': True, 'session_id': req.session_id})}\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
@@ -286,24 +240,34 @@ async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
|
||||
|
||||
|
||||
@router.post("/chat/sync")
|
||||
async def chat_sync(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
|
||||
"""Non-streaming endpoint — uses tool-calling when the agent has tools."""
|
||||
async def chat_sync(
|
||||
req: ChatRequest,
|
||||
client: OpenAI = Depends(get_llm_client),
|
||||
):
|
||||
"""Non-streaming chat — single message."""
|
||||
agent = _resolve_agent(req.agent_id)
|
||||
tools = get_all_tools(agent.skills)
|
||||
messages = [{"role": "user", "content": req.message}]
|
||||
|
||||
if tools:
|
||||
response = await run_agent_with_tools(
|
||||
client, req.message, req.agent_id,
|
||||
)
|
||||
response = await run_agent_with_tools(client, messages, req.agent_id)
|
||||
else:
|
||||
response = run_agent_simple(client, req.message, req.agent_id)
|
||||
agent_obj = _resolve_agent(req.agent_id)
|
||||
resp = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": agent_obj.build_system_prompt()},
|
||||
{"role": "user", "content": req.message},
|
||||
],
|
||||
)
|
||||
response = resp.choices[0].message.content
|
||||
|
||||
return {"response": response, "session_id": req.session_id}
|
||||
|
||||
|
||||
@router.get("/agents")
|
||||
def list_agents():
|
||||
"""Return all registered agents with their ids, descriptions, and skills."""
|
||||
"""Return all registered agents."""
|
||||
return {
|
||||
"agents": [
|
||||
{
|
||||
@@ -318,7 +282,7 @@ def list_agents():
|
||||
|
||||
@router.get("/models")
|
||||
def list_models():
|
||||
"""Return all registered agents as selectable models for OpenWebUI."""
|
||||
"""Return agents as selectable models for OpenWebUI."""
|
||||
return {
|
||||
"object": "list",
|
||||
"data": [
|
||||
@@ -339,36 +303,28 @@ async def chat_completions(
|
||||
client: OpenAI = Depends(get_llm_client),
|
||||
):
|
||||
"""OpenAI-compatible /chat/completions — supports stream=True.
|
||||
Resolves the agent from the model field (OpenWebUI sends this).
|
||||
Multi-turn: req.messages contains the FULL conversation history.
|
||||
Agent resolved from the model field (OpenWebUI sends this).
|
||||
"""
|
||||
user_message = req.messages[-1]["content"]
|
||||
agent = _resolve_agent(model=req.model)
|
||||
|
||||
if req.stream:
|
||||
async def sse_stream():
|
||||
async for token in run_agent_stream(client, user_message, agent_id=agent.agent_id):
|
||||
async for token in run_agent_stream(
|
||||
client, req.messages, agent_id=agent.agent_id,
|
||||
):
|
||||
chunk = {
|
||||
"id": "chatcmpl-local",
|
||||
"object": "chat.completion.chunk",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {"content": token},
|
||||
"finish_reason": None,
|
||||
}
|
||||
{"index": 0, "delta": {"content": token}, "finish_reason": None}
|
||||
],
|
||||
}
|
||||
yield f"data: {json.dumps(chunk)}\n\n"
|
||||
final_chunk = {
|
||||
"id": "chatcmpl-local",
|
||||
"object": "chat.completion.chunk",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
||||
}
|
||||
yield f"data: {json.dumps(final_chunk)}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
@@ -376,18 +332,23 @@ async def chat_completions(
|
||||
return StreamingResponse(
|
||||
sse_stream(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
},
|
||||
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
||||
)
|
||||
|
||||
# Non-streaming path
|
||||
# Non-streaming — full history, tool-calling
|
||||
tools = get_all_tools(agent.skills)
|
||||
if tools:
|
||||
response = await run_agent_with_tools(client, user_message, agent_id=agent.agent_id)
|
||||
response = await run_agent_with_tools(
|
||||
client, req.messages, agent_id=agent.agent_id,
|
||||
)
|
||||
else:
|
||||
response = run_agent_simple(client, user_message, agent_id=agent.agent_id)
|
||||
system_prompt = agent.build_system_prompt()
|
||||
full_msgs: list[dict] = [{"role": "system", "content": system_prompt}]
|
||||
full_msgs.extend(req.messages)
|
||||
resp = client.chat.completions.create(
|
||||
model="deepseek-chat", messages=full_msgs,
|
||||
)
|
||||
response = resp.choices[0].message.content
|
||||
|
||||
return {
|
||||
"id": "chatcmpl-local",
|
||||
@@ -401,4 +362,4 @@ async def chat_completions(
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user