This commit is contained in:
+192
-25
@@ -1,4 +1,4 @@
|
||||
from fastapi import APIRouter, Body, Depends
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.responses import StreamingResponse
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel
|
||||
@@ -7,6 +7,7 @@ import asyncio
|
||||
|
||||
from api.dependencies import get_llm_client
|
||||
from agents import get as get_agent, list_all as list_all_agents
|
||||
from skills import get_all_tools, execute_tool, ToolResult
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -24,34 +25,99 @@ class ChatCompletionRequest(BaseModel):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core helpers
|
||||
# Agent resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _resolve_agent(agent_id: str | None = None, model: str | None = None):
|
||||
"""
|
||||
Look up the agent. Resolution order:
|
||||
Resolution order:
|
||||
1. explicit agent_id
|
||||
2. model name (OpenWebUI sends this — maps to agent_id if registered)
|
||||
2. model field (OpenWebUI sends this — maps to agent_id if registered)
|
||||
3. fallback to "naked"
|
||||
"""
|
||||
lookup = agent_id or model
|
||||
if lookup is None:
|
||||
agent = get_agent("naked")
|
||||
else:
|
||||
agent = get_agent(lookup)
|
||||
if agent is None:
|
||||
agent = get_agent("naked")
|
||||
return agent
|
||||
return get_agent("naked")
|
||||
agent = get_agent(lookup)
|
||||
return agent if agent else get_agent("naked")
|
||||
|
||||
|
||||
def run_agent(
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool-calling loop (non-streaming)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def run_agent_with_tools(
|
||||
client: OpenAI,
|
||||
message: str,
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
max_turns: int = 5,
|
||||
) -> str:
|
||||
"""Send the user message to the LLM with tool definitions.
|
||||
Loop: if the LLM responds with tool_calls, execute them and feed
|
||||
results back until the LLM produces a final text answer.
|
||||
"""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
tools = get_all_tools(agent.skills)
|
||||
system_prompt = agent.build_system_prompt()
|
||||
|
||||
messages: list[dict] = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": message},
|
||||
]
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
for _ in range(max_turns):
|
||||
resp = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=messages,
|
||||
tools=tools if tools else None,
|
||||
tool_choice="auto" if tools else None,
|
||||
),
|
||||
)
|
||||
choice = resp.choices[0]
|
||||
|
||||
# If the model sends a final text answer, return it
|
||||
if choice.finish_reason == "stop" and choice.message.content:
|
||||
return choice.message.content
|
||||
|
||||
# If the model wants to call tools
|
||||
if choice.message.tool_calls:
|
||||
# Append the assistant message with tool_calls
|
||||
messages.append(choice.message.model_dump(exclude_none=True))
|
||||
|
||||
for tc in choice.message.tool_calls:
|
||||
fn_name = tc.function.name
|
||||
fn_args = json.loads(tc.function.arguments)
|
||||
tr = await execute_tool(agent.skills, fn_name, fn_args)
|
||||
result = tr.content if tr else f"Tool '{fn_name}' is not available right now."
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc.id,
|
||||
"content": result,
|
||||
})
|
||||
continue
|
||||
|
||||
# Fallback — should not normally happen
|
||||
return choice.message.content or "I'm not sure how to help with that."
|
||||
|
||||
return "I've taken several actions but still need more information. Could you clarify?"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Non-streaming helper (no tools — used by sync endpoint if tools are absent)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_agent_simple(
|
||||
client: OpenAI,
|
||||
message: str,
|
||||
session_id: str | None = None,
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> str:
|
||||
"""Non-streaming: uses the chosen agent's system prompt."""
|
||||
"""Plain LLM call — no tools. Used when the agent has no tool-enabled skills."""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
response = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
@@ -63,15 +129,105 @@ def run_agent(
|
||||
return response.choices[0].message.content
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Streaming generators
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _stream_with_tools(
|
||||
client: OpenAI,
|
||||
message: str,
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
max_turns: int = 5,
|
||||
):
|
||||
"""Streaming version with tool-calling loop.
|
||||
Yields tokens from the final text response (tools run silently in the background).
|
||||
"""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
tools = get_all_tools(agent.skills)
|
||||
system_prompt = agent.build_system_prompt()
|
||||
|
||||
messages: list[dict] = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": message},
|
||||
]
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
for turn in range(max_turns):
|
||||
# Non-streaming call to check for tool_calls
|
||||
resp = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=messages,
|
||||
tools=tools if tools else None,
|
||||
tool_choice="auto" if tools else None,
|
||||
),
|
||||
)
|
||||
choice = resp.choices[0]
|
||||
|
||||
# Tool calls? Execute them and loop
|
||||
if choice.message.tool_calls:
|
||||
messages.append(choice.message.model_dump(exclude_none=True))
|
||||
for tc in choice.message.tool_calls:
|
||||
fn_name = tc.function.name
|
||||
fn_args = json.loads(tc.function.arguments)
|
||||
tr = await execute_tool(agent.skills, fn_name, fn_args)
|
||||
result = tr.content if tr else f"Tool '{fn_name}' is not available right now."
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc.id,
|
||||
"content": result,
|
||||
})
|
||||
continue
|
||||
|
||||
# Final text answer — stream it
|
||||
if choice.finish_reason == "stop" and choice.message.content:
|
||||
# Already have a non-streaming answer — yield it token-by-token
|
||||
for token in choice.message.content:
|
||||
yield token
|
||||
await asyncio.sleep(0)
|
||||
return
|
||||
|
||||
# Last resort: stream the final response
|
||||
def _sync_stream():
|
||||
stream = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=messages,
|
||||
stream=True,
|
||||
)
|
||||
for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
if delta and delta.content:
|
||||
yield delta.content
|
||||
|
||||
gen = _sync_stream()
|
||||
while True:
|
||||
token = await loop.run_in_executor(None, next, gen, None)
|
||||
if token is None:
|
||||
return
|
||||
yield token
|
||||
|
||||
yield "…"
|
||||
|
||||
|
||||
async def run_agent_stream(
|
||||
client: OpenAI,
|
||||
message: str,
|
||||
session_id: str | None = None,
|
||||
agent_id: str | None = None,
|
||||
model: str | None = None,
|
||||
):
|
||||
"""Async generator — yields tokens using the chosen agent's system prompt."""
|
||||
"""Async generator — yields tokens. Uses tool-loop when skills have tools."""
|
||||
agent = _resolve_agent(agent_id, model)
|
||||
tools = get_all_tools(agent.skills)
|
||||
|
||||
if tools:
|
||||
async for token in _stream_with_tools(client, message, agent_id, model):
|
||||
yield token
|
||||
return
|
||||
|
||||
# No tools — simple streaming
|
||||
system_prompt = agent.build_system_prompt()
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
@@ -111,7 +267,7 @@ async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
|
||||
"""Streaming chat endpoint — returns Server-Sent Events."""
|
||||
async def event_stream():
|
||||
async for token in run_agent_stream(
|
||||
client, req.message, req.session_id, req.agent_id,
|
||||
client, req.message, req.agent_id,
|
||||
):
|
||||
payload = json.dumps({"token": token, "session_id": req.session_id})
|
||||
yield f"data: {payload}\n\n"
|
||||
@@ -130,9 +286,18 @@ async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
|
||||
|
||||
|
||||
@router.post("/chat/sync")
|
||||
def chat_sync(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
|
||||
"""Non-streaming fallback — returns the full response at once."""
|
||||
response = run_agent(client, req.message, req.session_id, req.agent_id)
|
||||
async def chat_sync(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
|
||||
"""Non-streaming endpoint — uses tool-calling when the agent has tools."""
|
||||
agent = _resolve_agent(req.agent_id)
|
||||
tools = get_all_tools(agent.skills)
|
||||
|
||||
if tools:
|
||||
response = await run_agent_with_tools(
|
||||
client, req.message, req.agent_id,
|
||||
)
|
||||
else:
|
||||
response = run_agent_simple(client, req.message, req.agent_id)
|
||||
|
||||
return {"response": response, "session_id": req.session_id}
|
||||
|
||||
|
||||
@@ -174,11 +339,9 @@ async def chat_completions(
|
||||
client: OpenAI = Depends(get_llm_client),
|
||||
):
|
||||
"""OpenAI-compatible /chat/completions — supports stream=True.
|
||||
The last message's content is used as the user prompt; defaults to 'naked' agent.
|
||||
Resolves the agent from the model field (OpenWebUI sends this).
|
||||
"""
|
||||
user_message = req.messages[-1]["content"]
|
||||
|
||||
# Resolve agent from the model field (OpenWebUI sends this)
|
||||
agent = _resolve_agent(model=req.model)
|
||||
|
||||
if req.stream:
|
||||
@@ -219,9 +382,13 @@ async def chat_completions(
|
||||
},
|
||||
)
|
||||
|
||||
# Non-streaming path — resolve agent from model field
|
||||
agent = _resolve_agent(model=req.model)
|
||||
response = run_agent(client, user_message, agent_id=agent.agent_id)
|
||||
# Non-streaming path
|
||||
tools = get_all_tools(agent.skills)
|
||||
if tools:
|
||||
response = await run_agent_with_tools(client, user_message, agent_id=agent.agent_id)
|
||||
else:
|
||||
response = run_agent_simple(client, user_message, agent_id=agent.agent_id)
|
||||
|
||||
return {
|
||||
"id": "chatcmpl-local",
|
||||
"object": "chat.completion",
|
||||
|
||||
Reference in New Issue
Block a user