Add agent and skill system: implement Agent and Skill classes, register media and naked agents, and create media_info demo skill

2026-05-10 19:24:44 +02:00
parent 54ac77ab51
commit cb4ebfa43e
7 changed files with 276 additions and 22 deletions
@@ -6,6 +6,7 @@ import json
 import asyncio

 from api.dependencies import get_llm_client
+from agents import get as get_agent, list_all as list_all_agents

 router = APIRouter()

@@ -13,6 +14,7 @@ router = APIRouter()
 class ChatRequest(BaseModel):
    message: str
    session_id: str | None = None
+    agent_id: str | None = None   # which agent to use ("naked", "media-agent", …)


 class ChatCompletionRequest(BaseModel):
@@ -25,28 +27,59 @@ class ChatCompletionRequest(BaseModel):
 # Core helpers
 # ---------------------------------------------------------------------------

-def run_agent(client: OpenAI, message: str, session_id: str | None = None) -> str:
-    """Non-streaming: returns the full response as a single string."""
+def _resolve_agent(agent_id: str | None = None, model: str | None = None):
+    """
+    Look up the agent. Resolution order:
+    1. explicit agent_id
+    2. model name (OpenWebUI sends this — maps to agent_id if registered)
+    3. fallback to "naked"
+    """
+    lookup = agent_id or model
+    if lookup is None:
+        agent = get_agent("naked")
+    else:
+        agent = get_agent(lookup)
+        if agent is None:
+            agent = get_agent("naked")
+    return agent
+
+
+def run_agent(
+    client: OpenAI,
+    message: str,
+    session_id: str | None = None,
+    agent_id: str | None = None,
+    model: str | None = None,
+) -> str:
+    """Non-streaming: uses the chosen agent's system prompt."""
+    agent = _resolve_agent(agent_id, model)
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
-            {"role": "system", "content": "You are a helpful agent."},
+            {"role": "system", "content": agent.build_system_prompt()},
            {"role": "user", "content": message},
        ],
    )
    return response.choices[0].message.content


-async def run_agent_stream(client: OpenAI, message: str, session_id: str | None = None):
-    """Async generator that yields text tokens as they arrive from the LLM."""
+async def run_agent_stream(
+    client: OpenAI,
+    message: str,
+    session_id: str | None = None,
+    agent_id: str | None = None,
+    model: str | None = None,
+):
+    """Async generator — yields tokens using the chosen agent's system prompt."""
+    agent = _resolve_agent(agent_id, model)
+    system_prompt = agent.build_system_prompt()
    loop = asyncio.get_running_loop()

-    # OpenAI's sync streaming iterator must run in a thread so it doesn't block the event loop
    def _sync_stream():
        stream = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
-                {"role": "system", "content": "You are a helpful agent."},
+                {"role": "system", "content": system_prompt},
                {"role": "user", "content": message},
            ],
            stream=True,
@@ -56,7 +89,6 @@ async def run_agent_stream(client: OpenAI, message: str, session_id: str | None
            if delta and delta.content:
                yield delta.content

-    # Run the sync generator in a thread, yield results back to the async world
    gen = _sync_stream()
    while True:
        token = await loop.run_in_executor(None, next, gen, None)
@@ -78,11 +110,12 @@ def root():
 async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
    """Streaming chat endpoint — returns Server-Sent Events."""
    async def event_stream():
-        async for token in run_agent_stream(client, req.message, req.session_id):
+        async for token in run_agent_stream(
+            client, req.message, req.session_id, req.agent_id,
+        ):
            payload = json.dumps({"token": token, "session_id": req.session_id})
            yield f"data: {payload}\n\n"

-        # Signal completion
        yield f"data: {json.dumps({'done': True, 'session_id': req.session_id})}\n\n"

    return StreamingResponse(
@@ -91,7 +124,7 @@ async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no",   # Disable nginx buffering if behind a proxy
+            "X-Accel-Buffering": "no",
        },
    )

@@ -99,21 +132,38 @@ async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
@router.post("/chat/sync")
 def chat_sync(req: ChatRequest, client: OpenAI = Depends(get_llm_client)):
    """Non-streaming fallback — returns the full response at once."""
-    response = run_agent(client, req.message, req.session_id)
+    response = run_agent(client, req.message, req.session_id, req.agent_id)
    return {"response": response, "session_id": req.session_id}


+@router.get("/agents")
+def list_agents():
+    """Return all registered agents with their ids, descriptions, and skills."""
+    return {
+        "agents": [
+            {
+                "agent_id": a.agent_id,
+                "description": a.description,
+                "skills": a.skills,
+            }
+            for a in list_all_agents().values()
+        ]
+    }
+
+
@router.get("/models")
 def list_models():
+    """Return all registered agents as selectable models for OpenWebUI."""
    return {
        "object": "list",
        "data": [
            {
-                "id": "agent-model",
+                "id": a.agent_id,
                "object": "model",
                "created": 0,
                "owned_by": "local-agent",
-            },
+            }
+            for a in list_all_agents().values()
        ],
    }

@@ -123,12 +173,17 @@ async def chat_completions(
    req: ChatCompletionRequest,
    client: OpenAI = Depends(get_llm_client),
 ):
-    """OpenAI-compatible /chat/completions — supports stream=True."""
+    """OpenAI-compatible /chat/completions — supports stream=True.
+    The last message's content is used as the user prompt; defaults to 'naked' agent.
+    """
    user_message = req.messages[-1]["content"]

+    # Resolve agent from the model field (OpenWebUI sends this)
+    agent = _resolve_agent(model=req.model)
+
    if req.stream:
        async def sse_stream():
-            async for token in run_agent_stream(client, user_message):
+            async for token in run_agent_stream(client, user_message, agent_id=agent.agent_id):
                chunk = {
                    "id": "chatcmpl-local",
                    "object": "chat.completion.chunk",
@@ -141,7 +196,6 @@ async def chat_completions(
                    ],
                }
                yield f"data: {json.dumps(chunk)}\n\n"
-            # Final chunk with finish_reason
            final_chunk = {
                "id": "chatcmpl-local",
                "object": "chat.completion.chunk",
@@ -165,8 +219,9 @@ async def chat_completions(
            },
        )

-    # Non-streaming path
-    response = run_agent(client, user_message)
+    # Non-streaming path — resolve agent from model field
+    agent = _resolve_agent(model=req.model)
+    response = run_agent(client, user_message, agent_id=agent.agent_id)
    return {
        "id": "chatcmpl-local",
        "object": "chat.completion",