diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b072ac9 --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +# --------------------------------------------------------------------------- +# Agent Backend — Environment Variables +# Copy this to .env and fill in your values. +# --------------------------------------------------------------------------- + +# LLM — DeepSeek (OpenAI-compatible) +DEEPSEEK_API_KEY=sk-your-deepseek-api-key + +# --------------------------------------------------------------------------- +# Seerr (Overseerr / Jellyseerr) +# --------------------------------------------------------------------------- +SEERR_URL=https://seerr.example.com +SEERR_API_KEY=your-seerr-api-key +# SEERR_TIMEOUT=30 # optional, defaults to 30 seconds diff --git a/agents/__init__.py b/agents/__init__.py index 57f4a0f..8064a30 100644 --- a/agents/__init__.py +++ b/agents/__init__.py @@ -62,3 +62,5 @@ def load_all_agents() -> None: # Also import skill modules so they self-register import skills.media_info # noqa: F401 + import skills.seerr # noqa: F401 + import skills.triage # noqa: F401 diff --git a/agents/media_agent.py b/agents/media_agent.py index 07aa731..657f61b 100644 --- a/agents/media_agent.py +++ b/agents/media_agent.py @@ -2,18 +2,25 @@ media-agent — an agent that knows how to handle media queries (Jellyfin / Sonarr / Seerr / subtitle requests). -For now it only loads the *media_info* demo skill which teaches it -a structured response format. Later you'll add real API-calling skills. +Skills: +- media_info : base persona (prompt-only) +- seerr : trending, discover, request media, submit issues (tools + API) +- triage : fallback for unsupported actions (prompt-only, uses seerr tools) """ from agents import Agent, register media_agent = Agent( agent_id="media-agent", - description="Media assistant — handles movie/TV/subtitle/ticket requests. " - "Will eventually connect to Seerr, Sonarr, Jellyfin, etc.", - skills=["media_info"], - base_prompt="You are a media assistant. Help users with their media library.", + description="Media assistant — handles movie/TV/subtitle/ticket requests " + "via Seerr, Jellyfin, Sonarr, etc.", + skills=["media_info", "seerr", "triage"], + base_prompt=( + "You are a media assistant connected to Seerr and other media services. " + "Help users discover, request, and troubleshoot their media library. " + "Use the tools provided to perform real actions." + ), ) register(media_agent) + diff --git a/api/v1/chat.py b/api/v1/chat.py index ed76e71..d9ad5cf 100644 --- a/api/v1/chat.py +++ b/api/v1/chat.py @@ -1,4 +1,4 @@ -from fastapi import APIRouter, Body, Depends +from fastapi import APIRouter, Depends from fastapi.responses import StreamingResponse from openai import OpenAI from pydantic import BaseModel @@ -7,6 +7,7 @@ import asyncio from api.dependencies import get_llm_client from agents import get as get_agent, list_all as list_all_agents +from skills import get_all_tools, execute_tool, ToolResult router = APIRouter() @@ -24,34 +25,99 @@ class ChatCompletionRequest(BaseModel): # --------------------------------------------------------------------------- -# Core helpers +# Agent resolution # --------------------------------------------------------------------------- def _resolve_agent(agent_id: str | None = None, model: str | None = None): """ - Look up the agent. Resolution order: + Resolution order: 1. explicit agent_id - 2. model name (OpenWebUI sends this — maps to agent_id if registered) + 2. model field (OpenWebUI sends this — maps to agent_id if registered) 3. fallback to "naked" """ lookup = agent_id or model if lookup is None: - agent = get_agent("naked") - else: - agent = get_agent(lookup) - if agent is None: - agent = get_agent("naked") - return agent + return get_agent("naked") + agent = get_agent(lookup) + return agent if agent else get_agent("naked") -def run_agent( +# --------------------------------------------------------------------------- +# Tool-calling loop (non-streaming) +# --------------------------------------------------------------------------- + +async def run_agent_with_tools( + client: OpenAI, + message: str, + agent_id: str | None = None, + model: str | None = None, + max_turns: int = 5, +) -> str: + """Send the user message to the LLM with tool definitions. + Loop: if the LLM responds with tool_calls, execute them and feed + results back until the LLM produces a final text answer. + """ + agent = _resolve_agent(agent_id, model) + tools = get_all_tools(agent.skills) + system_prompt = agent.build_system_prompt() + + messages: list[dict] = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": message}, + ] + + loop = asyncio.get_running_loop() + + for _ in range(max_turns): + resp = await loop.run_in_executor( + None, + lambda: client.chat.completions.create( + model="deepseek-chat", + messages=messages, + tools=tools if tools else None, + tool_choice="auto" if tools else None, + ), + ) + choice = resp.choices[0] + + # If the model sends a final text answer, return it + if choice.finish_reason == "stop" and choice.message.content: + return choice.message.content + + # If the model wants to call tools + if choice.message.tool_calls: + # Append the assistant message with tool_calls + messages.append(choice.message.model_dump(exclude_none=True)) + + for tc in choice.message.tool_calls: + fn_name = tc.function.name + fn_args = json.loads(tc.function.arguments) + tr = await execute_tool(agent.skills, fn_name, fn_args) + result = tr.content if tr else f"Tool '{fn_name}' is not available right now." + messages.append({ + "role": "tool", + "tool_call_id": tc.id, + "content": result, + }) + continue + + # Fallback — should not normally happen + return choice.message.content or "I'm not sure how to help with that." + + return "I've taken several actions but still need more information. Could you clarify?" + + +# --------------------------------------------------------------------------- +# Non-streaming helper (no tools — used by sync endpoint if tools are absent) +# --------------------------------------------------------------------------- + +def run_agent_simple( client: OpenAI, message: str, - session_id: str | None = None, agent_id: str | None = None, model: str | None = None, ) -> str: - """Non-streaming: uses the chosen agent's system prompt.""" + """Plain LLM call — no tools. Used when the agent has no tool-enabled skills.""" agent = _resolve_agent(agent_id, model) response = client.chat.completions.create( model="deepseek-chat", @@ -63,15 +129,105 @@ def run_agent( return response.choices[0].message.content +# --------------------------------------------------------------------------- +# Streaming generators +# --------------------------------------------------------------------------- + +async def _stream_with_tools( + client: OpenAI, + message: str, + agent_id: str | None = None, + model: str | None = None, + max_turns: int = 5, +): + """Streaming version with tool-calling loop. + Yields tokens from the final text response (tools run silently in the background). + """ + agent = _resolve_agent(agent_id, model) + tools = get_all_tools(agent.skills) + system_prompt = agent.build_system_prompt() + + messages: list[dict] = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": message}, + ] + + loop = asyncio.get_running_loop() + + for turn in range(max_turns): + # Non-streaming call to check for tool_calls + resp = await loop.run_in_executor( + None, + lambda: client.chat.completions.create( + model="deepseek-chat", + messages=messages, + tools=tools if tools else None, + tool_choice="auto" if tools else None, + ), + ) + choice = resp.choices[0] + + # Tool calls? Execute them and loop + if choice.message.tool_calls: + messages.append(choice.message.model_dump(exclude_none=True)) + for tc in choice.message.tool_calls: + fn_name = tc.function.name + fn_args = json.loads(tc.function.arguments) + tr = await execute_tool(agent.skills, fn_name, fn_args) + result = tr.content if tr else f"Tool '{fn_name}' is not available right now." + messages.append({ + "role": "tool", + "tool_call_id": tc.id, + "content": result, + }) + continue + + # Final text answer — stream it + if choice.finish_reason == "stop" and choice.message.content: + # Already have a non-streaming answer — yield it token-by-token + for token in choice.message.content: + yield token + await asyncio.sleep(0) + return + + # Last resort: stream the final response + def _sync_stream(): + stream = client.chat.completions.create( + model="deepseek-chat", + messages=messages, + stream=True, + ) + for chunk in stream: + delta = chunk.choices[0].delta + if delta and delta.content: + yield delta.content + + gen = _sync_stream() + while True: + token = await loop.run_in_executor(None, next, gen, None) + if token is None: + return + yield token + + yield "…" + + async def run_agent_stream( client: OpenAI, message: str, - session_id: str | None = None, agent_id: str | None = None, model: str | None = None, ): - """Async generator — yields tokens using the chosen agent's system prompt.""" + """Async generator — yields tokens. Uses tool-loop when skills have tools.""" agent = _resolve_agent(agent_id, model) + tools = get_all_tools(agent.skills) + + if tools: + async for token in _stream_with_tools(client, message, agent_id, model): + yield token + return + + # No tools — simple streaming system_prompt = agent.build_system_prompt() loop = asyncio.get_running_loop() @@ -111,7 +267,7 @@ async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)): """Streaming chat endpoint — returns Server-Sent Events.""" async def event_stream(): async for token in run_agent_stream( - client, req.message, req.session_id, req.agent_id, + client, req.message, req.agent_id, ): payload = json.dumps({"token": token, "session_id": req.session_id}) yield f"data: {payload}\n\n" @@ -130,9 +286,18 @@ async def chat(req: ChatRequest, client: OpenAI = Depends(get_llm_client)): @router.post("/chat/sync") -def chat_sync(req: ChatRequest, client: OpenAI = Depends(get_llm_client)): - """Non-streaming fallback — returns the full response at once.""" - response = run_agent(client, req.message, req.session_id, req.agent_id) +async def chat_sync(req: ChatRequest, client: OpenAI = Depends(get_llm_client)): + """Non-streaming endpoint — uses tool-calling when the agent has tools.""" + agent = _resolve_agent(req.agent_id) + tools = get_all_tools(agent.skills) + + if tools: + response = await run_agent_with_tools( + client, req.message, req.agent_id, + ) + else: + response = run_agent_simple(client, req.message, req.agent_id) + return {"response": response, "session_id": req.session_id} @@ -174,11 +339,9 @@ async def chat_completions( client: OpenAI = Depends(get_llm_client), ): """OpenAI-compatible /chat/completions — supports stream=True. - The last message's content is used as the user prompt; defaults to 'naked' agent. + Resolves the agent from the model field (OpenWebUI sends this). """ user_message = req.messages[-1]["content"] - - # Resolve agent from the model field (OpenWebUI sends this) agent = _resolve_agent(model=req.model) if req.stream: @@ -219,9 +382,13 @@ async def chat_completions( }, ) - # Non-streaming path — resolve agent from model field - agent = _resolve_agent(model=req.model) - response = run_agent(client, user_message, agent_id=agent.agent_id) + # Non-streaming path + tools = get_all_tools(agent.skills) + if tools: + response = await run_agent_with_tools(client, user_message, agent_id=agent.agent_id) + else: + response = run_agent_simple(client, user_message, agent_id=agent.agent_id) + return { "id": "chatcmpl-local", "object": "chat.completion", diff --git a/core/config.py b/core/config.py index 7783ffb..b586060 100644 --- a/core/config.py +++ b/core/config.py @@ -2,6 +2,30 @@ from dotenv import load_dotenv from pathlib import Path import os -load_dotenv(Path(__file__).resolve().parent.parent / ".env") +# --------------------------------------------------------------------------- +# Load .env from the project root (one level above core/) +# --------------------------------------------------------------------------- +_env_path = Path(__file__).resolve().parent.parent / ".env" +load_dotenv(_env_path) -DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY") \ No newline at end of file + +# --------------------------------------------------------------------------- +# General-purpose config accessor — every skill uses this +# --------------------------------------------------------------------------- +def get_config(key: str, default: str | None = None) -> str | None: + """Read a value from the environment (loaded from .env).""" + return os.getenv(key, default) + + +# --------------------------------------------------------------------------- +# LLM +# --------------------------------------------------------------------------- +DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY") + + +# --------------------------------------------------------------------------- +# Seerr (Overseerr / Jellyseerr) +# --------------------------------------------------------------------------- +SEERR_URL = os.getenv("SEERR_URL", "") +SEERR_API_KEY = os.getenv("SEERR_API_KEY", "") +SEERR_TIMEOUT = int(os.getenv("SEERR_TIMEOUT", "30")) \ No newline at end of file diff --git a/main.py b/main.py index 599f1ce..943bc5e 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,5 @@ +import logging + from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware @@ -5,6 +7,15 @@ from api.v1.chat import router as v1_router from core.config import DEEPSEEK_API_KEY from core.llm import create_client +# --------------------------------------------------------------------------- +# Logging — tool calls will appear in the uvicorn console +# --------------------------------------------------------------------------- +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", + datefmt="%H:%M:%S", +) + # --------------------------------------------------------------------------- # Load all agents & skills so they self-register at startup # --------------------------------------------------------------------------- diff --git a/requirements.txt b/requirements.txt index c9db552..db69327 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ fastapi openai uvicorn -python-dotenv \ No newline at end of file +python-dotenv +httpx \ No newline at end of file diff --git a/skills/__init__.py b/skills/__init__.py index f6b4eed..1a3b32f 100644 --- a/skills/__init__.py +++ b/skills/__init__.py @@ -3,13 +3,41 @@ Skill system — each skill is a piece of domain knowledge or a capability that can be attached to an agent to shape its behavior and system prompt. A Skill is a lightweight object with: -- name : short identifier (e.g. "media_info") -- description : human-readable summary +- name : short identifier (e.g. "media_info") +- description : human-readable summary - prompt_fragment : extra text injected into the agent's system prompt +- tools : OpenAI function-calling tool definitions (list of dicts) +- execute : async callable to run a tool → ToolResult """ from dataclasses import dataclass, field -from typing import Dict +from typing import Any, Awaitable, Callable, Dict, List, Optional +from core.config import get_config # re-export so every skill can use it + + +# --------------------------------------------------------------------------- +# ToolResult — every skill executor must return this +# --------------------------------------------------------------------------- +@dataclass +class ToolResult: + """Result of executing a tool. + - success: True if the API returned 2xx and the action completed. + - content: The message to feed back to the LLM (will be shown to the user). + """ + content: str + success: bool = True + + @classmethod + def ok(cls, content: str) -> "ToolResult": + return cls(content=content, success=True) + + @classmethod + def fail(cls, content: str) -> "ToolResult": + return cls(content=content, success=False) + + +# Type alias for a tool executor +ToolExecutor = Callable[[str, dict], Awaitable[ToolResult]] @dataclass @@ -17,6 +45,8 @@ class Skill: name: str description: str prompt_fragment: str = "" + tools: List[Dict[str, Any]] = field(default_factory=list) + execute: Optional[ToolExecutor] = None # --------------------------------------------------------------------------- @@ -48,3 +78,52 @@ def get_combined_prompt(skill_names: list[str], base_prompt: str = "") -> str: if s and s.prompt_fragment: parts.append(s.prompt_fragment) return "\n\n".join(parts) + + +def get_all_tools(skill_names: list[str]) -> List[Dict[str, Any]]: + """Collect all OpenAI tool definitions across the requested skills.""" + tools: List[Dict[str, Any]] = [] + seen: set[str] = set() + for name in skill_names: + s = get(name) + if s: + for t in s.tools: + fn_name = t.get("function", {}).get("name", "") + if fn_name and fn_name not in seen: + seen.add(fn_name) + tools.append(t) + return tools + + +async def execute_tool( + skill_names: list[str], tool_name: str, args: dict +) -> ToolResult | None: + """Find the skill that owns *tool_name* and run its executor. + Only logs failures to the console — successful calls are silent. + """ + import logging + logger = logging.getLogger("skills") + + for name in skill_names: + s = get(name) + if s and s.execute: + for t in s.tools: + if t.get("function", {}).get("name") == tool_name: + try: + result = await s.execute(tool_name, args) + if not result.success: + logger.warning( + "⚠️ TOOL FAILED: %s | args=%s → %s", + tool_name, args, result.content[:300], + ) + return result + except Exception as exc: + logger.exception( + "💥 TOOL CRASH: %s | args=%s", tool_name, args + ) + return ToolResult.fail( + f"Tool '{tool_name}' crashed unexpectedly: {exc}" + ) + + logger.warning("⚠️ TOOL NOT FOUND: %s (skills=%s)", tool_name, skill_names) + return None diff --git a/skills/media_info.py b/skills/media_info.py index 0b60eb5..8578cdf 100644 --- a/skills/media_info.py +++ b/skills/media_info.py @@ -1,45 +1,31 @@ """ Demo skill: media_info -Gives the agent knowledge about how to respond to media-related queries -(movie / TV / subtitle requests). This is intentionally simple — in the future -you would add real API-calling skills here (Sonarr / Jellyfin / Seerr / etc.). +A lightweight base skill that teaches the agent it is a media assistant. +Real API capabilities come from other skills (seerr, triage, etc.). """ from skills import Skill, register media_info_skill = Skill( name="media_info", - description="Respond to media queries with a structured format " - "(movie / TV show requests, subtitles, tickets).", - prompt_fragment="""## Media Agent Instructions + description="Base media assistant persona — movie, TV, subtitle, and media requests.", + prompt_fragment="""## Media Assistant Persona -You are a media assistant. When users ask about movies, TV shows, subtitles, -or media library requests, follow these rules: +You are a friendly media assistant connected to a media back-end (Seerr, +Jellyfin, Sonarr, etc.). Your job is to help users discover, request, and +troubleshoot their media library. -- If a user wants to **request** a movie or show, respond with a clear - confirmation using this format: +When responding: +- Be concise and helpful. +- Use the tools available to you for real actions. +- If a user asks about **subtitles**, explain that Bazarr handles those and + suggest submitting a ticket if there's a problem. +- Always confirm successful actions and warn about failures. - ``` - [MEDIA REQUEST] - Title: