From ee49b748b4e37a3d117a85cca389541619043c25 Mon Sep 17 00:00:00 2001 From: Thibault Jaigu Date: Fri, 26 Jun 2026 08:12:59 +0100 Subject: [PATCH] feat: add Requesty as an OpenAI-compatible LLM provider Signed-off-by: Thibault Jaigu --- .env.example | 10 ++- README.md | 5 +- src/caal/chat/api.py | 10 +++ src/caal/llm/providers/__init__.py | 33 +++++++-- src/caal/llm/providers/requesty_provider.py | 75 +++++++++++++++++++++ src/caal/settings.py | 3 + voice_agent.py | 13 ++++ 7 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 src/caal/llm/providers/requesty_provider.py diff --git a/.env.example b/.env.example index 8e23c5a..f912c17 100644 --- a/.env.example +++ b/.env.example @@ -94,7 +94,7 @@ TTS_VOICE=am_puck # ============================================================================= # LLM Configuration # ============================================================================= -# Choose your LLM provider: "ollama" (local), "groq" (cloud), "openrouter" (cloud), or "openai_compatible" +# Choose your LLM provider: "ollama" (local), "groq" (cloud), "openrouter" (cloud), "requesty" (cloud), or "openai_compatible" LLM_PROVIDER=ollama # ============================================================================= @@ -140,6 +140,14 @@ GROQ_MODEL=llama-3.3-70b-versatile OPENROUTER_API_KEY= OPENROUTER_MODEL=google/gemini-2.0-flash-001 +# ============================================================================= +# LLM - Requesty (Cloud) +# ============================================================================= +# OpenAI-compatible LLM gateway. Get your API key from https://app.requesty.ai/api-keys +# Required when LLM_PROVIDER=requesty +REQUESTY_API_KEY= +REQUESTY_MODEL=openai/gpt-4o-mini + # ============================================================================= # LLM - OpenAI-Compatible (Any Provider) # ============================================================================= diff --git a/README.md b/README.md index 22b18ab..7eeed7c 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ ollama pull coreworxlab/caal-qwen3.5-9b **Infinitely extensible.** Any [n8n](https://n8n.io/) workflow becomes a voice-activated tool. Control [Home Assistant](https://www.home-assistant.io/) devices, query APIs, automate your life — then share your tools with the community via the [CAAL Tool Registry](https://github.com/CoreWorxLab/caal-tools). Tools follow a suite convention — fewer tools, better accuracy, more reliable routing. -**Local by default.** Runs fully on your network with [Ollama](https://ollama.ai/). No accounts, no telemetry, no cloud dependency. Want to use [Groq](https://groq.com/), [OpenRouter](https://openrouter.ai/), or any OpenAI-compatible API? Your choice. Your credentials and tool executions never leave your network regardless. +**Local by default.** Runs fully on your network with [Ollama](https://ollama.ai/). No accounts, no telemetry, no cloud dependency. Want to use [Groq](https://groq.com/), [OpenRouter](https://openrouter.ai/), [Requesty](https://requesty.ai/), or any OpenAI-compatible API? Your choice. Your credentials and tool executions never leave your network regardless. --- @@ -33,7 +33,7 @@ ollama pull coreworxlab/caal-qwen3.5-9b - **Tool Chaining** — Sequential multi-tool calls in one prompt. The model uses real data from each step to inform the next - **Home Assistant** — Voice control across lights, covers, locks, climate, media, and more via `hass` - **n8n Workflows** — Any workflow becomes a tool. Visual, inspectable, shareable, auditable through n8n's execution history -- **Flexible Providers** — Ollama, Groq, OpenRouter, or any OpenAI-compatible API. Speaches or Groq for STT. Kokoro or Piper for TTS +- **Flexible Providers** — Ollama, Groq, OpenRouter, Requesty, or any OpenAI-compatible API. Speaches or Groq for STT. Kokoro or Piper for TTS - **Short-Term Memory** — Store and recall information across sessions - **Internationalization** — English, French, Italian, with more coming - **Wake Word** — "Hey Cal" via OpenWakeWord @@ -165,6 +165,7 @@ CAAL is built in the open. If you build a tool, we see the PR. If you find a bug - [Ollama](https://ollama.ai/) - Local LLM server - [Groq](https://groq.com/) - Fast cloud LLM inference - [OpenRouter](https://openrouter.ai/) - Unified API for 200+ models +- [Requesty](https://requesty.ai/) - OpenAI-compatible LLM gateway - [n8n](https://n8n.io/) - Workflow automation - [Home Assistant](https://www.home-assistant.io/) - Smart home platform diff --git a/src/caal/chat/api.py b/src/caal/chat/api.py index b4ea2fd..77a71f0 100644 --- a/src/caal/chat/api.py +++ b/src/caal/chat/api.py @@ -179,6 +179,15 @@ def _get_runtime_settings() -> dict: user_settings.get("openrouter_model") or os.getenv("OPENROUTER_MODEL", "openai/gpt-4") ), + # Requesty + "requesty_api_key": ( + settings.get("requesty_api_key") + or os.getenv("REQUESTY_API_KEY", "") + ), + "requesty_model": ( + user_settings.get("requesty_model") + or os.getenv("REQUESTY_MODEL", "openai/gpt-4o-mini") + ), # Shared "max_turns": settings.get( "max_turns", int(os.getenv("OLLAMA_MAX_TURNS", "20")) @@ -536,6 +545,7 @@ async def reload_chat() -> ReloadResponse: or runtime.get("groq_model") or runtime.get("openai_model") or runtime.get("openrouter_model") + or runtime.get("requesty_model") or "" ), tools_loaded=tools_loaded, diff --git a/src/caal/llm/providers/__init__.py b/src/caal/llm/providers/__init__.py index 782288e..9bc1faf 100644 --- a/src/caal/llm/providers/__init__.py +++ b/src/caal/llm/providers/__init__.py @@ -9,6 +9,7 @@ - GroqProvider: Groq cloud API - OpenAICompatibleProvider: Any OpenAI-compatible server - OpenRouterProvider: OpenRouter cloud API (400+ models) + - RequestyProvider: Requesty cloud API (OpenAI-compatible gateway) Example: >>> from caal.llm.providers import create_provider @@ -26,6 +27,10 @@ >>> # Create OpenRouter provider >>> provider = create_provider("openrouter", model="openai/gpt-4", ... api_key="sk-...") + >>> + >>> # Create Requesty provider + >>> provider = create_provider("requesty", model="openai/gpt-4o-mini", + ... api_key="sk-...") """ from __future__ import annotations @@ -39,6 +44,7 @@ from .ollama_provider import OllamaProvider from .openai_compatible_provider import OpenAICompatibleProvider from .openrouter_provider import OpenRouterProvider +from .requesty_provider import RequestyProvider __all__ = [ "LLMProvider", @@ -48,6 +54,7 @@ "GroqProvider", "OpenAICompatibleProvider", "OpenRouterProvider", + "RequestyProvider", "create_provider", ] @@ -62,7 +69,7 @@ def create_provider( Args: provider_name: Provider identifier ("ollama", "groq", "openai_compatible", - or "openrouter") + "openrouter", or "requesty") **kwargs: Provider-specific configuration options Returns: @@ -94,10 +101,12 @@ def create_provider( return OpenAICompatibleProvider(**kwargs) elif provider_name == "openrouter": return OpenRouterProvider(**kwargs) + elif provider_name == "requesty": + return RequestyProvider(**kwargs) else: raise ValueError( f"Unknown LLM provider: {provider_name}. " - f"Supported providers: ollama, groq, openai_compatible, openrouter" + f"Supported providers: ollama, groq, openai_compatible, openrouter, requesty" ) @@ -109,7 +118,8 @@ def create_provider_from_settings(settings: dict[str, Any]) -> LLMProvider: Args: settings: Runtime settings dict with keys like: - - llm_provider: "ollama", "groq", "openai_compatible", or "openrouter" + - llm_provider: "ollama", "groq", "openai_compatible", "openrouter", + or "requesty" - ollama_model: Ollama model name - groq_model: Groq model name - openai_model: OpenAI-compatible model name @@ -117,6 +127,8 @@ def create_provider_from_settings(settings: dict[str, Any]) -> LLMProvider: - openai_api_key: OpenAI-compatible API key (optional) - openrouter_model: OpenRouter model name - openrouter_api_key: OpenRouter API key (required) + - requesty_model: Requesty model name + - requesty_api_key: Requesty API key (required) - temperature: Sampling temperature - num_ctx: Context window size (Ollama only) @@ -168,8 +180,21 @@ def create_provider_from_settings(settings: dict[str, Any]) -> LLMProvider: api_key=api_key, temperature=settings.get("temperature", 0.7), ) + elif provider_name == "requesty": + # API key from settings, fallback to environment variable + api_key = settings.get("requesty_api_key") or os.environ.get("REQUESTY_API_KEY") + if not api_key: + raise ValueError( + "Requesty API key required. Set requesty_api_key in settings " + "or REQUESTY_API_KEY environment variable." + ) + return RequestyProvider( + model=settings.get("requesty_model", "openai/gpt-4o-mini"), + api_key=api_key, + temperature=settings.get("temperature", 0.7), + ) else: raise ValueError( f"Unknown LLM provider: {provider_name}. " - f"Supported providers: ollama, groq, openai_compatible, openrouter" + f"Supported providers: ollama, groq, openai_compatible, openrouter, requesty" ) diff --git a/src/caal/llm/providers/requesty_provider.py b/src/caal/llm/providers/requesty_provider.py new file mode 100644 index 0000000..f2553b9 --- /dev/null +++ b/src/caal/llm/providers/requesty_provider.py @@ -0,0 +1,75 @@ +"""Requesty LLM provider implementation. + +Provides access to many cloud models through Requesty's unified, +OpenAI-compatible LLM gateway. Inherits from OpenAICompatibleProvider +since Requesty exposes an OpenAI-compatible API. + +See: https://docs.requesty.ai +""" + +from __future__ import annotations + +import logging + +from .openai_compatible_provider import OpenAICompatibleProvider + +__all__ = ["RequestyProvider"] + +logger = logging.getLogger(__name__) + +REQUESTY_BASE_URL = "https://router.requesty.ai/v1" + + +class RequestyProvider(OpenAICompatibleProvider): + """Requesty cloud LLM provider. + + Provides access to many models including GPT-4, Claude, Llama, Mistral, + and many others through a single API. Inherits all chat, streaming, and + tool handling from OpenAICompatibleProvider. + + Differences from base OpenAICompatibleProvider: + - Fixed base URL (router.requesty.ai) + - API key is required + - Attribution headers for model providers + + Args: + model: Requesty model ID (e.g., "openai/gpt-4o-mini", "anthropic/claude-sonnet-4-5") + api_key: Requesty API key (required) + temperature: Sampling temperature (0.0-2.0) + max_tokens: Maximum tokens to generate + + See: https://app.requesty.ai/router/list + """ + + def __init__( + self, + model: str, + api_key: str, + temperature: float = 0.7, + max_tokens: int = 4096, + ) -> None: + if not api_key: + raise ValueError( + "Requesty API key required. Pass api_key parameter " + "or configure in settings." + ) + + super().__init__( + model=model, + base_url=REQUESTY_BASE_URL, + api_key=api_key, + temperature=temperature, + max_tokens=max_tokens, + ) + + # Add Requesty-specific attribution headers + self._client.default_headers.update({ + "HTTP-Referer": "https://github.com/caal-project/caal", + "X-Title": "CAAL Voice Assistant", + }) + + logger.debug(f"RequestyProvider initialized: {model}") + + @property + def provider_name(self) -> str: + return "requesty" diff --git a/src/caal/settings.py b/src/caal/settings.py index e490612..20b1fa2 100644 --- a/src/caal/settings.py +++ b/src/caal/settings.py @@ -111,6 +111,9 @@ def validate_url(url: str) -> tuple[bool, str]: # OpenRouter settings (cloud API) "openrouter_api_key": "", # OpenRouter API key (required for openrouter provider) "openrouter_model": "", # Model name (empty = use default) + # Requesty settings (cloud API) + "requesty_api_key": "", # Requesty API key (required for requesty provider) + "requesty_model": "", # Model name (empty = use default) } # Per-language Piper TTS voice mapping diff --git a/voice_agent.py b/voice_agent.py index 1a7593c..1a281ae 100644 --- a/voice_agent.py +++ b/voice_agent.py @@ -176,6 +176,15 @@ def get_runtime_settings() -> dict: user_settings.get("openrouter_model") or os.getenv("OPENROUTER_MODEL", "openai/gpt-4") ), + # Requesty settings + "requesty_api_key": ( + settings.get("requesty_api_key") + or os.getenv("REQUESTY_API_KEY", "") + ), + "requesty_model": ( + user_settings.get("requesty_model") + or os.getenv("REQUESTY_MODEL", "openai/gpt-4o-mini") + ), # Shared settings "max_turns": settings.get("max_turns", int(os.getenv("OLLAMA_MAX_TURNS", "20"))), "tool_cache_size": settings.get("tool_cache_size", int(os.getenv("TOOL_CACHE_SIZE", "3"))), @@ -384,6 +393,10 @@ async def entrypoint(ctx: agents.JobContext) -> None: logger.info( f" LLM: OpenRouter ({runtime.get('openrouter_model', '?')})" ) + elif llm_provider == "requesty": + logger.info( + f" LLM: Requesty ({runtime.get('requesty_model', '?')})" + ) logger.info(f" MCP: {list(mcp_servers.keys()) or 'None'}") logger.info( f" Turn detection: interruptions={runtime['allow_interruptions']}, "