Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ TTS_VOICE=am_puck
# =============================================================================
# LLM Configuration
# =============================================================================
# Choose your LLM provider: "ollama" (local), "groq" (cloud), "openrouter" (cloud), or "openai_compatible"
# Choose your LLM provider: "ollama" (local), "groq" (cloud), "openrouter" (cloud), "requesty" (cloud), or "openai_compatible"
LLM_PROVIDER=ollama

# =============================================================================
Expand Down Expand Up @@ -140,6 +140,14 @@ GROQ_MODEL=llama-3.3-70b-versatile
OPENROUTER_API_KEY=
OPENROUTER_MODEL=google/gemini-2.0-flash-001

# =============================================================================
# LLM - Requesty (Cloud)
# =============================================================================
# OpenAI-compatible LLM gateway. Get your API key from https://app.requesty.ai/api-keys
# Required when LLM_PROVIDER=requesty
REQUESTY_API_KEY=
REQUESTY_MODEL=openai/gpt-4o-mini

# =============================================================================
# LLM - OpenAI-Compatible (Any Provider)
# =============================================================================
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ ollama pull coreworxlab/caal-qwen3.5-9b

**Infinitely extensible.** Any [n8n](https://n8n.io/) workflow becomes a voice-activated tool. Control [Home Assistant](https://www.home-assistant.io/) devices, query APIs, automate your life — then share your tools with the community via the [CAAL Tool Registry](https://github.com/CoreWorxLab/caal-tools). Tools follow a suite convention — fewer tools, better accuracy, more reliable routing.

**Local by default.** Runs fully on your network with [Ollama](https://ollama.ai/). No accounts, no telemetry, no cloud dependency. Want to use [Groq](https://groq.com/), [OpenRouter](https://openrouter.ai/), or any OpenAI-compatible API? Your choice. Your credentials and tool executions never leave your network regardless.
**Local by default.** Runs fully on your network with [Ollama](https://ollama.ai/). No accounts, no telemetry, no cloud dependency. Want to use [Groq](https://groq.com/), [OpenRouter](https://openrouter.ai/), [Requesty](https://requesty.ai/), or any OpenAI-compatible API? Your choice. Your credentials and tool executions never leave your network regardless.

---

Expand All @@ -33,7 +33,7 @@ ollama pull coreworxlab/caal-qwen3.5-9b
- **Tool Chaining** — Sequential multi-tool calls in one prompt. The model uses real data from each step to inform the next
- **Home Assistant** — Voice control across lights, covers, locks, climate, media, and more via `hass`
- **n8n Workflows** — Any workflow becomes a tool. Visual, inspectable, shareable, auditable through n8n's execution history
- **Flexible Providers** — Ollama, Groq, OpenRouter, or any OpenAI-compatible API. Speaches or Groq for STT. Kokoro or Piper for TTS
- **Flexible Providers** — Ollama, Groq, OpenRouter, Requesty, or any OpenAI-compatible API. Speaches or Groq for STT. Kokoro or Piper for TTS
- **Short-Term Memory** — Store and recall information across sessions
- **Internationalization** — English, French, Italian, with more coming
- **Wake Word** — "Hey Cal" via OpenWakeWord
Expand Down Expand Up @@ -165,6 +165,7 @@ CAAL is built in the open. If you build a tool, we see the PR. If you find a bug
- [Ollama](https://ollama.ai/) - Local LLM server
- [Groq](https://groq.com/) - Fast cloud LLM inference
- [OpenRouter](https://openrouter.ai/) - Unified API for 200+ models
- [Requesty](https://requesty.ai/) - OpenAI-compatible LLM gateway
- [n8n](https://n8n.io/) - Workflow automation
- [Home Assistant](https://www.home-assistant.io/) - Smart home platform

Expand Down
10 changes: 10 additions & 0 deletions src/caal/chat/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,15 @@ def _get_runtime_settings() -> dict:
user_settings.get("openrouter_model")
or os.getenv("OPENROUTER_MODEL", "openai/gpt-4")
),
# Requesty
"requesty_api_key": (
settings.get("requesty_api_key")
or os.getenv("REQUESTY_API_KEY", "")
),
"requesty_model": (
user_settings.get("requesty_model")
or os.getenv("REQUESTY_MODEL", "openai/gpt-4o-mini")
),
# Shared
"max_turns": settings.get(
"max_turns", int(os.getenv("OLLAMA_MAX_TURNS", "20"))
Expand Down Expand Up @@ -536,6 +545,7 @@ async def reload_chat() -> ReloadResponse:
or runtime.get("groq_model")
or runtime.get("openai_model")
or runtime.get("openrouter_model")
or runtime.get("requesty_model")
or ""
),
tools_loaded=tools_loaded,
Expand Down
33 changes: 29 additions & 4 deletions src/caal/llm/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- GroqProvider: Groq cloud API
- OpenAICompatibleProvider: Any OpenAI-compatible server
- OpenRouterProvider: OpenRouter cloud API (400+ models)
- RequestyProvider: Requesty cloud API (OpenAI-compatible gateway)

Example:
>>> from caal.llm.providers import create_provider
Expand All @@ -26,6 +27,10 @@
>>> # Create OpenRouter provider
>>> provider = create_provider("openrouter", model="openai/gpt-4",
... api_key="sk-...")
>>>
>>> # Create Requesty provider
>>> provider = create_provider("requesty", model="openai/gpt-4o-mini",
... api_key="sk-...")
"""

from __future__ import annotations
Expand All @@ -39,6 +44,7 @@
from .ollama_provider import OllamaProvider
from .openai_compatible_provider import OpenAICompatibleProvider
from .openrouter_provider import OpenRouterProvider
from .requesty_provider import RequestyProvider

__all__ = [
"LLMProvider",
Expand All @@ -48,6 +54,7 @@
"GroqProvider",
"OpenAICompatibleProvider",
"OpenRouterProvider",
"RequestyProvider",
"create_provider",
]

Expand All @@ -62,7 +69,7 @@ def create_provider(

Args:
provider_name: Provider identifier ("ollama", "groq", "openai_compatible",
or "openrouter")
"openrouter", or "requesty")
**kwargs: Provider-specific configuration options

Returns:
Expand Down Expand Up @@ -94,10 +101,12 @@ def create_provider(
return OpenAICompatibleProvider(**kwargs)
elif provider_name == "openrouter":
return OpenRouterProvider(**kwargs)
elif provider_name == "requesty":
return RequestyProvider(**kwargs)
else:
raise ValueError(
f"Unknown LLM provider: {provider_name}. "
f"Supported providers: ollama, groq, openai_compatible, openrouter"
f"Supported providers: ollama, groq, openai_compatible, openrouter, requesty"
)


Expand All @@ -109,14 +118,17 @@ def create_provider_from_settings(settings: dict[str, Any]) -> LLMProvider:

Args:
settings: Runtime settings dict with keys like:
- llm_provider: "ollama", "groq", "openai_compatible", or "openrouter"
- llm_provider: "ollama", "groq", "openai_compatible", "openrouter",
or "requesty"
- ollama_model: Ollama model name
- groq_model: Groq model name
- openai_model: OpenAI-compatible model name
- openai_base_url: OpenAI-compatible server URL
- openai_api_key: OpenAI-compatible API key (optional)
- openrouter_model: OpenRouter model name
- openrouter_api_key: OpenRouter API key (required)
- requesty_model: Requesty model name
- requesty_api_key: Requesty API key (required)
- temperature: Sampling temperature
- num_ctx: Context window size (Ollama only)

Expand Down Expand Up @@ -168,8 +180,21 @@ def create_provider_from_settings(settings: dict[str, Any]) -> LLMProvider:
api_key=api_key,
temperature=settings.get("temperature", 0.7),
)
elif provider_name == "requesty":
# API key from settings, fallback to environment variable
api_key = settings.get("requesty_api_key") or os.environ.get("REQUESTY_API_KEY")
if not api_key:
raise ValueError(
"Requesty API key required. Set requesty_api_key in settings "
"or REQUESTY_API_KEY environment variable."
)
return RequestyProvider(
model=settings.get("requesty_model", "openai/gpt-4o-mini"),
api_key=api_key,
temperature=settings.get("temperature", 0.7),
)
else:
raise ValueError(
f"Unknown LLM provider: {provider_name}. "
f"Supported providers: ollama, groq, openai_compatible, openrouter"
f"Supported providers: ollama, groq, openai_compatible, openrouter, requesty"
)
75 changes: 75 additions & 0 deletions src/caal/llm/providers/requesty_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Requesty LLM provider implementation.

Provides access to many cloud models through Requesty's unified,
OpenAI-compatible LLM gateway. Inherits from OpenAICompatibleProvider
since Requesty exposes an OpenAI-compatible API.

See: https://docs.requesty.ai
"""

from __future__ import annotations

import logging

from .openai_compatible_provider import OpenAICompatibleProvider

__all__ = ["RequestyProvider"]

logger = logging.getLogger(__name__)

REQUESTY_BASE_URL = "https://router.requesty.ai/v1"


class RequestyProvider(OpenAICompatibleProvider):
"""Requesty cloud LLM provider.

Provides access to many models including GPT-4, Claude, Llama, Mistral,
and many others through a single API. Inherits all chat, streaming, and
tool handling from OpenAICompatibleProvider.

Differences from base OpenAICompatibleProvider:
- Fixed base URL (router.requesty.ai)
- API key is required
- Attribution headers for model providers

Args:
model: Requesty model ID (e.g., "openai/gpt-4o-mini", "anthropic/claude-sonnet-4-5")
api_key: Requesty API key (required)
temperature: Sampling temperature (0.0-2.0)
max_tokens: Maximum tokens to generate

See: https://app.requesty.ai/router/list
"""

def __init__(
self,
model: str,
api_key: str,
temperature: float = 0.7,
max_tokens: int = 4096,
) -> None:
if not api_key:
raise ValueError(
"Requesty API key required. Pass api_key parameter "
"or configure in settings."
)

super().__init__(
model=model,
base_url=REQUESTY_BASE_URL,
api_key=api_key,
temperature=temperature,
max_tokens=max_tokens,
)

# Add Requesty-specific attribution headers
self._client.default_headers.update({
"HTTP-Referer": "https://github.com/caal-project/caal",
"X-Title": "CAAL Voice Assistant",
})

logger.debug(f"RequestyProvider initialized: {model}")

@property
def provider_name(self) -> str:
return "requesty"
3 changes: 3 additions & 0 deletions src/caal/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ def validate_url(url: str) -> tuple[bool, str]:
# OpenRouter settings (cloud API)
"openrouter_api_key": "", # OpenRouter API key (required for openrouter provider)
"openrouter_model": "", # Model name (empty = use default)
# Requesty settings (cloud API)
"requesty_api_key": "", # Requesty API key (required for requesty provider)
"requesty_model": "", # Model name (empty = use default)
}

# Per-language Piper TTS voice mapping
Expand Down
13 changes: 13 additions & 0 deletions voice_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,15 @@ def get_runtime_settings() -> dict:
user_settings.get("openrouter_model")
or os.getenv("OPENROUTER_MODEL", "openai/gpt-4")
),
# Requesty settings
"requesty_api_key": (
settings.get("requesty_api_key")
or os.getenv("REQUESTY_API_KEY", "")
),
"requesty_model": (
user_settings.get("requesty_model")
or os.getenv("REQUESTY_MODEL", "openai/gpt-4o-mini")
),
# Shared settings
"max_turns": settings.get("max_turns", int(os.getenv("OLLAMA_MAX_TURNS", "20"))),
"tool_cache_size": settings.get("tool_cache_size", int(os.getenv("TOOL_CACHE_SIZE", "3"))),
Expand Down Expand Up @@ -384,6 +393,10 @@ async def entrypoint(ctx: agents.JobContext) -> None:
logger.info(
f" LLM: OpenRouter ({runtime.get('openrouter_model', '?')})"
)
elif llm_provider == "requesty":
logger.info(
f" LLM: Requesty ({runtime.get('requesty_model', '?')})"
)
logger.info(f" MCP: {list(mcp_servers.keys()) or 'None'}")
logger.info(
f" Turn detection: interruptions={runtime['allow_interruptions']}, "
Expand Down