ares-server 0.3.0

A.R.E.S - Agentic Retrieval Enhanced Server: A production-grade agentic chatbot server with multi-provider LLM support, tool calling, RAG, and MCP integration
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# A.R.E.S Configuration Example

# ==============================

# Copy this file to 'ares.toml' and customize for your deployment.

#

# REQUIRED: Set these environment variables before starting:

#   - JWT_SECRET: A secret key for JWT signing (min 32 characters)

#   - API_KEY: API key for service-to-service authentication

#

# This file demonstrates all available configuration options.

# The server will NOT start without ares.toml present.

#

# Hot Reloading: Changes to ares.toml are automatically detected and applied

# without restarting the server.



# =============================================================================

# Server Configuration

# =============================================================================

[server]

host = "127.0.0.1"                 # Bind address

port = 3000                         # HTTP port

log_level = "info"                  # debug, info, warn, error



# =============================================================================

# Authentication Configuration

# =============================================================================

# For security, secrets are stored in environment variables.

# The config references the ENV VAR NAME, not the actual secret.

[auth]

jwt_secret_env = "JWT_SECRET"       # Name of env var containing JWT secret

jwt_access_expiry = 900             # Access token lifetime (seconds)

jwt_refresh_expiry = 604800         # Refresh token lifetime (seconds)

api_key_env = "API_KEY"             # Name of env var containing API key



# =============================================================================

# Database Configuration

# =============================================================================

[database]

# Local SQLite database (default)

url = "./data/ares.db"



# Turso cloud database (optional - uncomment to enable)

# When both turso_url_env and turso_token_env are set and the env vars exist,

# the server will use Turso instead of local SQLite.

# turso_url_env = "TURSO_URL"

# turso_token_env = "TURSO_AUTH_TOKEN"



# Qdrant vector database for semantic search (optional)

# [database.qdrant]

# url = "http://localhost:6334"

# api_key_env = "QDRANT_API_KEY"    # Optional if Qdrant has no auth



# =============================================================================

# LLM Providers

# =============================================================================

# Define named provider configurations. These are referenced by [models].

# Each provider type requires its Cargo feature to be enabled:

#   - ollama: default

#   - openai: cargo build --features openai

#   - llamacpp: cargo build --features llamacpp



# Ollama - Local inference (no API key required)

[providers.ollama-local]

type = "ollama"

base_url = "http://localhost:11434"

default_model = "ministral-3:3b"



# Multiple Ollama configs with different models

[providers.ollama-fast]

type = "ollama"

base_url = "http://localhost:11434"

default_model = "ministral-3:3b"



[providers.ollama-vision]

type = "ollama"

base_url = "http://localhost:11434"

default_model = "qwen3-vl:2b"



[providers.ollama-code]

type = "ollama"

base_url = "http://localhost:11434"

default_model = "ministral-3:3b"



# Granite model (IBM)

[providers.ollama-granite]

type = "ollama"

base_url = "http://localhost:11434"

default_model = "ministral-3:3b"



# OpenAI API (requires 'openai' feature and OPENAI_API_KEY env var)

# [providers.openai]

# type = "openai"

# api_key_env = "OPENAI_API_KEY"

# api_base = "https://api.openai.com/v1"

# default_model = "gpt-4"



# OpenAI-compatible endpoint (e.g., Azure, local vLLM, Anyscale)

# [providers.azure-openai]

# type = "openai"

# api_key_env = "AZURE_OPENAI_KEY"

# api_base = "https://your-resource.openai.azure.com"

# default_model = "gpt-4-deployment"



# LlamaCpp - Direct GGUF model loading (requires 'llamacpp' feature)

# [providers.llamacpp]

# type = "llamacpp"

# model_path = "./models/granite4-tiny-h.gguf"

# n_ctx = 4096                       # Context window size

# n_threads = 4                       # CPU threads to use

# max_tokens = 512                    # Default max tokens



# =============================================================================

# Model Configurations

# =============================================================================

# Named model configs that reference providers. Agents reference these by name.

# This allows easy model swapping without changing agent configurations.



# Fast model for quick routing decisions

[models.fast]

provider = "ollama-fast"

model = "ministral-3:3b"

temperature = 0.7

max_tokens = 256



# Balanced model for most tasks

[models.balanced]

provider = "ollama-local"

model = "ministral-3:3b"

temperature = 0.7

max_tokens = 512



# Powerful model for complex reasoning

[models.powerful]

provider = "ollama-local"

model = "qwen3-vl:2b"

temperature = 0.5

max_tokens = 1024

# top_p = 0.9

# frequency_penalty = 0.0

# presence_penalty = 0.0



# Granite model (alternative)

[models.granite]

provider = "ollama-granite"

model = "ministral-3:3b"

temperature = 0.7

max_tokens = 1024



# Vision model for multimodal

[models.vision]

provider = "ollama-vision"

model = "qwen3-vl:2b"

temperature = 0.7

max_tokens = 512



# Coding model

[models.coding]

provider = "ollama-code"

model = "ministral-3:3b"

temperature = 0.3

max_tokens = 2048



# =============================================================================

# Tools Configuration

# =============================================================================

# Define available tools and their settings.

# Agents reference tools by name in their 'tools' array.



[tools.calculator]

enabled = true

description = "Performs basic arithmetic operations (+, -, *, /)"

timeout_secs = 10



[tools.web_search]

enabled = true

description = "Search the web using DuckDuckGo (no API key required)"

timeout_secs = 30



# Example: Database query tool (not implemented by default)

# [tools.database_query]

# enabled = false

# description = "Execute read-only database queries"

# timeout_secs = 60



# =============================================================================

# Agent Configurations

# =============================================================================

# Each agent has a model, optional tools, and a system prompt.

# The system_prompt can be customized to change agent behavior.



[agents.router]

model = "fast"

tools = []

max_tool_iterations = 1

parallel_tools = false

system_prompt = """
You are a routing agent that classifies user queries.

Available agents:
- product: Product information, catalog, inventory
- invoice: Billing, payments, invoices
- sales: Sales metrics, revenue, customers
- finance: Financial analysis, budgets, expenses
- hr: HR policies, employees, benefits
- orchestrator: Complex multi-domain queries

Respond with ONLY the agent name (one word, lowercase).
"""



[agents.orchestrator]

model = "powerful"

tools = ["calculator", "web_search"]

max_tool_iterations = 10

parallel_tools = false

system_prompt = """
You are an orchestrator agent for complex queries.

Capabilities:
- Break down complex requests
- Delegate to specialized agents
- Perform web searches
- Execute calculations
- Synthesize multiple results

Provide comprehensive, well-structured answers.
"""



[agents.product]

model = "balanced"

tools = []

max_tool_iterations = 5

system_prompt = """
You are a Product Agent for product-related queries.

Capabilities:
- Product catalog and search
- Specifications and details
- Inventory and availability
- Comparisons and recommendations
- Pricing information
"""



[agents.invoice]

model = "balanced"

tools = ["calculator"]

max_tool_iterations = 5

system_prompt = """
You are an Invoice Agent for billing queries.

Capabilities:
- Invoice lookup and status
- Payment processing
- Billing history
- Payment terms
- Discrepancy resolution
"""



[agents.sales]

model = "balanced"

tools = ["calculator"]

max_tool_iterations = 5

system_prompt = """
You are a Sales Agent for sales analytics.

Capabilities:
- Sales performance metrics
- Revenue analysis
- Customer acquisition
- Pipeline insights
- Commission calculations
"""



[agents.finance]

model = "balanced"

tools = ["calculator"]

max_tool_iterations = 5

system_prompt = """
You are a Finance Agent for financial analysis.

Capabilities:
- Financial statements
- Budget tracking
- Expense management
- Cash flow analysis
- Cost accounting
"""



[agents.hr]

model = "balanced"

tools = []

max_tool_iterations = 5

system_prompt = """
You are an HR Agent for human resources queries.

Capabilities:
- Employee information
- Company policies
- Benefits and compensation
- Leave management
- Recruitment
"""



# =============================================================================

# Workflow Configurations

# =============================================================================

# Define how requests flow through agents.



[workflows.default]

entry_agent = "router"              # First agent to handle requests

fallback_agent = "orchestrator"     # Fallback if routing fails

max_depth = 3                       # Max recursive depth

max_iterations = 5                  # Max iterations per workflow



[workflows.research]

entry_agent = "orchestrator"

max_depth = 3

max_iterations = 10

parallel_subagents = true           # Execute subagents in parallel



# =============================================================================

# RAG (Retrieval Augmented Generation) Configuration

# =============================================================================

# Full configuration for the RAG pipeline including vector store, embeddings,

# chunking, search strategies, and reranking.



[rag]

# Vector Store Configuration

# --------------------------

# Provider: "ares-vector" (default, pure Rust), "qdrant", "lancedb", "pgvector"

vector_store = "ares-vector"

vector_path = "./data/vectors"      # Path for persistent storage



# Embedding Configuration

# -----------------------

# Model for generating vector embeddings. Available models:

#   Dense models (fastembed):

#   - bge-small-en-v1.5 (384 dims, fast, English)

#   - bge-base-en-v1.5 (768 dims, balanced)

#   - bge-large-en-v1.5 (1024 dims, best quality)

#   - all-minilm-l6-v2 (384 dims, very fast)

#   - nomic-embed-text-v1.5 (768 dims, 8K context)

#   - multilingual-e5-small (384 dims, 100+ languages)

embedding_model = "bge-small-en-v1.5"



# Sparse embeddings for hybrid search (optional)

sparse_embeddings = false            # Enable sparse embeddings

sparse_model = "splade-pp-en-v1"    # Sparse model to use



# Chunking Configuration

# ----------------------

# Strategy: "word" (default), "semantic", "character"

# - word: Simple word-based chunking with overlap

# - semantic: Sentence/paragraph aware chunking (best for retrieval)

# - character: Fixed character count chunking

chunking_strategy = "word"

chunk_size = 200                     # Words (or chars for character strategy)

chunk_overlap = 50                   # Overlap for context continuity

min_chunk_size = 20                  # Minimum chunk size to keep



# Search Configuration

# --------------------

# Strategy: "semantic" (default), "bm25", "fuzzy", "hybrid"

# - semantic: Pure vector similarity search

# - bm25: Traditional keyword search with TF-IDF

# - fuzzy: Typo-tolerant keyword matching

# - hybrid: Combines semantic + bm25 + fuzzy with RRF fusion

search_strategy = "semantic"

search_limit = 10                    # Default results to return

search_threshold = 0.0               # Minimum similarity score (0.0-1.0)



# Hybrid search weights (used when search_strategy = "hybrid")

[rag.hybrid_weights]

semantic = 0.5                       # Weight for vector similarity

bm25 = 0.3                           # Weight for BM25 keyword matching

fuzzy = 0.2                          # Weight for fuzzy matching



# Reranking Configuration

# -----------------------

# Cross-encoder models for improved relevance ranking.

# Models: bge-reranker-base, bge-reranker-v2-m3,

#         jina-reranker-v1-turbo-en, jina-reranker-v2-base-multilingual

[rag]

rerank_enabled = false               # Enable reranking (slower but better)

reranker_model = "bge-reranker-base"

rerank_weight = 0.6                  # Weight for rerank score vs retrieval



# =============================================================================

# Dynamic Configuration Paths (TOON Files)

# =============================================================================

# ARES uses a hybrid TOML + TOON configuration approach:

# - TOML (this file): Static infrastructure config (server, auth, database, providers)

# - TOON (config/*.toon): Dynamic behavioral config (agents, workflows, models, tools, MCPs)

#

# Benefits of TOON format:

# - 30-60% token savings when sending to LLMs

# - Hot-reloadable without server restart

# - One file per entity for easy management

# - LLM-friendly format optimized for AI consumption

#

# See docs/TOON.md for more information on the TOON format.



[config]

agents_dir = "config/agents"       # Agent definitions (*.toon)

workflows_dir = "config/workflows" # Workflow definitions (*.toon)

models_dir = "config/models"       # Model configurations (*.toon)

tools_dir = "config/tools"         # Tool configurations (*.toon)

mcps_dir = "config/mcps"           # MCP server configurations (*.toon)

hot_reload = true                  # Watch for changes and reload

watch_interval_ms = 1000           # How often to check for changes