Skip to main content

car_inference/
service.rs

1//! Inference service — exposes inference as built-in CAR tools.
2//!
3//! Provides `ToolSchema` definitions for `infer`, `embed`, and `classify`
4//! so they can be registered as built-in tools with real implementations.
5
6use car_ir::ToolSchema;
7use serde_json::{json, Value};
8
9use crate::{ClassifyRequest, EmbedRequest, GenerateRequest, InferenceEngine, InferenceError};
10
11/// Execute an inference tool call. Returns the result as JSON.
12///
13/// This is the bridge between CAR's tool dispatch and the inference engine.
14pub async fn execute_tool(
15    engine: &InferenceEngine,
16    tool_name: &str,
17    params: &Value,
18) -> Result<Value, InferenceError> {
19    match tool_name {
20        "infer" => {
21            let req: GenerateRequest = serde_json::from_value(params.clone())
22                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
23            let result = engine.generate(req).await?;
24            Ok(json!({ "text": result }))
25        }
26        "embed" => {
27            let req: EmbedRequest = serde_json::from_value(params.clone())
28                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
29            let result = engine.embed(req).await?;
30            Ok(json!({ "embeddings": result }))
31        }
32        "classify" => {
33            let req: ClassifyRequest = serde_json::from_value(params.clone())
34                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
35            let result = engine.classify(req).await?;
36            Ok(json!({ "classifications": result }))
37        }
38        _ => Err(InferenceError::InferenceFailed(format!(
39            "unknown inference tool: {tool_name}"
40        ))),
41    }
42}
43
44/// ToolSchema for text generation.
45pub fn infer_schema() -> ToolSchema {
46    ToolSchema {
47        name: "infer".to_string(),
48        description: "Generate text using a local Qwen3 model.".to_string(),
49        parameters: json!({
50            "type": "object",
51            "properties": {
52                "prompt": {
53                    "type": "string",
54                    "description": "The prompt to complete"
55                },
56                "model": {
57                    "type": "string",
58                    "description": "Model name (default: Qwen3-1.7B)"
59                },
60                "context": {
61                    "type": "string",
62                    "description": "Optional memory context to ground the model's response"
63                },
64                "params": {
65                    "type": "object",
66                    "properties": {
67                        "temperature": { "type": "number", "default": 0.7 },
68                        "top_p": { "type": "number", "default": 0.9 },
69                        "top_k": { "type": "integer", "default": 0 },
70                        "max_tokens": { "type": "integer", "default": 512 },
71                        "stop": {
72                            "type": "array",
73                            "items": { "type": "string" }
74                        }
75                    }
76                }
77            },
78            "required": ["prompt"]
79        }),
80        returns: Some(json!({
81            "type": "object",
82            "properties": {
83                "text": { "type": "string" }
84            }
85        })),
86        idempotent: false,
87        cache_ttl_secs: None,
88        rate_limit: None,
89    }
90}
91
92/// ToolSchema for embedding generation.
93pub fn embed_schema() -> ToolSchema {
94    ToolSchema {
95        name: "embed".to_string(),
96        description: "Generate vector embeddings for text using a local Qwen3 model.".to_string(),
97        parameters: json!({
98            "type": "object",
99            "properties": {
100                "texts": {
101                    "type": "array",
102                    "items": { "type": "string" },
103                    "description": "Texts to embed"
104                },
105                "model": {
106                    "type": "string",
107                    "description": "Model name (default: Qwen3-0.6B)"
108                }
109            },
110            "required": ["texts"]
111        }),
112        returns: Some(json!({
113            "type": "object",
114            "properties": {
115                "embeddings": {
116                    "type": "array",
117                    "items": {
118                        "type": "array",
119                        "items": { "type": "number" }
120                    }
121                }
122            }
123        })),
124        idempotent: true,
125        cache_ttl_secs: Some(3600),
126        rate_limit: None,
127    }
128}
129
130/// ToolSchema for classification.
131pub fn classify_schema() -> ToolSchema {
132    ToolSchema {
133        name: "classify".to_string(),
134        description: "Classify text against candidate labels using a local Qwen3 model."
135            .to_string(),
136        parameters: json!({
137            "type": "object",
138            "properties": {
139                "text": {
140                    "type": "string",
141                    "description": "Text to classify"
142                },
143                "labels": {
144                    "type": "array",
145                    "items": { "type": "string" },
146                    "description": "Candidate labels"
147                },
148                "model": {
149                    "type": "string",
150                    "description": "Model name (default: Qwen3-0.6B)"
151                }
152            },
153            "required": ["text", "labels"]
154        }),
155        returns: Some(json!({
156            "type": "object",
157            "properties": {
158                "classifications": {
159                    "type": "array",
160                    "items": {
161                        "type": "object",
162                        "properties": {
163                            "label": { "type": "string" },
164                            "score": { "type": "number" }
165                        }
166                    }
167                }
168            }
169        })),
170        idempotent: true,
171        cache_ttl_secs: Some(300),
172        rate_limit: None,
173    }
174}
175
176/// ToolSchema for memory-grounded text generation.
177pub fn infer_grounded_schema() -> ToolSchema {
178    ToolSchema {
179        name: "infer.grounded".to_string(),
180        description: "Generate text grounded with memory context. Automatically queries the memgine for relevant context before generating.".to_string(),
181        parameters: json!({
182            "type": "object",
183            "properties": {
184                "prompt": {
185                    "type": "string",
186                    "description": "The prompt to complete"
187                },
188                "model": {
189                    "type": "string",
190                    "description": "Model name (default: Qwen3-1.7B)"
191                },
192                "params": {
193                    "type": "object",
194                    "properties": {
195                        "temperature": { "type": "number", "default": 0.7 },
196                        "top_p": { "type": "number", "default": 0.9 },
197                        "top_k": { "type": "integer", "default": 0 },
198                        "max_tokens": { "type": "integer", "default": 512 },
199                        "stop": {
200                            "type": "array",
201                            "items": { "type": "string" }
202                        }
203                    }
204                }
205            },
206            "required": ["prompt"]
207        }),
208        returns: Some(json!({
209            "type": "object",
210            "properties": {
211                "text": { "type": "string" }
212            }
213        })),
214        idempotent: false,
215        cache_ttl_secs: None,
216        rate_limit: None,
217    }
218}
219
220/// ToolSchema for model management — list models in the unified registry.
221pub fn list_models_schema() -> ToolSchema {
222    ToolSchema {
223        name: "models.list".to_string(),
224        description: "List all registered models (local and remote) with their capabilities, availability, and performance profiles.".to_string(),
225        parameters: json!({
226            "type": "object",
227            "properties": {
228                "capability": {
229                    "type": "string",
230                    "description": "Filter by capability (generate, embed, classify, code, reasoning, summarize, tool_use, vision)"
231                },
232                "local_only": {
233                    "type": "boolean",
234                    "description": "Only show local models"
235                },
236                "available_only": {
237                    "type": "boolean",
238                    "description": "Only show available models"
239                }
240            }
241        }),
242        returns: Some(json!({
243            "type": "object",
244            "properties": {
245                "models": {
246                    "type": "array",
247                    "items": {
248                        "type": "object",
249                        "properties": {
250                            "id": { "type": "string" },
251                            "name": { "type": "string" },
252                            "provider": { "type": "string" },
253                            "capabilities": { "type": "array", "items": { "type": "string" } },
254                            "available": { "type": "boolean" },
255                            "is_local": { "type": "boolean" }
256                        }
257                    }
258                }
259            }
260        })),
261        idempotent: true,
262        cache_ttl_secs: Some(60),
263        rate_limit: None,
264    }
265}
266
267/// ToolSchema for model routing — show which model would be selected for a prompt.
268pub fn route_model_schema() -> ToolSchema {
269    ToolSchema {
270        name: "models.route".to_string(),
271        description: "Route a prompt to the best model without executing. Shows the routing decision, strategy, and fallback chain.".to_string(),
272        parameters: json!({
273            "type": "object",
274            "properties": {
275                "prompt": {
276                    "type": "string",
277                    "description": "The prompt to route"
278                }
279            },
280            "required": ["prompt"]
281        }),
282        returns: Some(json!({
283            "type": "object",
284            "properties": {
285                "model_id": { "type": "string" },
286                "model_name": { "type": "string" },
287                "strategy": { "type": "string" },
288                "complexity": { "type": "string" },
289                "predicted_quality": { "type": "number" },
290                "reason": { "type": "string" },
291                "fallbacks": { "type": "array", "items": { "type": "string" } }
292            }
293        })),
294        idempotent: true,
295        cache_ttl_secs: None,
296        rate_limit: None,
297    }
298}
299
300/// ToolSchema for model performance stats.
301pub fn model_stats_schema() -> ToolSchema {
302    ToolSchema {
303        name: "models.stats".to_string(),
304        description: "Get performance statistics for models based on observed outcomes.".to_string(),
305        parameters: json!({
306            "type": "object",
307            "properties": {
308                "model_id": {
309                    "type": "string",
310                    "description": "Model ID to get stats for (omit for all models)"
311                }
312            }
313        }),
314        returns: Some(json!({
315            "type": "object",
316            "properties": {
317                "profiles": {
318                    "type": "array",
319                    "items": {
320                        "type": "object",
321                        "properties": {
322                            "model_id": { "type": "string" },
323                            "total_calls": { "type": "integer" },
324                            "success_rate": { "type": "number" },
325                            "avg_latency_ms": { "type": "number" },
326                            "ema_quality": { "type": "number" }
327                        }
328                    }
329                }
330            }
331        })),
332        idempotent: true,
333        cache_ttl_secs: Some(30),
334        rate_limit: None,
335    }
336}
337
338/// All inference tool schemas.
339pub fn all_schemas() -> Vec<ToolSchema> {
340    vec![
341        infer_schema(),
342        infer_grounded_schema(),
343        embed_schema(),
344        classify_schema(),
345        list_models_schema(),
346        route_model_schema(),
347        model_stats_schema(),
348    ]
349}