use car_ir::ToolSchema;
use serde_json::{json, Value};
use crate::{
ClassifyRequest, EmbedRequest, GenerateImageRequest, GenerateRequest, GenerateVideoRequest,
InferenceEngine, InferenceError, SynthesizeRequest, TranscribeRequest,
};
pub async fn execute_tool(
engine: &InferenceEngine,
tool_name: &str,
params: &Value,
) -> Result<Value, InferenceError> {
match tool_name {
"infer" => {
let req: GenerateRequest = serde_json::from_value(params.clone())
.map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
let result = engine.generate_tracked(req).await?;
let mut resp = json!({ "text": result.text });
if !result.tool_calls.is_empty() {
resp["tool_calls"] = serde_json::to_value(&result.tool_calls).unwrap_or_default();
}
if let Some(usage) = &result.usage {
resp["usage"] = json!({
"prompt_tokens": usage.prompt_tokens,
"completion_tokens": usage.completion_tokens,
});
}
resp["model_used"] = json!(result.model_used);
resp["latency_ms"] = json!(result.latency_ms);
Ok(resp)
}
"embed" => {
let req: EmbedRequest = serde_json::from_value(params.clone())
.map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
let result = engine.embed(req).await?;
Ok(json!({ "embeddings": result }))
}
"classify" => {
let req: ClassifyRequest = serde_json::from_value(params.clone())
.map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
let result = engine.classify(req).await?;
Ok(json!({ "classifications": result }))
}
"transcribe" => {
let req: TranscribeRequest = serde_json::from_value(params.clone())
.map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
let result = engine.transcribe(req).await?;
Ok(serde_json::to_value(result)
.map_err(|e| InferenceError::InferenceFailed(format!("serialize result: {e}")))?)
}
"synthesize" => {
let req: SynthesizeRequest = serde_json::from_value(params.clone())
.map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
let result = engine.synthesize(req).await?;
Ok(serde_json::to_value(result)
.map_err(|e| InferenceError::InferenceFailed(format!("serialize result: {e}")))?)
}
"generate_image" => {
let req: GenerateImageRequest = serde_json::from_value(params.clone())
.map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
let result = engine.generate_image(req).await?;
Ok(serde_json::to_value(result)
.map_err(|e| InferenceError::InferenceFailed(format!("serialize result: {e}")))?)
}
"generate_video" => {
let req: GenerateVideoRequest = serde_json::from_value(params.clone())
.map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
let result = engine.generate_video(req).await?;
Ok(serde_json::to_value(result)
.map_err(|e| InferenceError::InferenceFailed(format!("serialize result: {e}")))?)
}
_ => Err(InferenceError::InferenceFailed(format!(
"unknown inference tool: {tool_name}"
))),
}
}
pub fn infer_schema() -> ToolSchema {
ToolSchema {
name: "infer".to_string(),
description: "Generate text using a local Qwen3 model.".to_string(),
parameters: json!({
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "The prompt to complete"
},
"model": {
"type": "string",
"description": "Model name (default: Qwen3-1.7B)"
},
"context": {
"type": "string",
"description": "Optional memory context to ground the model's response"
},
"params": {
"type": "object",
"properties": {
"temperature": { "type": "number", "default": 0.7 },
"top_p": { "type": "number", "default": 0.9 },
"top_k": { "type": "integer", "default": 0 },
"max_tokens": { "type": "integer", "default": 4096 },
"workload": {
"type": "string",
"enum": ["interactive", "batch", "background"],
"default": "interactive",
"description": "Routing workload class. Interactive favors latency; batch/background tolerate slower high-quality local models."
},
"stop": {
"type": "array",
"items": { "type": "string" }
}
}
}
},
"required": ["prompt"]
}),
returns: Some(json!({
"type": "object",
"properties": {
"text": { "type": "string" }
}
})),
idempotent: false,
cache_ttl_secs: None,
rate_limit: None,
}
}
pub fn embed_schema() -> ToolSchema {
ToolSchema {
name: "embed".to_string(),
description: "Generate vector embeddings for text using a local Qwen3 model.".to_string(),
parameters: json!({
"type": "object",
"properties": {
"texts": {
"type": "array",
"items": { "type": "string" },
"description": "Texts to embed"
},
"model": {
"type": "string",
"description": "Model name (default: Qwen3-0.6B)"
}
},
"required": ["texts"]
}),
returns: Some(json!({
"type": "object",
"properties": {
"embeddings": {
"type": "array",
"items": {
"type": "array",
"items": { "type": "number" }
}
}
}
})),
idempotent: true,
cache_ttl_secs: Some(3600),
rate_limit: None,
}
}
pub fn classify_schema() -> ToolSchema {
ToolSchema {
name: "classify".to_string(),
description: "Classify text against candidate labels using a local Qwen3 model."
.to_string(),
parameters: json!({
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "Text to classify"
},
"labels": {
"type": "array",
"items": { "type": "string" },
"description": "Candidate labels"
},
"model": {
"type": "string",
"description": "Model name (default: Qwen3-0.6B)"
}
},
"required": ["text", "labels"]
}),
returns: Some(json!({
"type": "object",
"properties": {
"classifications": {
"type": "array",
"items": {
"type": "object",
"properties": {
"label": { "type": "string" },
"score": { "type": "number" }
}
}
}
}
})),
idempotent: true,
cache_ttl_secs: Some(300),
rate_limit: None,
}
}
pub fn infer_grounded_schema() -> ToolSchema {
ToolSchema {
name: "infer.grounded".to_string(),
description: "Generate text grounded with memory context. Automatically queries the memgine for relevant context before generating.".to_string(),
parameters: json!({
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "The prompt to complete"
},
"model": {
"type": "string",
"description": "Model name (default: Qwen3-1.7B)"
},
"params": {
"type": "object",
"properties": {
"temperature": { "type": "number", "default": 0.7 },
"top_p": { "type": "number", "default": 0.9 },
"top_k": { "type": "integer", "default": 0 },
"max_tokens": { "type": "integer", "default": 4096 },
"stop": {
"type": "array",
"items": { "type": "string" }
}
}
}
},
"required": ["prompt"]
}),
returns: Some(json!({
"type": "object",
"properties": {
"text": { "type": "string" }
}
})),
idempotent: false,
cache_ttl_secs: None,
rate_limit: None,
}
}
pub fn generate_image_schema() -> ToolSchema {
ToolSchema {
name: "generate_image".to_string(),
description: "Generate an image using a local MLX image model.".to_string(),
parameters: json!({
"type": "object",
"properties": {
"prompt": { "type": "string" },
"model": { "type": "string" },
"width": { "type": "integer" },
"height": { "type": "integer" },
"steps": { "type": "integer" },
"guidance": { "type": "number" },
"seed": { "type": "integer" },
"output_path": { "type": "string" },
"format": { "type": "string", "default": "png" }
},
"required": ["prompt"]
}),
returns: Some(json!({
"type": "object",
"properties": {
"image_path": { "type": "string" },
"media_type": { "type": "string" },
"model_used": { "type": "string" }
}
})),
idempotent: false,
cache_ttl_secs: None,
rate_limit: None,
}
}
pub fn generate_video_schema() -> ToolSchema {
ToolSchema {
name: "generate_video".to_string(),
description: "Generate a video using a local MLX video model.".to_string(),
parameters: json!({
"type": "object",
"properties": {
"prompt": { "type": "string" },
"model": { "type": "string" },
"width": { "type": "integer" },
"height": { "type": "integer" },
"num_frames": { "type": "integer" },
"steps": { "type": "integer" },
"guidance": { "type": "number" },
"seed": { "type": "integer" },
"fps": { "type": "integer" },
"output_path": { "type": "string" },
"format": { "type": "string", "default": "mp4" },
"image_path": { "type": "string", "description": "Reference image for image-to-video." },
"audio_path": {
"type": "string",
"description": "Existing audio reference for audio_ref_video. The file is input conditioning for visual timing, rhythm, vocal cadence, intensity, and transitions; it is not generated output."
},
"video_path": {
"type": "string",
"description": "NOT YET IMPLEMENTED on any backend. Reference video for extension or retake modes; the request surface accepts this field but invoking it returns UnsupportedMode."
},
"extend_after_frame": {
"type": "integer",
"description": "NOT YET IMPLEMENTED on any backend. Extend mode: frame index in video_path to resume from."
},
"extend_context_frames": {
"type": "integer",
"description": "NOT YET IMPLEMENTED on any backend. Extend mode: how many trailing frames of video_path the model attends to when continuing the clip."
},
"retake_start_frame": {
"type": "integer",
"description": "NOT YET IMPLEMENTED on any backend. Retake mode: inclusive start frame of the span to regenerate."
},
"retake_end_frame": {
"type": "integer",
"description": "NOT YET IMPLEMENTED on any backend. Retake mode: exclusive end frame of the span to regenerate (half-open range)."
},
"mode": {
"type": "string",
"enum": ["t2v", "i2v", "audio_video", "audio_ref_video", "extend", "retake"],
"description": "Explicit mode. `t2v`, `i2v`, `audio_video`, and `audio_ref_video` are wired. `audio_ref_video` uses an existing audio file as the visual timing/intensity conditioning signal. `extend` and `retake` are NOT YET IMPLEMENTED — the request surface accepts them but calling a backend returns UnsupportedMode. Do not select `extend` or `retake` in production until this note is removed."
}
},
"required": ["prompt"]
}),
returns: Some(json!({
"type": "object",
"properties": {
"video_path": { "type": "string" },
"media_type": { "type": "string" },
"model_used": { "type": "string" }
}
})),
idempotent: false,
cache_ttl_secs: None,
rate_limit: None,
}
}
pub fn list_models_schema() -> ToolSchema {
ToolSchema {
name: "models.list".to_string(),
description: "List all registered models (local and remote) with their capabilities, availability, and performance profiles.".to_string(),
parameters: json!({
"type": "object",
"properties": {
"capability": {
"type": "string",
"description": "Filter by capability (generate, embed, classify, code, reasoning, summarize, tool_use, vision, speech_to_text, text_to_speech, image_generation, video_generation)"
},
"local_only": {
"type": "boolean",
"description": "Only show local models"
},
"available_only": {
"type": "boolean",
"description": "Only show available models"
}
}
}),
returns: Some(json!({
"type": "object",
"properties": {
"models": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": { "type": "string" },
"name": { "type": "string" },
"provider": { "type": "string" },
"capabilities": { "type": "array", "items": { "type": "string" } },
"available": { "type": "boolean" },
"is_local": { "type": "boolean" }
}
}
}
}
})),
idempotent: true,
cache_ttl_secs: Some(60),
rate_limit: None,
}
}
pub fn route_model_schema() -> ToolSchema {
ToolSchema {
name: "models.route".to_string(),
description: "Route a prompt to the best model without executing. Shows the routing decision, strategy, and fallback chain.".to_string(),
parameters: json!({
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "The prompt to route"
}
},
"required": ["prompt"]
}),
returns: Some(json!({
"type": "object",
"properties": {
"model_id": { "type": "string" },
"model_name": { "type": "string" },
"strategy": { "type": "string" },
"complexity": { "type": "string" },
"predicted_quality": { "type": "number" },
"reason": { "type": "string" },
"fallbacks": { "type": "array", "items": { "type": "string" } }
}
})),
idempotent: true,
cache_ttl_secs: None,
rate_limit: None,
}
}
pub fn model_stats_schema() -> ToolSchema {
ToolSchema {
name: "models.stats".to_string(),
description: "Get performance statistics for models based on observed outcomes."
.to_string(),
parameters: json!({
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "Model ID to get stats for (omit for all models)"
}
}
}),
returns: Some(json!({
"type": "object",
"properties": {
"profiles": {
"type": "array",
"items": {
"type": "object",
"properties": {
"model_id": { "type": "string" },
"total_calls": { "type": "integer" },
"success_rate": { "type": "number" },
"avg_latency_ms": { "type": "number" },
"ema_quality": { "type": "number" }
}
}
}
}
})),
idempotent: true,
cache_ttl_secs: Some(30),
rate_limit: None,
}
}
pub fn transcribe_schema() -> ToolSchema {
ToolSchema {
name: "transcribe".to_string(),
description: "Transcribe an audio file using the best available local or remote speech-to-text model.".to_string(),
parameters: json!({
"type": "object",
"properties": {
"audio_path": {
"type": "string",
"description": "Path to the audio file to transcribe"
},
"model": {
"type": "string",
"description": "Optional STT model override"
},
"language": {
"type": "string",
"description": "Optional language hint"
},
"prompt": {
"type": "string",
"description": "Optional context or hotword hint"
},
"timestamps": {
"type": "boolean",
"description": "Request verbose timestamp-oriented output when supported"
}
},
"required": ["audio_path"]
}),
returns: Some(json!({
"type": "object",
"properties": {
"text": { "type": "string" },
"model_used": { "type": "string" },
"language": { "type": "string" }
}
})),
idempotent: true,
cache_ttl_secs: None,
rate_limit: None,
}
}
pub fn synthesize_schema() -> ToolSchema {
ToolSchema {
name: "synthesize".to_string(),
description: "Synthesize speech to an audio file using the best available local or remote text-to-speech model.".to_string(),
parameters: json!({
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "Text to synthesize"
},
"model": {
"type": "string",
"description": "Optional TTS model override"
},
"voice": {
"type": "string",
"description": "Optional voice preset or provider-specific voice ID"
},
"language": {
"type": "string",
"description": "Optional language or language code"
},
"speed": {
"type": "number",
"description": "Optional playback speed multiplier"
},
"output_path": {
"type": "string",
"description": "Optional destination path for the generated audio"
},
"format": {
"type": "string",
"description": "Audio format (wav, mp3, flac, pcm)",
"default": "wav"
}
},
"required": ["text"]
}),
returns: Some(json!({
"type": "object",
"properties": {
"audio_path": { "type": "string" },
"media_type": { "type": "string" },
"model_used": { "type": "string" },
"voice_used": { "type": "string" }
}
})),
idempotent: false,
cache_ttl_secs: None,
rate_limit: None,
}
}
pub fn all_schemas() -> Vec<ToolSchema> {
vec![
infer_schema(),
infer_grounded_schema(),
embed_schema(),
classify_schema(),
generate_image_schema(),
generate_video_schema(),
transcribe_schema(),
synthesize_schema(),
list_models_schema(),
route_model_schema(),
model_stats_schema(),
]
}