car-inference 0.15.0

//! Inference service — exposes inference as built-in CAR tools.
//!
//! Provides `ToolSchema` definitions for `infer`, `embed`, and `classify`
//! so they can be registered as built-in tools with real implementations.

use car_ir::ToolSchema;
use serde_json::{json, Value};

use crate::{
    ClassifyRequest, EmbedRequest, GenerateImageRequest, GenerateRequest, GenerateVideoRequest,
    InferenceEngine, InferenceError, SynthesizeRequest, TranscribeRequest,
};

/// Execute an inference tool call. Returns the result as JSON.
///
/// This is the bridge between CAR's tool dispatch and the inference engine.
pub async fn execute_tool(
    engine: &InferenceEngine,
    tool_name: &str,
    params: &Value,
) -> Result<Value, InferenceError> {
    match tool_name {
        "infer" => {
            let req: GenerateRequest = serde_json::from_value(params.clone())
                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
            let result = engine.generate_tracked(req).await?;
            let mut resp = json!({ "text": result.text });
            if !result.tool_calls.is_empty() {
                resp["tool_calls"] = serde_json::to_value(&result.tool_calls).unwrap_or_default();
            }
            if let Some(usage) = &result.usage {
                resp["usage"] = json!({
                    "prompt_tokens": usage.prompt_tokens,
                    "completion_tokens": usage.completion_tokens,
                });
            }
            resp["model_used"] = json!(result.model_used);
            resp["latency_ms"] = json!(result.latency_ms);
            Ok(resp)
        }
        "embed" => {
            let req: EmbedRequest = serde_json::from_value(params.clone())
                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
            let result = engine.embed(req).await?;
            Ok(json!({ "embeddings": result }))
        }
        "classify" => {
            let req: ClassifyRequest = serde_json::from_value(params.clone())
                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
            let result = engine.classify(req).await?;
            Ok(json!({ "classifications": result }))
        }
        "transcribe" => {
            let req: TranscribeRequest = serde_json::from_value(params.clone())
                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
            let result = engine.transcribe(req).await?;
            Ok(serde_json::to_value(result)
                .map_err(|e| InferenceError::InferenceFailed(format!("serialize result: {e}")))?)
        }
        "synthesize" => {
            let req: SynthesizeRequest = serde_json::from_value(params.clone())
                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
            let result = engine.synthesize(req).await?;
            Ok(serde_json::to_value(result)
                .map_err(|e| InferenceError::InferenceFailed(format!("serialize result: {e}")))?)
        }
        "generate_image" => {
            let req: GenerateImageRequest = serde_json::from_value(params.clone())
                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
            let result = engine.generate_image(req).await?;
            Ok(serde_json::to_value(result)
                .map_err(|e| InferenceError::InferenceFailed(format!("serialize result: {e}")))?)
        }
        "generate_video" => {
            let req: GenerateVideoRequest = serde_json::from_value(params.clone())
                .map_err(|e| InferenceError::InferenceFailed(format!("bad params: {e}")))?;
            let result = engine.generate_video(req).await?;
            Ok(serde_json::to_value(result)
                .map_err(|e| InferenceError::InferenceFailed(format!("serialize result: {e}")))?)
        }
        _ => Err(InferenceError::InferenceFailed(format!(
            "unknown inference tool: {tool_name}"
        ))),
    }
}

/// ToolSchema for text generation.
pub fn infer_schema() -> ToolSchema {
    ToolSchema {
        name: "infer".to_string(),
        description: "Generate text using a local Qwen3 model.".to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "prompt": {
                    "type": "string",
                    "description": "The prompt to complete"
                },
                "model": {
                    "type": "string",
                    "description": "Model name (default: Qwen3-1.7B)"
                },
                "context": {
                    "type": "string",
                    "description": "Optional memory context to ground the model's response"
                },
                "params": {
                    "type": "object",
                    "properties": {
                        "temperature": { "type": "number", "default": 0.7 },
                        "top_p": { "type": "number", "default": 0.9 },
                        "top_k": { "type": "integer", "default": 0 },
                        "max_tokens": { "type": "integer", "default": 4096 },
                        "workload": {
                            "type": "string",
                            "enum": ["interactive", "batch", "background"],
                            "default": "interactive",
                            "description": "Routing workload class. Interactive favors latency; batch/background tolerate slower high-quality local models."
                        },
                        "stop": {
                            "type": "array",
                            "items": { "type": "string" }
                        }
                    }
                }
            },
            "required": ["prompt"]
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "text": { "type": "string" }
            }
        })),
        idempotent: false,
        cache_ttl_secs: None,
        rate_limit: None,
    }
}

/// ToolSchema for embedding generation.
pub fn embed_schema() -> ToolSchema {
    ToolSchema {
        name: "embed".to_string(),
        description: "Generate vector embeddings for text using a local Qwen3 model.".to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "texts": {
                    "type": "array",
                    "items": { "type": "string" },
                    "description": "Texts to embed"
                },
                "model": {
                    "type": "string",
                    "description": "Model name (default: Qwen3-0.6B)"
                }
            },
            "required": ["texts"]
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "embeddings": {
                    "type": "array",
                    "items": {
                        "type": "array",
                        "items": { "type": "number" }
                    }
                }
            }
        })),
        idempotent: true,
        cache_ttl_secs: Some(3600),
        rate_limit: None,
    }
}

/// ToolSchema for classification.
pub fn classify_schema() -> ToolSchema {
    ToolSchema {
        name: "classify".to_string(),
        description: "Classify text against candidate labels using a local Qwen3 model."
            .to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "text": {
                    "type": "string",
                    "description": "Text to classify"
                },
                "labels": {
                    "type": "array",
                    "items": { "type": "string" },
                    "description": "Candidate labels"
                },
                "model": {
                    "type": "string",
                    "description": "Model name (default: Qwen3-0.6B)"
                }
            },
            "required": ["text", "labels"]
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "classifications": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "label": { "type": "string" },
                            "score": { "type": "number" }
                        }
                    }
                }
            }
        })),
        idempotent: true,
        cache_ttl_secs: Some(300),
        rate_limit: None,
    }
}

/// ToolSchema for memory-grounded text generation.
pub fn infer_grounded_schema() -> ToolSchema {
    ToolSchema {
        name: "infer.grounded".to_string(),
        description: "Generate text grounded with memory context. Automatically queries the memgine for relevant context before generating.".to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "prompt": {
                    "type": "string",
                    "description": "The prompt to complete"
                },
                "model": {
                    "type": "string",
                    "description": "Model name (default: Qwen3-1.7B)"
                },
                "params": {
                    "type": "object",
                    "properties": {
                        "temperature": { "type": "number", "default": 0.7 },
                        "top_p": { "type": "number", "default": 0.9 },
                        "top_k": { "type": "integer", "default": 0 },
                        "max_tokens": { "type": "integer", "default": 4096 },
                        "stop": {
                            "type": "array",
                            "items": { "type": "string" }
                        }
                    }
                }
            },
            "required": ["prompt"]
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "text": { "type": "string" }
            }
        })),
        idempotent: false,
        cache_ttl_secs: None,
        rate_limit: None,
    }
}

pub fn generate_image_schema() -> ToolSchema {
    ToolSchema {
        name: "generate_image".to_string(),
        description: "Generate an image using a local MLX image model.".to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "prompt": { "type": "string" },
                "model": { "type": "string" },
                "width": { "type": "integer" },
                "height": { "type": "integer" },
                "steps": { "type": "integer" },
                "guidance": { "type": "number" },
                "seed": { "type": "integer" },
                "output_path": { "type": "string" },
                "format": { "type": "string", "default": "png" }
            },
            "required": ["prompt"]
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "image_path": { "type": "string" },
                "media_type": { "type": "string" },
                "model_used": { "type": "string" }
            }
        })),
        idempotent: false,
        cache_ttl_secs: None,
        rate_limit: None,
    }
}

pub fn generate_video_schema() -> ToolSchema {
    ToolSchema {
        name: "generate_video".to_string(),
        description: "Generate a video using a local MLX video model.".to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "prompt": { "type": "string" },
                "model": { "type": "string" },
                "width": { "type": "integer" },
                "height": { "type": "integer" },
                "num_frames": { "type": "integer" },
                "steps": { "type": "integer" },
                "guidance": { "type": "number" },
                "seed": { "type": "integer" },
                "fps": { "type": "integer" },
                "output_path": { "type": "string" },
                "format": { "type": "string", "default": "mp4" },
                "image_path": { "type": "string", "description": "Reference image for image-to-video." },
                "audio_path": {
                    "type": "string",
                    "description": "Existing audio reference for audio_ref_video. The file is input conditioning for visual timing, rhythm, vocal cadence, intensity, and transitions; it is not generated output."
                },
                "video_path": {
                    "type": "string",
                    "description": "NOT YET IMPLEMENTED on any backend. Reference video for extension or retake modes; the request surface accepts this field but invoking it returns UnsupportedMode."
                },
                "extend_after_frame": {
                    "type": "integer",
                    "description": "NOT YET IMPLEMENTED on any backend. Extend mode: frame index in video_path to resume from."
                },
                "extend_context_frames": {
                    "type": "integer",
                    "description": "NOT YET IMPLEMENTED on any backend. Extend mode: how many trailing frames of video_path the model attends to when continuing the clip."
                },
                "retake_start_frame": {
                    "type": "integer",
                    "description": "NOT YET IMPLEMENTED on any backend. Retake mode: inclusive start frame of the span to regenerate."
                },
                "retake_end_frame": {
                    "type": "integer",
                    "description": "NOT YET IMPLEMENTED on any backend. Retake mode: exclusive end frame of the span to regenerate (half-open range)."
                },
                "mode": {
                    "type": "string",
                    "enum": ["t2v", "i2v", "audio_video", "audio_ref_video", "extend", "retake"],
                    "description": "Explicit mode. `t2v`, `i2v`, `audio_video`, and `audio_ref_video` are wired. `audio_ref_video` uses an existing audio file as the visual timing/intensity conditioning signal. `extend` and `retake` are NOT YET IMPLEMENTED — the request surface accepts them but calling a backend returns UnsupportedMode. Do not select `extend` or `retake` in production until this note is removed."
                }
            },
            "required": ["prompt"]
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "video_path": { "type": "string" },
                "media_type": { "type": "string" },
                "model_used": { "type": "string" }
            }
        })),
        idempotent: false,
        cache_ttl_secs: None,
        rate_limit: None,
    }
}

/// ToolSchema for model management — list models in the unified registry.
pub fn list_models_schema() -> ToolSchema {
    ToolSchema {
        name: "models.list".to_string(),
        description: "List all registered models (local and remote) with their capabilities, availability, and performance profiles.".to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "capability": {
                    "type": "string",
                    "description": "Filter by capability (generate, embed, classify, code, reasoning, summarize, tool_use, vision, speech_to_text, text_to_speech, image_generation, video_generation)"
                },
                "local_only": {
                    "type": "boolean",
                    "description": "Only show local models"
                },
                "available_only": {
                    "type": "boolean",
                    "description": "Only show available models"
                }
            }
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "models": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "id": { "type": "string" },
                            "name": { "type": "string" },
                            "provider": { "type": "string" },
                            "capabilities": { "type": "array", "items": { "type": "string" } },
                            "available": { "type": "boolean" },
                            "is_local": { "type": "boolean" }
                        }
                    }
                }
            }
        })),
        idempotent: true,
        cache_ttl_secs: Some(60),
        rate_limit: None,
    }
}

/// ToolSchema for model routing — show which model would be selected for a prompt.
pub fn route_model_schema() -> ToolSchema {
    ToolSchema {
        name: "models.route".to_string(),
        description: "Route a prompt to the best model without executing. Shows the routing decision, strategy, and fallback chain.".to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "prompt": {
                    "type": "string",
                    "description": "The prompt to route"
                }
            },
            "required": ["prompt"]
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "model_id": { "type": "string" },
                "model_name": { "type": "string" },
                "strategy": { "type": "string" },
                "complexity": { "type": "string" },
                "predicted_quality": { "type": "number" },
                "reason": { "type": "string" },
                "fallbacks": { "type": "array", "items": { "type": "string" } }
            }
        })),
        idempotent: true,
        cache_ttl_secs: None,
        rate_limit: None,
    }
}

/// ToolSchema for model performance stats.
pub fn model_stats_schema() -> ToolSchema {
    ToolSchema {
        name: "models.stats".to_string(),
        description: "Get performance statistics for models based on observed outcomes."
            .to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "model_id": {
                    "type": "string",
                    "description": "Model ID to get stats for (omit for all models)"
                }
            }
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "profiles": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "model_id": { "type": "string" },
                            "total_calls": { "type": "integer" },
                            "success_rate": { "type": "number" },
                            "avg_latency_ms": { "type": "number" },
                            "ema_quality": { "type": "number" }
                        }
                    }
                }
            }
        })),
        idempotent: true,
        cache_ttl_secs: Some(30),
        rate_limit: None,
    }
}

/// ToolSchema for speech transcription.
pub fn transcribe_schema() -> ToolSchema {
    ToolSchema {
        name: "transcribe".to_string(),
        description: "Transcribe an audio file using the best available local or remote speech-to-text model.".to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "audio_path": {
                    "type": "string",
                    "description": "Path to the audio file to transcribe"
                },
                "model": {
                    "type": "string",
                    "description": "Optional STT model override"
                },
                "language": {
                    "type": "string",
                    "description": "Optional language hint"
                },
                "prompt": {
                    "type": "string",
                    "description": "Optional context or hotword hint"
                },
                "timestamps": {
                    "type": "boolean",
                    "description": "Request verbose timestamp-oriented output when supported"
                }
            },
            "required": ["audio_path"]
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "text": { "type": "string" },
                "model_used": { "type": "string" },
                "language": { "type": "string" }
            }
        })),
        idempotent: true,
        cache_ttl_secs: None,
        rate_limit: None,
    }
}

/// ToolSchema for speech synthesis.
pub fn synthesize_schema() -> ToolSchema {
    ToolSchema {
        name: "synthesize".to_string(),
        description: "Synthesize speech to an audio file using the best available local or remote text-to-speech model.".to_string(),
        parameters: json!({
            "type": "object",
            "properties": {
                "text": {
                    "type": "string",
                    "description": "Text to synthesize"
                },
                "model": {
                    "type": "string",
                    "description": "Optional TTS model override"
                },
                "voice": {
                    "type": "string",
                    "description": "Optional voice preset or provider-specific voice ID"
                },
                "language": {
                    "type": "string",
                    "description": "Optional language or language code"
                },
                "speed": {
                    "type": "number",
                    "description": "Optional playback speed multiplier"
                },
                "output_path": {
                    "type": "string",
                    "description": "Optional destination path for the generated audio"
                },
                "format": {
                    "type": "string",
                    "description": "Audio format (wav, mp3, flac, pcm)",
                    "default": "wav"
                }
            },
            "required": ["text"]
        }),
        returns: Some(json!({
            "type": "object",
            "properties": {
                "audio_path": { "type": "string" },
                "media_type": { "type": "string" },
                "model_used": { "type": "string" },
                "voice_used": { "type": "string" }
            }
        })),
        idempotent: false,
        cache_ttl_secs: None,
        rate_limit: None,
    }
}

/// All inference tool schemas.
pub fn all_schemas() -> Vec<ToolSchema> {
    vec![
        infer_schema(),
        infer_grounded_schema(),
        embed_schema(),
        classify_schema(),
        generate_image_schema(),
        generate_video_schema(),
        transcribe_schema(),
        synthesize_schema(),
        list_models_schema(),
        route_model_schema(),
        model_stats_schema(),
    ]
}