edgequake_llm/
model_config.rs

1//! Model Configuration Module
2//!
3//! This module provides TOML-based configuration for LLM and embedding models,
4//! including model cards with capabilities (vision, context length, costs, etc.).
5//!
6//! @implements SPEC-032: Ollama/LM Studio provider support - Model cards configuration
7//! @iteration OODA Loop #51-55 - TOML Config Schema Design
8//!
9//! # Overview
10//!
11//! The configuration file (`models.toml`) defines:
12//! - Available LLM providers and models
13//! - Embedding providers and models  
14//! - Model capabilities (vision, max tokens, context length)
15//! - Cost information (per 1K tokens)
16//! - Default selections for LLM and embedding
17//!
18//! # Configuration File Location
19//!
20//! The config file is loaded from (in order of priority):
21//! 1. `EDGEQUAKE_MODELS_CONFIG` environment variable
22//! 2. `./models.toml` (current working directory)
23//! 3. `~/.edgequake/models.toml` (user config)
24//! 4. Built-in default configuration
25//!
26//! # Example Configuration
27//!
28//! ```toml
29//! [defaults]
30//! llm_provider = "openai"
31//! llm_model = "gpt-4o-mini"
32//! embedding_provider = "openai"
33//! embedding_model = "text-embedding-3-small"
34//!
35//! [[providers]]
36//! name = "openai"
37//! display_name = "OpenAI"
38//! type = "openai"
39//! api_key_env = "OPENAI_API_KEY"
40//!
41//! [[providers.models]]
42//! name = "gpt-4o"
43//! display_name = "GPT-4 Omni"
44//! type = "llm"
45//! context_length = 128000
46//! max_output_tokens = 16384
47//! supports_vision = true
48//! supports_function_calling = true
49//! cost_per_1k_input = 0.0025
50//! cost_per_1k_output = 0.01
51//! ```
52
53use serde::{Deserialize, Serialize};
54use std::collections::HashMap;
55use std::path::Path;
56use thiserror::Error;
57
58// ============================================================================
59// Error Types
60// ============================================================================
61
62/// Errors that can occur during model configuration loading.
63#[derive(Error, Debug)]
64pub enum ModelConfigError {
65    /// Failed to read configuration file.
66    #[error("Failed to read config file: {0}")]
67    IoError(#[from] std::io::Error),
68
69    /// Failed to parse TOML configuration.
70    #[error("Failed to parse TOML config: {0}")]
71    ParseError(String),
72
73    /// Invalid configuration (missing required fields, invalid values).
74    #[error("Invalid configuration: {0}")]
75    ValidationError(String),
76
77    /// Provider not found in configuration.
78    #[error("Provider not found: {0}")]
79    ProviderNotFound(String),
80
81    /// Model not found in configuration.
82    #[error("Model not found: {0}")]
83    ModelNotFound(String),
84}
85
86// ============================================================================
87// Model Types
88// ============================================================================
89
90/// Type of model (LLM for chat/completion, Embedding for vectors).
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
92#[serde(rename_all = "lowercase")]
93pub enum ModelType {
94    /// Language model for chat/completion.
95    #[default]
96    Llm,
97    /// Embedding model for vector generation.
98    Embedding,
99    /// Multi-modal model supporting both.
100    Multimodal,
101}
102
103impl std::fmt::Display for ModelType {
104    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105        match self {
106            ModelType::Llm => write!(f, "llm"),
107            ModelType::Embedding => write!(f, "embedding"),
108            ModelType::Multimodal => write!(f, "multimodal"),
109        }
110    }
111}
112
113/// Provider type for API compatibility.
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
115#[serde(rename_all = "lowercase")]
116pub enum ProviderType {
117    /// OpenAI API.
118    #[default]
119    OpenAI,
120    /// Ollama local server.
121    Ollama,
122    /// LM Studio local server.
123    LMStudio,
124    /// Azure OpenAI.
125    Azure,
126    /// Anthropic Claude.
127    Anthropic,
128    /// OpenRouter (200+ models).
129    OpenRouter,
130    /// Generic OpenAI-compatible API.
131    OpenAICompatible,
132    /// Mock provider for testing.
133    Mock,
134    /// Mistral AI (La Plateforme).
135    Mistral,
136}
137
138impl std::fmt::Display for ProviderType {
139    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
140        match self {
141            ProviderType::OpenAI => write!(f, "openai"),
142            ProviderType::Ollama => write!(f, "ollama"),
143            ProviderType::LMStudio => write!(f, "lmstudio"),
144            ProviderType::Azure => write!(f, "azure"),
145            ProviderType::Anthropic => write!(f, "anthropic"),
146            ProviderType::OpenRouter => write!(f, "openrouter"),
147            ProviderType::OpenAICompatible => write!(f, "openai_compatible"),
148            ProviderType::Mock => write!(f, "mock"),
149            ProviderType::Mistral => write!(f, "mistral"),
150        }
151    }
152}
153
154// ============================================================================
155// Model Capabilities
156// ============================================================================
157
158/// Capabilities of a specific model.
159#[derive(Debug, Clone, Serialize, Deserialize, Default)]
160pub struct ModelCapabilities {
161    /// Maximum context length (input + output tokens).
162    #[serde(default)]
163    pub context_length: usize,
164
165    /// Maximum output tokens the model can generate.
166    #[serde(default)]
167    pub max_output_tokens: usize,
168
169    /// Whether the model supports vision/image input.
170    #[serde(default)]
171    pub supports_vision: bool,
172
173    /// Whether the model supports function/tool calling.
174    #[serde(default)]
175    pub supports_function_calling: bool,
176
177    /// Whether the model supports structured JSON output.
178    #[serde(default)]
179    pub supports_json_mode: bool,
180
181    /// Whether the model supports streaming responses.
182    #[serde(default = "default_true")]
183    pub supports_streaming: bool,
184
185    /// Whether the model supports system messages.
186    #[serde(default = "default_true")]
187    pub supports_system_message: bool,
188
189    /// Embedding dimension (only for embedding models).
190    #[serde(default)]
191    pub embedding_dimension: usize,
192
193    /// Maximum tokens for embedding input.
194    #[serde(default)]
195    pub max_embedding_tokens: usize,
196
197    /// OODA-200: Whether the model supports thinking/chain-of-thought mode.
198    #[serde(default)]
199    pub supports_thinking: bool,
200
201    /// OODA-200: Whether the model supports web search tool.
202    #[serde(default)]
203    pub supports_web_search: bool,
204
205    /// OODA-200: Recommended temperature for this model (0.0-1.0).
206    #[serde(default = "default_temperature")]
207    pub default_temperature: f32,
208}
209
210fn default_temperature() -> f32 {
211    1.0
212}
213
214fn default_true() -> bool {
215    true
216}
217
218// ============================================================================
219// Cost Information
220// ============================================================================
221
222/// Cost information for a model (per 1000 tokens).
223#[derive(Debug, Clone, Serialize, Deserialize, Default)]
224pub struct ModelCost {
225    /// Cost per 1000 input tokens (USD).
226    #[serde(default)]
227    pub input_per_1k: f64,
228
229    /// Cost per 1000 output tokens (USD).
230    #[serde(default)]
231    pub output_per_1k: f64,
232
233    /// Cost per 1000 embedding tokens (USD, for embedding models).
234    #[serde(default)]
235    pub embedding_per_1k: f64,
236
237    /// Cost per image processed (USD, for vision models).
238    #[serde(default)]
239    pub image_per_unit: f64,
240
241    /// Currency code (default: USD).
242    #[serde(default = "default_currency")]
243    pub currency: String,
244}
245
246fn default_currency() -> String {
247    "USD".to_string()
248}
249
250// ============================================================================
251// Model Card
252// ============================================================================
253
254/// Complete model card with all metadata.
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct ModelCard {
257    /// Unique model identifier (e.g., "gpt-4o", "nomic-embed-text").
258    pub name: String,
259
260    /// Human-readable display name.
261    pub display_name: String,
262
263    /// Model type (LLM, Embedding, Multimodal).
264    #[serde(default)]
265    pub model_type: ModelType,
266
267    /// Model capabilities.
268    #[serde(default)]
269    pub capabilities: ModelCapabilities,
270
271    /// Cost information.
272    #[serde(default)]
273    pub cost: ModelCost,
274
275    /// Optional description of the model.
276    #[serde(default)]
277    pub description: String,
278
279    /// Release date or version.
280    #[serde(default)]
281    pub version: String,
282
283    /// Whether the model is deprecated.
284    #[serde(default)]
285    pub deprecated: bool,
286
287    /// Recommended replacement if deprecated.
288    #[serde(default)]
289    pub replacement: Option<String>,
290
291    /// Tags for categorization (e.g., "recommended", "fast", "vision").
292    #[serde(default)]
293    pub tags: Vec<String>,
294
295    /// Additional metadata as key-value pairs.
296    #[serde(default)]
297    pub metadata: HashMap<String, String>,
298}
299
300impl Default for ModelCard {
301    fn default() -> Self {
302        Self {
303            name: "unknown".to_string(),
304            display_name: "Unknown Model".to_string(),
305            model_type: ModelType::Llm,
306            capabilities: ModelCapabilities::default(),
307            cost: ModelCost::default(),
308            description: String::new(),
309            version: String::new(),
310            deprecated: false,
311            replacement: None,
312            tags: Vec::new(),
313            metadata: HashMap::new(),
314        }
315    }
316}
317
318// ============================================================================
319// Provider Configuration
320// ============================================================================
321
322/// Configuration for a provider (OpenAI, Ollama, LM Studio, etc.).
323#[derive(Debug, Clone, Serialize, Deserialize)]
324pub struct ProviderConfig {
325    /// Unique provider identifier (e.g., "openai", "ollama").
326    pub name: String,
327
328    /// Human-readable display name.
329    pub display_name: String,
330
331    /// Provider type for API compatibility.
332    #[serde(rename = "type")]
333    pub provider_type: ProviderType,
334
335    /// Environment variable name for API key (if required).
336    #[serde(default)]
337    pub api_key_env: Option<String>,
338
339    /// Base URL for the provider API.
340    #[serde(default)]
341    pub base_url: Option<String>,
342
343    /// Environment variable for base URL override.
344    #[serde(default)]
345    pub base_url_env: Option<String>,
346
347    /// Default model for LLM operations.
348    #[serde(default)]
349    pub default_llm_model: Option<String>,
350
351    /// Default model for embedding operations.
352    #[serde(default)]
353    pub default_embedding_model: Option<String>,
354
355    /// List of available models for this provider.
356    #[serde(default)]
357    pub models: Vec<ModelCard>,
358
359    /// Whether this provider is enabled.
360    #[serde(default = "default_true")]
361    pub enabled: bool,
362
363    /// Priority for auto-selection (lower = higher priority).
364    #[serde(default = "default_priority")]
365    pub priority: u32,
366
367    /// Description of the provider.
368    #[serde(default)]
369    pub description: String,
370
371    /// Additional provider-specific settings.
372    #[serde(default)]
373    pub settings: HashMap<String, String>,
374
375    /// OODA-200: Custom HTTP headers for API requests.
376    /// Useful for providers that require additional headers like Accept-Language.
377    #[serde(default)]
378    pub headers: HashMap<String, String>,
379
380    /// OODA-200: Request timeout in seconds (default: 120).
381    #[serde(default = "default_timeout")]
382    pub timeout_seconds: u64,
383
384    /// OODA-200: Whether this provider supports thinking/reasoning mode (e.g., Z.ai GLM-4.5).
385    #[serde(default)]
386    pub supports_thinking: bool,
387}
388
389fn default_timeout() -> u64 {
390    120
391}
392
393fn default_priority() -> u32 {
394    100
395}
396
397impl Default for ProviderConfig {
398    fn default() -> Self {
399        Self {
400            name: "unknown".to_string(),
401            display_name: "Unknown Provider".to_string(),
402            provider_type: ProviderType::OpenAI,
403            api_key_env: None,
404            base_url: None,
405            base_url_env: None,
406            default_llm_model: None,
407            default_embedding_model: None,
408            models: Vec::new(),
409            enabled: true,
410            priority: 100,
411            description: String::new(),
412            settings: HashMap::new(),
413            headers: HashMap::new(),
414            timeout_seconds: default_timeout(),
415            supports_thinking: false,
416        }
417    }
418}
419
420// ============================================================================
421// Default Configuration
422// ============================================================================
423
424/// Default provider and model selections.
425#[derive(Debug, Clone, Serialize, Deserialize)]
426pub struct DefaultsConfig {
427    /// Default LLM provider name.
428    #[serde(default = "default_llm_provider")]
429    pub llm_provider: String,
430
431    /// Default LLM model name.
432    #[serde(default = "default_llm_model")]
433    pub llm_model: String,
434
435    /// Default embedding provider name.
436    #[serde(default = "default_embedding_provider")]
437    pub embedding_provider: String,
438
439    /// Default embedding model name.
440    #[serde(default = "default_embedding_model")]
441    pub embedding_model: String,
442}
443
444fn default_llm_provider() -> String {
445    "openai".to_string()
446}
447
448fn default_llm_model() -> String {
449    "gpt-4o-mini".to_string()
450}
451
452fn default_embedding_provider() -> String {
453    "openai".to_string()
454}
455
456fn default_embedding_model() -> String {
457    "text-embedding-3-small".to_string()
458}
459
460impl Default for DefaultsConfig {
461    fn default() -> Self {
462        Self {
463            llm_provider: default_llm_provider(),
464            llm_model: default_llm_model(),
465            embedding_provider: default_embedding_provider(),
466            embedding_model: default_embedding_model(),
467        }
468    }
469}
470
471// ============================================================================
472// Root Configuration
473// ============================================================================
474
475/// Root configuration structure for models.toml.
476#[derive(Debug, Clone, Serialize, Deserialize, Default)]
477pub struct ModelsConfig {
478    /// Default selections.
479    #[serde(default)]
480    pub defaults: DefaultsConfig,
481
482    /// List of configured providers.
483    #[serde(default)]
484    pub providers: Vec<ProviderConfig>,
485}
486
487impl ModelsConfig {
488    /// Load configuration from the default location.
489    ///
490    /// Searches in order:
491    /// 1. `EDGEQUAKE_MODELS_CONFIG` environment variable
492    /// 2. `./models.toml`
493    /// 3. `~/.edgequake/models.toml`
494    /// 4. Built-in defaults
495    pub fn load() -> Result<Self, ModelConfigError> {
496        // Check environment variable first
497        if let Ok(path) = std::env::var("EDGEQUAKE_MODELS_CONFIG") {
498            if Path::new(&path).exists() {
499                return Self::from_file(&path);
500            }
501        }
502
503        // Check current directory
504        let local_path = Path::new("models.toml");
505        if local_path.exists() {
506            return Self::from_file(local_path);
507        }
508
509        // Check user config directory
510        if let Some(home) = dirs::home_dir() {
511            let user_path = home.join(".edgequake").join("models.toml");
512            if user_path.exists() {
513                return Self::from_file(&user_path);
514            }
515        }
516
517        // Fall back to built-in defaults
518        Ok(Self::builtin_defaults())
519    }
520
521    /// Load configuration from a specific file path.
522    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, ModelConfigError> {
523        let content = std::fs::read_to_string(path.as_ref())?;
524        Self::from_toml(&content)
525    }
526
527    /// Parse configuration from TOML string.
528    pub fn from_toml(toml_str: &str) -> Result<Self, ModelConfigError> {
529        toml::from_str(toml_str).map_err(|e| ModelConfigError::ParseError(e.to_string()))
530    }
531
532    /// Serialize configuration to TOML string.
533    pub fn to_toml(&self) -> Result<String, ModelConfigError> {
534        toml::to_string_pretty(self).map_err(|e| ModelConfigError::ParseError(e.to_string()))
535    }
536
537    /// Save configuration to a file.
538    pub fn save(&self, path: impl AsRef<Path>) -> Result<(), ModelConfigError> {
539        let toml_str = self.to_toml()?;
540        std::fs::write(path.as_ref(), toml_str)?;
541        Ok(())
542    }
543
544    /// Get built-in default configuration with common providers.
545    pub fn builtin_defaults() -> Self {
546        Self {
547            defaults: DefaultsConfig::default(),
548            providers: vec![
549                // OpenAI provider
550                ProviderConfig {
551                    name: "openai".to_string(),
552                    display_name: "OpenAI".to_string(),
553                    provider_type: ProviderType::OpenAI,
554                    api_key_env: Some("OPENAI_API_KEY".to_string()),
555                    base_url: Some("https://api.openai.com/v1".to_string()),
556                    base_url_env: Some("OPENAI_API_BASE".to_string()),
557                    default_llm_model: Some("gpt-4o-mini".to_string()),
558                    default_embedding_model: Some("text-embedding-3-small".to_string()),
559                    priority: 10,
560                    models: vec![
561                        ModelCard {
562                            name: "gpt-4o".to_string(),
563                            display_name: "GPT-4 Omni".to_string(),
564                            model_type: ModelType::Llm,
565                            capabilities: ModelCapabilities {
566                                context_length: 128000,
567                                max_output_tokens: 16384,
568                                supports_vision: true,
569                                supports_function_calling: true,
570                                supports_json_mode: true,
571                                supports_streaming: true,
572                                ..Default::default()
573                            },
574                            cost: ModelCost {
575                                input_per_1k: 0.0025,
576                                output_per_1k: 0.01,
577                                ..Default::default()
578                            },
579                            description: "Most capable GPT-4 model with vision support".to_string(),
580                            ..Default::default()
581                        },
582                        ModelCard {
583                            name: "gpt-4o-mini".to_string(),
584                            display_name: "GPT-4 Omni Mini".to_string(),
585                            model_type: ModelType::Llm,
586                            capabilities: ModelCapabilities {
587                                context_length: 128000,
588                                max_output_tokens: 16384,
589                                supports_vision: true,
590                                supports_function_calling: true,
591                                supports_json_mode: true,
592                                supports_streaming: true,
593                                ..Default::default()
594                            },
595                            cost: ModelCost {
596                                input_per_1k: 0.00015,
597                                output_per_1k: 0.0006,
598                                ..Default::default()
599                            },
600                            description: "Cost-effective GPT-4 variant".to_string(),
601                            ..Default::default()
602                        },
603                        ModelCard {
604                            name: "text-embedding-3-small".to_string(),
605                            display_name: "Embedding 3 Small".to_string(),
606                            model_type: ModelType::Embedding,
607                            capabilities: ModelCapabilities {
608                                embedding_dimension: 1536,
609                                max_embedding_tokens: 8191,
610                                ..Default::default()
611                            },
612                            cost: ModelCost {
613                                embedding_per_1k: 0.00002,
614                                ..Default::default()
615                            },
616                            description: "Efficient embedding model".to_string(),
617                            ..Default::default()
618                        },
619                        ModelCard {
620                            name: "text-embedding-3-large".to_string(),
621                            display_name: "Embedding 3 Large".to_string(),
622                            model_type: ModelType::Embedding,
623                            capabilities: ModelCapabilities {
624                                embedding_dimension: 3072,
625                                max_embedding_tokens: 8191,
626                                ..Default::default()
627                            },
628                            cost: ModelCost {
629                                embedding_per_1k: 0.00013,
630                                ..Default::default()
631                            },
632                            description: "High-quality embedding model".to_string(),
633                            ..Default::default()
634                        },
635                    ],
636                    ..Default::default()
637                },
638                // OODA-32: Anthropic provider (Claude models)
639                // WHY: Direct Anthropic API access for Claude models
640                // Supports: claude-sonnet-4.5, claude-3.5-sonnet, claude-3.5-haiku
641                ProviderConfig {
642                    name: "anthropic".to_string(),
643                    display_name: "Anthropic (Claude)".to_string(),
644                    provider_type: ProviderType::Anthropic,
645                    api_key_env: Some("ANTHROPIC_API_KEY".to_string()),
646                    base_url: Some("https://api.anthropic.com".to_string()),
647                    base_url_env: Some("ANTHROPIC_API_BASE".to_string()),
648                    default_llm_model: Some("claude-sonnet-4-5-20250929".to_string()),
649                    default_embedding_model: None, // Anthropic doesn't support embeddings
650                    priority: 15, // Higher than OpenAI (10), prefer Claude when available
651                    models: vec![
652                        ModelCard {
653                            name: "claude-sonnet-4-5-20250929".to_string(),
654                            display_name: "Claude Sonnet 4.5".to_string(),
655                            model_type: ModelType::Llm,
656                            capabilities: ModelCapabilities {
657                                context_length: 200000,
658                                max_output_tokens: 8192,
659                                supports_vision: true,
660                                supports_function_calling: true,
661                                supports_streaming: true,
662                                ..Default::default()
663                            },
664                            cost: ModelCost {
665                                input_per_1k: 0.003,
666                                output_per_1k: 0.015,
667                                ..Default::default()
668                            },
669                            description: "Anthropic's most capable model with excellent coding".to_string(),
670                            ..Default::default()
671                        },
672                        ModelCard {
673                            name: "claude-3-5-sonnet-20241022".to_string(),
674                            display_name: "Claude 3.5 Sonnet".to_string(),
675                            model_type: ModelType::Llm,
676                            capabilities: ModelCapabilities {
677                                context_length: 200000,
678                                max_output_tokens: 8192,
679                                supports_vision: true,
680                                supports_function_calling: true,
681                                supports_streaming: true,
682                                ..Default::default()
683                            },
684                            cost: ModelCost {
685                                input_per_1k: 0.003,
686                                output_per_1k: 0.015,
687                                ..Default::default()
688                            },
689                            description: "Previous generation Sonnet, stable and reliable".to_string(),
690                            ..Default::default()
691                        },
692                        ModelCard {
693                            name: "claude-3-5-haiku-20241022".to_string(),
694                            display_name: "Claude 3.5 Haiku".to_string(),
695                            model_type: ModelType::Llm,
696                            capabilities: ModelCapabilities {
697                                context_length: 200000,
698                                max_output_tokens: 8192,
699                                supports_vision: true,
700                                supports_function_calling: true,
701                                supports_streaming: true,
702                                ..Default::default()
703                            },
704                            cost: ModelCost {
705                                input_per_1k: 0.0008,
706                                output_per_1k: 0.004,
707                                ..Default::default()
708                            },
709                            description: "Fast and cost-effective Claude model".to_string(),
710                            ..Default::default()
711                        },
712                    ],
713                    ..Default::default()
714                },
715                // Ollama provider
716                ProviderConfig {
717                    name: "ollama".to_string(),
718                    display_name: "Ollama (Local)".to_string(),
719                    provider_type: ProviderType::Ollama,
720                    base_url: Some("http://localhost:11434".to_string()),
721                    base_url_env: Some("OLLAMA_HOST".to_string()),
722                    default_llm_model: Some("gemma3:12b".to_string()),
723                    default_embedding_model: Some("nomic-embed-text".to_string()),
724                    priority: 20,
725                    models: vec![
726                        ModelCard {
727                            name: "gemma3:12b".to_string(),
728                            display_name: "Gemma 3 12B".to_string(),
729                            model_type: ModelType::Llm,
730                            capabilities: ModelCapabilities {
731                                context_length: 8192,
732                                max_output_tokens: 4096,
733                                supports_streaming: true,
734                                ..Default::default()
735                            },
736                            cost: ModelCost::default(), // Free for local
737                            description: "Google's Gemma 3 12B parameter model".to_string(),
738                            ..Default::default()
739                        },
740                        ModelCard {
741                            name: "llama3.3:70b".to_string(),
742                            display_name: "Llama 3.3 70B".to_string(),
743                            model_type: ModelType::Llm,
744                            capabilities: ModelCapabilities {
745                                context_length: 131072,
746                                max_output_tokens: 8192,
747                                supports_function_calling: true,
748                                supports_streaming: true,
749                                ..Default::default()
750                            },
751                            cost: ModelCost::default(),
752                            description: "Meta's Llama 3.3 70B with extended context".to_string(),
753                            ..Default::default()
754                        },
755                        // OODA-32: Add qwen3-coder and gpt-oss:20b for coding tasks
756                        ModelCard {
757                            name: "qwen3-coder".to_string(),
758                            display_name: "Qwen3 Coder".to_string(),
759                            model_type: ModelType::Llm,
760                            capabilities: ModelCapabilities {
761                                context_length: 32768,
762                                max_output_tokens: 8192,
763                                supports_function_calling: true,
764                                supports_streaming: true,
765                                ..Default::default()
766                            },
767                            cost: ModelCost::default(),
768                            description: "Qwen3 optimized for coding tasks".to_string(),
769                            ..Default::default()
770                        },
771                        ModelCard {
772                            name: "gpt-oss:20b".to_string(),
773                            display_name: "GPT-OSS 20B".to_string(),
774                            model_type: ModelType::Llm,
775                            capabilities: ModelCapabilities {
776                                context_length: 32768,
777                                max_output_tokens: 8192,
778                                supports_streaming: true,
779                                ..Default::default()
780                            },
781                            cost: ModelCost::default(),
782                            description: "Open-source GPT model, 20B parameters".to_string(),
783                            ..Default::default()
784                        },
785                        ModelCard {
786                            name: "nomic-embed-text".to_string(),
787                            display_name: "Nomic Embed Text".to_string(),
788                            model_type: ModelType::Embedding,
789                            capabilities: ModelCapabilities {
790                                embedding_dimension: 768,
791                                max_embedding_tokens: 8192,
792                                ..Default::default()
793                            },
794                            cost: ModelCost::default(),
795                            description: "High-quality local embedding model".to_string(),
796                            ..Default::default()
797                        },
798                        ModelCard {
799                            name: "mxbai-embed-large".to_string(),
800                            display_name: "MxBai Embed Large".to_string(),
801                            model_type: ModelType::Embedding,
802                            capabilities: ModelCapabilities {
803                                embedding_dimension: 1024,
804                                max_embedding_tokens: 512,
805                                ..Default::default()
806                            },
807                            cost: ModelCost::default(),
808                            description: "Large embedding model with 1024 dimensions".to_string(),
809                            ..Default::default()
810                        },
811                    ],
812                    ..Default::default()
813                },
814                // LM Studio provider
815                ProviderConfig {
816                    name: "lmstudio".to_string(),
817                    display_name: "LM Studio (Local)".to_string(),
818                    provider_type: ProviderType::LMStudio,
819                    base_url: Some("http://localhost:1234/v1".to_string()),
820                    base_url_env: Some("LMSTUDIO_HOST".to_string()),
821                    default_llm_model: Some("local-model".to_string()),
822                    default_embedding_model: Some("nomic-embed-text-v1.5".to_string()),
823                    priority: 30,
824                    models: vec![
825                        ModelCard {
826                            name: "local-model".to_string(),
827                            display_name: "Local LM Studio Model".to_string(),
828                            model_type: ModelType::Llm,
829                            capabilities: ModelCapabilities {
830                                context_length: 4096,
831                                max_output_tokens: 2048,
832                                supports_streaming: true,
833                                ..Default::default()
834                            },
835                            cost: ModelCost::default(),
836                            description: "Currently loaded model in LM Studio".to_string(),
837                            ..Default::default()
838                        },
839                        ModelCard {
840                            name: "nomic-embed-text-v1.5".to_string(),
841                            display_name: "Nomic Embed Text v1.5".to_string(),
842                            model_type: ModelType::Embedding,
843                            capabilities: ModelCapabilities {
844                                embedding_dimension: 768,
845                                max_embedding_tokens: 8192,
846                                ..Default::default()
847                            },
848                            cost: ModelCost::default(),
849                            description: "Nomic embedding model for LM Studio".to_string(),
850                            ..Default::default()
851                        },
852                    ],
853                    ..Default::default()
854                },
855                // Z.ai provider (OpenAI-compatible)
856                // OODA-200: Configurable OpenAI-compatible providers
857                ProviderConfig {
858                    name: "zai".to_string(),
859                    display_name: "Z.AI Platform".to_string(),
860                    provider_type: ProviderType::OpenAICompatible,
861                    api_key_env: Some("ZAI_API_KEY".to_string()),
862                    base_url: Some("https://api.z.ai/api/paas/v4".to_string()),
863                    default_llm_model: Some("glm-4.7-flash".to_string()),
864                    priority: 15,
865                    headers: {
866                        let mut h = std::collections::HashMap::new();
867                        h.insert("Accept-Language".to_string(), "en-US,en".to_string());
868                        h
869                    },
870                    supports_thinking: true,
871                    models: vec![
872                        ModelCard {
873                            name: "glm-4.7".to_string(),
874                            display_name: "GLM-4.7 (Premium)".to_string(),
875                            model_type: ModelType::Llm,
876                            capabilities: ModelCapabilities {
877                                context_length: 128000,
878                                max_output_tokens: 16384,
879                                supports_vision: true,
880                                supports_function_calling: true,
881                                supports_json_mode: true,
882                                supports_streaming: true,
883                                supports_thinking: true,
884                                ..Default::default()
885                            },
886                            cost: ModelCost {
887                                input_per_1k: 0.2,
888                                output_per_1k: 1.1,
889                                ..Default::default()
890                            },
891                            description: "Z.ai's flagship model with thinking mode".to_string(),
892                            tags: vec!["reasoning".to_string(), "coding".to_string(), "agent".to_string()],
893                            ..Default::default()
894                        },
895                        ModelCard {
896                            name: "glm-4.7-flash".to_string(),
897                            display_name: "GLM-4.7 Flash (Fast)".to_string(),
898                            model_type: ModelType::Llm,
899                            capabilities: ModelCapabilities {
900                                context_length: 128000,
901                                max_output_tokens: 8192,
902                                supports_function_calling: true,
903                                supports_json_mode: true,
904                                supports_streaming: true,
905                                ..Default::default()
906                            },
907                            cost: ModelCost {
908                                input_per_1k: 0.0,
909                                output_per_1k: 0.0,
910                                ..Default::default()
911                            },
912                            description: "Free, fast Z.ai model".to_string(),
913                            tags: vec!["fast".to_string(), "free".to_string()],
914                            ..Default::default()
915                        },
916                        ModelCard {
917                            name: "glm-4.5".to_string(),
918                            display_name: "GLM-4.5 (Reasoning)".to_string(),
919                            model_type: ModelType::Llm,
920                            capabilities: ModelCapabilities {
921                                context_length: 128000,
922                                max_output_tokens: 96000,
923                                supports_vision: true,
924                                supports_function_calling: true,
925                                supports_streaming: true,
926                                supports_thinking: true,
927                                ..Default::default()
928                            },
929                            cost: ModelCost {
930                                input_per_1k: 0.2,
931                                output_per_1k: 1.1,
932                                ..Default::default()
933                            },
934                            description: "Z.ai reasoning model for complex tasks".to_string(),
935                            tags: vec!["reasoning".to_string(), "coding".to_string()],
936                            ..Default::default()
937                        },
938                    ],
939                    ..Default::default()
940                },
941                // POE provider (OpenAI-compatible)
942                // OODA-200: Configurable OpenAI-compatible providers
943                // Updated 2026-01-24: Use correct POE API model names (PascalCase)
944                // Reference: https://creator.poe.com/api-reference/listModels
945                ProviderConfig {
946                    name: "poe".to_string(),
947                    display_name: "POE Platform".to_string(),
948                    provider_type: ProviderType::OpenAICompatible,
949                    api_key_env: Some("POE_API_KEY".to_string()),
950                    base_url: Some("https://api.poe.com/v1".to_string()),
951                    default_llm_model: Some("Claude-Haiku-4.5".to_string()),
952                    priority: 16,
953                    models: vec![
954                        // Claude models via POE (Anthropic's latest)
955                        ModelCard {
956                            name: "Claude-Sonnet-4.5".to_string(),
957                            display_name: "Claude Sonnet 4.5 (POE)".to_string(),
958                            model_type: ModelType::Llm,
959                            capabilities: ModelCapabilities {
960                                context_length: 200000,
961                                max_output_tokens: 16384,
962                                supports_vision: true,
963                                supports_function_calling: true,
964                                supports_streaming: true,
965                                supports_thinking: true,
966                                ..Default::default()
967                            },
968                            cost: ModelCost::default(),
969                            description: "Claude Sonnet 4.5 - Anthropic's most advanced model via POE".to_string(),
970                            tags: vec!["reasoning".to_string(), "coding".to_string()],
971                            ..Default::default()
972                        },
973                        ModelCard {
974                            name: "Claude-Haiku-4.5".to_string(),
975                            display_name: "Claude Haiku 4.5 (POE)".to_string(),
976                            model_type: ModelType::Llm,
977                            capabilities: ModelCapabilities {
978                                context_length: 200000,
979                                max_output_tokens: 8192,
980                                supports_vision: true,
981                                supports_function_calling: true,
982                                supports_streaming: true,
983                                ..Default::default()
984                            },
985                            cost: ModelCost::default(),
986                            description: "Claude Haiku 4.5 - Fast and efficient with frontier intelligence via POE".to_string(),
987                            tags: vec!["fast".to_string()],
988                            ..Default::default()
989                        },
990                        ModelCard {
991                            name: "Claude-Opus-4.1".to_string(),
992                            display_name: "Claude Opus 4.1 (POE)".to_string(),
993                            model_type: ModelType::Llm,
994                            capabilities: ModelCapabilities {
995                                context_length: 200000,
996                                max_output_tokens: 16384,
997                                supports_vision: true,
998                                supports_function_calling: true,
999                                supports_streaming: true,
1000                                supports_thinking: true,
1001                                ..Default::default()
1002                            },
1003                            cost: ModelCost::default(),
1004                            description: "Claude Opus 4.1 - Anthropic's premium model for complex tasks via POE".to_string(),
1005                            tags: vec!["reasoning".to_string(), "pro".to_string()],
1006                            ..Default::default()
1007                        },
1008                        // GPT models via POE (OpenAI's latest)
1009                        ModelCard {
1010                            name: "GPT-5-Pro".to_string(),
1011                            display_name: "GPT-5 Pro (POE)".to_string(),
1012                            model_type: ModelType::Llm,
1013                            capabilities: ModelCapabilities {
1014                                context_length: 128000,
1015                                max_output_tokens: 32768,
1016                                supports_vision: true,
1017                                supports_function_calling: true,
1018                                supports_streaming: true,
1019                                supports_thinking: true,
1020                                ..Default::default()
1021                            },
1022                            cost: ModelCost::default(),
1023                            description: "GPT-5 Pro - OpenAI's flagship model with extended reasoning via POE".to_string(),
1024                            tags: vec!["reasoning".to_string(), "pro".to_string()],
1025                            ..Default::default()
1026                        },
1027                        ModelCard {
1028                            name: "GPT-5".to_string(),
1029                            display_name: "GPT-5 (POE)".to_string(),
1030                            model_type: ModelType::Llm,
1031                            capabilities: ModelCapabilities {
1032                                context_length: 128000,
1033                                max_output_tokens: 16384,
1034                                supports_vision: true,
1035                                supports_function_calling: true,
1036                                supports_streaming: true,
1037                                ..Default::default()
1038                            },
1039                            cost: ModelCost::default(),
1040                            description: "GPT-5 - OpenAI's next-generation model via POE".to_string(),
1041                            ..Default::default()
1042                        },
1043                        ModelCard {
1044                            name: "GPT-5-Codex".to_string(),
1045                            display_name: "GPT-5 Codex (POE)".to_string(),
1046                            model_type: ModelType::Llm,
1047                            capabilities: ModelCapabilities {
1048                                context_length: 128000,
1049                                max_output_tokens: 16384,
1050                                supports_function_calling: true,
1051                                supports_streaming: true,
1052                                ..Default::default()
1053                            },
1054                            cost: ModelCost::default(),
1055                            description: "GPT-5 Codex - Specialized for software engineering tasks via POE".to_string(),
1056                            tags: vec!["coding".to_string()],
1057                            ..Default::default()
1058                        },
1059                        // Grok models via POE (xAI)
1060                        ModelCard {
1061                            name: "Grok-4".to_string(),
1062                            display_name: "Grok-4 (POE)".to_string(),
1063                            model_type: ModelType::Llm,
1064                            capabilities: ModelCapabilities {
1065                                context_length: 131072,
1066                                max_output_tokens: 32768,
1067                                supports_function_calling: true,
1068                                supports_streaming: true,
1069                                supports_thinking: true,
1070                                ..Default::default()
1071                            },
1072                            cost: ModelCost::default(),
1073                            description: "Grok-4 - xAI's most intelligent language model via POE".to_string(),
1074                            tags: vec!["reasoning".to_string(), "coding".to_string()],
1075                            ..Default::default()
1076                        },
1077                        // DeepSeek models via POE
1078                        ModelCard {
1079                            name: "DeepSeek-R1".to_string(),
1080                            display_name: "DeepSeek R1 (POE)".to_string(),
1081                            model_type: ModelType::Llm,
1082                            capabilities: ModelCapabilities {
1083                                context_length: 128000,
1084                                max_output_tokens: 16384,
1085                                supports_function_calling: true,
1086                                supports_streaming: true,
1087                                supports_thinking: true,
1088                                ..Default::default()
1089                            },
1090                            cost: ModelCost::default(),
1091                            description: "DeepSeek R1 - Top open-source reasoning model via POE".to_string(),
1092                            tags: vec!["reasoning".to_string(), "open-source".to_string()],
1093                            ..Default::default()
1094                        },
1095                        ModelCard {
1096                            name: "DeepSeek-V3".to_string(),
1097                            display_name: "DeepSeek V3 (POE)".to_string(),
1098                            model_type: ModelType::Llm,
1099                            capabilities: ModelCapabilities {
1100                                context_length: 128000,
1101                                max_output_tokens: 16384,
1102                                supports_function_calling: true,
1103                                supports_streaming: true,
1104                                ..Default::default()
1105                            },
1106                            cost: ModelCost::default(),
1107                            description: "DeepSeek V3 - Advanced open-source model via POE".to_string(),
1108                            tags: vec!["open-source".to_string()],
1109                            ..Default::default()
1110                        },
1111                        // Gemini models via POE (Google)
1112                        ModelCard {
1113                            name: "Gemini-2.5-Pro".to_string(),
1114                            display_name: "Gemini 2.5 Pro (POE)".to_string(),
1115                            model_type: ModelType::Llm,
1116                            capabilities: ModelCapabilities {
1117                                context_length: 1000000,
1118                                max_output_tokens: 65536,
1119                                supports_vision: true,
1120                                supports_function_calling: true,
1121                                supports_streaming: true,
1122                                ..Default::default()
1123                            },
1124                            cost: ModelCost::default(),
1125                            description: "Gemini 2.5 Pro - Google's advanced model with web search via POE".to_string(),
1126                            tags: vec!["reasoning".to_string(), "web-search".to_string()],
1127                            ..Default::default()
1128                        },
1129                        ModelCard {
1130                            name: "Gemini-2.5-Flash".to_string(),
1131                            display_name: "Gemini 2.5 Flash (POE)".to_string(),
1132                            model_type: ModelType::Llm,
1133                            capabilities: ModelCapabilities {
1134                                context_length: 1000000,
1135                                max_output_tokens: 65536,
1136                                supports_vision: true,
1137                                supports_function_calling: true,
1138                                supports_streaming: true,
1139                                ..Default::default()
1140                            },
1141                            cost: ModelCost::default(),
1142                            description: "Gemini 2.5 Flash - Fast variant with large context via POE".to_string(),
1143                            tags: vec!["fast".to_string()],
1144                            ..Default::default()
1145                        },
1146                    ],
1147                    ..Default::default()
1148                },
1149                // Mistral AI provider
1150                ProviderConfig {
1151                    name: "mistral".to_string(),
1152                    display_name: "Mistral AI".to_string(),
1153                    provider_type: ProviderType::Mistral,
1154                    api_key_env: Some("MISTRAL_API_KEY".to_string()),
1155                    base_url: Some("https://api.mistral.ai/v1".to_string()),
1156                    default_llm_model: Some("mistral-small-latest".to_string()),
1157                    default_embedding_model: Some("mistral-embed".to_string()),
1158                    priority: 50,
1159                    models: vec![
1160                        ModelCard {
1161                            name: "mistral-small-latest".to_string(),
1162                            display_name: "Mistral Small (Latest)".to_string(),
1163                            model_type: ModelType::Llm,
1164                            capabilities: ModelCapabilities {
1165                                context_length: 32768,
1166                                max_output_tokens: 4096,
1167                                supports_vision: false,
1168                                supports_function_calling: true,
1169                                supports_streaming: true,
1170                                ..Default::default()
1171                            },
1172                            cost: ModelCost::default(),
1173                            description: "Mistral Small — efficient and cost-effective model".to_string(),
1174                            tags: vec!["fast".to_string(), "affordable".to_string()],
1175                            ..Default::default()
1176                        },
1177                        ModelCard {
1178                            name: "mistral-large-latest".to_string(),
1179                            display_name: "Mistral Large (Latest)".to_string(),
1180                            model_type: ModelType::Llm,
1181                            capabilities: ModelCapabilities {
1182                                context_length: 131072,
1183                                max_output_tokens: 4096,
1184                                supports_vision: false,
1185                                supports_function_calling: true,
1186                                supports_streaming: true,
1187                                ..Default::default()
1188                            },
1189                            cost: ModelCost::default(),
1190                            description: "Mistral Large — flagship reasoning model".to_string(),
1191                            tags: vec!["powerful".to_string()],
1192                            ..Default::default()
1193                        },
1194                        ModelCard {
1195                            name: "mistral-medium-latest".to_string(),
1196                            display_name: "Mistral Medium (Latest)".to_string(),
1197                            model_type: ModelType::Llm,
1198                            capabilities: ModelCapabilities {
1199                                context_length: 131072,
1200                                max_output_tokens: 4096,
1201                                supports_vision: false,
1202                                supports_function_calling: true,
1203                                supports_streaming: true,
1204                                ..Default::default()
1205                            },
1206                            cost: ModelCost::default(),
1207                            description: "Mistral Medium — balanced performance model".to_string(),
1208                            tags: vec!["balanced".to_string()],
1209                            ..Default::default()
1210                        },
1211                        ModelCard {
1212                            name: "codestral-latest".to_string(),
1213                            display_name: "Codestral (Latest)".to_string(),
1214                            model_type: ModelType::Llm,
1215                            capabilities: ModelCapabilities {
1216                                context_length: 32768,
1217                                max_output_tokens: 4096,
1218                                supports_vision: false,
1219                                supports_function_calling: true,
1220                                supports_streaming: true,
1221                                ..Default::default()
1222                            },
1223                            cost: ModelCost::default(),
1224                            description: "Codestral — specialized code generation model".to_string(),
1225                            tags: vec!["code".to_string()],
1226                            ..Default::default()
1227                        },
1228                        ModelCard {
1229                            name: "mistral-embed".to_string(),
1230                            display_name: "Mistral Embed".to_string(),
1231                            model_type: ModelType::Embedding,
1232                            capabilities: ModelCapabilities {
1233                                embedding_dimension: 1024,
1234                                max_embedding_tokens: 8192,
1235                                ..Default::default()
1236                            },
1237                            cost: ModelCost::default(),
1238                            description: "Mistral embedding model — 1024-dimensional dense embeddings".to_string(),
1239                            tags: vec!["embedding".to_string()],
1240                            ..Default::default()
1241                        },
1242                    ],
1243                    ..Default::default()
1244                },
1245                // Mock provider for testing
1246                ProviderConfig {
1247                    name: "mock".to_string(),
1248                    display_name: "Mock (Testing)".to_string(),
1249                    provider_type: ProviderType::Mock,
1250                    default_llm_model: Some("mock-model".to_string()),
1251                    default_embedding_model: Some("mock-embedding".to_string()),
1252                    priority: 1000,
1253                    models: vec![
1254                        ModelCard {
1255                            name: "mock-model".to_string(),
1256                            display_name: "Mock LLM".to_string(),
1257                            model_type: ModelType::Llm,
1258                            capabilities: ModelCapabilities {
1259                                context_length: 4096,
1260                                max_output_tokens: 2048,
1261                                supports_streaming: true,
1262                                ..Default::default()
1263                            },
1264                            cost: ModelCost::default(),
1265                            description: "Mock model for testing".to_string(),
1266                            ..Default::default()
1267                        },
1268                        ModelCard {
1269                            name: "mock-embedding".to_string(),
1270                            display_name: "Mock Embedding".to_string(),
1271                            model_type: ModelType::Embedding,
1272                            capabilities: ModelCapabilities {
1273                                embedding_dimension: 1536,
1274                                max_embedding_tokens: 512,
1275                                ..Default::default()
1276                            },
1277                            cost: ModelCost::default(),
1278                            description: "Mock embedding for testing".to_string(),
1279                            ..Default::default()
1280                        },
1281                    ],
1282                    ..Default::default()
1283                },
1284            ],
1285        }
1286    }
1287
1288    /// Get a provider by name.
1289    pub fn get_provider(&self, name: &str) -> Option<&ProviderConfig> {
1290        self.providers.iter().find(|p| p.name == name)
1291    }
1292
1293    /// Get a model by provider and model name.
1294    pub fn get_model(&self, provider: &str, model: &str) -> Option<&ModelCard> {
1295        self.get_provider(provider)
1296            .and_then(|p| p.models.iter().find(|m| m.name == model))
1297    }
1298
1299    /// OODA-200: Find a provider by model name.
1300    ///
1301    /// Searches all enabled providers for a model with the given name.
1302    /// Returns the provider config if found, None otherwise.
1303    ///
1304    /// # Arguments
1305    ///
1306    /// * `model_name` - The model identifier to search for (e.g., "glm-4.7")
1307    ///
1308    /// # Returns
1309    ///
1310    /// The provider configuration containing this model, or None.
1311    pub fn find_provider_for_model(&self, model_name: &str) -> Option<&ProviderConfig> {
1312        self.providers
1313            .iter()
1314            .find(|p| p.enabled && p.models.iter().any(|m| m.name == model_name))
1315    }
1316
1317    /// OODA-200: Find a provider and model by model name.
1318    ///
1319    /// Searches all enabled providers for a model with the given name.
1320    /// Returns both the provider config and model card if found.
1321    ///
1322    /// # Arguments
1323    ///
1324    /// * `model_name` - The model identifier to search for (e.g., "glm-4.7")
1325    ///
1326    /// # Returns
1327    ///
1328    /// A tuple of (ProviderConfig, ModelCard) if found, None otherwise.
1329    pub fn find_provider_and_model(
1330        &self,
1331        model_name: &str,
1332    ) -> Option<(&ProviderConfig, &ModelCard)> {
1333        for provider in &self.providers {
1334            if !provider.enabled {
1335                continue;
1336            }
1337            for model in &provider.models {
1338                if model.name == model_name {
1339                    return Some((provider, model));
1340                }
1341            }
1342        }
1343        None
1344    }
1345
1346    /// Get all LLM models across all providers.
1347    pub fn all_llm_models(&self) -> Vec<(&ProviderConfig, &ModelCard)> {
1348        self.providers
1349            .iter()
1350            .filter(|p| p.enabled)
1351            .flat_map(|p| {
1352                p.models
1353                    .iter()
1354                    .filter(|m| matches!(m.model_type, ModelType::Llm | ModelType::Multimodal))
1355                    .map(move |m| (p, m))
1356            })
1357            .collect()
1358    }
1359
1360    /// Get all embedding models across all providers.
1361    pub fn all_embedding_models(&self) -> Vec<(&ProviderConfig, &ModelCard)> {
1362        self.providers
1363            .iter()
1364            .filter(|p| p.enabled)
1365            .flat_map(|p| {
1366                p.models
1367                    .iter()
1368                    .filter(|m| {
1369                        matches!(m.model_type, ModelType::Embedding | ModelType::Multimodal)
1370                    })
1371                    .map(move |m| (p, m))
1372            })
1373            .collect()
1374    }
1375
1376    /// Get the default LLM provider and model.
1377    pub fn default_llm(&self) -> Option<(&ProviderConfig, &ModelCard)> {
1378        self.get_model(&self.defaults.llm_provider, &self.defaults.llm_model)
1379            .and_then(|m| {
1380                self.get_provider(&self.defaults.llm_provider)
1381                    .map(|p| (p, m))
1382            })
1383    }
1384
1385    /// Get the default embedding provider and model.
1386    pub fn default_embedding(&self) -> Option<(&ProviderConfig, &ModelCard)> {
1387        self.get_model(
1388            &self.defaults.embedding_provider,
1389            &self.defaults.embedding_model,
1390        )
1391        .and_then(|m| {
1392            self.get_provider(&self.defaults.embedding_provider)
1393                .map(|p| (p, m))
1394        })
1395    }
1396
1397    /// Validate the configuration.
1398    pub fn validate(&self) -> Result<(), ModelConfigError> {
1399        // Check that default providers exist
1400        if self.get_provider(&self.defaults.llm_provider).is_none() {
1401            return Err(ModelConfigError::ValidationError(format!(
1402                "Default LLM provider '{}' not found in providers list",
1403                self.defaults.llm_provider
1404            )));
1405        }
1406
1407        if self
1408            .get_provider(&self.defaults.embedding_provider)
1409            .is_none()
1410        {
1411            return Err(ModelConfigError::ValidationError(format!(
1412                "Default embedding provider '{}' not found in providers list",
1413                self.defaults.embedding_provider
1414            )));
1415        }
1416
1417        // Check that default models exist
1418        if self
1419            .get_model(&self.defaults.llm_provider, &self.defaults.llm_model)
1420            .is_none()
1421        {
1422            return Err(ModelConfigError::ValidationError(format!(
1423                "Default LLM model '{}' not found in provider '{}'",
1424                self.defaults.llm_model, self.defaults.llm_provider
1425            )));
1426        }
1427
1428        if self
1429            .get_model(
1430                &self.defaults.embedding_provider,
1431                &self.defaults.embedding_model,
1432            )
1433            .is_none()
1434        {
1435            return Err(ModelConfigError::ValidationError(format!(
1436                "Default embedding model '{}' not found in provider '{}'",
1437                self.defaults.embedding_model, self.defaults.embedding_provider
1438            )));
1439        }
1440
1441        // Check for duplicate provider names
1442        let mut seen_providers = std::collections::HashSet::new();
1443        for provider in &self.providers {
1444            if !seen_providers.insert(&provider.name) {
1445                return Err(ModelConfigError::ValidationError(format!(
1446                    "Duplicate provider name: '{}'",
1447                    provider.name
1448                )));
1449            }
1450
1451            // Check for duplicate model names within a provider
1452            let mut seen_models = std::collections::HashSet::new();
1453            for model in &provider.models {
1454                if !seen_models.insert(&model.name) {
1455                    return Err(ModelConfigError::ValidationError(format!(
1456                        "Duplicate model name '{}' in provider '{}'",
1457                        model.name, provider.name
1458                    )));
1459                }
1460            }
1461        }
1462
1463        Ok(())
1464    }
1465}
1466
1467#[cfg(test)]
1468mod tests {
1469    use super::*;
1470
1471    #[test]
1472    fn test_builtin_defaults() {
1473        let config = ModelsConfig::builtin_defaults();
1474        assert!(config.validate().is_ok());
1475        assert!(!config.providers.is_empty());
1476    }
1477
1478    #[test]
1479    fn test_get_provider() {
1480        let config = ModelsConfig::builtin_defaults();
1481        assert!(config.get_provider("openai").is_some());
1482        assert!(config.get_provider("ollama").is_some());
1483        assert!(config.get_provider("nonexistent").is_none());
1484    }
1485
1486    #[test]
1487    fn test_get_model() {
1488        let config = ModelsConfig::builtin_defaults();
1489        assert!(config.get_model("openai", "gpt-4o").is_some());
1490        assert!(config.get_model("ollama", "nomic-embed-text").is_some());
1491        assert!(config.get_model("openai", "nonexistent").is_none());
1492    }
1493
1494    #[test]
1495    fn test_all_llm_models() {
1496        let config = ModelsConfig::builtin_defaults();
1497        let llm_models = config.all_llm_models();
1498        assert!(!llm_models.is_empty());
1499        assert!(llm_models.iter().any(|(_, m)| m.name == "gpt-4o"));
1500    }
1501
1502    #[test]
1503    fn test_all_embedding_models() {
1504        let config = ModelsConfig::builtin_defaults();
1505        let embedding_models = config.all_embedding_models();
1506        assert!(!embedding_models.is_empty());
1507        assert!(embedding_models
1508            .iter()
1509            .any(|(_, m)| m.name == "text-embedding-3-small"));
1510    }
1511
1512    #[test]
1513    fn test_toml_roundtrip() {
1514        let config = ModelsConfig::builtin_defaults();
1515        let toml_str = config.to_toml().expect("Failed to serialize");
1516        let parsed: ModelsConfig = ModelsConfig::from_toml(&toml_str).expect("Failed to parse");
1517        assert_eq!(config.providers.len(), parsed.providers.len());
1518    }
1519
1520    #[test]
1521    fn test_model_capabilities() {
1522        let config = ModelsConfig::builtin_defaults();
1523        let gpt4o = config
1524            .get_model("openai", "gpt-4o")
1525            .expect("gpt-4o should exist");
1526        assert!(gpt4o.capabilities.supports_vision);
1527        assert!(gpt4o.capabilities.supports_function_calling);
1528        assert_eq!(gpt4o.capabilities.context_length, 128000);
1529    }
1530
1531    #[test]
1532    fn test_embedding_dimensions() {
1533        let config = ModelsConfig::builtin_defaults();
1534
1535        let openai_embed = config
1536            .get_model("openai", "text-embedding-3-small")
1537            .unwrap();
1538        assert_eq!(openai_embed.capabilities.embedding_dimension, 1536);
1539
1540        let ollama_embed = config.get_model("ollama", "nomic-embed-text").unwrap();
1541        assert_eq!(ollama_embed.capabilities.embedding_dimension, 768);
1542    }
1543
1544    #[test]
1545    fn test_validation_duplicate_provider() {
1546        let mut config = ModelsConfig::builtin_defaults();
1547        config.providers.push(config.providers[0].clone());
1548        assert!(config.validate().is_err());
1549    }
1550
1551    #[test]
1552    fn test_parse_models_toml_file() {
1553        // Read the actual models.toml file from the project root
1554        let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
1555        let toml_path = std::path::Path::new(&manifest_dir)
1556            .parent() // crates/
1557            .unwrap()
1558            .parent() // edgequake/
1559            .unwrap()
1560            .join("models.toml");
1561
1562        if toml_path.exists() {
1563            let content = std::fs::read_to_string(&toml_path).expect("Failed to read models.toml");
1564            let config = ModelsConfig::from_toml(&content).expect("Failed to parse models.toml");
1565
1566            // Validate the parsed config
1567            assert!(config.validate().is_ok(), "models.toml failed validation");
1568
1569            // Check we have expected providers
1570            assert!(
1571                config.get_provider("openai").is_some(),
1572                "OpenAI provider should exist"
1573            );
1574            assert!(
1575                config.get_provider("ollama").is_some(),
1576                "Ollama provider should exist"
1577            );
1578            assert!(
1579                config.get_provider("lmstudio").is_some(),
1580                "LM Studio provider should exist"
1581            );
1582            assert!(
1583                config.get_provider("mock").is_some(),
1584                "Mock provider should exist"
1585            );
1586
1587            // Check default selections are set
1588            assert!(!config.defaults.llm_provider.is_empty());
1589            assert!(!config.defaults.llm_model.is_empty());
1590            assert!(!config.defaults.embedding_provider.is_empty());
1591            assert!(!config.defaults.embedding_model.is_empty());
1592
1593            // Check we have LLM and embedding models
1594            let llm_models = config.all_llm_models();
1595            let embedding_models = config.all_embedding_models();
1596            assert!(!llm_models.is_empty(), "Should have LLM models");
1597            assert!(!embedding_models.is_empty(), "Should have embedding models");
1598        }
1599    }
1600
1601    #[test]
1602    fn test_provider_priorities() {
1603        let config = ModelsConfig::builtin_defaults();
1604        let mut priorities: Vec<(String, u32)> = config
1605            .providers
1606            .iter()
1607            .map(|p| (p.name.clone(), p.priority))
1608            .collect();
1609        priorities.sort_by_key(|(_, p)| *p);
1610
1611        // Lower priority means higher preference
1612        // OpenAI should have lower priority number than Mock
1613        let openai_prio = config.get_provider("openai").unwrap().priority;
1614        let mock_prio = config.get_provider("mock").unwrap().priority;
1615        assert!(
1616            openai_prio < mock_prio,
1617            "OpenAI should have higher priority than mock"
1618        );
1619    }
1620
1621    // ====================================================================
1622    // Display impl tests
1623    // ====================================================================
1624
1625    #[test]
1626    fn test_model_type_display() {
1627        assert_eq!(ModelType::Llm.to_string(), "llm");
1628        assert_eq!(ModelType::Embedding.to_string(), "embedding");
1629        assert_eq!(ModelType::Multimodal.to_string(), "multimodal");
1630    }
1631
1632    #[test]
1633    fn test_provider_type_display() {
1634        assert_eq!(ProviderType::OpenAI.to_string(), "openai");
1635        assert_eq!(ProviderType::Ollama.to_string(), "ollama");
1636        assert_eq!(ProviderType::LMStudio.to_string(), "lmstudio");
1637        assert_eq!(ProviderType::Azure.to_string(), "azure");
1638        assert_eq!(ProviderType::Anthropic.to_string(), "anthropic");
1639        assert_eq!(ProviderType::OpenRouter.to_string(), "openrouter");
1640        assert_eq!(
1641            ProviderType::OpenAICompatible.to_string(),
1642            "openai_compatible"
1643        );
1644        assert_eq!(ProviderType::Mock.to_string(), "mock");
1645    }
1646
1647    // ====================================================================
1648    // Default impl tests
1649    // ====================================================================
1650
1651    #[test]
1652    fn test_model_type_default() {
1653        assert_eq!(ModelType::default(), ModelType::Llm);
1654    }
1655
1656    #[test]
1657    fn test_provider_type_default() {
1658        assert_eq!(ProviderType::default(), ProviderType::OpenAI);
1659    }
1660
1661    #[test]
1662    fn test_model_card_default() {
1663        let card = ModelCard::default();
1664        assert_eq!(card.name, "unknown");
1665        assert_eq!(card.display_name, "Unknown Model");
1666        assert_eq!(card.model_type, ModelType::Llm);
1667        assert!(!card.deprecated);
1668        assert!(card.replacement.is_none());
1669        assert!(card.tags.is_empty());
1670    }
1671
1672    #[test]
1673    fn test_provider_config_default() {
1674        let config = ProviderConfig::default();
1675        assert_eq!(config.name, "unknown");
1676        assert!(config.enabled);
1677        assert_eq!(config.priority, 100);
1678        assert_eq!(config.timeout_seconds, 120);
1679        assert!(config.api_key_env.is_none());
1680    }
1681
1682    #[test]
1683    fn test_defaults_config_default() {
1684        let defaults = DefaultsConfig::default();
1685        assert_eq!(defaults.llm_provider, "openai");
1686        assert_eq!(defaults.llm_model, "gpt-4o-mini");
1687        assert_eq!(defaults.embedding_provider, "openai");
1688        assert_eq!(defaults.embedding_model, "text-embedding-3-small");
1689    }
1690
1691    #[test]
1692    fn test_model_capabilities_default() {
1693        let caps = ModelCapabilities::default();
1694        assert_eq!(caps.context_length, 0);
1695        assert!(!caps.supports_vision);
1696        assert!(!caps.supports_function_calling);
1697    }
1698
1699    #[test]
1700    fn test_model_cost_default() {
1701        let cost = ModelCost::default();
1702        assert_eq!(cost.input_per_1k, 0.0);
1703        assert_eq!(cost.output_per_1k, 0.0);
1704    }
1705
1706    // ====================================================================
1707    // Find methods
1708    // ====================================================================
1709
1710    #[test]
1711    fn test_find_provider_for_model() {
1712        let config = ModelsConfig::builtin_defaults();
1713        let provider = config.find_provider_for_model("gpt-4o");
1714        assert!(provider.is_some());
1715        assert_eq!(provider.unwrap().name, "openai");
1716    }
1717
1718    #[test]
1719    fn test_find_provider_for_model_not_found() {
1720        let config = ModelsConfig::builtin_defaults();
1721        assert!(config
1722            .find_provider_for_model("nonexistent-model-xyz")
1723            .is_none());
1724    }
1725
1726    #[test]
1727    fn test_find_provider_and_model() {
1728        let config = ModelsConfig::builtin_defaults();
1729        let result = config.find_provider_and_model("gpt-4o");
1730        assert!(result.is_some());
1731        let (provider, model) = result.unwrap();
1732        assert_eq!(provider.name, "openai");
1733        assert_eq!(model.name, "gpt-4o");
1734    }
1735
1736    #[test]
1737    fn test_find_provider_and_model_not_found() {
1738        let config = ModelsConfig::builtin_defaults();
1739        assert!(config.find_provider_and_model("nonexistent-xyz").is_none());
1740    }
1741
1742    // ====================================================================
1743    // Default model selection
1744    // ====================================================================
1745
1746    #[test]
1747    fn test_default_llm() {
1748        let config = ModelsConfig::builtin_defaults();
1749        let result = config.default_llm();
1750        assert!(result.is_some());
1751        let (provider, model) = result.unwrap();
1752        assert_eq!(provider.name, "openai");
1753        assert_eq!(model.name, "gpt-4o-mini");
1754    }
1755
1756    #[test]
1757    fn test_default_embedding() {
1758        let config = ModelsConfig::builtin_defaults();
1759        let result = config.default_embedding();
1760        assert!(result.is_some());
1761        let (provider, model) = result.unwrap();
1762        assert_eq!(provider.name, "openai");
1763        assert_eq!(model.name, "text-embedding-3-small");
1764    }
1765
1766    // ====================================================================
1767    // Error type tests
1768    // ====================================================================
1769
1770    #[test]
1771    fn test_model_config_error_display() {
1772        let err = ModelConfigError::ProviderNotFound("test".to_string());
1773        assert!(err.to_string().contains("test"));
1774
1775        let err = ModelConfigError::ModelNotFound("gpt-5".to_string());
1776        assert!(err.to_string().contains("gpt-5"));
1777
1778        let err = ModelConfigError::ValidationError("missing field".to_string());
1779        assert!(err.to_string().contains("missing field"));
1780
1781        let err = ModelConfigError::ParseError("bad toml".to_string());
1782        assert!(err.to_string().contains("bad toml"));
1783    }
1784
1785    // ====================================================================
1786    // TOML parsing edge cases
1787    // ====================================================================
1788
1789    #[test]
1790    fn test_from_toml_invalid() {
1791        let result = ModelsConfig::from_toml("this is not valid toml {{{");
1792        assert!(result.is_err());
1793    }
1794
1795    #[test]
1796    fn test_from_toml_empty() {
1797        let config = ModelsConfig::from_toml("").unwrap();
1798        assert!(config.providers.is_empty());
1799    }
1800
1801    #[test]
1802    fn test_models_config_default() {
1803        let config = ModelsConfig::default();
1804        assert!(config.providers.is_empty());
1805    }
1806
1807    #[test]
1808    fn test_validation_empty_config() {
1809        let config = ModelsConfig::default();
1810        // Empty config should fail validation because default providers are missing
1811        assert!(config.validate().is_err());
1812    }
1813}
edgequake_llm/model_config.rs

edgequake_llm/
model_config.rs