lc/
model_metadata.rs

1use crate::provider::Provider;
2use anyhow::{Context, Result};
3use serde::{Deserialize, Serialize};
4use serde_json::Value;
5use std::collections::HashMap;
6use std::fs;
7use std::path::PathBuf;
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct ModelMetadata {
11    pub id: String,
12    pub provider: String,
13    pub display_name: Option<String>,
14    pub description: Option<String>,
15    pub owned_by: Option<String>,
16    pub created: Option<i64>,
17
18    // Context and token limits
19    pub context_length: Option<u32>,
20    pub max_input_tokens: Option<u32>,
21    pub max_output_tokens: Option<u32>,
22
23    // Pricing (per million tokens)
24    pub input_price_per_m: Option<f64>,
25    pub output_price_per_m: Option<f64>,
26
27    // Capabilities - These flags must only be set to `true` when the provider JSON explicitly contains that feature
28    /// Only set to `true` when provider JSON explicitly indicates tool/function calling support
29    pub supports_tools: bool,
30    /// Only set to `true` when provider JSON explicitly indicates vision/image processing support
31    pub supports_vision: bool,
32    /// Only set to `true` when provider JSON explicitly indicates audio processing support
33    pub supports_audio: bool,
34    /// Only set to `true` when provider JSON explicitly indicates advanced reasoning capabilities
35    pub supports_reasoning: bool,
36    /// Only set to `true` when provider JSON explicitly indicates code generation support
37    pub supports_code: bool,
38    /// Only set to `true` when provider JSON explicitly indicates function calling support
39    pub supports_function_calling: bool,
40    /// Only set to `true` when provider JSON explicitly indicates JSON mode support
41    pub supports_json_mode: bool,
42    /// Only set to `true` when provider JSON explicitly indicates streaming support
43    pub supports_streaming: bool,
44
45    // Model type and characteristics
46    pub model_type: ModelType,
47    /// Only set to `true` when provider JSON explicitly indicates the model is deprecated
48    pub is_deprecated: bool,
49    /// Only set to `true` when provider JSON explicitly indicates the model supports fine-tuning
50    pub is_fine_tunable: bool,
51
52    // Raw provider-specific data
53    pub raw_data: serde_json::Value,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub enum ModelType {
58    Chat,
59    Completion,
60    Embedding,
61    ImageGeneration,
62    AudioGeneration,
63    Moderation,
64    Other(String),
65}
66
67impl Default for ModelMetadata {
68    fn default() -> Self {
69        Self {
70            id: String::new(),
71            provider: String::new(),
72            display_name: None,
73            description: None,
74            owned_by: None,
75            created: None,
76            context_length: None,
77            max_input_tokens: None,
78            max_output_tokens: None,
79            input_price_per_m: None,
80            output_price_per_m: None,
81            supports_tools: false,
82            supports_vision: false,
83            supports_audio: false,
84            supports_reasoning: false,
85            supports_code: false,
86            supports_function_calling: false,
87            supports_json_mode: false,
88            supports_streaming: false,
89            model_type: ModelType::Chat,
90            is_deprecated: false,
91            is_fine_tunable: false,
92            raw_data: serde_json::Value::Null,
93        }
94    }
95}
96
97// Configuration structures
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct ModelPaths {
100    pub paths: Vec<String>,
101    #[serde(default)]
102    pub field_mappings: FieldMappings,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct FieldMappings {
107    /// Fields to check for model ID (in priority order)
108    pub id_fields: Vec<String>,
109    /// Fields to check for model name/display name (in priority order)
110    pub name_fields: Vec<String>,
111}
112
113impl Default for FieldMappings {
114    fn default() -> Self {
115        Self {
116            id_fields: vec![
117                "id".to_string(),
118                "modelId".to_string(),
119                "name".to_string(),
120                "modelName".to_string(),
121            ],
122            name_fields: vec![
123                "display_name".to_string(),
124                "name".to_string(),
125                "modelName".to_string(),
126            ],
127        }
128    }
129}
130
131impl Default for ModelPaths {
132    fn default() -> Self {
133        Self {
134            paths: vec![
135                ".data[]".to_string(),
136                ".models[]".to_string(),
137                ".".to_string(),
138            ],
139            field_mappings: FieldMappings::default(),
140        }
141    }
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct TagConfig {
146    pub tags: HashMap<String, TagRule>,
147}
148
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct TagRule {
151    pub paths: Vec<String>,
152    pub value_type: String,
153    pub transform: Option<String>,
154}
155
156impl Default for TagConfig {
157    fn default() -> Self {
158        let mut tags = HashMap::new();
159
160        // Context length
161        tags.insert(
162            "context_length".to_string(),
163            TagRule {
164                paths: vec![
165                    ".context_length".to_string(),
166                    ".context_window".to_string(),
167                    ".context_size".to_string(),
168                    ".max_context_length".to_string(),
169                    ".input_token_limit".to_string(),
170                    ".inputTokenLimit".to_string(),
171                    ".limits.max_input_tokens".to_string(),
172                    ".top_provider.context_length".to_string(),
173                ],
174                value_type: "u32".to_string(),
175                transform: None,
176            },
177        );
178
179        // Output tokens
180        tags.insert(
181            "output".to_string(),
182            TagRule {
183                paths: vec![
184                    ".max_completion_tokens".to_string(),
185                    ".outputTokenLimit".to_string(),
186                    ".max_output_tokens".to_string(),
187                    ".limits.max_output_tokens".to_string(),
188                    ".top_provider.max_completion_tokens".to_string(),
189                    ".max_tokens".to_string(),
190                ],
191                value_type: "u32".to_string(),
192                transform: None,
193            },
194        );
195
196        // Input pricing
197        tags.insert(
198            "input_price_per_m".to_string(),
199            TagRule {
200                paths: vec![
201                    ".pricing.prompt".to_string(),
202                    ".pricing.input.usd".to_string(),
203                    ".input_price".to_string(),
204                ],
205                value_type: "f64".to_string(),
206                transform: Some("multiply_million".to_string()),
207            },
208        );
209
210        // Input pricing direct (no transform)
211        tags.insert(
212            "input_price_per_m_direct".to_string(),
213            TagRule {
214                paths: vec![".input_token_price_per_m".to_string()],
215                value_type: "f64".to_string(),
216                transform: None,
217            },
218        );
219
220        // Output pricing
221        tags.insert(
222            "output_price_per_m".to_string(),
223            TagRule {
224                paths: vec![
225                    ".pricing.completion".to_string(),
226                    ".pricing.output.usd".to_string(),
227                    ".output_price".to_string(),
228                ],
229                value_type: "f64".to_string(),
230                transform: Some("multiply_million".to_string()),
231            },
232        );
233
234        // Output pricing direct (no transform)
235        tags.insert(
236            "output_price_per_m_direct".to_string(),
237            TagRule {
238                paths: vec![".output_token_price_per_m".to_string()],
239                value_type: "f64".to_string(),
240                transform: None,
241            },
242        );
243
244        // Vision support with comprehensive name-based detection
245        tags.insert(
246            "supports_vision".to_string(),
247            TagRule {
248                paths: vec![
249                    ".supports_vision".to_string(),
250                    ".supports_image_input".to_string(),
251                    ".capabilities.vision".to_string(),
252                    ".architecture.input_modalities[] | select(. == \"image\")".to_string(),
253                    ".architecture.output_modalities[] | select(. == \"image\")".to_string(),
254                    "@name_contains(\"image\")".to_string(),
255                    "@name_contains(\"flux\")".to_string(),
256                    "@name_contains(\"dall-e\")".to_string(),
257                    "@name_contains(\"midjourney\")".to_string(),
258                    "@name_contains(\"stable\")".to_string(),
259                    "@name_contains(\"diffusion\")".to_string(),
260                    "@name_contains(\"vision\")".to_string(),
261                    "@name_contains(\"visual\")".to_string(),
262                    "@name_contains(\"photo\")".to_string(),
263                    "@name_contains(\"picture\")".to_string(),
264                    "@name_contains(\"draw\")".to_string(),
265                    "@name_contains(\"paint\")".to_string(),
266                    "@name_contains(\"art\")".to_string(),
267                    "@name_contains(\"generate\")".to_string(),
268                ],
269                value_type: "bool".to_string(),
270                transform: None,
271            },
272        );
273
274        // Tools/Function calling support
275        tags.insert(
276            "supports_tools".to_string(),
277            TagRule {
278                paths: vec![
279                    ".supports_tools".to_string(),
280                    ".capabilities.function_calling".to_string(),
281                    ".features[] | select(. == \"tools\")".to_string(),
282                    ".features[] | select(. == \"function-calling\")".to_string(),
283                    ".capabilities[] | select(. == \"tool-calling\")".to_string(),
284                    ".supported_parameters[] | select(. == \"tools\")".to_string(),
285                ],
286                value_type: "bool".to_string(),
287                transform: None,
288            },
289        );
290
291        // Audio support
292        tags.insert(
293            "supports_audio".to_string(),
294            TagRule {
295                paths: vec![
296                    ".supports_audio".to_string(),
297                    "@name_contains(\"audio\")".to_string(),
298                    ".features[] | select(. == \"audio\")".to_string(),
299                    ".capabilities[] | select(. == \"audio\")".to_string(),
300                    ".supported_input_modalities[] | select(. == \"audio\")".to_string(),
301                    ".supported_output_modalities[] | select(. == \"audio\")".to_string(),
302                    ".architecture.input_modalities[] | select(. == \"audio\")".to_string(),
303                    ".architecture.output_modalities[] | select(. == \"audio\")".to_string(),
304                ],
305                value_type: "bool".to_string(),
306                transform: None,
307            },
308        );
309
310        // Reasoning support
311        tags.insert(
312            "supports_reasoning".to_string(),
313            TagRule {
314                paths: vec![
315                    ".supports_reasoning".to_string(),
316                    ".features[] | select(. == \"think\")".to_string(),
317                    ".features[] | select(. == \"reasoning\")".to_string(),
318                    ".capabilities[] | select(. == \"reasoning\")".to_string(),
319                    ".supported_input_modalities[] | select(. == \"reasoning\")".to_string(),
320                    ".supported_output_modalities[] | select(. == \"reasoning\")".to_string(),
321                    ".architecture.input_modalities[] | select(. == \"reasoning\")".to_string(),
322                    ".architecture.output_modalities[] | select(. == \"reasoning\")".to_string(),
323                ],
324                value_type: "bool".to_string(),
325                transform: None,
326            },
327        );
328
329        Self { tags }
330    }
331}
332
333// Main extractor
334pub struct ModelMetadataExtractor {
335    model_paths: ModelPaths,
336    tag_config: TagConfig,
337}
338
339impl ModelMetadataExtractor {
340    pub fn new() -> Result<Self> {
341        // Ensure configuration files exist on first run
342        if let Err(e) = Self::ensure_config_files_exist() {
343            eprintln!(
344                "Warning: Failed to ensure model metadata config files exist: {}",
345                e
346            );
347        }
348
349        let model_paths = Self::load_model_paths()?;
350        let tag_config = Self::load_tag_config()?;
351
352        Ok(Self {
353            model_paths,
354            tag_config,
355        })
356    }
357
358    /// Ensures that tags.toml and model_paths.toml exist with default values
359    fn ensure_config_files_exist() -> Result<()> {
360        let config_dir = Self::get_config_dir()?;
361
362        // Ensure directory exists
363        fs::create_dir_all(&config_dir)?;
364
365        // Check and create model_paths.toml if it doesn't exist
366        let model_paths_file = config_dir.join("model_paths.toml");
367        if !model_paths_file.exists() {
368            let default_paths = ModelPaths::default();
369            let content = toml::to_string_pretty(&default_paths)?;
370            fs::write(&model_paths_file, content)?;
371        }
372
373        // Check and create tags.toml if it doesn't exist
374        let tags_file = config_dir.join("tags.toml");
375        if !tags_file.exists() {
376            let default_tags = TagConfig::default();
377            let content = toml::to_string_pretty(&default_tags)?;
378            fs::write(&tags_file, content)?;
379        }
380
381        Ok(())
382    }
383
384    fn get_config_dir() -> Result<PathBuf> {
385        // Check for test environment variables first
386        if let Ok(xdg_config) = std::env::var("XDG_CONFIG_HOME") {
387            return Ok(std::path::PathBuf::from(xdg_config).join("lc"));
388        }
389
390        if let Ok(home) = std::env::var("HOME") {
391            // Check if this looks like a test environment (temp directory)
392            if home.contains("tmp") || home.contains("temp") {
393                return Ok(std::path::PathBuf::from(home).join(".config").join("lc"));
394            }
395        }
396
397        // Default behavior for production
398        let config_dir = dirs::config_dir()
399            .context("Failed to get config directory")?
400            .join("lc");
401        Ok(config_dir)
402    }
403
404    fn load_model_paths() -> Result<ModelPaths> {
405        let config_dir = Self::get_config_dir()?;
406        let path = config_dir.join("model_paths.toml");
407
408        // Ensure directory exists
409        fs::create_dir_all(&config_dir)?;
410
411        if path.exists() {
412            let content = fs::read_to_string(&path)?;
413            toml::from_str(&content).context("Failed to parse model_paths.toml")
414        } else {
415            // Create default file
416            let default = ModelPaths::default();
417            let content = toml::to_string_pretty(&default)?;
418            fs::write(&path, content)?;
419            Ok(default)
420        }
421    }
422
423    fn load_tag_config() -> Result<TagConfig> {
424        let config_dir = Self::get_config_dir()?;
425        let path = config_dir.join("tags.toml");
426
427        // Ensure directory exists
428        fs::create_dir_all(&config_dir)?;
429
430        if path.exists() {
431            let content = fs::read_to_string(&path)?;
432            toml::from_str(&content).context("Failed to parse tags.toml")
433        } else {
434            // Create default file
435            let default = TagConfig::default();
436            let content = toml::to_string_pretty(&default)?;
437            fs::write(&path, content)?;
438            Ok(default)
439        }
440    }
441
442    pub fn extract_models(&self, provider: &Provider, response: &Value) -> Result<Vec<Value>> {
443        let mut models = Vec::new();
444
445        for path in &self.model_paths.paths {
446            if let Ok(extracted) = self.extract_with_jq_path(response, path) {
447                match &extracted {
448                    Value::Array(arr) => models.extend(arr.clone()),
449                    Value::Object(obj) => {
450                        // Check if object looks like a model using configured field mappings
451                        let has_model_field = self.model_paths.field_mappings.id_fields.iter()
452                            .any(|field| obj.contains_key(field))
453                            || obj.contains_key("model"); // Keep "model" as a generic field
454                        
455                        if has_model_field {
456                            models.push(extracted);
457                        }
458                    }
459                    _ => {}
460                }
461            }
462        }
463
464        // Special handling for HuggingFace
465        if provider.provider == "hf" || provider.provider == "huggingface" {
466            models = self.expand_huggingface_models(models)?;
467        }
468
469        Ok(models)
470    }
471
472    pub fn extract_with_jq_path(&self, data: &Value, path: &str) -> Result<Value> {
473        // Simple JQ path implementation
474        if path == "." {
475            return Ok(data.clone());
476        }
477
478        // Handle complex JQ expressions with pipes
479        if path.contains(" | ") {
480            return self.extract_with_jq_filter(data, path);
481        }
482
483        let parts: Vec<&str> = path.split('.').filter(|s| !s.is_empty()).collect();
484        let mut current = data;
485
486        for part in parts {
487            if part.ends_with("[]") {
488                let field = &part[..part.len() - 2];
489                current = current
490                    .get(field)
491                    .context(format!("Field {} not found", field))?;
492                if !current.is_array() {
493                    anyhow::bail!("Expected array at {}", field);
494                }
495            } else {
496                current = current
497                    .get(part)
498                    .context(format!("Field {} not found", part))?;
499            }
500        }
501
502        Ok(current.clone())
503    }
504
505    fn extract_with_jq_filter(&self, data: &Value, path: &str) -> Result<Value> {
506        let parts: Vec<&str> = path.split(" | ").collect();
507        if parts.len() != 2 {
508            anyhow::bail!("Complex JQ filters not supported: {}", path);
509        }
510
511        let array_path = parts[0].trim();
512        let filter = parts[1].trim();
513
514        // Extract the array first
515        let array_value = self.extract_with_jq_path(data, array_path)?;
516
517        // Handle select filters
518        if filter.starts_with("select(") && filter.ends_with(")") {
519            let condition = &filter[7..filter.len() - 1]; // Remove "select(" and ")"
520
521            if let Value::Array(arr) = array_value {
522                // Check if any element in the array matches the condition
523                for item in arr {
524                    if self.evaluate_select_condition(&item, condition)? {
525                        return Ok(Value::Bool(true));
526                    }
527                }
528                return Ok(Value::Bool(false));
529            } else {
530                // For non-arrays, evaluate the condition directly
531                if self.evaluate_select_condition(&array_value, condition)? {
532                    return Ok(array_value);
533                } else {
534                    return Ok(Value::Null);
535                }
536            }
537        }
538
539        anyhow::bail!("Unsupported JQ filter: {}", filter)
540    }
541
542    fn evaluate_select_condition(&self, value: &Value, condition: &str) -> Result<bool> {
543        // Handle equality conditions like '. == "tool-calling"'
544        if condition.starts_with(". == ") {
545            let expected = condition[5..].trim();
546
547            // Remove quotes if present
548            let expected = if expected.starts_with('"') && expected.ends_with('"') {
549                &expected[1..expected.len() - 1]
550            } else {
551                expected
552            };
553
554            match value {
555                Value::String(s) => Ok(s == expected),
556                Value::Number(n) => {
557                    if let Ok(num) = expected.parse::<f64>() {
558                        Ok(n.as_f64() == Some(num))
559                    } else {
560                        Ok(false)
561                    }
562                }
563                Value::Bool(b) => {
564                    if let Ok(bool_val) = expected.parse::<bool>() {
565                        Ok(*b == bool_val)
566                    } else {
567                        Ok(false)
568                    }
569                }
570                _ => Ok(false),
571            }
572        } else {
573            anyhow::bail!("Unsupported select condition: {}", condition)
574        }
575    }
576
577    fn expand_huggingface_models(&self, models: Vec<Value>) -> Result<Vec<Value>> {
578        let mut expanded = Vec::new();
579
580        for model in models {
581            if let Some(providers) = model.get("providers").and_then(|p| p.as_array()) {
582                for provider in providers {
583                    let mut new_model = model.clone();
584                    if let Some(obj) = new_model.as_object_mut() {
585                        obj.insert("provider".to_string(), provider.clone());
586                        obj.remove("providers");
587                    }
588                    expanded.push(new_model);
589                }
590            } else {
591                expanded.push(model);
592            }
593        }
594
595        Ok(expanded)
596    }
597
598    pub fn extract_metadata(&self, provider: &Provider, model: &Value) -> Result<ModelMetadata> {
599        let mut metadata = ModelMetadata::default();
600
601        // Extract ID using configured field mappings (in priority order)
602        let base_id = self.model_paths.field_mappings.id_fields.iter()
603            .find_map(|field| model.get(field).and_then(|v| v.as_str()))
604            .map(|s| s.to_string())
605            .ok_or_else(|| {
606                let fields = self.model_paths.field_mappings.id_fields.join(", ");
607                anyhow::anyhow!("Model missing required ID field. Checked fields: {}", fields)
608            })?;
609
610        // For HuggingFace models, append the provider suffix from the expanded provider object
611        if (provider.provider == "hf" || provider.provider == "huggingface")
612            && model.get("provider").is_some()
613        {
614            if let Some(provider_obj) = model.get("provider") {
615                if let Some(provider_name) = provider_obj.get("provider").and_then(|v| v.as_str()) {
616                    metadata.id = format!("{}:{}", base_id, provider_name);
617                } else {
618                    metadata.id = base_id;
619                }
620            } else {
621                metadata.id = base_id;
622            }
623        } else {
624            metadata.id = base_id;
625        }
626
627        metadata.provider = provider.provider.clone();
628        metadata.raw_data = model.clone();
629
630        // Extract basic fields using configured field mappings
631        if let Some(name) = self.model_paths.field_mappings.name_fields.iter()
632            .find_map(|field| model.get(field).and_then(|v| v.as_str()))
633        {
634            metadata.display_name = Some(name.to_string());
635        }
636
637        if let Some(desc) = model.get("description").and_then(|v| v.as_str()) {
638            metadata.description = Some(desc.to_string());
639        }
640
641        if let Some(owner) = model.get("owned_by").and_then(|v| v.as_str()) {
642            metadata.owned_by = Some(owner.to_string());
643        }
644
645        if let Some(created) = model.get("created").and_then(|v| v.as_i64()) {
646            metadata.created = Some(created);
647        }
648
649        // Extract tags using configured rules
650        for (tag_name, rule) in &self.tag_config.tags {
651            if let Some(value) = self.extract_tag_value(model, rule) {
652                self.apply_tag_value(&mut metadata, tag_name, value, &rule.value_type)?;
653            }
654        }
655
656        Ok(metadata)
657    }
658
659    fn extract_tag_value(&self, model: &Value, rule: &TagRule) -> Option<Value> {
660        // For boolean fields, we want to find the first "true" value, not just the first non-null value
661        let is_bool_field = rule.value_type == "bool";
662        let mut found_false = false;
663
664        for path in &rule.paths {
665            // Handle special name-based patterns
666            if path.starts_with("@name_contains(") && path.ends_with(")") {
667                let pattern = &path[15..path.len() - 1]; // Remove "@name_contains(" and ")"
668                let pattern = pattern.trim_matches('"'); // Remove quotes if present
669
670                if let Some(result) = self.check_name_contains(model, pattern) {
671                    if is_bool_field && result {
672                        return Some(Value::Bool(true));
673                    } else if !is_bool_field {
674                        return Some(Value::Bool(result));
675                    } else if result == false {
676                        found_false = true;
677                    }
678                }
679                continue;
680            }
681
682            if path.starts_with("@name_matches(") && path.ends_with(")") {
683                let pattern = &path[14..path.len() - 1]; // Remove "@name_matches(" and ")"
684                let pattern = pattern.trim_matches('"'); // Remove quotes if present
685
686                if let Some(result) = self.check_name_matches(model, pattern) {
687                    if is_bool_field && result {
688                        return Some(Value::Bool(true));
689                    } else if !is_bool_field {
690                        return Some(Value::Bool(result));
691                    } else if result == false {
692                        found_false = true;
693                    }
694                }
695                continue;
696            }
697
698            // Regular JQ path extraction
699            if let Ok(value) = self.extract_with_jq_path(model, path) {
700                if !value.is_null() {
701                    // For boolean fields, continue searching if we found false, but return immediately if we found true
702                    if is_bool_field {
703                        if let Some(bool_val) = value.as_bool() {
704                            if bool_val {
705                                // Found true, return immediately
706                                if let Some(transform) = &rule.transform {
707                                    return self.apply_transform(value, transform);
708                                }
709                                return Some(value);
710                            } else {
711                                // Found false, remember it but continue searching
712                                found_false = true;
713                            }
714                        }
715                    } else {
716                        // For non-boolean fields, return the first non-null value
717                        if let Some(transform) = &rule.transform {
718                            return self.apply_transform(value, transform);
719                        }
720                        return Some(value);
721                    }
722                }
723            }
724        }
725
726        // If we're dealing with a boolean field and found at least one false, return false
727        // Otherwise return None
728        if is_bool_field && found_false {
729            Some(Value::Bool(false))
730        } else {
731            None
732        }
733    }
734
735    fn apply_transform(&self, value: Value, transform: &str) -> Option<Value> {
736        match transform {
737            "multiply_million" => {
738                if let Some(num) = value.as_f64() {
739                    Some(Value::from(num * 1_000_000.0))
740                } else {
741                    None
742                }
743            }
744            _ => Some(value),
745        }
746    }
747
748    fn apply_tag_value(
749        &self,
750        metadata: &mut ModelMetadata,
751        tag_name: &str,
752        value: Value,
753        value_type: &str,
754    ) -> Result<()> {
755        match tag_name {
756            "context_length" => {
757                if let Some(v) = self.parse_value_as_u32(&value, value_type)? {
758                    metadata.context_length = Some(v);
759                }
760            }
761            "max_input_tokens" => {
762                if let Some(v) = self.parse_value_as_u32(&value, value_type)? {
763                    metadata.max_input_tokens = Some(v);
764                }
765            }
766            "max_output_tokens" | "output" => {
767                if let Some(v) = self.parse_value_as_u32(&value, value_type)? {
768                    metadata.max_output_tokens = Some(v);
769                }
770            }
771            "input_price_per_m" | "input_price_per_m_direct" => {
772                if let Some(v) = self.parse_value_as_f64(&value, value_type)? {
773                    metadata.input_price_per_m = Some(v);
774                }
775            }
776            "output_price_per_m" | "output_price_per_m_direct" => {
777                if let Some(v) = self.parse_value_as_f64(&value, value_type)? {
778                    metadata.output_price_per_m = Some(v);
779                }
780            }
781            "supports_tools" => {
782                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
783                    metadata.supports_tools = v;
784                }
785            }
786            "supports_vision" => {
787                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
788                    metadata.supports_vision = v;
789                }
790            }
791            "supports_audio" => {
792                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
793                    metadata.supports_audio = v;
794                }
795            }
796            "supports_reasoning" => {
797                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
798                    metadata.supports_reasoning = v;
799                }
800            }
801            "supports_code" => {
802                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
803                    metadata.supports_code = v;
804                }
805            }
806            "supports_function_calling" => {
807                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
808                    metadata.supports_function_calling = v;
809                }
810            }
811            "supports_json_mode" => {
812                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
813                    metadata.supports_json_mode = v;
814                }
815            }
816            "supports_streaming" => {
817                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
818                    metadata.supports_streaming = v;
819                }
820            }
821            "is_deprecated" => {
822                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
823                    metadata.is_deprecated = v;
824                }
825            }
826            "is_fine_tunable" => {
827                if let Some(v) = self.parse_value_as_bool(&value, value_type)? {
828                    metadata.is_fine_tunable = v;
829                }
830            }
831            _ => {
832                // Unknown tag, ignore
833            }
834        }
835        Ok(())
836    }
837
838    fn parse_value_as_bool(&self, value: &Value, _value_type: &str) -> Result<Option<bool>> {
839        match value {
840            Value::Bool(b) => Ok(Some(*b)),
841            Value::String(s) => Ok(Some(s == "true" || s == "yes" || s == "1")),
842            Value::Number(n) => Ok(Some(n.as_i64().unwrap_or(0) != 0)),
843            _ => Ok(None),
844        }
845    }
846
847    fn parse_value_as_u32(&self, value: &Value, _value_type: &str) -> Result<Option<u32>> {
848        match value {
849            Value::Number(n) => {
850                if let Some(v) = n.as_u64() {
851                    Ok(Some(v as u32))
852                } else if let Some(v) = n.as_i64() {
853                    Ok(Some(v as u32))
854                } else {
855                    Ok(None)
856                }
857            }
858            Value::String(s) => Ok(s.parse::<u32>().ok()),
859            _ => Ok(None),
860        }
861    }
862
863    fn parse_value_as_f64(&self, value: &Value, _value_type: &str) -> Result<Option<f64>> {
864        match value {
865            Value::Number(n) => Ok(n.as_f64()),
866            Value::String(s) => Ok(s.parse::<f64>().ok()),
867            _ => Ok(None),
868        }
869    }
870
871    /// Check if model name contains a specific pattern (case-insensitive)
872    fn check_name_contains(&self, model: &Value, pattern: &str) -> Option<bool> {
873        let pattern_lower = pattern.to_lowercase();
874
875        // Check all configured ID fields
876        for field in &self.model_paths.field_mappings.id_fields {
877            if let Some(value) = model.get(field).and_then(|v| v.as_str()) {
878                if value.to_lowercase().contains(&pattern_lower) {
879                    return Some(true);
880                }
881            }
882        }
883
884        // Check all configured name fields
885        for field in &self.model_paths.field_mappings.name_fields {
886            if let Some(value) = model.get(field).and_then(|v| v.as_str()) {
887                if value.to_lowercase().contains(&pattern_lower) {
888                    return Some(true);
889                }
890            }
891        }
892
893        Some(false)
894    }
895
896    /// Check if model name matches a specific pattern using regex (case-insensitive)
897    fn check_name_matches(&self, model: &Value, pattern: &str) -> Option<bool> {
898        use regex::RegexBuilder;
899
900        // Create case-insensitive regex
901        let regex = match RegexBuilder::new(pattern).case_insensitive(true).build() {
902            Ok(r) => r,
903            Err(_) => return Some(false), // Invalid regex pattern
904        };
905
906        // Check all configured ID fields
907        for field in &self.model_paths.field_mappings.id_fields {
908            if let Some(value) = model.get(field).and_then(|v| v.as_str()) {
909                if regex.is_match(value) {
910                    return Some(true);
911                }
912            }
913        }
914
915        // Check all configured name fields
916        for field in &self.model_paths.field_mappings.name_fields {
917            if let Some(value) = model.get(field).and_then(|v| v.as_str()) {
918                if regex.is_match(value) {
919                    return Some(true);
920                }
921            }
922        }
923
924        Some(false)
925    }
926}
927
928// Public API function
929pub fn extract_models_from_provider(
930    provider: &Provider,
931    raw_json: &str,
932) -> Result<Vec<ModelMetadata>> {
933    let response: Value = serde_json::from_str(raw_json)?;
934    let extractor = ModelMetadataExtractor::new()?;
935
936    let models = extractor.extract_models(provider, &response)?;
937    let mut metadata_list = Vec::new();
938
939    for model in models {
940        match extractor.extract_metadata(provider, &model) {
941            Ok(metadata) => metadata_list.push(metadata),
942            Err(e) => {
943                eprintln!("Warning: Failed to extract metadata for model: {}", e);
944            }
945        }
946    }
947
948    Ok(metadata_list)
949}
950
951// CLI command handlers
952pub fn add_model_path(path: String) -> Result<()> {
953    let config_dir = ModelMetadataExtractor::get_config_dir()?;
954    let file_path = config_dir.join("model_paths.toml");
955
956    let mut paths = if file_path.exists() {
957        let content = fs::read_to_string(&file_path)?;
958        toml::from_str(&content)?
959    } else {
960        ModelPaths::default()
961    };
962
963    if !paths.paths.contains(&path) {
964        paths.paths.push(path);
965        let content = toml::to_string_pretty(&paths)?;
966        fs::write(&file_path, content)?;
967        println!("Added model path");
968    } else {
969        println!("Path already exists");
970    }
971
972    Ok(())
973}
974
975pub fn remove_model_path(path: String) -> Result<()> {
976    let config_dir = ModelMetadataExtractor::get_config_dir()?;
977    let file_path = config_dir.join("model_paths.toml");
978
979    if !file_path.exists() {
980        anyhow::bail!("No model paths configured");
981    }
982
983    let mut paths: ModelPaths = {
984        let content = fs::read_to_string(&file_path)?;
985        toml::from_str(&content)?
986    };
987
988    if let Some(pos) = paths.paths.iter().position(|p| p == &path) {
989        paths.paths.remove(pos);
990        let content = toml::to_string_pretty(&paths)?;
991        fs::write(&file_path, content)?;
992        println!("Removed model path");
993    } else {
994        println!("Path not found");
995    }
996
997    Ok(())
998}
999
1000pub fn list_model_paths() -> Result<()> {
1001    let config_dir = ModelMetadataExtractor::get_config_dir()?;
1002    let file_path = config_dir.join("model_paths.toml");
1003
1004    let paths = if file_path.exists() {
1005        let content = fs::read_to_string(&file_path)?;
1006        toml::from_str(&content)?
1007    } else {
1008        ModelPaths::default()
1009    };
1010
1011    println!("Model paths:");
1012    for path in &paths.paths {
1013        println!("  - {}", path);
1014    }
1015
1016    Ok(())
1017}
1018
1019pub fn add_tag(
1020    name: String,
1021    paths: Vec<String>,
1022    value_type: String,
1023    transform: Option<String>,
1024) -> Result<()> {
1025    let config_dir = ModelMetadataExtractor::get_config_dir()?;
1026    let file_path = config_dir.join("tags.toml");
1027
1028    let mut config = if file_path.exists() {
1029        let content = fs::read_to_string(&file_path)?;
1030        toml::from_str(&content)?
1031    } else {
1032        TagConfig::default()
1033    };
1034
1035    config.tags.insert(
1036        name.clone(),
1037        TagRule {
1038            paths,
1039            value_type,
1040            transform,
1041        },
1042    );
1043
1044    let content = toml::to_string_pretty(&config)?;
1045    fs::write(&file_path, content)?;
1046    println!("Added tag: {}", name);
1047
1048    Ok(())
1049}
1050
1051/// Initialize model metadata configuration files
1052/// This should be called once during application startup to ensure
1053/// tags.toml and model_paths.toml exist with default values
1054pub fn initialize_model_metadata_config() -> Result<()> {
1055    ModelMetadataExtractor::ensure_config_files_exist()
1056}
1057
1058pub fn list_tags() -> Result<()> {
1059    let config_dir = ModelMetadataExtractor::get_config_dir()?;
1060    let file_path = config_dir.join("tags.toml");
1061
1062    let config = if file_path.exists() {
1063        let content = fs::read_to_string(&file_path)?;
1064        toml::from_str(&content)?
1065    } else {
1066        TagConfig::default()
1067    };
1068
1069    println!("Tags:");
1070    for (name, rule) in &config.tags {
1071        println!("  {}:", name);
1072        println!("    Type: {}", rule.value_type);
1073        println!("    Paths:");
1074        for path in &rule.paths {
1075            println!("      - {}", path);
1076        }
1077        if let Some(transform) = &rule.transform {
1078            println!("    Transform: {}", transform);
1079        }
1080    }
1081
1082    Ok(())
1083}
1084
1085// Compatibility layer for existing code
1086pub struct MetadataExtractor;
1087
1088impl MetadataExtractor {
1089    pub fn extract_from_provider(
1090        provider: &str,
1091        raw_json: &str,
1092    ) -> Result<Vec<ModelMetadata>, Box<dyn std::error::Error>> {
1093        let provider_obj = Provider {
1094            provider: provider.to_string(),
1095            status: "active".to_string(),
1096            supports_tools: false,
1097            supports_structured_output: false,
1098        };
1099
1100        extract_models_from_provider(&provider_obj, raw_json).map_err(|e| e.into())
1101    }
1102}