lc/models/
dump_metadata.rs

1use anyhow::Result;
2use colored::Colorize;
3use serde_json::Value;
4use tokio::fs;
5
6pub struct MetadataDumper;
7
8impl MetadataDumper {
9    /// Dump fresh raw metadata for all configured providers
10    pub async fn dump_all_metadata() -> Result<()> {
11        use crate::config::Config;
12
13        println!(
14            "{} Dumping fresh raw metadata for all configured providers...",
15            "🔍".blue()
16        );
17
18        let config = Config::load()?;
19        let models_raw_dir = Self::get_models_raw_dir()?;
20
21        // Create models_raw directory if it doesn't exist
22        if !models_raw_dir.exists() {
23            fs::create_dir_all(&models_raw_dir).await?;
24            println!(
25                "{} Created directory: {}",
26                "📁".blue(),
27                models_raw_dir.display()
28            );
29        }
30
31        println!(
32            "{} Raw models directory: {}",
33            "📁".blue(),
34            models_raw_dir.display()
35        );
36
37        let mut total_providers = 0;
38        let mut successful_dumps = 0;
39
40        // Sort providers by name for consistent output
41        let mut sorted_providers: Vec<_> = config.providers.iter().collect();
42        sorted_providers.sort_by(|a, b| a.0.cmp(b.0));
43
44        for (provider_name, provider_config) in sorted_providers {
45            total_providers += 1;
46
47            // Skip providers without API keys
48            if provider_config.api_key.is_none() {
49                println!("{} Skipping {} (no API key)", "⚠️".yellow(), provider_name);
50                continue;
51            }
52
53            println!(
54                "{} Fetching fresh models from {}...",
55                "📡".blue(),
56                provider_name
57            );
58
59            match Self::fetch_and_save_raw_metadata(&config, provider_name, &models_raw_dir).await {
60                Ok(_) => {
61                    println!("{} Saved {} raw models data", "✅".green(), provider_name);
62                    successful_dumps += 1;
63                }
64                Err(e) => {
65                    println!(
66                        "{} Failed to fetch models from {}: {}",
67                        "❌".red(),
68                        provider_name,
69                        e
70                    );
71                }
72            }
73        }
74
75        println!("\n{} Summary:", "📊".blue());
76        println!("   Total providers: {}", total_providers);
77        println!("   Successful dumps: {}", successful_dumps);
78        println!("   Raw data saved to: {}", models_raw_dir.display());
79
80        if successful_dumps > 0 {
81            println!("\n{} Raw metadata dump complete!", "🎉".green());
82            println!("   Next step: Analyze the JSON files to debug metadata patterns");
83        }
84
85        Ok(())
86    }
87
88    /// Dump fresh raw metadata for a specific provider by name
89    pub async fn dump_provider_by_name(provider_name: &str) -> Result<()> {
90        use crate::config::Config;
91
92        println!(
93            "{} Dumping fresh raw metadata for provider: {}",
94            "🔍".blue(),
95            provider_name
96        );
97
98        let config = Config::load()?;
99
100        // Check if provider exists
101        if !config.has_provider(provider_name) {
102            anyhow::bail!("Provider '{}' not found in configuration. Use 'lc providers list' to see available providers.", provider_name);
103        }
104
105        let models_raw_dir = Self::get_models_raw_dir()?;
106
107        // Create models_raw directory if it doesn't exist
108        if !models_raw_dir.exists() {
109            fs::create_dir_all(&models_raw_dir).await?;
110            println!(
111                "{} Created directory: {}",
112                "📁".blue(),
113                models_raw_dir.display()
114            );
115        }
116
117        match Self::fetch_and_save_raw_metadata(&config, provider_name, &models_raw_dir).await {
118            Ok(_) => {
119                println!(
120                    "\n{} Successfully dumped fresh raw metadata for {}",
121                    "✅".green(),
122                    provider_name
123                );
124            }
125            Err(e) => {
126                anyhow::bail!("Failed to dump raw metadata for {}: {}", provider_name, e);
127            }
128        }
129
130        Ok(())
131    }
132
133    /// List available raw metadata files
134    pub async fn list_cached_metadata() -> Result<()> {
135        let models_raw_dir = Self::get_models_raw_dir()?;
136
137        println!("{} Available raw metadata files:", "📋".blue());
138        println!();
139
140        if !models_raw_dir.exists() {
141            println!(
142                "No models_raw directory found at: {}",
143                models_raw_dir.display()
144            );
145            println!("Run 'lc dump' to fetch fresh raw metadata from providers.");
146            return Ok(());
147        }
148
149        let mut entries = fs::read_dir(&models_raw_dir).await?;
150        let mut files = Vec::new();
151
152        while let Some(entry) = entries.next_entry().await? {
153            let path = entry.path();
154            if let Some(extension) = path.extension() {
155                if extension == "json" {
156                    if let Some(provider_name) = path.file_stem().and_then(|s| s.to_str()) {
157                        let metadata = entry.metadata().await?;
158                        let size = Self::format_file_size(metadata.len());
159                        files.push((provider_name.to_string(), path.clone(), size));
160                    }
161                }
162            }
163        }
164
165        if files.is_empty() {
166            println!("No raw metadata files found.");
167            println!("Run 'lc dump' to fetch fresh raw metadata from providers.");
168            return Ok(());
169        }
170
171        // Sort files by provider name
172        files.sort_by(|a, b| a.0.cmp(&b.0));
173
174        for (provider_name, path, size) in files {
175            println!(
176                "  {} {} - {} ({})",
177                "•".blue(),
178                provider_name,
179                path.display(),
180                size
181            );
182        }
183
184        println!(
185            "\n{} Use 'lc dump <provider>' to fetch fresh raw data for a specific provider",
186            "💡".yellow()
187        );
188
189        Ok(())
190    }
191
192    /// Fetch fresh raw metadata from a provider and save it
193    async fn fetch_and_save_raw_metadata(
194        config: &crate::config::Config,
195        provider_name: &str,
196        models_raw_dir: &std::path::Path,
197    ) -> Result<()> {
198        use crate::chat;
199
200        // Create authenticated client
201        let mut config_mut = config.clone();
202        let client = chat::create_authenticated_client(&mut config_mut, provider_name).await?;
203
204        // Get provider config for raw API call
205        let provider_config = config.get_provider(provider_name)?;
206
207        // Make raw request to get full JSON response
208        let raw_response = Self::fetch_raw_models_response(&client, provider_config).await?;
209
210        // Save raw response to file
211        let filename = format!("{}.json", provider_name);
212        let filepath = models_raw_dir.join(&filename);
213
214        fs::write(&filepath, &raw_response).await?;
215
216        println!("{} Saved raw data to: {}", "💾".green(), filepath.display());
217
218        Ok(())
219    }
220
221    /// Fetch raw models response from provider API
222    async fn fetch_raw_models_response(
223        _client: &crate::chat::LLMClient,
224        provider_config: &crate::config::ProviderConfig,
225    ) -> Result<String> {
226        // No need to import debug_log, it's a macro exported from lib.rs
227
228        // Create optimized HTTP client with connection pooling and keep-alive settings
229        let http_client = reqwest::Client::builder()
230            .pool_max_idle_per_host(10)
231            .pool_idle_timeout(std::time::Duration::from_secs(90))
232            .tcp_keepalive(std::time::Duration::from_secs(60))
233            .timeout(std::time::Duration::from_secs(60))
234            .connect_timeout(std::time::Duration::from_secs(10))
235            .build()?;
236
237        let url = format!(
238            "{}{}",
239            provider_config.endpoint.trim_end_matches('/'),
240            provider_config.models_path
241        );
242
243        crate::debug_log!("Making API request to: {}", url);
244        crate::debug_log!("Request timeout: 60 seconds");
245
246        let mut req = http_client
247            .get(&url)
248            .header("Content-Type", "application/json");
249
250        crate::debug_log!("Added Content-Type: application/json header");
251
252        // Add custom headers first
253        let mut has_custom_headers = false;
254        for (name, value) in &provider_config.headers {
255            crate::debug_log!("Adding custom header: {}: {}", name, value);
256            req = req.header(name, value);
257            has_custom_headers = true;
258        }
259
260        // Only add Authorization header if no custom headers are present
261        if !has_custom_headers {
262            let api_key = provider_config
263                .api_key
264                .as_ref()
265                .ok_or_else(|| anyhow::anyhow!("API key is required but not found for provider"))?;
266            req = req.header("Authorization", format!("Bearer {}", api_key));
267            crate::debug_log!("Added Authorization header with API key");
268        } else {
269            crate::debug_log!("Skipping Authorization header due to custom headers present");
270        }
271
272        crate::debug_log!("Sending HTTP GET request...");
273        let response = req.send().await?;
274
275        let status = response.status();
276        crate::debug_log!("Received response with status: {}", status);
277
278        if !status.is_success() {
279            let text = response.text().await.unwrap_or_default();
280            crate::debug_log!("API request failed with error response: {}", text);
281            anyhow::bail!("API request failed with status {}: {}", status, text);
282        }
283
284        let response_text = response.text().await?;
285        crate::debug_log!("Received response body ({} bytes)", response_text.len());
286
287        // Pretty print the JSON for better readability
288        match serde_json::from_str::<Value>(&response_text) {
289            Ok(json_value) => {
290                crate::debug_log!("Response is valid JSON, pretty-printing");
291                Ok(serde_json::to_string_pretty(&json_value)?)
292            }
293            Err(_) => {
294                crate::debug_log!("Response is not valid JSON, returning as-is");
295                // If it's not valid JSON, return as-is
296                Ok(response_text)
297            }
298        }
299    }
300
301    /// Get the models_raw directory path
302    fn get_models_raw_dir() -> Result<std::path::PathBuf> {
303        use crate::config::Config;
304        let config_dir = Config::config_dir()?;
305        Ok(config_dir.join("models_raw"))
306    }
307
308    /// Format file size in human-readable format
309    fn format_file_size(bytes: u64) -> String {
310        if bytes < 1024 {
311            format!("{} B", bytes)
312        } else if bytes < 1024 * 1024 {
313            format!("{:.1} KB", bytes as f64 / 1024.0)
314        } else {
315            format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
316        }
317    }
318}