Skip to main content

llm_manager/tui/app/
metadata.rs

1use super::types::App;
2
3impl App {
4    /// Compute VRAM estimate from model file size and current settings.
5    pub fn update_vram_estimate(&mut self) {
6        if let Some(model) = self.selected_model() {
7            let model_mib = model.file_size / (1024 * 1024);
8            let hidden = if self.loading.model_hidden_size > 0 {
9                Some(self.loading.model_hidden_size)
10            } else {
11                None
12            };
13            let n_head = if self.loading.model_n_head > 0 {
14                Some(self.loading.model_n_head)
15            } else {
16                None
17            };
18            let n_kv_head = if self.loading.model_n_kv_head > 0 {
19                Some(self.loading.model_n_kv_head)
20            } else {
21                None
22            };
23            let gpu_mem_total_mib = self.metrics.gpu_mem_total / (1024 * 1024);
24            self.loading.vram_estimate = crate::models::estimate_vram_mib(
25                model_mib,
26                &self.settings,
27                self.loading.model_total_layers,
28                hidden,
29                n_head,
30                n_kv_head,
31                gpu_mem_total_mib,
32            );
33        }
34    }
35
36    /// Read metadata (layers, hidden size) from the model's GGUF file.
37    ///
38    /// Uses a single cache keyed by the model's full path, so each unique
39    /// model is parsed only once regardless of how many times it's selected.
40    pub fn update_model_metadata(&mut self) {
41        let model = match self.selected_model() {
42            Some(m) => m.clone(),
43            None => return,
44        };
45        let key = model.path.to_string_lossy().to_string();
46
47        // 1. Debounce: skip re-parse if file hasn't changed.
48        // This must run before the cache lookup so file changes are detected
49        // even when a stale cache entry exists.
50        if let Ok(meta) = std::fs::metadata(&model.path) {
51            let mtime = meta.modified().unwrap_or(std::time::SystemTime::now());
52            let (last_path, last_mtime) = &self.loading.last_metadata_parse;
53            if last_path == &model.path && mtime == *last_mtime {
54                // File unchanged — use cached values if available.
55                if let Some(cached) = self.search.gguf_metadata_cache.get(&key) {
56                    self.loading.model_total_layers = cached.layers;
57                    self.loading.model_hidden_size = cached.hidden_size;
58                    self.loading.model_n_ctx_train = cached.n_ctx_train;
59                    self.loading.model_n_head = cached.n_head;
60                    self.loading.model_n_kv_head = cached.n_kv_head;
61                }
62                if self.loading.model_hidden_size > 0 {
63                    self.update_vram_estimate();
64                }
65                return;
66            }
67            self.loading.last_metadata_parse = (model.path.clone(), mtime);
68        }
69
70        // 2. Evict cache entries if it exceeds the maximum size.
71        // BTreeMap keys are sorted, so `next()` returns the smallest (oldest) key.
72        const MAX_CACHE_SIZE: usize = 50;
73        if self.search.gguf_metadata_cache.len() > MAX_CACHE_SIZE
74            && let Some(first_key) = self.search.gguf_metadata_cache.keys().next().cloned() {
75                self.search.gguf_metadata_cache.remove(&first_key);
76            }
77
78        // 3. Perform the actual parse
79        if let Ok(meta) = crate::models::GgufMetadata::from_path(&model.path) {
80            self.loading.model_total_layers = meta.layers;
81            self.loading.model_hidden_size = meta.hidden_size;
82            self.loading.model_n_ctx_train = meta.n_ctx_train;
83            self.loading.model_n_head = meta.n_head;
84            self.loading.model_n_kv_head = meta.n_kv_head;
85
86            if meta.arch == "mtp" {
87                self.settings.spec_type = "draft-mtp".to_string();
88                self.settings.draft_tokens = meta.draft_tokens;
89            }
90
91            // Cache the parsed metadata
92            self.search.gguf_metadata_cache.insert(key, meta);
93        } else {
94            self.add_log(
95                format!("Failed to parse GGUF metadata for {}", model.path.display()),
96                crate::config::LogLevel::Error,
97            );
98        }
99
100        // Compute VRAM estimate once, after metadata fields are populated.
101        if self.loading.model_hidden_size > 0 {
102            self.update_vram_estimate();
103        }
104    }
105}