Skip to main content

llm_manager/tui/app/
state.rs

1use super::types::LoadingPhase::*;
2use super::types::{App, LoadingPhase};
3use crate::config::LogLevel;
4use crate::models::ModelState;
5use chrono::Local;
6
7impl App {
8    pub fn add_log(&mut self, message: impl Into<String>, level: LogLevel) {
9        let msg = message.into();
10        self.log_message(&msg, level);
11        self.update_spinner();
12        self.detect_loading_phases(&msg);
13        self.parse_loading_details(&msg);
14        self.detect_load_state(&msg);
15        let previous_progress = self.loading.loading_progress;
16        self.compute_progress();
17        self.loading.progress_target = self.loading.loading_progress;
18        self.loading.loading_progress =
19            previous_progress * 0.85 + self.loading.progress_target * 0.15;
20        self.trim_log();
21        self.log
22            .log_entries
23            .push_back(crate::config::LogEntry::new(msg, level));
24    }
25
26    fn log_message(&mut self, msg: &str, level: LogLevel) {
27        match level {
28            LogLevel::Info => tracing::info!("{}", msg),
29            LogLevel::Warning => tracing::warn!("{}", msg),
30            LogLevel::Error => tracing::error!("{}", msg),
31        }
32    }
33
34    fn update_spinner(&mut self) {
35        self.loading.last_spinner_time = Some(tokio::time::Instant::now());
36        self.loading.loading_spinner = 0;
37    }
38
39    fn detect_loading_phases(&mut self, msg: &str) {
40        let upper = msg.to_uppercase();
41        if self.loading.loading_phases.is_empty() {
42            // Detect server starting (first log line after spawn)
43            if upper.contains("LLAMA") || upper.contains("SERVER") || upper.contains("GGML") {
44                self.loading.loading_phases.insert(ServerStarting);
45                self.loading.last_active_phase = Some(ServerStarting);
46            }
47        }
48        if upper.contains("LLAMA_MODEL_LOADER") || upper.contains("LOADING MODEL") {
49            self.ui.last_error_message = None;
50            self.loading.loading_phases.insert(LoadingModel);
51            self.loading.last_active_phase = Some(LoadingModel);
52        }
53        if upper.contains("LOADED META") || upper.contains("META DATA") {
54            self.ui.last_error_message = None;
55            self.loading.loading_phases.insert(LoadingMeta);
56            self.loading.last_active_phase = Some(LoadingMeta);
57        }
58        if upper.contains("LOAD_TENSORS:") {
59            self.ui.last_error_message = None;
60            self.loading.loading_phases.insert(LoadingTensors);
61            self.loading.last_active_phase = Some(LoadingTensors);
62        }
63        if upper.contains("SERVER LISTENING")
64            || upper.contains("HTTP SERVER LISTENING")
65            || upper.contains("LOAD_MODEL: INITIALIZING SLOTS")
66            || (upper.contains("SRV")
67                && upper.contains("LOAD_MODEL")
68                && upper.contains("INITIALIZING"))
69        {
70            self.loading.loading_phases.insert(ServerListening);
71            self.loading.last_active_phase = Some(ServerListening);
72        }
73    }
74
75    fn parse_loading_details(&mut self, msg: &str) {
76        let upper = msg.to_uppercase();
77        if self.loading.loading_phases.contains(&LoadingTensors) {
78            // Parse "loading tensor X of Y" or "loading tensor X out of Y" pattern
79            if upper.contains("LOADING TENSOR")
80                && let Some(pos) = msg.to_lowercase().find("loading tensor") {
81                    let rest = &msg[pos + "loading tensor".len()..];
82                    let parts: Vec<&str> = rest.split_whitespace().collect();
83                    if parts.len() >= 3 {
84                        if let Ok(n) = parts[0].parse::<u32>() {
85                            self.loading.load_progress.tensors_loaded = n;
86                        }
87                        // "of Y" or "out of Y" — Y is at index 2 or 4
88                        let total_idx = if parts.len() >= 5 && parts[2].to_lowercase() == "out" {
89                            4
90                        } else if parts.len() >= 3 && parts[1].to_lowercase() == "of" {
91                            2
92                        } else {
93                            usize::MAX
94                        };
95                        if total_idx != usize::MAX
96                            && let Ok(total) = parts[total_idx]
97                                .trim_end_matches(|c: char| !c.is_ascii_digit())
98                                .parse::<u32>()
99                            {
100                                self.loading.load_progress.tensors_total = Some(total);
101                            }
102                    }
103                }
104            // Count dots from progress lines like "................................"
105            // Only use dot-counting as fallback when we haven't seen an explicit tensor count yet
106            if self.loading.load_progress.tensors_total.is_none() {
107                let dot_count = msg.chars().filter(|&c| c == '.').count();
108                if dot_count > 0 && dot_count <= 200 {
109                    self.loading.load_progress.tensors_loaded += dot_count as u32;
110                }
111            }
112
113            // Offloading N repeating layers to GPU
114            if upper.contains("OFFLOADING")
115                && upper.contains("REPEATING LAYERS")
116                && let Some(pos) = msg.find("offloading")
117            {
118                let rest = &msg[pos + "offloading".len()..];
119                if let Some(colon_pos) = rest.find(':') {
120                    let rest = rest[colon_pos + 1..].trim_start();
121                    let end = rest.find(' ').unwrap_or(rest.len());
122                    if let Ok(count) = rest[..end].trim().parse::<u32>() {
123                        self.loading.load_progress.layers_total = Some(count);
124                    }
125                }
126            }
127
128            // Offloaded X/Y layers to GPU
129            if upper.contains("OFFLOADED")
130                && upper.contains("LAYERS")
131                && let Some(pos) = msg.find("offloaded")
132            {
133                let rest = &msg[pos + "offloaded".len()..];
134                if let Some(slash) = rest.find('/') {
135                    let before = rest[..slash].trim();
136                    let after = rest[slash + 1..].trim();
137                    if let Ok(loaded) = before.parse::<u32>() {
138                        self.loading.load_progress.layers_loaded = Some(loaded);
139                    }
140                    if let Ok(total) = after.split_whitespace().next().unwrap_or("").parse::<u32>()
141                    {
142                        self.loading.load_progress.layers_total = Some(total);
143                    }
144                }
145                // Also handle "offloaded N layers" without Y
146                if self.loading.load_progress.layers_loaded.is_none() {
147                    let rest = rest.trim_start();
148                    let end = rest.find(' ').unwrap_or(rest.len());
149                    if let Ok(count) = rest[..end].trim().parse::<u32>() {
150                        self.loading.load_progress.layers_loaded = Some(count);
151                    }
152                }
153            }
154
155            // CPU_Mapped model buffer size = X MiB
156            // Vulkan0 model buffer size = X MiB
157            for keyword in &["model buffer size", "kv buffer size"] {
158                if let Some(pos) = msg.to_lowercase().find(keyword) {
159                    let before = &msg[..pos];
160                    let device = before.split_whitespace().last().unwrap_or("").to_string();
161                    if !device.is_empty() {
162                        let rest = &msg[pos + keyword.len()..];
163                        if let Some(eq_pos) = rest.find('=') {
164                            let after = rest[eq_pos + 1..].trim();
165                            let end = after
166                                .find(|c: char| !c.is_ascii_digit() && c != '.')
167                                .unwrap_or(after.len());
168                            if let Ok(mib) = after[..end].parse::<f64>() {
169                                let exists = self
170                                    .loading
171                                    .load_progress
172                                    .buffers
173                                    .iter_mut()
174                                    .find(|b| b.device == device);
175                                if let Some(buf) = exists {
176                                    buf.buffer_size_mib = mib;
177                                } else {
178                                    self.loading.load_progress.buffers.push(
179                                        crate::models::GPUBuffer {
180                                            device,
181                                            buffer_size_mib: mib,
182                                        },
183                                    );
184                                }
185                            }
186                        }
187                    }
188                }
189            }
190        }
191    }
192
193    fn detect_load_state(&mut self, _msg: &str) {
194        // Log-based load state detection removed.
195        // Loading completion is now detected via /health API polling.
196        // Server exit is now detected via channel-based signaling.
197        // Error detection still uses log parsing for OOM/crash detection.
198        let upper = _msg.to_uppercase();
199
200        let is_error = upper.contains("ERROR")
201            || upper.contains("FAILED TO LOAD")
202            || upper.contains("EXCEPTION")
203            || upper.contains("VK::SYSTEMERROR")
204            || upper.contains("OUTOFDEVICEMEMORY")
205            || upper.contains("OUT OF MEMORY");
206
207        if is_error {
208            let is_loading = self
209                .model_states
210                .values()
211                .any(|s| matches!(s, ModelState::Loading));
212            if is_loading {
213                let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S");
214                let error_msg =
215                    if upper.contains("OUTOFDEVICEMEMORY") || upper.contains("OUT OF MEMORY") {
216                        format!("Last Failed to load a model (OOM - {})", timestamp)
217                    } else {
218                        format!("Last Failed to load a model ({})", timestamp)
219                    };
220
221                self.ui.last_error_message = Some(error_msg);
222                self.reset_loading_state(false);
223            }
224        }
225    }
226
227    pub(crate) fn compute_progress(&mut self) {
228        const PHASE_WEIGHTS: [(LoadingPhase, f32); 5] = [
229            (ServerStarting, 0.08),
230            (LoadingModel, 0.07),
231            (LoadingMeta, 0.07),
232            (LoadingTensors, 0.70),
233            (ServerListening, 0.08),
234        ];
235
236        let mut phase_progress: f32 = 0.0;
237        for (phase, weight) in &PHASE_WEIGHTS {
238            if self.loading.loading_phases.contains(phase) {
239                phase_progress += weight;
240            }
241        }
242
243        // Handle Complete phase separately — it means 100%
244        if self.loading.loading_phases.contains(&Complete) {
245            self.loading.loading_progress = 1.0;
246            return;
247        }
248
249        // Spinner interpolation for ServerStarting (works even as the only active phase)
250        if self.loading.loading_phases.contains(&ServerStarting)
251            && self.loading.loading_phases.len() == 1
252            && self.loading.last_active_phase == Some(ServerStarting)
253        {
254            if let Some(last_spinner) = self.loading.last_spinner_time {
255                let elapsed = last_spinner.elapsed();
256                phase_progress =
257                    (elapsed.as_millis() as f32 / 2000.0).min(1.0) * PHASE_WEIGHTS[0].1;
258            }
259        } else if self.loading.loading_phases.len() > 1 {
260            // Apply interpolation within the current active phase for smooth transitions
261            if let Some(phase) = self.loading.last_active_phase {
262                let cumulative_before: f32 = PHASE_WEIGHTS
263                    .iter()
264                    .filter(|(p, _)| *p != phase && self.loading.loading_phases.contains(p))
265                    .map(|(_, w)| w)
266                    .sum();
267
268                let phase_fraction = match phase {
269                    LoadingModel => 0.5,
270                    LoadingMeta => 0.5,
271                    LoadingTensors => {
272                        let mut tensor_fraction: f32 = 0.0;
273                        if let (Some(loaded), Some(total)) = (
274                            self.loading.load_progress.layers_loaded,
275                            self.loading.load_progress.layers_total,
276                        ) {
277                            let layer_fraction = loaded as f32 / total as f32;
278                            tensor_fraction = layer_fraction.min(1.0);
279                        }
280                        if self.loading.load_progress.tensors_loaded > 0 {
281                            let estimated_total: f32 =
282                                match self.loading.load_progress.tensors_total {
283                                    Some(total) => total as f32,
284                                    None => match self.loading.load_progress.layers_total {
285                                        Some(layers) => (layers as f32 * 12.0 + 10.0).max(100.0),
286                                        None => 500.0,
287                                    },
288                                };
289                            tensor_fraction = (self.loading.load_progress.tensors_loaded as f32
290                                / estimated_total)
291                                .min(0.95);
292                        }
293                        tensor_fraction
294                    }
295                    ServerListening => 0.8,
296                    Complete => 1.0,
297                    ServerStarting => 0.0,
298                };
299
300                phase_progress = cumulative_before
301                    + phase_fraction
302                        * PHASE_WEIGHTS
303                            .iter()
304                            .find(|(p, _)| *p == phase)
305                            .map(|(_, w)| *w)
306                            .unwrap_or(0.0);
307            }
308        }
309
310        if phase_progress > 0.0 {
311            self.loading.loading_progress = phase_progress;
312        }
313    }
314
315    pub fn handle_server_exit(&mut self) {
316        if let Some(rx) = &mut self.server.server_exit_rx
317            && let Ok(()) = rx.try_recv() {
318                self.server.server_handle = None;
319                self.loading.loading_phases.clear();
320                self.loading.last_active_phase = None;
321                self.loading.loading_progress = 0.0;
322                self.loading.load_progress = Default::default();
323
324                if !self.bench_tune.bench_tune_running {
325                    for state in self.model_states.values_mut() {
326                        *state = crate::models::ModelState::Available;
327                    }
328                    self.ui.needs_redraw = true;
329                }
330            }
331    }
332
333    fn trim_log(&mut self) {
334        if self.log.log_entries.len() >= 500 {
335            self.log.log_entries.pop_front();
336        }
337    }
338
339    pub fn is_model_loaded(&self, display_name: &str) -> bool {
340        matches!(
341            self.model_states.get(display_name),
342            Some(ModelState::Loaded { .. })
343        )
344    }
345
346    /// Reset loading state (progress bar and model status) on failure.
347    pub fn reset_loading_state(&mut self, is_crash: bool) {
348        self.loading.loading_phases.clear();
349        self.loading.last_active_phase = None;
350        self.loading.loading_progress = 0.0;
351        self.loading.load_progress = Default::default();
352        self.loading.last_spinner_time = None;
353        self.loading.loading_spinner = 0;
354        if let Some(h) = self.loading.health_poll_handle.take() {
355            h.abort();
356        }
357
358        // Models to fail: always any that were Loading.
359        // If it's a crash, also fail all that were Loaded.
360        let to_fail: Vec<String> = self
361            .model_states
362            .iter()
363            .filter(|(_, state)| {
364                matches!(state, ModelState::Loading)
365                    || (is_crash && matches!(state, ModelState::Loaded { .. }))
366            })
367            .map(|(name, _)| name.clone())
368            .collect();
369
370        // Remove from loaded list and set to Failed
371        for name in &to_fail {
372            self.server
373                .loaded_model_names
374                .lock()
375                .unwrap_or_else(|e| e.into_inner())
376                .retain(|n| n != name);
377            let error = self.ui.last_error_message.clone().unwrap_or_else(|| {
378                let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S");
379                format!("Last Failed to load a model ({})", timestamp)
380            });
381            self.model_states
382                .insert(name.clone(), ModelState::Failed { error });
383        }
384    }
385
386    pub fn tick_spinner(&mut self) {
387        if self.is_loading() {
388            let spinner_interval = std::time::Duration::from_millis(150);
389            if self.loading.last_spinner_time.is_none()
390                || self.loading.last_spinner_time.unwrap().elapsed() > spinner_interval
391            {
392                self.loading.loading_spinner = (self.loading.loading_spinner + 1) % 4;
393                self.loading.last_spinner_time = Some(tokio::time::Instant::now());
394            }
395        }
396    }
397
398    pub fn is_loading(&self) -> bool {
399        self.model_states
400            .values()
401            .any(|s| matches!(s, ModelState::Loading))
402    }
403}