llm_manager/tui/app/
state.rs1use super::types::LoadingPhase::*;
2use super::types::{App, LoadingPhase};
3use crate::config::LogLevel;
4use crate::models::ModelState;
5use chrono::Local;
6
7impl App {
8 pub fn add_log(&mut self, message: impl Into<String>, level: LogLevel) {
9 let msg = message.into();
10 self.log_message(&msg, level);
11 self.update_spinner();
12 self.detect_loading_phases(&msg);
13 self.parse_loading_details(&msg);
14 self.detect_load_state(&msg);
15 let previous_progress = self.loading.loading_progress;
16 self.compute_progress();
17 self.loading.progress_target = self.loading.loading_progress;
18 self.loading.loading_progress =
19 previous_progress * 0.85 + self.loading.progress_target * 0.15;
20 self.trim_log();
21 self.log
22 .log_entries
23 .push_back(crate::config::LogEntry::new(msg, level));
24 }
25
26 fn log_message(&mut self, msg: &str, level: LogLevel) {
27 match level {
28 LogLevel::Info => tracing::info!("{}", msg),
29 LogLevel::Warning => tracing::warn!("{}", msg),
30 LogLevel::Error => tracing::error!("{}", msg),
31 }
32 }
33
34 fn update_spinner(&mut self) {
35 self.loading.last_spinner_time = Some(tokio::time::Instant::now());
36 self.loading.loading_spinner = 0;
37 }
38
39 fn detect_loading_phases(&mut self, msg: &str) {
40 let upper = msg.to_uppercase();
41 if self.loading.loading_phases.is_empty() {
42 if upper.contains("LLAMA") || upper.contains("SERVER") || upper.contains("GGML") {
44 self.loading.loading_phases.insert(ServerStarting);
45 self.loading.last_active_phase = Some(ServerStarting);
46 }
47 }
48 if upper.contains("LLAMA_MODEL_LOADER") || upper.contains("LOADING MODEL") {
49 self.ui.last_error_message = None;
50 self.loading.loading_phases.insert(LoadingModel);
51 self.loading.last_active_phase = Some(LoadingModel);
52 }
53 if upper.contains("LOADED META") || upper.contains("META DATA") {
54 self.ui.last_error_message = None;
55 self.loading.loading_phases.insert(LoadingMeta);
56 self.loading.last_active_phase = Some(LoadingMeta);
57 }
58 if upper.contains("LOAD_TENSORS:") {
59 self.ui.last_error_message = None;
60 self.loading.loading_phases.insert(LoadingTensors);
61 self.loading.last_active_phase = Some(LoadingTensors);
62 }
63 if upper.contains("SERVER LISTENING")
64 || upper.contains("HTTP SERVER LISTENING")
65 || upper.contains("LOAD_MODEL: INITIALIZING SLOTS")
66 || (upper.contains("SRV")
67 && upper.contains("LOAD_MODEL")
68 && upper.contains("INITIALIZING"))
69 {
70 self.loading.loading_phases.insert(ServerListening);
71 self.loading.last_active_phase = Some(ServerListening);
72 }
73 }
74
75 fn parse_loading_details(&mut self, msg: &str) {
76 let upper = msg.to_uppercase();
77 if self.loading.loading_phases.contains(&LoadingTensors) {
78 if upper.contains("LOADING TENSOR")
80 && let Some(pos) = msg.to_lowercase().find("loading tensor") {
81 let rest = &msg[pos + "loading tensor".len()..];
82 let parts: Vec<&str> = rest.split_whitespace().collect();
83 if parts.len() >= 3 {
84 if let Ok(n) = parts[0].parse::<u32>() {
85 self.loading.load_progress.tensors_loaded = n;
86 }
87 let total_idx = if parts.len() >= 5 && parts[2].to_lowercase() == "out" {
89 4
90 } else if parts.len() >= 3 && parts[1].to_lowercase() == "of" {
91 2
92 } else {
93 usize::MAX
94 };
95 if total_idx != usize::MAX
96 && let Ok(total) = parts[total_idx]
97 .trim_end_matches(|c: char| !c.is_ascii_digit())
98 .parse::<u32>()
99 {
100 self.loading.load_progress.tensors_total = Some(total);
101 }
102 }
103 }
104 if self.loading.load_progress.tensors_total.is_none() {
107 let dot_count = msg.chars().filter(|&c| c == '.').count();
108 if dot_count > 0 && dot_count <= 200 {
109 self.loading.load_progress.tensors_loaded += dot_count as u32;
110 }
111 }
112
113 if upper.contains("OFFLOADING")
115 && upper.contains("REPEATING LAYERS")
116 && let Some(pos) = msg.find("offloading")
117 {
118 let rest = &msg[pos + "offloading".len()..];
119 if let Some(colon_pos) = rest.find(':') {
120 let rest = rest[colon_pos + 1..].trim_start();
121 let end = rest.find(' ').unwrap_or(rest.len());
122 if let Ok(count) = rest[..end].trim().parse::<u32>() {
123 self.loading.load_progress.layers_total = Some(count);
124 }
125 }
126 }
127
128 if upper.contains("OFFLOADED")
130 && upper.contains("LAYERS")
131 && let Some(pos) = msg.find("offloaded")
132 {
133 let rest = &msg[pos + "offloaded".len()..];
134 if let Some(slash) = rest.find('/') {
135 let before = rest[..slash].trim();
136 let after = rest[slash + 1..].trim();
137 if let Ok(loaded) = before.parse::<u32>() {
138 self.loading.load_progress.layers_loaded = Some(loaded);
139 }
140 if let Ok(total) = after.split_whitespace().next().unwrap_or("").parse::<u32>()
141 {
142 self.loading.load_progress.layers_total = Some(total);
143 }
144 }
145 if self.loading.load_progress.layers_loaded.is_none() {
147 let rest = rest.trim_start();
148 let end = rest.find(' ').unwrap_or(rest.len());
149 if let Ok(count) = rest[..end].trim().parse::<u32>() {
150 self.loading.load_progress.layers_loaded = Some(count);
151 }
152 }
153 }
154
155 for keyword in &["model buffer size", "kv buffer size"] {
158 if let Some(pos) = msg.to_lowercase().find(keyword) {
159 let before = &msg[..pos];
160 let device = before.split_whitespace().last().unwrap_or("").to_string();
161 if !device.is_empty() {
162 let rest = &msg[pos + keyword.len()..];
163 if let Some(eq_pos) = rest.find('=') {
164 let after = rest[eq_pos + 1..].trim();
165 let end = after
166 .find(|c: char| !c.is_ascii_digit() && c != '.')
167 .unwrap_or(after.len());
168 if let Ok(mib) = after[..end].parse::<f64>() {
169 let exists = self
170 .loading
171 .load_progress
172 .buffers
173 .iter_mut()
174 .find(|b| b.device == device);
175 if let Some(buf) = exists {
176 buf.buffer_size_mib = mib;
177 } else {
178 self.loading.load_progress.buffers.push(
179 crate::models::GPUBuffer {
180 device,
181 buffer_size_mib: mib,
182 },
183 );
184 }
185 }
186 }
187 }
188 }
189 }
190 }
191 }
192
193 fn detect_load_state(&mut self, _msg: &str) {
194 let upper = _msg.to_uppercase();
199
200 let is_error = upper.contains("ERROR")
201 || upper.contains("FAILED TO LOAD")
202 || upper.contains("EXCEPTION")
203 || upper.contains("VK::SYSTEMERROR")
204 || upper.contains("OUTOFDEVICEMEMORY")
205 || upper.contains("OUT OF MEMORY");
206
207 if is_error {
208 let is_loading = self
209 .model_states
210 .values()
211 .any(|s| matches!(s, ModelState::Loading));
212 if is_loading {
213 let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S");
214 let error_msg =
215 if upper.contains("OUTOFDEVICEMEMORY") || upper.contains("OUT OF MEMORY") {
216 format!("Last Failed to load a model (OOM - {})", timestamp)
217 } else {
218 format!("Last Failed to load a model ({})", timestamp)
219 };
220
221 self.ui.last_error_message = Some(error_msg);
222 self.reset_loading_state(false);
223 }
224 }
225 }
226
227 pub(crate) fn compute_progress(&mut self) {
228 const PHASE_WEIGHTS: [(LoadingPhase, f32); 5] = [
229 (ServerStarting, 0.08),
230 (LoadingModel, 0.07),
231 (LoadingMeta, 0.07),
232 (LoadingTensors, 0.70),
233 (ServerListening, 0.08),
234 ];
235
236 let mut phase_progress: f32 = 0.0;
237 for (phase, weight) in &PHASE_WEIGHTS {
238 if self.loading.loading_phases.contains(phase) {
239 phase_progress += weight;
240 }
241 }
242
243 if self.loading.loading_phases.contains(&Complete) {
245 self.loading.loading_progress = 1.0;
246 return;
247 }
248
249 if self.loading.loading_phases.contains(&ServerStarting)
251 && self.loading.loading_phases.len() == 1
252 && self.loading.last_active_phase == Some(ServerStarting)
253 {
254 if let Some(last_spinner) = self.loading.last_spinner_time {
255 let elapsed = last_spinner.elapsed();
256 phase_progress =
257 (elapsed.as_millis() as f32 / 2000.0).min(1.0) * PHASE_WEIGHTS[0].1;
258 }
259 } else if self.loading.loading_phases.len() > 1 {
260 if let Some(phase) = self.loading.last_active_phase {
262 let cumulative_before: f32 = PHASE_WEIGHTS
263 .iter()
264 .filter(|(p, _)| *p != phase && self.loading.loading_phases.contains(p))
265 .map(|(_, w)| w)
266 .sum();
267
268 let phase_fraction = match phase {
269 LoadingModel => 0.5,
270 LoadingMeta => 0.5,
271 LoadingTensors => {
272 let mut tensor_fraction: f32 = 0.0;
273 if let (Some(loaded), Some(total)) = (
274 self.loading.load_progress.layers_loaded,
275 self.loading.load_progress.layers_total,
276 ) {
277 let layer_fraction = loaded as f32 / total as f32;
278 tensor_fraction = layer_fraction.min(1.0);
279 }
280 if self.loading.load_progress.tensors_loaded > 0 {
281 let estimated_total: f32 =
282 match self.loading.load_progress.tensors_total {
283 Some(total) => total as f32,
284 None => match self.loading.load_progress.layers_total {
285 Some(layers) => (layers as f32 * 12.0 + 10.0).max(100.0),
286 None => 500.0,
287 },
288 };
289 tensor_fraction = (self.loading.load_progress.tensors_loaded as f32
290 / estimated_total)
291 .min(0.95);
292 }
293 tensor_fraction
294 }
295 ServerListening => 0.8,
296 Complete => 1.0,
297 ServerStarting => 0.0,
298 };
299
300 phase_progress = cumulative_before
301 + phase_fraction
302 * PHASE_WEIGHTS
303 .iter()
304 .find(|(p, _)| *p == phase)
305 .map(|(_, w)| *w)
306 .unwrap_or(0.0);
307 }
308 }
309
310 if phase_progress > 0.0 {
311 self.loading.loading_progress = phase_progress;
312 }
313 }
314
315 pub fn handle_server_exit(&mut self) {
316 if let Some(rx) = &mut self.server.server_exit_rx
317 && let Ok(()) = rx.try_recv() {
318 self.server.server_handle = None;
319 self.loading.loading_phases.clear();
320 self.loading.last_active_phase = None;
321 self.loading.loading_progress = 0.0;
322 self.loading.load_progress = Default::default();
323
324 if !self.bench_tune.bench_tune_running {
325 for state in self.model_states.values_mut() {
326 *state = crate::models::ModelState::Available;
327 }
328 self.ui.needs_redraw = true;
329 }
330 }
331 }
332
333 fn trim_log(&mut self) {
334 if self.log.log_entries.len() >= 500 {
335 self.log.log_entries.pop_front();
336 }
337 }
338
339 pub fn is_model_loaded(&self, display_name: &str) -> bool {
340 matches!(
341 self.model_states.get(display_name),
342 Some(ModelState::Loaded { .. })
343 )
344 }
345
346 pub fn reset_loading_state(&mut self, is_crash: bool) {
348 self.loading.loading_phases.clear();
349 self.loading.last_active_phase = None;
350 self.loading.loading_progress = 0.0;
351 self.loading.load_progress = Default::default();
352 self.loading.last_spinner_time = None;
353 self.loading.loading_spinner = 0;
354 if let Some(h) = self.loading.health_poll_handle.take() {
355 h.abort();
356 }
357
358 let to_fail: Vec<String> = self
361 .model_states
362 .iter()
363 .filter(|(_, state)| {
364 matches!(state, ModelState::Loading)
365 || (is_crash && matches!(state, ModelState::Loaded { .. }))
366 })
367 .map(|(name, _)| name.clone())
368 .collect();
369
370 for name in &to_fail {
372 self.server
373 .loaded_model_names
374 .lock()
375 .unwrap_or_else(|e| e.into_inner())
376 .retain(|n| n != name);
377 let error = self.ui.last_error_message.clone().unwrap_or_else(|| {
378 let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S");
379 format!("Last Failed to load a model ({})", timestamp)
380 });
381 self.model_states
382 .insert(name.clone(), ModelState::Failed { error });
383 }
384 }
385
386 pub fn tick_spinner(&mut self) {
387 if self.is_loading() {
388 let spinner_interval = std::time::Duration::from_millis(150);
389 if self.loading.last_spinner_time.is_none()
390 || self.loading.last_spinner_time.unwrap().elapsed() > spinner_interval
391 {
392 self.loading.loading_spinner = (self.loading.loading_spinner + 1) % 4;
393 self.loading.last_spinner_time = Some(tokio::time::Instant::now());
394 }
395 }
396 }
397
398 pub fn is_loading(&self) -> bool {
399 self.model_states
400 .values()
401 .any(|s| matches!(s, ModelState::Loading))
402 }
403}