1use crate::agent;
2use crate::agent::conversation::{ConversationManager, UserTurn};
3use crate::agent::git_monitor::GitState;
4use crate::agent::inference::{InferenceEngine, InferenceEvent};
5use crate::ui;
6use crate::ui::gpu_monitor::GpuState;
7use crate::ui::voice::VoiceManager;
8use crate::CliCockpit;
9use notify::RecommendedWatcher;
10use std::sync::Arc;
11use tokio::sync::mpsc;
12
13const MIN_RECOMMENDED_CODING_CONTEXT: usize = 8_192;
14
15fn provider_help_hint(base_url: &str, provider_name: &str) -> String {
16 if provider_name == "LM Studio" {
17 format!(
18 "Check if LM Studio is running on {}. If you prefer Ollama, set `api_url` to `{}` in `.hematite/settings.json`.",
19 base_url,
20 crate::agent::config::DEFAULT_OLLAMA_API_URL
21 )
22 } else if provider_name == "Ollama" {
23 format!(
24 "Check if Ollama is running on {} and that a chat model is available. If you prefer LM Studio, set `api_url` to `{}`.",
25 base_url,
26 crate::agent::config::DEFAULT_LM_STUDIO_API_URL
27 )
28 } else {
29 format!(
30 "Check if the configured provider is running on {} and that `.hematite/settings.json` points at the right endpoint.",
31 base_url
32 )
33 }
34}
35
36pub fn session_endpoint_url(base_url: &str) -> String {
37 format!("{}/v1", base_url.trim_end_matches('/'))
38}
39
40fn preferred_coding_model_target(
41 config: &crate::agent::config::HematiteConfig,
42 cockpit: &CliCockpit,
43) -> Option<String> {
44 crate::agent::config::preferred_coding_model(config)
45 .or(cockpit.think_model.clone())
46 .or(cockpit.fast_model.clone())
47}
48
49fn model_name_matches(current: &str, target: &str) -> bool {
50 current.trim().eq_ignore_ascii_case(target.trim())
51}
52
53fn coding_runtime_budget_warning(
54 provider_name: &str,
55 model_name: &str,
56 context_length: usize,
57 preferred_model: Option<&str>,
58) -> Option<String> {
59 if model_name.trim().is_empty()
60 || model_name.eq_ignore_ascii_case("no model loaded")
61 || context_length >= MIN_RECOMMENDED_CODING_CONTEXT
62 {
63 return None;
64 }
65
66 let provider_label = if provider_name.is_empty() {
67 "the active provider"
68 } else {
69 provider_name
70 };
71 let mut message = format!(
72 "Warning: {} loaded `{}` with only {} tokens of live context. That is too small for normal coding, scaffold, or teleport-resume work.",
73 provider_label, model_name, context_length
74 );
75 if let Some(target) = preferred_model.filter(|target| !model_name_matches(model_name, target)) {
76 message.push_str(&format!(
77 " Load your preferred coding model `{}` and rerun `/runtime refresh` before heavy implementation.",
78 target
79 ));
80 } else {
81 message.push_str(
82 " Load a larger-context coding model before heavy implementation and rerun `/runtime refresh`.",
83 );
84 }
85 Some(message)
86}
87
88fn provider_model_setup_hint(provider_name: &str) -> String {
89 if provider_name == "Ollama" {
90 format!(
91 "Pull or run a chat model in Ollama, then keep `api_url` pointed at `{}`. If you want semantic search too, save an embedding model in `/embed prefer <id>` and Hematite can load it here as well.",
92 crate::agent::config::DEFAULT_OLLAMA_API_URL
93 )
94 } else {
95 format!(
96 "Load a coding model in LM Studio and keep the local server on `{}`. Optionally also load an embedding model for semantic search.",
97 crate::agent::config::DEFAULT_LM_STUDIO_API_URL
98 )
99 }
100}
101
102async fn provider_startup_guidance(provider_name: &str, endpoint: &str, has_model: bool) -> String {
103 let mut lines = vec![format!("Provider setup: {} ({})", provider_name, endpoint)];
104 if has_model {
105 lines.push("Status: local runtime is reachable and a coding model is loaded.".to_string());
106 } else {
107 lines.push("Status: provider is reachable but no coding model is loaded yet.".to_string());
108 lines.push(provider_model_setup_hint(provider_name));
109 }
110 if let Some((alt_name, alt_url)) = detect_alternative_provider(provider_name).await {
111 lines.push(format!("Reachable alternative: {} ({})", alt_name, alt_url));
112 }
113 lines.push(
114 "Use `/provider` after startup if you want to save a different runtime for future sessions."
115 .to_string(),
116 );
117 lines.join("\n")
118}
119
120fn runtime_context_display(model: &str, context_length: usize) -> String {
121 let lower = model.to_ascii_lowercase();
122 if lower.trim().is_empty() || lower.contains("no model loaded") || context_length == 0 {
123 "none".to_string()
124 } else {
125 context_length.to_string()
126 }
127}
128
129async fn print_provider_bootstrap_help(provider_name: &str, base_url: &str) {
130 let endpoint = session_endpoint_url(base_url);
131 println!("Quick setup path:");
132 if provider_name == "Ollama" {
133 println!(" 1. Install Ollama: https://ollama.com/");
134 println!(" 2. Start Ollama and ensure `{}` is reachable.", endpoint);
135 println!(" 3. Pull a chat model, for example: `ollama pull qwen3.5:latest`");
136 println!(
137 " 4. Restart Hematite, or switch back to LM Studio with `api_url = \"{}\"`.",
138 crate::agent::config::DEFAULT_LM_STUDIO_API_URL
139 );
140 } else {
141 println!(" 1. Install LM Studio: https://lmstudio.ai/");
142 println!(
143 " 2. Start the local server and ensure `{}` is reachable.",
144 endpoint
145 );
146 println!(" 3. Load a coding model such as `Qwen/Qwen3.5-9B Q4_K_M`.");
147 println!(" 4. Restart Hematite after the model is loaded.");
148 }
149 if let Some((alt_name, alt_url)) = detect_alternative_provider(provider_name).await {
150 println!(
151 "Reachable alternative detected: {} ({}). You can point Hematite there instead.",
152 alt_name, alt_url
153 );
154 }
155}
156
157pub async fn detect_alternative_provider(active_provider: &str) -> Option<(String, String)> {
158 match active_provider {
159 "LM Studio" => {
160 let ollama = crate::agent::ollama::OllamaHarness::new("http://localhost:11434");
161 if ollama.is_reachable().await {
162 Some((
163 "Ollama".to_string(),
164 crate::agent::config::DEFAULT_OLLAMA_API_URL.to_string(),
165 ))
166 } else {
167 None
168 }
169 }
170 "Ollama" => {
171 let lms = crate::agent::lms::LmsHarness::new();
172 if lms.is_server_responding("http://localhost:1234").await {
173 Some((
174 "LM Studio".to_string(),
175 crate::agent::config::DEFAULT_LM_STUDIO_API_URL.to_string(),
176 ))
177 } else {
178 None
179 }
180 }
181 _ => {
182 let lms = crate::agent::lms::LmsHarness::new();
183 if lms.is_server_responding("http://localhost:1234").await {
184 return Some((
185 "LM Studio".to_string(),
186 crate::agent::config::DEFAULT_LM_STUDIO_API_URL.to_string(),
187 ));
188 }
189 let ollama = crate::agent::ollama::OllamaHarness::new("http://localhost:11434");
190 if ollama.is_reachable().await {
191 return Some((
192 "Ollama".to_string(),
193 crate::agent::config::DEFAULT_OLLAMA_API_URL.to_string(),
194 ));
195 }
196 None
197 }
198 }
199}
200
201pub struct RuntimeServices {
202 pub engine: Arc<InferenceEngine>,
203 pub gpu_state: Arc<GpuState>,
204 pub git_state: Arc<GitState>,
205 pub voice_manager: Arc<VoiceManager>,
206 pub swarm_coordinator: Arc<agent::swarm::SwarmCoordinator>,
207 pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
208 pub searx_session: agent::searx_lifecycle::SearxRuntimeSession,
209}
210
211pub struct RuntimeChannels {
212 pub specular_rx: mpsc::Receiver<agent::specular::SpecularEvent>,
213 pub agent_tx: mpsc::Sender<InferenceEvent>,
214 pub agent_rx: mpsc::Receiver<InferenceEvent>,
215 pub swarm_tx: mpsc::Sender<agent::swarm::SwarmMessage>,
216 pub swarm_rx: mpsc::Receiver<agent::swarm::SwarmMessage>,
217 pub user_input_tx: mpsc::Sender<UserTurn>,
218 pub user_input_rx: mpsc::Receiver<UserTurn>,
219}
220
221pub struct RuntimeBundle {
222 pub services: RuntimeServices,
223 pub channels: RuntimeChannels,
224 pub watcher_guard: RecommendedWatcher,
225}
226
227pub struct AgentLoopRuntime {
228 pub user_input_rx: mpsc::Receiver<UserTurn>,
229 pub agent_tx: mpsc::Sender<InferenceEvent>,
230 pub services: RuntimeServices,
231}
232
233pub struct AgentLoopConfig {
234 pub yolo: bool,
235 pub professional: bool,
236 pub brief: bool,
237 pub snark: u8,
238 pub chaos: u8,
239 pub soul_personality: String,
240 pub fast_model: Option<String>,
241 pub think_model: Option<String>,
242}
243
244pub async fn build_runtime_bundle(
245 cockpit: &CliCockpit,
246 species: &str,
247 snark: u8,
248 professional: bool,
249) -> Result<RuntimeBundle, Box<dyn std::error::Error>> {
250 println!("Booting Hematite systems...");
251 let config = crate::agent::config::load_config();
252
253 let searx_session = crate::agent::searx_lifecycle::boot_searx_if_needed(&config).await;
255
256 let api_url = crate::agent::config::effective_api_url(&config, &cockpit.url);
258 let mut engine_raw = InferenceEngine::new(api_url, species.to_string(), snark)?;
259 let provider_name = engine_raw.provider_name().await;
260 let preferred_model = preferred_coding_model_target(&config, cockpit);
261 let gpu_state = ui::gpu_monitor::spawn_gpu_monitor();
262 let git_state = agent::git_monitor::spawn_git_monitor();
263
264 if !engine_raw.health_check().await {
265 println!(
266 "ERROR: {} not detected at {}",
267 provider_name, engine_raw.base_url
268 );
269 println!(
270 "{}",
271 provider_help_hint(&engine_raw.base_url, &provider_name)
272 );
273 print_provider_bootstrap_help(&provider_name, &engine_raw.base_url).await;
274 std::process::exit(1);
275 }
276
277 let mut detected_model = String::new();
278 let mut detected_context = 0;
279 let mut empty_observations = 0u8;
280
281 for _ in 0..20 {
284 detected_model = engine_raw.get_loaded_model().await.unwrap_or_default();
285 detected_context = engine_raw.detect_context_length().await;
286
287 if !detected_model.trim().is_empty() && detected_context > 0 {
288 break;
289 }
290 if detected_model.trim().is_empty() && detected_context == 0 {
291 empty_observations = empty_observations.saturating_add(1);
292 if empty_observations >= 2 {
293 break;
294 }
295 } else {
296 empty_observations = 0;
297 }
298 tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;
299 }
300
301 let mut auto_loaded_coding_model = false;
302
303 if detected_model.trim().is_empty() {
304 let target = preferred_model
305 .as_deref()
306 .or(if provider_name == "LM Studio" {
307 Some("gemma-4-9b-it")
308 } else {
309 None
310 });
311 if let Some(target) = target {
312 println!(
313 "Notice: No model loaded in {}. Attempting to auto-load `{}`...",
314 provider_name, target
315 );
316 if let Err(e) = engine_raw.load_model(target).await {
317 println!(
318 "Warning: Auto-load failed: {}. Please load a model manually in {}.",
319 e, provider_name
320 );
321 } else {
322 auto_loaded_coding_model = true;
323 detected_model = engine_raw.get_loaded_model().await.unwrap_or_default();
324 detected_context = engine_raw.detect_context_length().await;
325 }
326 }
327 }
328
329 let effective_model = if detected_model.trim().is_empty() {
330 "no model loaded".to_string()
331 } else {
332 detected_model.clone()
333 };
334 let effective_context = if effective_model == "no model loaded" {
335 0
336 } else {
337 detected_context
338 };
339 engine_raw
340 .set_runtime_profile(&effective_model, effective_context)
341 .await;
342 if let Some(warning) = coding_runtime_budget_warning(
343 &provider_name,
344 &effective_model,
345 effective_context,
346 preferred_model.as_deref(),
347 ) {
348 println!("{}", warning);
349 }
350
351 if auto_loaded_coding_model {
352 if let Some(embed_target) = config.embed_model.as_deref() {
353 let current_embed = engine_raw.get_embedding_model().await;
354 let needs_embed = current_embed
355 .as_deref()
356 .map(|loaded| !model_name_matches(loaded, embed_target))
357 .unwrap_or(true);
358 if needs_embed {
359 println!(
360 "Notice: preferred embed model `{}` is not loaded. Attempting to load it for semantic search...",
361 embed_target
362 );
363 if let Err(e) = engine_raw.load_embedding_model(embed_target).await {
364 println!(
365 "Warning: Preferred embed model auto-load failed: {}. Load `{}` manually or save a different `/embed prefer` target if you want semantic search.",
366 e, embed_target
367 );
368 }
369 }
370 }
371 }
372
373 let (specular_tx, specular_rx) = mpsc::channel(32);
374 let watcher_guard = agent::specular::spawn_watcher(specular_tx)?;
375
376 let (agent_tx, agent_rx) = mpsc::channel::<InferenceEvent>(100);
377 let (swarm_tx, swarm_rx) = mpsc::channel(32);
378 let voice_manager = Arc::new(VoiceManager::new(agent_tx.clone()));
379
380 if let Some(worker) = config
381 .fast_model
382 .clone()
383 .or_else(|| cockpit.fast_model.clone())
384 {
385 engine_raw.worker_model = Some(worker);
386 }
387
388 let engine = Arc::new(engine_raw);
389 let swarm_coordinator = Arc::new(agent::swarm::SwarmCoordinator::new(
390 engine.clone(),
391 gpu_state.clone(),
392 cockpit.fast_model.clone(),
393 professional,
394 ));
395
396 let (user_input_tx, user_input_rx) = mpsc::channel::<UserTurn>(32);
397 let cancel_token = Arc::new(std::sync::atomic::AtomicBool::new(false));
398
399 Ok(RuntimeBundle {
400 services: RuntimeServices {
401 engine,
402 gpu_state,
403 git_state,
404 voice_manager,
405 swarm_coordinator,
406 cancel_token,
407 searx_session,
408 },
409 channels: RuntimeChannels {
410 specular_rx,
411 agent_tx,
412 agent_rx,
413 swarm_tx,
414 swarm_rx,
415 user_input_tx,
416 user_input_rx,
417 },
418 watcher_guard,
419 })
420}
421
422pub fn spawn_runtime_profile_sync(
423 engine: Arc<InferenceEngine>,
424 agent_tx: mpsc::Sender<InferenceEvent>,
425) -> tokio::task::JoinHandle<()> {
426 tokio::spawn(async move {
427 tokio::time::sleep(tokio::time::Duration::from_secs(4)).await;
429
430 let mut last_embed: Option<String> = None;
431
432 loop {
433 let result = engine.refresh_runtime_profile().await;
434
435 let Some((model_id, context_length, _changed)) = result else {
436 if agent_tx.is_closed() {
437 break;
438 }
439 tokio::time::sleep(tokio::time::Duration::from_secs(15)).await;
441 continue;
442 };
443 let provider_name = engine.provider_name().await;
444
445 let poll_interval = if model_id == "no model loaded" {
447 tokio::time::Duration::from_secs(12)
448 } else {
449 tokio::time::Duration::from_secs(4)
450 };
451
452 if agent_tx
453 .send(InferenceEvent::RuntimeProfile {
454 provider_name,
455 endpoint: session_endpoint_url(&engine.base_url),
456 model_id,
457 context_length,
458 })
459 .await
460 .is_err()
461 {
462 break;
463 }
464
465 let current_embed = engine.get_embedding_model().await;
467 if current_embed != last_embed {
468 if agent_tx
469 .send(InferenceEvent::EmbedProfile {
470 model_id: current_embed.clone(),
471 })
472 .await
473 .is_err()
474 {
475 break;
476 }
477 last_embed = current_embed;
478 }
479
480 tokio::time::sleep(poll_interval).await;
481 }
482 })
483}
484
485pub async fn run_agent_loop(runtime: AgentLoopRuntime, config: AgentLoopConfig) {
486 let AgentLoopRuntime {
487 mut user_input_rx,
488 agent_tx,
489 services,
490 } = runtime;
491 let RuntimeServices {
492 engine,
493 gpu_state,
494 git_state,
495 voice_manager,
496 swarm_coordinator,
497 cancel_token,
498 searx_session,
499 } = services;
500
501 let mut manager = ConversationManager::new(
502 engine,
503 config.professional,
504 config.brief,
505 config.snark,
506 config.chaos,
507 config.soul_personality,
508 config.fast_model,
509 config.think_model,
510 gpu_state.clone(),
511 git_state,
512 swarm_coordinator,
513 voice_manager,
514 );
515 manager.cancel_token = cancel_token;
516
517 let _ = agent_tx
518 .send(InferenceEvent::RuntimeProfile {
519 provider_name: manager.engine.provider_name().await,
520 endpoint: session_endpoint_url(&manager.engine.base_url),
521 model_id: manager.engine.current_model(),
522 context_length: manager.engine.current_context_length(),
523 })
524 .await;
525
526 let workspace_root = crate::tools::file_ops::workspace_root();
527 let _ = crate::agent::workspace_profile::ensure_workspace_profile(&workspace_root);
528
529 let gpu_name = gpu_state.gpu_name();
532 let vram = gpu_state.label();
533 let voice_cfg = crate::agent::config::load_config();
534 let voice_status = format!(
535 "Voice: {} | Speed: {}x | Volume: {}x",
536 crate::agent::config::effective_voice(&voice_cfg),
537 crate::agent::config::effective_voice_speed(&voice_cfg),
538 crate::agent::config::effective_voice_volume(&voice_cfg),
539 );
540 let embed_status = match manager.engine.get_embedding_model().await {
541 Some(id) => format!("Embed: {} (semantic search ready)", id),
542 None => {
543 "Embed: none loaded (load a preferred embedding model for semantic search)".to_string()
544 }
545 };
546 let workspace_root = crate::tools::file_ops::workspace_root();
547 let docs_only_mode = !crate::tools::file_ops::is_project_workspace();
548 let workspace_mode = if docs_only_mode {
549 "docs-only"
550 } else {
551 "project"
552 };
553 let launched_from_home = home::home_dir()
554 .and_then(|home| std::env::current_dir().ok().map(|cwd| cwd == home))
555 .unwrap_or(false);
556 let project_hint = if !docs_only_mode {
557 String::new()
558 } else if launched_from_home {
559 "\nTip: you launched Hematite from your home directory. That is fine for workstation questions and docs-only memory, but for project-specific build, test, script, or repo work you should relaunch in the target project directory. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
560 } else {
561 "\nTip: source indexing is disabled outside a project folder. Launch Hematite in the target project directory for project-specific build, test, script, or repo work. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
562 };
563 let display_model = {
564 let m = manager.engine.current_model();
565 if m.is_empty() || m == "no model loaded" {
566 "no model loaded".to_string()
567 } else {
568 m
569 }
570 };
571 let provider_name = manager.engine.provider_name().await;
572 let startup_endpoint = session_endpoint_url(&manager.engine.base_url);
573 let terminal_name = crate::ui::terminal::detect_terminal().label();
574 let greeting = format!(
575 "Hematite {} Online [{}] | Provider: {}\nModel: {} | CTX: {} | GPU: {} | VRAM: {}\nEndpoint: {}\nWorkspace: {} ({})\n{}\n{}\n/ask · read-only analysis /code · implement /architect · plan-first /chat · conversation\nRecovery: /undo · /new · /forget · /clear | /version · /about{}",
576 crate::hematite_version_display(),
577 terminal_name,
578 provider_name,
579 display_model,
580 runtime_context_display(&display_model, manager.engine.current_context_length()),
581 gpu_name,
582 vram,
583 startup_endpoint,
584 workspace_root.display(),
585 workspace_mode,
586 embed_status,
587 voice_status,
588 project_hint
589 );
590 let _ = agent_tx
591 .send(InferenceEvent::MutedToken(format!("\n{}", greeting)))
592 .await;
593 if let Some(summary) = searx_session.startup_summary.as_deref() {
594 let _ = agent_tx
595 .send(InferenceEvent::Thought(summary.to_string()))
596 .await;
597 }
598 if display_model == "no model loaded" {
599 let guidance = provider_startup_guidance(&provider_name, &startup_endpoint, false).await;
600 let _ = agent_tx.send(InferenceEvent::Thought(guidance)).await;
601 }
602
603 if let Err(e) = manager.initialize_mcp(&agent_tx).await {
604 let _ = agent_tx
605 .send(InferenceEvent::Error(format!("MCP Init Failed: {}", e)))
606 .await;
607 }
608 let indexed = manager.initialize_vein();
609 manager.initialize_repo_map();
610 let _ = agent_tx
611 .send(InferenceEvent::VeinStatus {
612 file_count: manager.vein.file_count(),
613 embedded_count: manager.vein.embedded_chunk_count(),
614 docs_only: docs_only_mode,
615 })
616 .await;
617 let _ = agent_tx
618 .send(InferenceEvent::Thought(format!(
619 "The Vein: indexed {} files",
620 indexed
621 )))
622 .await;
623
624 if let Some(cp) = crate::agent::conversation::load_checkpoint() {
626 let verify_tag = match cp.last_verify_ok {
627 Some(true) => " | last verify: PASS",
628 Some(false) => " | last verify: FAIL",
629 None => "",
630 };
631 let files_tag = if cp.working_files.is_empty() {
632 String::new()
633 } else {
634 format!(" | files: {}", cp.working_files.join(", "))
635 };
636 let goal_preview: String = cp.last_goal.chars().take(120).collect();
637 let trail = if cp.last_goal.len() > 120 { "…" } else { "" };
638 let resume_msg = format!(
639 "Resumed: {} turn{}{}{} — last goal: \"{}{}\"",
640 cp.turn_count,
641 if cp.turn_count == 1 { "" } else { "s" },
642 verify_tag,
643 files_tag,
644 goal_preview,
645 trail,
646 );
647 let _ = agent_tx.send(InferenceEvent::Thought(resume_msg)).await;
648 } else {
649 let session_path = crate::tools::file_ops::hematite_dir().join("session.json");
650 if !session_path.exists() {
651 let first_run_msg = "\nWelcome to Hematite! I'm your local AI workstation assistant.\n\n\
652 Since this is your first time here, what would you like to do?\n\
653 - System Check: Wondering if your tools are working? Run `/health`\n\
654 - Code: Ready to build something? Run `/architect Let's build a new feature`\n\
655 - Setup: Need help configuring Git or the workspace? Run `/ask What should I set up first?`\n\
656 - Help: Have a weird error? Type `/explain ` and paste it.\n\n\
657 Just type \"hello\" to start a normal conversation!".to_string();
658 let _ = agent_tx.send(InferenceEvent::Thought(first_run_msg)).await;
659 let provider_setup = provider_startup_guidance(
660 &provider_name,
661 &startup_endpoint,
662 display_model != "no model loaded",
663 )
664 .await;
665 let _ = agent_tx.send(InferenceEvent::Thought(provider_setup)).await;
666
667 let _ = std::fs::write(&session_path, "{\"turn_count\": 0}");
669 }
670 }
671
672 let _ = agent_tx.send(InferenceEvent::Done).await;
673 let startup_config = crate::agent::config::load_config();
674 manager.engine.set_gemma_native_formatting(
675 crate::agent::config::effective_gemma_native_formatting(
676 &startup_config,
677 &manager.engine.current_model(),
678 ),
679 );
680 let startup_model = manager.engine.current_model();
681 if crate::agent::inference::is_hematite_native_model(&startup_model) {
682 let mode = crate::agent::config::gemma_native_mode_label(&startup_config, &startup_model);
683 let status = match mode {
684 "on" => "Sovereign Engine detected | Native Turn-Formatting: ON (forced)",
685 "auto" => "Sovereign Engine detected | Native Turn-Formatting: ON (auto)",
686 _ => "Sovereign Engine detected | Native Turn-Formatting: OFF (use /gemma-native auto|on)",
687 };
688 let _ = agent_tx
689 .send(InferenceEvent::MutedToken(status.to_string()))
690 .await;
691 }
692
693 while let Some(input) = user_input_rx.recv().await {
694 if let Err(e) = manager
695 .run_turn(&input, agent_tx.clone(), config.yolo)
696 .await
697 {
698 let _ = agent_tx.send(InferenceEvent::Error(e.to_string())).await;
699 let _ = agent_tx.send(InferenceEvent::Done).await;
700 }
701 }
702}
703
704#[cfg(test)]
705mod tests {
706 use super::{
707 coding_runtime_budget_warning, model_name_matches, preferred_coding_model_target,
708 runtime_context_display,
709 };
710 use crate::agent::config::HematiteConfig;
711
712 #[test]
713 fn preferred_coding_model_uses_config_before_cli() {
714 let mut config = HematiteConfig::default();
715 config.think_model = Some("qwen-config".into());
716 config.fast_model = Some("fast-config".into());
717 let cockpit = crate::CliCockpit {
718 yolo: false,
719 swarm_size: 3,
720 brief: false,
721 reroll: None,
722 rusty: false,
723 stats: false,
724 no_splash: false,
725 fast_model: Some("fast-cli".into()),
726 think_model: Some("think-cli".into()),
727 url: "http://localhost:1234/v1".into(),
728 mcp_server: false,
729 edge_redact: false,
730 semantic_redact: false,
731 semantic_url: None,
732 semantic_model: None,
733 pdf_extract_helper: None,
734 teleported_from: None,
735 };
736
737 assert_eq!(
738 preferred_coding_model_target(&config, &cockpit),
739 Some("qwen-config".to_string())
740 );
741 }
742
743 #[test]
744 fn model_name_matches_is_case_insensitive() {
745 assert!(model_name_matches("Qwen/Qwen3.5-9B", "qwen/qwen3.5-9b"));
746 assert!(!model_name_matches("bonsai-8b", "qwen/qwen3.5-9b"));
747 }
748
749 #[test]
750 fn coding_runtime_budget_warning_flags_small_context() {
751 let warning =
752 coding_runtime_budget_warning("LM Studio", "bonsai-8b", 4096, Some("qwen/qwen3.5-9b"))
753 .expect("warning expected");
754 assert!(warning.contains("bonsai-8b"));
755 assert!(warning.contains("4096"));
756 assert!(warning.contains("qwen/qwen3.5-9b"));
757 }
758
759 #[test]
760 fn runtime_context_display_reports_none_without_loaded_model() {
761 assert_eq!(runtime_context_display("no model loaded", 0), "none");
762 assert_eq!(runtime_context_display("", 32768), "none");
763 assert_eq!(runtime_context_display("qwen/qwen3.5-9b", 32000), "32000");
764 }
765}