1use std::fmt::Write as _;
2
3use crate::agent;
4use crate::agent::conversation::{ConversationManager, UserTurn};
5use crate::agent::git_monitor::GitState;
6use crate::agent::inference::{InferenceEngine, InferenceEvent};
7use crate::ui;
8use crate::ui::gpu_monitor::GpuState;
9use crate::ui::voice::VoiceManager;
10use crate::CliCockpit;
11use notify::RecommendedWatcher;
12use std::sync::Arc;
13use tokio::sync::mpsc;
14
15const MIN_RECOMMENDED_CODING_CONTEXT: usize = 8_192;
16
17fn provider_help_hint(base_url: &str, provider_name: &str) -> String {
18 if provider_name == "LM Studio" {
19 format!(
20 "Check if LM Studio is running on {}. If you prefer Ollama, set `api_url` to `{}` in `.hematite/settings.json`.",
21 base_url,
22 crate::agent::config::DEFAULT_OLLAMA_API_URL
23 )
24 } else if provider_name == "Ollama" {
25 format!(
26 "Check if Ollama is running on {} and that a chat model is available. If you prefer LM Studio, set `api_url` to `{}`.",
27 base_url,
28 crate::agent::config::DEFAULT_LM_STUDIO_API_URL
29 )
30 } else {
31 format!(
32 "Check if the configured provider is running on {} and that `.hematite/settings.json` points at the right endpoint.",
33 base_url
34 )
35 }
36}
37
38pub fn session_endpoint_url(base_url: &str) -> String {
39 format!("{}/v1", base_url.trim_end_matches('/'))
40}
41
42fn preferred_coding_model_target(
43 config: &crate::agent::config::HematiteConfig,
44 cockpit: &CliCockpit,
45) -> Option<String> {
46 crate::agent::config::preferred_coding_model(config)
47 .or(cockpit.think_model.clone())
48 .or(cockpit.fast_model.clone())
49}
50
51fn model_name_matches(current: &str, target: &str) -> bool {
52 current.trim().eq_ignore_ascii_case(target.trim())
53}
54
55fn coding_runtime_budget_warning(
56 provider_name: &str,
57 model_name: &str,
58 context_length: usize,
59 preferred_model: Option<&str>,
60) -> Option<String> {
61 if model_name.trim().is_empty()
62 || model_name.eq_ignore_ascii_case("no model loaded")
63 || context_length >= MIN_RECOMMENDED_CODING_CONTEXT
64 {
65 return None;
66 }
67
68 let provider_label = if provider_name.is_empty() {
69 "the active provider"
70 } else {
71 provider_name
72 };
73 let mut message = format!(
74 "Warning: {} loaded `{}` with only {} tokens of live context. That is too small for normal coding, scaffold, or teleport-resume work.",
75 provider_label, model_name, context_length
76 );
77 if let Some(target) = preferred_model.filter(|target| !model_name_matches(model_name, target)) {
78 let _ = write!(message,
79 " Load your preferred coding model `{}` and rerun `/runtime refresh` before heavy implementation.",
80 target
81 );
82 } else {
83 message.push_str(
84 " Load a larger-context coding model before heavy implementation and rerun `/runtime refresh`.",
85 );
86 }
87 Some(message)
88}
89
90fn provider_model_setup_hint(provider_name: &str) -> String {
91 if provider_name == "Ollama" {
92 format!(
93 "Pull or run a chat model in Ollama, then keep `api_url` pointed at `{}`. If you want semantic search too, save an embedding model in `/embed prefer <id>` and Hematite can load it here as well.",
94 crate::agent::config::DEFAULT_OLLAMA_API_URL
95 )
96 } else {
97 format!(
98 "Load a coding model in LM Studio and keep the local server on `{}`. Optionally also load an embedding model for semantic search.",
99 crate::agent::config::DEFAULT_LM_STUDIO_API_URL
100 )
101 }
102}
103
104async fn provider_startup_guidance(provider_name: &str, endpoint: &str, has_model: bool) -> String {
105 let mut lines = vec![format!("Provider setup: {} ({})", provider_name, endpoint)];
106 if has_model {
107 lines.push("Status: local runtime is reachable and a coding model is loaded.".to_string());
108 } else {
109 lines.push("Status: provider is reachable but no coding model is loaded yet.".to_string());
110 lines.push(provider_model_setup_hint(provider_name));
111 }
112 if let Some((alt_name, alt_url)) = detect_alternative_provider(provider_name).await {
113 lines.push(format!("Reachable alternative: {} ({})", alt_name, alt_url));
114 }
115 lines.push(
116 "Use `/provider` after startup if you want to save a different runtime for future sessions."
117 .to_string(),
118 );
119 lines.join("\n")
120}
121
122fn runtime_context_display(model: &str, context_length: usize) -> String {
123 let lower = model.to_ascii_lowercase();
124 if lower.trim().is_empty() || lower.contains("no model loaded") || context_length == 0 {
125 "none".to_string()
126 } else {
127 context_length.to_string()
128 }
129}
130
131async fn print_provider_bootstrap_help(provider_name: &str, base_url: &str) {
132 let endpoint = session_endpoint_url(base_url);
133 println!("Quick setup path:");
134 if provider_name == "Ollama" {
135 println!(" 1. Install Ollama: https://ollama.com/");
136 println!(" 2. Start Ollama and ensure `{}` is reachable.", endpoint);
137 println!(" 3. Pull a chat model, for example: `ollama pull qwen3.5:latest`");
138 println!(
139 " 4. Restart Hematite, or switch back to LM Studio with `api_url = \"{}\"`.",
140 crate::agent::config::DEFAULT_LM_STUDIO_API_URL
141 );
142 } else {
143 println!(" 1. Install LM Studio: https://lmstudio.ai/");
144 println!(
145 " 2. Start the local server and ensure `{}` is reachable.",
146 endpoint
147 );
148 println!(" 3. Load a coding model such as `Qwen/Qwen3.5-9B Q4_K_M`.");
149 println!(" 4. Restart Hematite after the model is loaded.");
150 }
151 if let Some((alt_name, alt_url)) = detect_alternative_provider(provider_name).await {
152 println!(
153 "Reachable alternative detected: {} ({}). You can point Hematite there instead.",
154 alt_name, alt_url
155 );
156 }
157}
158
159pub async fn detect_alternative_provider(active_provider: &str) -> Option<(String, String)> {
160 match active_provider {
161 "LM Studio" => {
162 let ollama = crate::agent::ollama::OllamaHarness::new("http://localhost:11434");
163 if ollama.is_reachable().await {
164 Some((
165 "Ollama".to_string(),
166 crate::agent::config::DEFAULT_OLLAMA_API_URL.to_string(),
167 ))
168 } else {
169 None
170 }
171 }
172 "Ollama" => {
173 let lms = crate::agent::lms::LmsHarness::new();
174 if lms.is_server_responding("http://localhost:1234").await {
175 Some((
176 "LM Studio".to_string(),
177 crate::agent::config::DEFAULT_LM_STUDIO_API_URL.to_string(),
178 ))
179 } else {
180 None
181 }
182 }
183 _ => {
184 let lms = crate::agent::lms::LmsHarness::new();
185 if lms.is_server_responding("http://localhost:1234").await {
186 return Some((
187 "LM Studio".to_string(),
188 crate::agent::config::DEFAULT_LM_STUDIO_API_URL.to_string(),
189 ));
190 }
191 let ollama = crate::agent::ollama::OllamaHarness::new("http://localhost:11434");
192 if ollama.is_reachable().await {
193 return Some((
194 "Ollama".to_string(),
195 crate::agent::config::DEFAULT_OLLAMA_API_URL.to_string(),
196 ));
197 }
198 None
199 }
200 }
201}
202
203pub struct RuntimeServices {
204 pub engine: Arc<InferenceEngine>,
205 pub gpu_state: Arc<GpuState>,
206 pub git_state: Arc<GitState>,
207 pub voice_manager: Arc<VoiceManager>,
208 pub swarm_coordinator: Arc<agent::swarm::SwarmCoordinator>,
209 pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
210 pub searx_session: agent::searx_lifecycle::SearxRuntimeSession,
211}
212
213pub struct RuntimeChannels {
214 pub specular_rx: mpsc::Receiver<agent::specular::SpecularEvent>,
215 pub agent_tx: mpsc::Sender<InferenceEvent>,
216 pub agent_rx: mpsc::Receiver<InferenceEvent>,
217 pub swarm_tx: mpsc::Sender<agent::swarm::SwarmMessage>,
218 pub swarm_rx: mpsc::Receiver<agent::swarm::SwarmMessage>,
219 pub user_input_tx: mpsc::Sender<UserTurn>,
220 pub user_input_rx: mpsc::Receiver<UserTurn>,
221}
222
223pub struct RuntimeBundle {
224 pub services: RuntimeServices,
225 pub channels: RuntimeChannels,
226 pub watcher_guard: RecommendedWatcher,
227}
228
229pub struct AgentLoopRuntime {
230 pub user_input_rx: mpsc::Receiver<UserTurn>,
231 pub agent_tx: mpsc::Sender<InferenceEvent>,
232 pub services: RuntimeServices,
233}
234
235pub struct AgentLoopConfig {
236 pub yolo: bool,
237 pub professional: bool,
238 pub brief: bool,
239 pub snark: u8,
240 pub chaos: u8,
241 pub soul_personality: String,
242 pub fast_model: Option<String>,
243 pub think_model: Option<String>,
244}
245
246pub async fn build_runtime_bundle(
247 cockpit: &CliCockpit,
248 species: &str,
249 snark: u8,
250 professional: bool,
251) -> Result<RuntimeBundle, Box<dyn std::error::Error>> {
252 println!("Booting Hematite systems...");
253 let config = crate::agent::config::load_config();
254
255 let searx_session = crate::agent::searx_lifecycle::boot_searx_if_needed(&config).await;
257
258 let api_url = crate::agent::config::effective_api_url(&config, &cockpit.url);
260 let mut engine_raw = InferenceEngine::new(api_url, species.to_string(), snark)?;
261 let provider_name = engine_raw.provider_name().await;
262 let preferred_model = preferred_coding_model_target(&config, cockpit);
263 let gpu_state = ui::gpu_monitor::spawn_gpu_monitor();
264 let git_state = agent::git_monitor::spawn_git_monitor();
265
266 if !engine_raw.health_check().await {
267 println!(
268 "ERROR: {} not detected at {}",
269 provider_name, engine_raw.base_url
270 );
271 println!(
272 "{}",
273 provider_help_hint(&engine_raw.base_url, &provider_name)
274 );
275 print_provider_bootstrap_help(&provider_name, &engine_raw.base_url).await;
276 std::process::exit(1);
277 }
278
279 let mut detected_model = String::new();
280 let mut detected_context = 0;
281 let mut empty_observations = 0u8;
282
283 for _ in 0..20 {
286 detected_model = engine_raw.get_loaded_model().await.unwrap_or_default();
287 detected_context = engine_raw.detect_context_length().await;
288
289 if !detected_model.trim().is_empty() && detected_context > 0 {
290 break;
291 }
292 if detected_model.trim().is_empty() && detected_context == 0 {
293 empty_observations = empty_observations.saturating_add(1);
294 if empty_observations >= 2 {
295 break;
296 }
297 } else {
298 empty_observations = 0;
299 }
300 tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;
301 }
302
303 let mut auto_loaded_coding_model = false;
304
305 if detected_model.trim().is_empty() {
306 let target = preferred_model
307 .as_deref()
308 .or(if provider_name == "LM Studio" {
309 Some("gemma-4-9b-it")
310 } else {
311 None
312 });
313 if let Some(target) = target {
314 println!(
315 "Notice: No model loaded in {}. Attempting to auto-load `{}`...",
316 provider_name, target
317 );
318 if let Err(e) = engine_raw.load_model(target).await {
319 println!(
320 "Warning: Auto-load failed: {}. Please load a model manually in {}.",
321 e, provider_name
322 );
323 } else {
324 auto_loaded_coding_model = true;
325 detected_model = engine_raw.get_loaded_model().await.unwrap_or_default();
326 detected_context = engine_raw.detect_context_length().await;
327 }
328 }
329 }
330
331 let effective_model = if detected_model.trim().is_empty() {
332 "no model loaded".to_string()
333 } else {
334 detected_model.clone()
335 };
336 let effective_context = if effective_model == "no model loaded" {
337 0
338 } else {
339 detected_context
340 };
341 engine_raw
342 .set_runtime_profile(&effective_model, effective_context)
343 .await;
344 if let Some(warning) = coding_runtime_budget_warning(
345 &provider_name,
346 &effective_model,
347 effective_context,
348 preferred_model.as_deref(),
349 ) {
350 println!("{}", warning);
351 }
352
353 if auto_loaded_coding_model {
354 if let Some(embed_target) = config.embed_model.as_deref() {
355 let current_embed = engine_raw.get_embedding_model().await;
356 let needs_embed = current_embed
357 .as_deref()
358 .map(|loaded| !model_name_matches(loaded, embed_target))
359 .unwrap_or(true);
360 if needs_embed {
361 println!(
362 "Notice: preferred embed model `{}` is not loaded. Attempting to load it for semantic search...",
363 embed_target
364 );
365 if let Err(e) = engine_raw.load_embedding_model(embed_target).await {
366 println!(
367 "Warning: Preferred embed model auto-load failed: {}. Load `{}` manually or save a different `/embed prefer` target if you want semantic search.",
368 e, embed_target
369 );
370 }
371 }
372 }
373 }
374
375 let (specular_tx, specular_rx) = mpsc::channel(32);
376 let watcher_guard = agent::specular::spawn_watcher(specular_tx)?;
377
378 let (agent_tx, agent_rx) = mpsc::channel::<InferenceEvent>(100);
379 let (swarm_tx, swarm_rx) = mpsc::channel(32);
380 let voice_manager = Arc::new(VoiceManager::new(agent_tx.clone()));
381
382 if let Some(worker) = config
383 .fast_model
384 .clone()
385 .or_else(|| cockpit.fast_model.clone())
386 {
387 engine_raw.worker_model = Some(worker);
388 }
389
390 let engine = Arc::new(engine_raw);
391 let swarm_coordinator = Arc::new(agent::swarm::SwarmCoordinator::new(
392 engine.clone(),
393 gpu_state.clone(),
394 cockpit.fast_model.clone(),
395 professional,
396 ));
397
398 let (user_input_tx, user_input_rx) = mpsc::channel::<UserTurn>(32);
399 let cancel_token = Arc::new(std::sync::atomic::AtomicBool::new(false));
400
401 Ok(RuntimeBundle {
402 services: RuntimeServices {
403 engine,
404 gpu_state,
405 git_state,
406 voice_manager,
407 swarm_coordinator,
408 cancel_token,
409 searx_session,
410 },
411 channels: RuntimeChannels {
412 specular_rx,
413 agent_tx,
414 agent_rx,
415 swarm_tx,
416 swarm_rx,
417 user_input_tx,
418 user_input_rx,
419 },
420 watcher_guard,
421 })
422}
423
424pub fn spawn_runtime_profile_sync(
425 engine: Arc<InferenceEngine>,
426 agent_tx: mpsc::Sender<InferenceEvent>,
427) -> tokio::task::JoinHandle<()> {
428 tokio::spawn(async move {
429 tokio::time::sleep(tokio::time::Duration::from_secs(4)).await;
431
432 let mut last_embed: Option<String> = None;
433
434 loop {
435 let result = engine.refresh_runtime_profile().await;
436
437 let Some((model_id, context_length, _changed)) = result else {
438 if agent_tx.is_closed() {
439 break;
440 }
441 tokio::time::sleep(tokio::time::Duration::from_secs(15)).await;
443 continue;
444 };
445 let provider_name = engine.provider_name().await;
446
447 let poll_interval = if model_id == "no model loaded" {
449 tokio::time::Duration::from_secs(12)
450 } else {
451 tokio::time::Duration::from_secs(4)
452 };
453
454 if agent_tx
455 .send(InferenceEvent::RuntimeProfile {
456 provider_name,
457 endpoint: session_endpoint_url(&engine.base_url),
458 model_id,
459 context_length,
460 })
461 .await
462 .is_err()
463 {
464 break;
465 }
466
467 let current_embed = engine.get_embedding_model().await;
469 if current_embed != last_embed {
470 if agent_tx
471 .send(InferenceEvent::EmbedProfile {
472 model_id: current_embed.clone(),
473 })
474 .await
475 .is_err()
476 {
477 break;
478 }
479 last_embed = current_embed;
480 }
481
482 tokio::time::sleep(poll_interval).await;
483 }
484 })
485}
486
487pub async fn run_agent_loop(runtime: AgentLoopRuntime, config: AgentLoopConfig) {
488 let AgentLoopRuntime {
489 mut user_input_rx,
490 agent_tx,
491 services,
492 } = runtime;
493 let RuntimeServices {
494 engine,
495 gpu_state,
496 git_state,
497 voice_manager,
498 swarm_coordinator,
499 cancel_token,
500 searx_session,
501 } = services;
502
503 let mut manager = ConversationManager::new(
504 engine,
505 config.professional,
506 config.brief,
507 config.snark,
508 config.chaos,
509 config.soul_personality,
510 config.fast_model,
511 config.think_model,
512 gpu_state.clone(),
513 git_state,
514 swarm_coordinator,
515 voice_manager,
516 );
517 manager.cancel_token = cancel_token;
518
519 let _ = agent_tx
520 .send(InferenceEvent::RuntimeProfile {
521 provider_name: manager.engine.provider_name().await,
522 endpoint: session_endpoint_url(&manager.engine.base_url),
523 model_id: manager.engine.current_model(),
524 context_length: manager.engine.current_context_length(),
525 })
526 .await;
527
528 let workspace_root = crate::tools::file_ops::workspace_root();
529 let _ = crate::agent::workspace_profile::ensure_workspace_profile(&workspace_root);
530
531 let gpu_name = gpu_state.gpu_name();
534 let vram = gpu_state.label();
535 let voice_cfg = crate::agent::config::load_config();
536 let voice_status = format!(
537 "Voice: {} | Speed: {}x | Volume: {}x",
538 crate::agent::config::effective_voice(&voice_cfg),
539 crate::agent::config::effective_voice_speed(&voice_cfg),
540 crate::agent::config::effective_voice_volume(&voice_cfg),
541 );
542 let embed_status = match manager.engine.get_embedding_model().await {
543 Some(id) => format!("Embed: {} (semantic search ready)", id),
544 None => {
545 "Embed: none loaded (load a preferred embedding model for semantic search)".to_string()
546 }
547 };
548 let workspace_root = crate::tools::file_ops::workspace_root();
549 let docs_only_mode = !crate::tools::file_ops::is_project_workspace();
550 let workspace_mode = if docs_only_mode {
551 "docs-only"
552 } else {
553 "project"
554 };
555 let launched_from_home = home::home_dir()
556 .and_then(|home| std::env::current_dir().ok().map(|cwd| cwd == home))
557 .unwrap_or(false);
558 let project_hint = if !docs_only_mode {
559 String::new()
560 } else if launched_from_home {
561 "\nTip: you launched Hematite from your home directory. That is fine for workstation questions and docs-only memory, but for project-specific build, test, script, or repo work you should relaunch in the target project directory. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
562 } else {
563 "\nTip: source indexing is disabled outside a project folder. Launch Hematite in the target project directory for project-specific build, test, script, or repo work. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
564 };
565 let display_model = {
566 let m = manager.engine.current_model();
567 if m.is_empty() || m == "no model loaded" {
568 "no model loaded".to_string()
569 } else {
570 m
571 }
572 };
573 let provider_name = manager.engine.provider_name().await;
574 let startup_endpoint = session_endpoint_url(&manager.engine.base_url);
575 let terminal_name = crate::ui::terminal::detect_terminal().label();
576 let greeting = format!(
577 "Hematite {} Online [{}] | Provider: {}\nModel: {} | CTX: {} | GPU: {} | VRAM: {}\nEndpoint: {}\nWorkspace: {} ({})\n{}\n{}\n/ask · read-only analysis /code · implement /architect · plan-first /chat · conversation\nRecovery: /undo · /new · /forget · /clear | /version · /about{}",
578 crate::hematite_version_display(),
579 terminal_name,
580 provider_name,
581 display_model,
582 runtime_context_display(&display_model, manager.engine.current_context_length()),
583 gpu_name,
584 vram,
585 startup_endpoint,
586 workspace_root.display(),
587 workspace_mode,
588 embed_status,
589 voice_status,
590 project_hint
591 );
592 let _ = agent_tx
593 .send(InferenceEvent::MutedToken(format!("\n{}", greeting)))
594 .await;
595 if let Some(summary) = searx_session.startup_summary.as_deref() {
596 let _ = agent_tx
597 .send(InferenceEvent::Thought(summary.to_string()))
598 .await;
599 }
600
601 if searx_session.docker_wake_pending {
604 let wake_tx = agent_tx.clone();
605 let wake_root = searx_session.root.clone();
606 let wake_url = crate::agent::config::load_config()
607 .searx_url
608 .unwrap_or_else(|| "http://localhost:8080".to_string());
609 tokio::spawn(async move {
610 let mut docker_ready = false;
612 for _ in 0..30 {
613 tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
614 if matches!(
615 crate::agent::searx_lifecycle::docker_state(),
616 crate::agent::searx_lifecycle::DockerState::Ready
617 ) {
618 docker_ready = true;
619 break;
620 }
621 }
622 if !docker_ready {
623 let _ = wake_tx
624 .send(InferenceEvent::Thought(
625 "Local search: Docker daemon did not come online within 90s. \
626 Start SearXNG manually with `docker compose up -d` in ~/.hematite/searxng-local."
627 .to_string(),
628 ))
629 .await;
630 return;
631 }
632 match crate::agent::searx_lifecycle::docker_compose_up(&wake_root) {
633 Err(e) => {
634 let _ = wake_tx
635 .send(InferenceEvent::Thought(format!(
636 "Local search: Docker is ready but SearXNG failed to start — {}",
637 e
638 )))
639 .await;
640 }
641 Ok(()) => {
642 if crate::agent::searx_lifecycle::wait_for_searx(&wake_url).await {
643 let _ = wake_tx
644 .send(InferenceEvent::Thought(format!(
645 "Local search online: SearXNG is now live at {} — switching from Jina.",
646 wake_url
647 )))
648 .await;
649 } else {
650 let _ = wake_tx
651 .send(InferenceEvent::Thought(format!(
652 "Local search: SearXNG container started but {} is not responding. \
653 Check `docker compose logs` in {}.",
654 wake_url,
655 wake_root.display()
656 )))
657 .await;
658 }
659 }
660 }
661 });
662 }
663
664 if display_model == "no model loaded" {
665 let guidance = provider_startup_guidance(&provider_name, &startup_endpoint, false).await;
666 let _ = agent_tx.send(InferenceEvent::Thought(guidance)).await;
667 }
668
669 if let Err(e) = manager.initialize_mcp(&agent_tx).await {
670 let _ = agent_tx
671 .send(InferenceEvent::Error(format!("MCP Init Failed: {}", e)))
672 .await;
673 }
674 let indexed = manager.initialize_vein();
675 manager.initialize_repo_map();
676 let _ = agent_tx
677 .send(InferenceEvent::VeinStatus {
678 file_count: manager.vein.file_count(),
679 embedded_count: manager.vein.embedded_chunk_count(),
680 docs_only: docs_only_mode,
681 })
682 .await;
683 let _ = agent_tx
684 .send(InferenceEvent::Thought(format!(
685 "The Vein: indexed {} files",
686 indexed
687 )))
688 .await;
689
690 if let Some(cp) = crate::agent::conversation::load_checkpoint() {
692 let verify_tag = match cp.last_verify_ok {
693 Some(true) => " | last verify: PASS",
694 Some(false) => " | last verify: FAIL",
695 None => "",
696 };
697 let files_tag = if cp.working_files.is_empty() {
698 String::new()
699 } else {
700 format!(" | files: {}", cp.working_files.join(", "))
701 };
702 let goal_preview: String = cp.last_goal.chars().take(120).collect();
703 let trail = if cp.last_goal.len() > 120 { "…" } else { "" };
704 let resume_msg = format!(
705 "Resumed: {} turn{}{}{} — last goal: \"{}{}\"",
706 cp.turn_count,
707 if cp.turn_count == 1 { "" } else { "s" },
708 verify_tag,
709 files_tag,
710 goal_preview,
711 trail,
712 );
713 let _ = agent_tx.send(InferenceEvent::Thought(resume_msg)).await;
714 } else {
715 let session_path = crate::tools::file_ops::hematite_dir().join("session.json");
716 if !session_path.exists() {
717 let first_run_msg = "\nWelcome to Hematite! I'm your local AI workstation assistant.\n\n\
718 Since this is your first time here, what would you like to do?\n\
719 - System Check: Wondering if your tools are working? Run `/health`\n\
720 - Code: Ready to build something? Run `/architect Let's build a new feature`\n\
721 - Setup: Need help configuring Git or the workspace? Run `/ask What should I set up first?`\n\
722 - Help: Have a weird error? Type `/explain ` and paste it.\n\n\
723 Just type \"hello\" to start a normal conversation!".to_string();
724 let _ = agent_tx.send(InferenceEvent::Thought(first_run_msg)).await;
725 let provider_setup = provider_startup_guidance(
726 &provider_name,
727 &startup_endpoint,
728 display_model != "no model loaded",
729 )
730 .await;
731 let _ = agent_tx.send(InferenceEvent::Thought(provider_setup)).await;
732
733 let _ = std::fs::write(&session_path, "{\"turn_count\": 0}");
735 }
736 }
737
738 let _ = agent_tx.send(InferenceEvent::Done).await;
739 let startup_config = crate::agent::config::load_config();
740 manager.engine.set_gemma_native_formatting(
741 crate::agent::config::effective_gemma_native_formatting(
742 &startup_config,
743 &manager.engine.current_model(),
744 ),
745 );
746 let startup_model = manager.engine.current_model();
747 if crate::agent::inference::is_hematite_native_model(&startup_model) {
748 let mode = crate::agent::config::gemma_native_mode_label(&startup_config, &startup_model);
749 let status = match mode {
750 "on" => "Sovereign Engine detected | Native Turn-Formatting: ON (forced)",
751 "auto" => "Sovereign Engine detected | Native Turn-Formatting: ON (auto)",
752 _ => "Sovereign Engine detected | Native Turn-Formatting: OFF (use /gemma-native auto|on)",
753 };
754 let _ = agent_tx
755 .send(InferenceEvent::MutedToken(status.to_string()))
756 .await;
757 }
758
759 while let Some(input) = user_input_rx.recv().await {
760 if let Err(e) = manager
761 .run_turn(&input, agent_tx.clone(), config.yolo)
762 .await
763 {
764 let _ = agent_tx.send(InferenceEvent::Error(e.to_string())).await;
765 let _ = agent_tx.send(InferenceEvent::Done).await;
766 }
767 }
768}
769
770#[cfg(test)]
771mod tests {
772 use super::{
773 coding_runtime_budget_warning, model_name_matches, preferred_coding_model_target,
774 runtime_context_display,
775 };
776 use crate::agent::config::HematiteConfig;
777
778 #[test]
779 #[allow(clippy::field_reassign_with_default)]
780 fn preferred_coding_model_uses_config_before_cli() {
781 let mut config = HematiteConfig::default();
782 config.think_model = Some("qwen-config".into());
783 config.fast_model = Some("fast-config".into());
784 let cockpit = crate::CliCockpit {
785 yolo: false,
786 swarm_size: 3,
787 brief: false,
788 reroll: None,
789 rusty: false,
790 stats: false,
791 no_splash: false,
792 fast_model: Some("fast-cli".into()),
793 think_model: Some("think-cli".into()),
794 url: "http://localhost:1234/v1".into(),
795 mcp_server: false,
796 edge_redact: false,
797 semantic_redact: false,
798 semantic_url: None,
799 semantic_model: None,
800 report: false,
801 report_format: "md".into(),
802 diagnose: false,
803 triage: None,
804 fix: None,
805 open: false,
806 dry_run: false,
807 execute: false,
808 yes: false,
809 quiet: false,
810 fix_all: false,
811 only: None,
812 clipboard: false,
813 notify: false,
814 output: None,
815 schedule: None,
816 inventory: false,
817 inspect: None,
818 query: None,
819 watch: None,
820 watch_interval: 5,
821 count: None,
822 diff: None,
823 diff_after: 30,
824 alert: None,
825 field: None,
826 snapshot: None,
827 from: None,
828 snapshots: false,
829 compare: None,
830 audit_start: None,
831 audit_end: None,
832 audit_topics: None,
833 alert_rule_add: None,
834 alert_rule_label: None,
835 alert_rule_negate: false,
836 alert_rules: false,
837 alert_rule_remove: None,
838 alert_rule_run: false,
839 timeline_capture: false,
840 timeline: false,
841 timeline_diff: None,
842 timeline_trend: false,
843 diagnose_why: None,
844 analyze: None,
845 compute: None,
846 convert: None,
847 query_data: None,
848 sql: None,
849 plot: None,
850 plot_type: None,
851 plot_x: None,
852 plot_y: None,
853 periodic: None,
854 hash: None,
855 hash_algo: None,
856 encode: None,
857 decode: None,
858 codec: None,
859 formula: None,
860 random: None,
861 length: None,
862 random_args: None,
863 diff_data: None,
864 diff_key: None,
865 describe: None,
866 column: None,
867 matrix: None,
868 matrix_a: None,
869 matrix_b: None,
870 solve: None,
871 solve_var: None,
872 solve_range: None,
873 curve_fit: None,
874 fit_x: None,
875 fit_y: None,
876 fit_model: None,
877 integrate: None,
878 int_from: None,
879 int_to: None,
880 int_var: None,
881 int_n: None,
882 differentiate: None,
883 at: None,
884 order: None,
885 profile: None,
886 prime: None,
887 sequence: None,
888 seq_count: None,
889 seq_start: None,
890 seq_step: None,
891 choose: None,
892 truth_table: None,
893 gcd: None,
894 roman: None,
895 base_convert: None,
896 base_from: None,
897 base_to: None,
898 date: None,
899 subnet: None,
900 color: None,
901 mw: None,
902 r#const: None,
903 normal: None,
904 vectors: None,
905 number_theory: None,
906 simulate: None,
907 fourier: None,
908 fourier_col: None,
909 fourier_top: None,
910 fourier_rate: None,
911 percentile: None,
912 percentile_col: None,
913 pivot: None,
914 pivot_row: None,
915 pivot_col: None,
916 pivot_val: None,
917 pivot_agg: None,
918 regression: None,
919 regression_target: None,
920 regression_predictors: None,
921 outliers: None,
922 outlier_col: None,
923 outlier_output: None,
924 plot_title: None,
925 plot_output: None,
926 sample: None,
927 sample_n: None,
928 sample_frac: None,
929 sample_seed: None,
930 split: None,
931 sample_output: None,
932 correlation: None,
933 corr_method: None,
934 timeseries: None,
935 ts_date: None,
936 ts_value: None,
937 ts_window: None,
938 cluster: None,
939 cluster_k: None,
940 cluster_cols: None,
941 cluster_output: None,
942 normalize: None,
943 normalize_method: None,
944 normalize_cols: None,
945 normalize_output: None,
946 pca: None,
947 pca_components: None,
948 pca_cols: None,
949 pca_output: None,
950 graph: None,
951 symbolic: None,
952 finance: None,
953 logic: None,
954 signal: None,
955 interpolate: None,
956 units: None,
957 ode: None,
958 optimize: None,
959 hypothesis: None,
960 hypothesis_test: None,
961 hypothesis_group2: None,
962 hypothesis_alpha: None,
963 hypothesis_mu: None,
964 classify: None,
965 classify_label: None,
966 classify_cols: None,
967 classify_predict: None,
968 classify_k: None,
969 classify_method: None,
970 polyfit: None,
971 polyfit_x: None,
972 polyfit_y: None,
973 polyfit_degree: None,
974 polyfit_predict: None,
975 probability: None,
976 bitwise: None,
977 set: None,
978 cipher: None,
979 text_stats: None,
980 levenshtein: None,
981 number_format: None,
982 sort_viz: None,
983 checksum: None,
984 validate: None,
985 pdf_extract_helper: None,
986 teleported_from: None,
987 };
988
989 assert_eq!(
990 preferred_coding_model_target(&config, &cockpit),
991 Some("qwen-config".to_string())
992 );
993 }
994
995 #[test]
996 fn model_name_matches_is_case_insensitive() {
997 assert!(model_name_matches("Qwen/Qwen3.5-9B", "qwen/qwen3.5-9b"));
998 assert!(!model_name_matches("bonsai-8b", "qwen/qwen3.5-9b"));
999 }
1000
1001 #[test]
1002 fn coding_runtime_budget_warning_flags_small_context() {
1003 let warning =
1004 coding_runtime_budget_warning("LM Studio", "bonsai-8b", 4096, Some("qwen/qwen3.5-9b"))
1005 .expect("warning expected");
1006 assert!(warning.contains("bonsai-8b"));
1007 assert!(warning.contains("4096"));
1008 assert!(warning.contains("qwen/qwen3.5-9b"));
1009 }
1010
1011 #[test]
1012 fn runtime_context_display_reports_none_without_loaded_model() {
1013 assert_eq!(runtime_context_display("no model loaded", 0), "none");
1014 assert_eq!(runtime_context_display("", 32768), "none");
1015 assert_eq!(runtime_context_display("qwen/qwen3.5-9b", 32000), "32000");
1016 }
1017}