Skip to main content

hematite/
runtime.rs

1use std::fmt::Write as _;
2
3use crate::agent;
4use crate::agent::conversation::{ConversationManager, UserTurn};
5use crate::agent::git_monitor::GitState;
6use crate::agent::inference::{InferenceEngine, InferenceEvent};
7use crate::ui;
8use crate::ui::gpu_monitor::GpuState;
9use crate::ui::voice::VoiceManager;
10use crate::CliCockpit;
11use notify::RecommendedWatcher;
12use std::sync::Arc;
13use tokio::sync::mpsc;
14
15const MIN_RECOMMENDED_CODING_CONTEXT: usize = 8_192;
16
17fn provider_help_hint(base_url: &str, provider_name: &str) -> String {
18    if provider_name == "LM Studio" {
19        format!(
20            "Check if LM Studio is running on {}. If you prefer Ollama, set `api_url` to `{}` in `.hematite/settings.json`.",
21            base_url,
22            crate::agent::config::DEFAULT_OLLAMA_API_URL
23        )
24    } else if provider_name == "Ollama" {
25        format!(
26            "Check if Ollama is running on {} and that a chat model is available. If you prefer LM Studio, set `api_url` to `{}`.",
27            base_url,
28            crate::agent::config::DEFAULT_LM_STUDIO_API_URL
29        )
30    } else {
31        format!(
32            "Check if the configured provider is running on {} and that `.hematite/settings.json` points at the right endpoint.",
33            base_url
34        )
35    }
36}
37
38pub fn session_endpoint_url(base_url: &str) -> String {
39    format!("{}/v1", base_url.trim_end_matches('/'))
40}
41
42fn preferred_coding_model_target(
43    config: &crate::agent::config::HematiteConfig,
44    cockpit: &CliCockpit,
45) -> Option<String> {
46    crate::agent::config::preferred_coding_model(config)
47        .or(cockpit.think_model.clone())
48        .or(cockpit.fast_model.clone())
49}
50
51fn model_name_matches(current: &str, target: &str) -> bool {
52    current.trim().eq_ignore_ascii_case(target.trim())
53}
54
55fn coding_runtime_budget_warning(
56    provider_name: &str,
57    model_name: &str,
58    context_length: usize,
59    preferred_model: Option<&str>,
60) -> Option<String> {
61    if model_name.trim().is_empty()
62        || model_name.eq_ignore_ascii_case("no model loaded")
63        || context_length >= MIN_RECOMMENDED_CODING_CONTEXT
64    {
65        return None;
66    }
67
68    let provider_label = if provider_name.is_empty() {
69        "the active provider"
70    } else {
71        provider_name
72    };
73    let mut message = format!(
74        "Warning: {} loaded `{}` with only {} tokens of live context. That is too small for normal coding, scaffold, or teleport-resume work.",
75        provider_label, model_name, context_length
76    );
77    if let Some(target) = preferred_model.filter(|target| !model_name_matches(model_name, target)) {
78        let _ = write!(message,
79            " Load your preferred coding model `{}` and rerun `/runtime refresh` before heavy implementation.",
80            target
81        );
82    } else {
83        message.push_str(
84            " Load a larger-context coding model before heavy implementation and rerun `/runtime refresh`.",
85        );
86    }
87    Some(message)
88}
89
90fn provider_model_setup_hint(provider_name: &str) -> String {
91    if provider_name == "Ollama" {
92        format!(
93            "Pull or run a chat model in Ollama, then keep `api_url` pointed at `{}`. If you want semantic search too, save an embedding model in `/embed prefer <id>` and Hematite can load it here as well.",
94            crate::agent::config::DEFAULT_OLLAMA_API_URL
95        )
96    } else {
97        format!(
98            "Load a coding model in LM Studio and keep the local server on `{}`. Optionally also load an embedding model for semantic search.",
99            crate::agent::config::DEFAULT_LM_STUDIO_API_URL
100        )
101    }
102}
103
104async fn provider_startup_guidance(provider_name: &str, endpoint: &str, has_model: bool) -> String {
105    let mut lines = vec![format!("Provider setup: {} ({})", provider_name, endpoint)];
106    if has_model {
107        lines.push("Status: local runtime is reachable and a coding model is loaded.".to_string());
108    } else {
109        lines.push("Status: provider is reachable but no coding model is loaded yet.".to_string());
110        lines.push(provider_model_setup_hint(provider_name));
111    }
112    if let Some((alt_name, alt_url)) = detect_alternative_provider(provider_name).await {
113        lines.push(format!("Reachable alternative: {} ({})", alt_name, alt_url));
114    }
115    lines.push(
116        "Use `/provider` after startup if you want to save a different runtime for future sessions."
117            .to_string(),
118    );
119    lines.join("\n")
120}
121
122fn runtime_context_display(model: &str, context_length: usize) -> String {
123    let lower = model.to_ascii_lowercase();
124    if lower.trim().is_empty() || lower.contains("no model loaded") || context_length == 0 {
125        "none".to_string()
126    } else {
127        context_length.to_string()
128    }
129}
130
131async fn print_provider_bootstrap_help(provider_name: &str, base_url: &str) {
132    let endpoint = session_endpoint_url(base_url);
133    println!("Quick setup path:");
134    if provider_name == "Ollama" {
135        println!("  1. Install Ollama: https://ollama.com/");
136        println!("  2. Start Ollama and ensure `{}` is reachable.", endpoint);
137        println!("  3. Pull a chat model, for example: `ollama pull qwen3.5:latest`");
138        println!(
139            "  4. Restart Hematite, or switch back to LM Studio with `api_url = \"{}\"`.",
140            crate::agent::config::DEFAULT_LM_STUDIO_API_URL
141        );
142    } else {
143        println!("  1. Install LM Studio: https://lmstudio.ai/");
144        println!(
145            "  2. Start the local server and ensure `{}` is reachable.",
146            endpoint
147        );
148        println!("  3. Load a coding model such as `Qwen/Qwen3.5-9B Q4_K_M`.");
149        println!("  4. Restart Hematite after the model is loaded.");
150    }
151    if let Some((alt_name, alt_url)) = detect_alternative_provider(provider_name).await {
152        println!(
153            "Reachable alternative detected: {} ({}). You can point Hematite there instead.",
154            alt_name, alt_url
155        );
156    }
157}
158
159pub async fn detect_alternative_provider(active_provider: &str) -> Option<(String, String)> {
160    match active_provider {
161        "LM Studio" => {
162            let ollama = crate::agent::ollama::OllamaHarness::new("http://localhost:11434");
163            if ollama.is_reachable().await {
164                Some((
165                    "Ollama".to_string(),
166                    crate::agent::config::DEFAULT_OLLAMA_API_URL.to_string(),
167                ))
168            } else {
169                None
170            }
171        }
172        "Ollama" => {
173            let lms = crate::agent::lms::LmsHarness::new();
174            if lms.is_server_responding("http://localhost:1234").await {
175                Some((
176                    "LM Studio".to_string(),
177                    crate::agent::config::DEFAULT_LM_STUDIO_API_URL.to_string(),
178                ))
179            } else {
180                None
181            }
182        }
183        _ => {
184            let lms = crate::agent::lms::LmsHarness::new();
185            if lms.is_server_responding("http://localhost:1234").await {
186                return Some((
187                    "LM Studio".to_string(),
188                    crate::agent::config::DEFAULT_LM_STUDIO_API_URL.to_string(),
189                ));
190            }
191            let ollama = crate::agent::ollama::OllamaHarness::new("http://localhost:11434");
192            if ollama.is_reachable().await {
193                return Some((
194                    "Ollama".to_string(),
195                    crate::agent::config::DEFAULT_OLLAMA_API_URL.to_string(),
196                ));
197            }
198            None
199        }
200    }
201}
202
203pub struct RuntimeServices {
204    pub engine: Arc<InferenceEngine>,
205    pub gpu_state: Arc<GpuState>,
206    pub git_state: Arc<GitState>,
207    pub voice_manager: Arc<VoiceManager>,
208    pub swarm_coordinator: Arc<agent::swarm::SwarmCoordinator>,
209    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
210    pub searx_session: agent::searx_lifecycle::SearxRuntimeSession,
211}
212
213pub struct RuntimeChannels {
214    pub specular_rx: mpsc::Receiver<agent::specular::SpecularEvent>,
215    pub agent_tx: mpsc::Sender<InferenceEvent>,
216    pub agent_rx: mpsc::Receiver<InferenceEvent>,
217    pub swarm_tx: mpsc::Sender<agent::swarm::SwarmMessage>,
218    pub swarm_rx: mpsc::Receiver<agent::swarm::SwarmMessage>,
219    pub user_input_tx: mpsc::Sender<UserTurn>,
220    pub user_input_rx: mpsc::Receiver<UserTurn>,
221}
222
223pub struct RuntimeBundle {
224    pub services: RuntimeServices,
225    pub channels: RuntimeChannels,
226    pub watcher_guard: RecommendedWatcher,
227}
228
229pub struct AgentLoopRuntime {
230    pub user_input_rx: mpsc::Receiver<UserTurn>,
231    pub agent_tx: mpsc::Sender<InferenceEvent>,
232    pub services: RuntimeServices,
233}
234
235pub struct AgentLoopConfig {
236    pub yolo: bool,
237    pub professional: bool,
238    pub brief: bool,
239    pub snark: u8,
240    pub chaos: u8,
241    pub soul_personality: String,
242    pub fast_model: Option<String>,
243    pub think_model: Option<String>,
244}
245
246pub async fn build_runtime_bundle(
247    cockpit: &CliCockpit,
248    species: &str,
249    snark: u8,
250    professional: bool,
251) -> Result<RuntimeBundle, Box<dyn std::error::Error>> {
252    println!("Booting Hematite systems...");
253    let config = crate::agent::config::load_config();
254
255    // Auto-boot SearXNG if enabled and offline.
256    let searx_session = crate::agent::searx_lifecycle::boot_searx_if_needed(&config).await;
257
258    // settings.json api_url overrides the --url CLI flag so users don't need to retype it.
259    let api_url = crate::agent::config::effective_api_url(&config, &cockpit.url);
260    let mut engine_raw = InferenceEngine::new(api_url, species.to_string(), snark)?;
261    let provider_name = engine_raw.provider_name().await;
262    let preferred_model = preferred_coding_model_target(&config, cockpit);
263    let gpu_state = ui::gpu_monitor::spawn_gpu_monitor();
264    let git_state = agent::git_monitor::spawn_git_monitor();
265
266    if !engine_raw.health_check().await {
267        println!(
268            "ERROR: {} not detected at {}",
269            provider_name, engine_raw.base_url
270        );
271        println!(
272            "{}",
273            provider_help_hint(&engine_raw.base_url, &provider_name)
274        );
275        print_provider_bootstrap_help(&provider_name, &engine_raw.base_url).await;
276        std::process::exit(1);
277    }
278
279    let mut detected_model = String::new();
280    let mut detected_context = 0;
281    let mut empty_observations = 0u8;
282
283    // Handshake loop: wait briefly for the provider to settle, but treat repeated
284    // "no model loaded" observations as a valid steady state rather than blocking startup.
285    for _ in 0..20 {
286        detected_model = engine_raw.get_loaded_model().await.unwrap_or_default();
287        detected_context = engine_raw.detect_context_length().await;
288
289        if !detected_model.trim().is_empty() && detected_context > 0 {
290            break;
291        }
292        if detected_model.trim().is_empty() && detected_context == 0 {
293            empty_observations = empty_observations.saturating_add(1);
294            if empty_observations >= 2 {
295                break;
296            }
297        } else {
298            empty_observations = 0;
299        }
300        tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;
301    }
302
303    let mut auto_loaded_coding_model = false;
304
305    if detected_model.trim().is_empty() {
306        let target = preferred_model
307            .as_deref()
308            .or(if provider_name == "LM Studio" {
309                Some("gemma-4-9b-it")
310            } else {
311                None
312            });
313        if let Some(target) = target {
314            println!(
315                "Notice: No model loaded in {}. Attempting to auto-load `{}`...",
316                provider_name, target
317            );
318            if let Err(e) = engine_raw.load_model(target).await {
319                println!(
320                    "Warning: Auto-load failed: {}. Please load a model manually in {}.",
321                    e, provider_name
322                );
323            } else {
324                auto_loaded_coding_model = true;
325                detected_model = engine_raw.get_loaded_model().await.unwrap_or_default();
326                detected_context = engine_raw.detect_context_length().await;
327            }
328        }
329    }
330
331    let effective_model = if detected_model.trim().is_empty() {
332        "no model loaded".to_string()
333    } else {
334        detected_model.clone()
335    };
336    let effective_context = if effective_model == "no model loaded" {
337        0
338    } else {
339        detected_context
340    };
341    engine_raw
342        .set_runtime_profile(&effective_model, effective_context)
343        .await;
344    if let Some(warning) = coding_runtime_budget_warning(
345        &provider_name,
346        &effective_model,
347        effective_context,
348        preferred_model.as_deref(),
349    ) {
350        println!("{}", warning);
351    }
352
353    if auto_loaded_coding_model {
354        if let Some(embed_target) = config.embed_model.as_deref() {
355            let current_embed = engine_raw.get_embedding_model().await;
356            let needs_embed = current_embed
357                .as_deref()
358                .map(|loaded| !model_name_matches(loaded, embed_target))
359                .unwrap_or(true);
360            if needs_embed {
361                println!(
362                    "Notice: preferred embed model `{}` is not loaded. Attempting to load it for semantic search...",
363                    embed_target
364                );
365                if let Err(e) = engine_raw.load_embedding_model(embed_target).await {
366                    println!(
367                        "Warning: Preferred embed model auto-load failed: {}. Load `{}` manually or save a different `/embed prefer` target if you want semantic search.",
368                        e, embed_target
369                    );
370                }
371            }
372        }
373    }
374
375    let (specular_tx, specular_rx) = mpsc::channel(32);
376    let watcher_guard = agent::specular::spawn_watcher(specular_tx)?;
377
378    let (agent_tx, agent_rx) = mpsc::channel::<InferenceEvent>(100);
379    let (swarm_tx, swarm_rx) = mpsc::channel(32);
380    let voice_manager = Arc::new(VoiceManager::new(agent_tx.clone()));
381
382    if let Some(worker) = config
383        .fast_model
384        .clone()
385        .or_else(|| cockpit.fast_model.clone())
386    {
387        engine_raw.worker_model = Some(worker);
388    }
389
390    let engine = Arc::new(engine_raw);
391    let swarm_coordinator = Arc::new(agent::swarm::SwarmCoordinator::new(
392        engine.clone(),
393        gpu_state.clone(),
394        cockpit.fast_model.clone(),
395        professional,
396    ));
397
398    let (user_input_tx, user_input_rx) = mpsc::channel::<UserTurn>(32);
399    let cancel_token = Arc::new(std::sync::atomic::AtomicBool::new(false));
400
401    Ok(RuntimeBundle {
402        services: RuntimeServices {
403            engine,
404            gpu_state,
405            git_state,
406            voice_manager,
407            swarm_coordinator,
408            cancel_token,
409            searx_session,
410        },
411        channels: RuntimeChannels {
412            specular_rx,
413            agent_tx,
414            agent_rx,
415            swarm_tx,
416            swarm_rx,
417            user_input_tx,
418            user_input_rx,
419        },
420        watcher_guard,
421    })
422}
423
424pub fn spawn_runtime_profile_sync(
425    engine: Arc<InferenceEngine>,
426    agent_tx: mpsc::Sender<InferenceEvent>,
427) -> tokio::task::JoinHandle<()> {
428    tokio::spawn(async move {
429        // Initial delay before the first background poll.
430        tokio::time::sleep(tokio::time::Duration::from_secs(4)).await;
431
432        let mut last_embed: Option<String> = None;
433
434        loop {
435            let result = engine.refresh_runtime_profile().await;
436
437            let Some((model_id, context_length, _changed)) = result else {
438                if agent_tx.is_closed() {
439                    break;
440                }
441                // LM Studio unreachable — back off; no need to hammer a closed server.
442                tokio::time::sleep(tokio::time::Duration::from_secs(15)).await;
443                continue;
444            };
445            let provider_name = engine.provider_name().await;
446
447            // When no coding model is loaded, back off to reduce log noise in LM Studio.
448            let poll_interval = if model_id == "no model loaded" {
449                tokio::time::Duration::from_secs(12)
450            } else {
451                tokio::time::Duration::from_secs(4)
452            };
453
454            if agent_tx
455                .send(InferenceEvent::RuntimeProfile {
456                    provider_name,
457                    endpoint: session_endpoint_url(&engine.base_url),
458                    model_id,
459                    context_length,
460                })
461                .await
462                .is_err()
463            {
464                break;
465            }
466
467            // Poll embed model separately and notify on change.
468            let current_embed = engine.get_embedding_model().await;
469            if current_embed != last_embed {
470                if agent_tx
471                    .send(InferenceEvent::EmbedProfile {
472                        model_id: current_embed.clone(),
473                    })
474                    .await
475                    .is_err()
476                {
477                    break;
478                }
479                last_embed = current_embed;
480            }
481
482            tokio::time::sleep(poll_interval).await;
483        }
484    })
485}
486
487pub async fn run_agent_loop(runtime: AgentLoopRuntime, config: AgentLoopConfig) {
488    let AgentLoopRuntime {
489        mut user_input_rx,
490        agent_tx,
491        services,
492    } = runtime;
493    let RuntimeServices {
494        engine,
495        gpu_state,
496        git_state,
497        voice_manager,
498        swarm_coordinator,
499        cancel_token,
500        searx_session,
501    } = services;
502
503    let mut manager = ConversationManager::new(
504        engine,
505        config.professional,
506        config.brief,
507        config.snark,
508        config.chaos,
509        config.soul_personality,
510        config.fast_model,
511        config.think_model,
512        gpu_state.clone(),
513        git_state,
514        swarm_coordinator,
515        voice_manager,
516    );
517    manager.cancel_token = cancel_token;
518
519    let _ = agent_tx
520        .send(InferenceEvent::RuntimeProfile {
521            provider_name: manager.engine.provider_name().await,
522            endpoint: session_endpoint_url(&manager.engine.base_url),
523            model_id: manager.engine.current_model(),
524            context_length: manager.engine.current_context_length(),
525        })
526        .await;
527
528    let workspace_root = crate::tools::file_ops::workspace_root();
529    let _ = crate::agent::workspace_profile::ensure_workspace_profile(&workspace_root);
530
531    // Send the startup greeting immediately — before MCP and Vein so it always
532    // appears right away, even if vein indexing takes a while on first run.
533    let gpu_name = gpu_state.gpu_name();
534    let vram = gpu_state.label();
535    let voice_cfg = crate::agent::config::load_config();
536    let voice_status = format!(
537        "Voice: {} | Speed: {}x | Volume: {}x",
538        crate::agent::config::effective_voice(&voice_cfg),
539        crate::agent::config::effective_voice_speed(&voice_cfg),
540        crate::agent::config::effective_voice_volume(&voice_cfg),
541    );
542    let embed_status = match manager.engine.get_embedding_model().await {
543        Some(id) => format!("Embed: {} (semantic search ready)", id),
544        None => {
545            "Embed: none loaded (load a preferred embedding model for semantic search)".to_string()
546        }
547    };
548    let workspace_root = crate::tools::file_ops::workspace_root();
549    let docs_only_mode = !crate::tools::file_ops::is_project_workspace();
550    let workspace_mode = if docs_only_mode {
551        "docs-only"
552    } else {
553        "project"
554    };
555    let launched_from_home = home::home_dir()
556        .and_then(|home| std::env::current_dir().ok().map(|cwd| cwd == home))
557        .unwrap_or(false);
558    let project_hint = if !docs_only_mode {
559        String::new()
560    } else if launched_from_home {
561        "\nTip: you launched Hematite from your home directory. That is fine for workstation questions and docs-only memory, but for project-specific build, test, script, or repo work you should relaunch in the target project directory. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
562    } else {
563        "\nTip: source indexing is disabled outside a project folder. Launch Hematite in the target project directory for project-specific build, test, script, or repo work. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
564    };
565    let display_model = {
566        let m = manager.engine.current_model();
567        if m.is_empty() || m == "no model loaded" {
568            "no model loaded".to_string()
569        } else {
570            m
571        }
572    };
573    let provider_name = manager.engine.provider_name().await;
574    let startup_endpoint = session_endpoint_url(&manager.engine.base_url);
575    let terminal_name = crate::ui::terminal::detect_terminal().label();
576    let greeting = format!(
577        "Hematite {} Online [{}] | Provider: {}\nModel: {} | CTX: {} | GPU: {} | VRAM: {}\nEndpoint: {}\nWorkspace: {} ({})\n{}\n{}\n/ask · read-only analysis   /code · implement   /architect · plan-first   /chat · conversation\nRecovery: /undo · /new · /forget · /clear   |   /version · /about{}",
578        crate::hematite_version_display(),
579        terminal_name,
580        provider_name,
581        display_model,
582        runtime_context_display(&display_model, manager.engine.current_context_length()),
583        gpu_name,
584        vram,
585        startup_endpoint,
586        workspace_root.display(),
587        workspace_mode,
588        embed_status,
589        voice_status,
590        project_hint
591    );
592    let _ = agent_tx
593        .send(InferenceEvent::MutedToken(format!("\n{}", greeting)))
594        .await;
595    if let Some(summary) = searx_session.startup_summary.as_deref() {
596        let _ = agent_tx
597            .send(InferenceEvent::Thought(summary.to_string()))
598            .await;
599    }
600
601    // If Docker Desktop was just launched, poll in background until the daemon
602    // is ready, then start SearXNG automatically and notify the TUI.
603    if searx_session.docker_wake_pending {
604        let wake_tx = agent_tx.clone();
605        let wake_root = searx_session.root.clone();
606        let wake_url = crate::agent::config::load_config()
607            .searx_url
608            .unwrap_or_else(|| "http://localhost:8080".to_string());
609        tokio::spawn(async move {
610            // Poll for Docker daemon — up to 90 seconds, checking every 3s.
611            let mut docker_ready = false;
612            for _ in 0..30 {
613                tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
614                if matches!(
615                    crate::agent::searx_lifecycle::docker_state(),
616                    crate::agent::searx_lifecycle::DockerState::Ready
617                ) {
618                    docker_ready = true;
619                    break;
620                }
621            }
622            if !docker_ready {
623                let _ = wake_tx
624                    .send(InferenceEvent::Thought(
625                        "Local search: Docker daemon did not come online within 90s. \
626                        Start SearXNG manually with `docker compose up -d` in ~/.hematite/searxng-local."
627                        .to_string(),
628                    ))
629                    .await;
630                return;
631            }
632            match crate::agent::searx_lifecycle::docker_compose_up(&wake_root) {
633                Err(e) => {
634                    let _ = wake_tx
635                        .send(InferenceEvent::Thought(format!(
636                            "Local search: Docker is ready but SearXNG failed to start — {}",
637                            e
638                        )))
639                        .await;
640                }
641                Ok(()) => {
642                    if crate::agent::searx_lifecycle::wait_for_searx(&wake_url).await {
643                        let _ = wake_tx
644                            .send(InferenceEvent::Thought(format!(
645                                "Local search online: SearXNG is now live at {} — switching from Jina.",
646                                wake_url
647                            )))
648                            .await;
649                    } else {
650                        let _ = wake_tx
651                            .send(InferenceEvent::Thought(format!(
652                                "Local search: SearXNG container started but {} is not responding. \
653                                Check `docker compose logs` in {}.",
654                                wake_url,
655                                wake_root.display()
656                            )))
657                            .await;
658                    }
659                }
660            }
661        });
662    }
663
664    if display_model == "no model loaded" {
665        let guidance = provider_startup_guidance(&provider_name, &startup_endpoint, false).await;
666        let _ = agent_tx.send(InferenceEvent::Thought(guidance)).await;
667    }
668
669    if let Err(e) = manager.initialize_mcp(&agent_tx).await {
670        let _ = agent_tx
671            .send(InferenceEvent::Error(format!("MCP Init Failed: {}", e)))
672            .await;
673    }
674    let indexed = manager.initialize_vein();
675    manager.initialize_repo_map();
676    let _ = agent_tx
677        .send(InferenceEvent::VeinStatus {
678            file_count: manager.vein.file_count(),
679            embedded_count: manager.vein.embedded_chunk_count(),
680            docs_only: docs_only_mode,
681        })
682        .await;
683    let _ = agent_tx
684        .send(InferenceEvent::Thought(format!(
685            "The Vein: indexed {} files",
686            indexed
687        )))
688        .await;
689
690    // Show a compact resume line if a prior session left a checkpoint.
691    if let Some(cp) = crate::agent::conversation::load_checkpoint() {
692        let verify_tag = match cp.last_verify_ok {
693            Some(true) => " | last verify: PASS",
694            Some(false) => " | last verify: FAIL",
695            None => "",
696        };
697        let files_tag = if cp.working_files.is_empty() {
698            String::new()
699        } else {
700            format!(" | files: {}", cp.working_files.join(", "))
701        };
702        let goal_preview: String = cp.last_goal.chars().take(120).collect();
703        let trail = if cp.last_goal.len() > 120 { "…" } else { "" };
704        let resume_msg = format!(
705            "Resumed: {} turn{}{}{} — last goal: \"{}{}\"",
706            cp.turn_count,
707            if cp.turn_count == 1 { "" } else { "s" },
708            verify_tag,
709            files_tag,
710            goal_preview,
711            trail,
712        );
713        let _ = agent_tx.send(InferenceEvent::Thought(resume_msg)).await;
714    } else {
715        let session_path = crate::tools::file_ops::hematite_dir().join("session.json");
716        if !session_path.exists() {
717            let first_run_msg = "\nWelcome to Hematite! I'm your local AI workstation assistant.\n\n\
718                                 Since this is your first time here, what would you like to do?\n\
719                                 - System Check: Wondering if your tools are working? Run `/health`\n\
720                                 - Code: Ready to build something? Run `/architect Let's build a new feature`\n\
721                                 - Setup: Need help configuring Git or the workspace? Run `/ask What should I set up first?`\n\
722                                 - Help: Have a weird error? Type `/explain ` and paste it.\n\n\
723                                 Just type \"hello\" to start a normal conversation!".to_string();
724            let _ = agent_tx.send(InferenceEvent::Thought(first_run_msg)).await;
725            let provider_setup = provider_startup_guidance(
726                &provider_name,
727                &startup_endpoint,
728                display_model != "no model loaded",
729            )
730            .await;
731            let _ = agent_tx.send(InferenceEvent::Thought(provider_setup)).await;
732
733            // Create a minimal empty session struct so we don't show this again until they intentionally /forget
734            let _ = std::fs::write(&session_path, "{\"turn_count\": 0}");
735        }
736    }
737
738    let _ = agent_tx.send(InferenceEvent::Done).await;
739    let startup_config = crate::agent::config::load_config();
740    manager.engine.set_gemma_native_formatting(
741        crate::agent::config::effective_gemma_native_formatting(
742            &startup_config,
743            &manager.engine.current_model(),
744        ),
745    );
746    let startup_model = manager.engine.current_model();
747    if crate::agent::inference::is_hematite_native_model(&startup_model) {
748        let mode = crate::agent::config::gemma_native_mode_label(&startup_config, &startup_model);
749        let status = match mode {
750            "on" => "Sovereign Engine detected | Native Turn-Formatting: ON (forced)",
751            "auto" => "Sovereign Engine detected | Native Turn-Formatting: ON (auto)",
752            _ => "Sovereign Engine detected | Native Turn-Formatting: OFF (use /gemma-native auto|on)",
753        };
754        let _ = agent_tx
755            .send(InferenceEvent::MutedToken(status.to_string()))
756            .await;
757    }
758
759    while let Some(input) = user_input_rx.recv().await {
760        if let Err(e) = manager
761            .run_turn(&input, agent_tx.clone(), config.yolo)
762            .await
763        {
764            let _ = agent_tx.send(InferenceEvent::Error(e.to_string())).await;
765            let _ = agent_tx.send(InferenceEvent::Done).await;
766        }
767    }
768}
769
770#[cfg(test)]
771mod tests {
772    use super::{
773        coding_runtime_budget_warning, model_name_matches, preferred_coding_model_target,
774        runtime_context_display,
775    };
776    use crate::agent::config::HematiteConfig;
777
778    #[test]
779    #[allow(clippy::field_reassign_with_default)]
780    fn preferred_coding_model_uses_config_before_cli() {
781        let mut config = HematiteConfig::default();
782        config.think_model = Some("qwen-config".into());
783        config.fast_model = Some("fast-config".into());
784        let cockpit = crate::CliCockpit {
785            yolo: false,
786            swarm_size: 3,
787            brief: false,
788            reroll: None,
789            rusty: false,
790            stats: false,
791            no_splash: false,
792            fast_model: Some("fast-cli".into()),
793            think_model: Some("think-cli".into()),
794            url: "http://localhost:1234/v1".into(),
795            mcp_server: false,
796            edge_redact: false,
797            semantic_redact: false,
798            semantic_url: None,
799            semantic_model: None,
800            report: false,
801            report_format: "md".into(),
802            diagnose: false,
803            triage: None,
804            fix: None,
805            open: false,
806            dry_run: false,
807            execute: false,
808            yes: false,
809            quiet: false,
810            fix_all: false,
811            only: None,
812            clipboard: false,
813            notify: false,
814            output: None,
815            schedule: None,
816            inventory: false,
817            inspect: None,
818            query: None,
819            watch: None,
820            watch_interval: 5,
821            count: None,
822            diff: None,
823            diff_after: 30,
824            alert: None,
825            field: None,
826            snapshot: None,
827            from: None,
828            snapshots: false,
829            compare: None,
830            audit_start: None,
831            audit_end: None,
832            audit_topics: None,
833            alert_rule_add: None,
834            alert_rule_label: None,
835            alert_rule_negate: false,
836            alert_rules: false,
837            alert_rule_remove: None,
838            alert_rule_run: false,
839            timeline_capture: false,
840            timeline: false,
841            timeline_diff: None,
842            timeline_trend: false,
843            diagnose_why: None,
844            analyze: None,
845            compute: None,
846            convert: None,
847            query_data: None,
848            sql: None,
849            plot: None,
850            plot_type: None,
851            plot_x: None,
852            plot_y: None,
853            periodic: None,
854            hash: None,
855            hash_algo: None,
856            encode: None,
857            decode: None,
858            codec: None,
859            formula: None,
860            random: None,
861            length: None,
862            random_args: None,
863            diff_data: None,
864            diff_key: None,
865            describe: None,
866            column: None,
867            matrix: None,
868            matrix_a: None,
869            matrix_b: None,
870            solve: None,
871            solve_var: None,
872            solve_range: None,
873            curve_fit: None,
874            fit_x: None,
875            fit_y: None,
876            fit_model: None,
877            integrate: None,
878            int_from: None,
879            int_to: None,
880            int_var: None,
881            int_n: None,
882            differentiate: None,
883            at: None,
884            order: None,
885            profile: None,
886            prime: None,
887            sequence: None,
888            seq_count: None,
889            seq_start: None,
890            seq_step: None,
891            choose: None,
892            truth_table: None,
893            gcd: None,
894            roman: None,
895            base_convert: None,
896            base_from: None,
897            base_to: None,
898            date: None,
899            subnet: None,
900            color: None,
901            mw: None,
902            r#const: None,
903            normal: None,
904            vectors: None,
905            number_theory: None,
906            simulate: None,
907            fourier: None,
908            fourier_col: None,
909            fourier_top: None,
910            fourier_rate: None,
911            percentile: None,
912            percentile_col: None,
913            pivot: None,
914            pivot_row: None,
915            pivot_col: None,
916            pivot_val: None,
917            pivot_agg: None,
918            regression: None,
919            regression_target: None,
920            regression_predictors: None,
921            outliers: None,
922            outlier_col: None,
923            outlier_output: None,
924            plot_title: None,
925            plot_output: None,
926            sample: None,
927            sample_n: None,
928            sample_frac: None,
929            sample_seed: None,
930            split: None,
931            sample_output: None,
932            correlation: None,
933            corr_method: None,
934            timeseries: None,
935            ts_date: None,
936            ts_value: None,
937            ts_window: None,
938            cluster: None,
939            cluster_k: None,
940            cluster_cols: None,
941            cluster_output: None,
942            normalize: None,
943            normalize_method: None,
944            normalize_cols: None,
945            normalize_output: None,
946            pca: None,
947            pca_components: None,
948            pca_cols: None,
949            pca_output: None,
950            graph: None,
951            symbolic: None,
952            finance: None,
953            logic: None,
954            signal: None,
955            interpolate: None,
956            units: None,
957            ode: None,
958            optimize: None,
959            hypothesis: None,
960            hypothesis_test: None,
961            hypothesis_group2: None,
962            hypothesis_alpha: None,
963            hypothesis_mu: None,
964            classify: None,
965            classify_label: None,
966            classify_cols: None,
967            classify_predict: None,
968            classify_k: None,
969            classify_method: None,
970            polyfit: None,
971            polyfit_x: None,
972            polyfit_y: None,
973            polyfit_degree: None,
974            polyfit_predict: None,
975            probability: None,
976            bitwise: None,
977            set: None,
978            cipher: None,
979            text_stats: None,
980            levenshtein: None,
981            number_format: None,
982            sort_viz: None,
983            checksum: None,
984            validate: None,
985            pdf_extract_helper: None,
986            teleported_from: None,
987        };
988
989        assert_eq!(
990            preferred_coding_model_target(&config, &cockpit),
991            Some("qwen-config".to_string())
992        );
993    }
994
995    #[test]
996    fn model_name_matches_is_case_insensitive() {
997        assert!(model_name_matches("Qwen/Qwen3.5-9B", "qwen/qwen3.5-9b"));
998        assert!(!model_name_matches("bonsai-8b", "qwen/qwen3.5-9b"));
999    }
1000
1001    #[test]
1002    fn coding_runtime_budget_warning_flags_small_context() {
1003        let warning =
1004            coding_runtime_budget_warning("LM Studio", "bonsai-8b", 4096, Some("qwen/qwen3.5-9b"))
1005                .expect("warning expected");
1006        assert!(warning.contains("bonsai-8b"));
1007        assert!(warning.contains("4096"));
1008        assert!(warning.contains("qwen/qwen3.5-9b"));
1009    }
1010
1011    #[test]
1012    fn runtime_context_display_reports_none_without_loaded_model() {
1013        assert_eq!(runtime_context_display("no model loaded", 0), "none");
1014        assert_eq!(runtime_context_display("", 32768), "none");
1015        assert_eq!(runtime_context_display("qwen/qwen3.5-9b", 32000), "32000");
1016    }
1017}