hematite/
runtime.rs

1use crate::agent;
2use crate::agent::conversation::{ConversationManager, UserTurn};
3use crate::agent::git_monitor::GitState;
4use crate::agent::inference::{InferenceEngine, InferenceEvent};
5use crate::ui;
6use crate::ui::gpu_monitor::GpuState;
7use crate::ui::voice::VoiceManager;
8use crate::CliCockpit;
9use notify::RecommendedWatcher;
10use std::sync::Arc;
11use tokio::sync::mpsc;
12
13const MIN_RECOMMENDED_CODING_CONTEXT: usize = 8_192;
14
15fn provider_help_hint(base_url: &str, provider_name: &str) -> String {
16    if provider_name == "LM Studio" {
17        format!(
18            "Check if LM Studio is running on {}. If you prefer Ollama, set `api_url` to `{}` in `.hematite/settings.json`.",
19            base_url,
20            crate::agent::config::DEFAULT_OLLAMA_API_URL
21        )
22    } else if provider_name == "Ollama" {
23        format!(
24            "Check if Ollama is running on {} and that a chat model is available. If you prefer LM Studio, set `api_url` to `{}`.",
25            base_url,
26            crate::agent::config::DEFAULT_LM_STUDIO_API_URL
27        )
28    } else {
29        format!(
30            "Check if the configured provider is running on {} and that `.hematite/settings.json` points at the right endpoint.",
31            base_url
32        )
33    }
34}
35
36pub fn session_endpoint_url(base_url: &str) -> String {
37    format!("{}/v1", base_url.trim_end_matches('/'))
38}
39
40fn preferred_coding_model_target(
41    config: &crate::agent::config::HematiteConfig,
42    cockpit: &CliCockpit,
43) -> Option<String> {
44    crate::agent::config::preferred_coding_model(config)
45        .or(cockpit.think_model.clone())
46        .or(cockpit.fast_model.clone())
47}
48
49fn model_name_matches(current: &str, target: &str) -> bool {
50    current.trim().eq_ignore_ascii_case(target.trim())
51}
52
53fn coding_runtime_budget_warning(
54    provider_name: &str,
55    model_name: &str,
56    context_length: usize,
57    preferred_model: Option<&str>,
58) -> Option<String> {
59    if model_name.trim().is_empty()
60        || model_name.eq_ignore_ascii_case("no model loaded")
61        || context_length >= MIN_RECOMMENDED_CODING_CONTEXT
62    {
63        return None;
64    }
65
66    let provider_label = if provider_name.is_empty() {
67        "the active provider"
68    } else {
69        provider_name
70    };
71    let mut message = format!(
72        "Warning: {} loaded `{}` with only {} tokens of live context. That is too small for normal coding, scaffold, or teleport-resume work.",
73        provider_label, model_name, context_length
74    );
75    if let Some(target) = preferred_model.filter(|target| !model_name_matches(model_name, target)) {
76        message.push_str(&format!(
77            " Load your preferred coding model `{}` and rerun `/runtime refresh` before heavy implementation.",
78            target
79        ));
80    } else {
81        message.push_str(
82            " Load a larger-context coding model before heavy implementation and rerun `/runtime refresh`.",
83        );
84    }
85    Some(message)
86}
87
88fn provider_model_setup_hint(provider_name: &str) -> String {
89    if provider_name == "Ollama" {
90        format!(
91            "Pull or run a chat model in Ollama, then keep `api_url` pointed at `{}`. If you want semantic search too, save an embedding model in `/embed prefer <id>` and Hematite can load it here as well.",
92            crate::agent::config::DEFAULT_OLLAMA_API_URL
93        )
94    } else {
95        format!(
96            "Load a coding model in LM Studio and keep the local server on `{}`. Optionally also load an embedding model for semantic search.",
97            crate::agent::config::DEFAULT_LM_STUDIO_API_URL
98        )
99    }
100}
101
102async fn provider_startup_guidance(provider_name: &str, endpoint: &str, has_model: bool) -> String {
103    let mut lines = vec![format!("Provider setup: {} ({})", provider_name, endpoint)];
104    if has_model {
105        lines.push("Status: local runtime is reachable and a coding model is loaded.".to_string());
106    } else {
107        lines.push("Status: provider is reachable but no coding model is loaded yet.".to_string());
108        lines.push(provider_model_setup_hint(provider_name));
109    }
110    if let Some((alt_name, alt_url)) = detect_alternative_provider(provider_name).await {
111        lines.push(format!("Reachable alternative: {} ({})", alt_name, alt_url));
112    }
113    lines.push(
114        "Use `/provider` after startup if you want to save a different runtime for future sessions."
115            .to_string(),
116    );
117    lines.join("\n")
118}
119
120fn runtime_context_display(model: &str, context_length: usize) -> String {
121    let lower = model.to_ascii_lowercase();
122    if lower.trim().is_empty() || lower.contains("no model loaded") || context_length == 0 {
123        "none".to_string()
124    } else {
125        context_length.to_string()
126    }
127}
128
129async fn print_provider_bootstrap_help(provider_name: &str, base_url: &str) {
130    let endpoint = session_endpoint_url(base_url);
131    println!("Quick setup path:");
132    if provider_name == "Ollama" {
133        println!("  1. Install Ollama: https://ollama.com/");
134        println!("  2. Start Ollama and ensure `{}` is reachable.", endpoint);
135        println!("  3. Pull a chat model, for example: `ollama pull qwen3.5:latest`");
136        println!(
137            "  4. Restart Hematite, or switch back to LM Studio with `api_url = \"{}\"`.",
138            crate::agent::config::DEFAULT_LM_STUDIO_API_URL
139        );
140    } else {
141        println!("  1. Install LM Studio: https://lmstudio.ai/");
142        println!(
143            "  2. Start the local server and ensure `{}` is reachable.",
144            endpoint
145        );
146        println!("  3. Load a coding model such as `Qwen/Qwen3.5-9B Q4_K_M`.");
147        println!("  4. Restart Hematite after the model is loaded.");
148    }
149    if let Some((alt_name, alt_url)) = detect_alternative_provider(provider_name).await {
150        println!(
151            "Reachable alternative detected: {} ({}). You can point Hematite there instead.",
152            alt_name, alt_url
153        );
154    }
155}
156
157pub async fn detect_alternative_provider(active_provider: &str) -> Option<(String, String)> {
158    match active_provider {
159        "LM Studio" => {
160            let ollama = crate::agent::ollama::OllamaHarness::new("http://localhost:11434");
161            if ollama.is_reachable().await {
162                Some((
163                    "Ollama".to_string(),
164                    crate::agent::config::DEFAULT_OLLAMA_API_URL.to_string(),
165                ))
166            } else {
167                None
168            }
169        }
170        "Ollama" => {
171            let lms = crate::agent::lms::LmsHarness::new();
172            if lms.is_server_responding("http://localhost:1234").await {
173                Some((
174                    "LM Studio".to_string(),
175                    crate::agent::config::DEFAULT_LM_STUDIO_API_URL.to_string(),
176                ))
177            } else {
178                None
179            }
180        }
181        _ => {
182            let lms = crate::agent::lms::LmsHarness::new();
183            if lms.is_server_responding("http://localhost:1234").await {
184                return Some((
185                    "LM Studio".to_string(),
186                    crate::agent::config::DEFAULT_LM_STUDIO_API_URL.to_string(),
187                ));
188            }
189            let ollama = crate::agent::ollama::OllamaHarness::new("http://localhost:11434");
190            if ollama.is_reachable().await {
191                return Some((
192                    "Ollama".to_string(),
193                    crate::agent::config::DEFAULT_OLLAMA_API_URL.to_string(),
194                ));
195            }
196            None
197        }
198    }
199}
200
201pub struct RuntimeServices {
202    pub engine: Arc<InferenceEngine>,
203    pub gpu_state: Arc<GpuState>,
204    pub git_state: Arc<GitState>,
205    pub voice_manager: Arc<VoiceManager>,
206    pub swarm_coordinator: Arc<agent::swarm::SwarmCoordinator>,
207    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
208    pub searx_session: agent::searx_lifecycle::SearxRuntimeSession,
209}
210
211pub struct RuntimeChannels {
212    pub specular_rx: mpsc::Receiver<agent::specular::SpecularEvent>,
213    pub agent_tx: mpsc::Sender<InferenceEvent>,
214    pub agent_rx: mpsc::Receiver<InferenceEvent>,
215    pub swarm_tx: mpsc::Sender<agent::swarm::SwarmMessage>,
216    pub swarm_rx: mpsc::Receiver<agent::swarm::SwarmMessage>,
217    pub user_input_tx: mpsc::Sender<UserTurn>,
218    pub user_input_rx: mpsc::Receiver<UserTurn>,
219}
220
221pub struct RuntimeBundle {
222    pub services: RuntimeServices,
223    pub channels: RuntimeChannels,
224    pub watcher_guard: RecommendedWatcher,
225}
226
227pub struct AgentLoopRuntime {
228    pub user_input_rx: mpsc::Receiver<UserTurn>,
229    pub agent_tx: mpsc::Sender<InferenceEvent>,
230    pub services: RuntimeServices,
231}
232
233pub struct AgentLoopConfig {
234    pub yolo: bool,
235    pub professional: bool,
236    pub brief: bool,
237    pub snark: u8,
238    pub chaos: u8,
239    pub soul_personality: String,
240    pub fast_model: Option<String>,
241    pub think_model: Option<String>,
242}
243
244pub async fn build_runtime_bundle(
245    cockpit: &CliCockpit,
246    species: &str,
247    snark: u8,
248    professional: bool,
249) -> Result<RuntimeBundle, Box<dyn std::error::Error>> {
250    println!("Booting Hematite systems...");
251    let config = crate::agent::config::load_config();
252
253    // Auto-boot SearXNG if enabled and offline.
254    let searx_session = crate::agent::searx_lifecycle::boot_searx_if_needed(&config).await;
255
256    // settings.json api_url overrides the --url CLI flag so users don't need to retype it.
257    let api_url = crate::agent::config::effective_api_url(&config, &cockpit.url);
258    let mut engine_raw = InferenceEngine::new(api_url, species.to_string(), snark)?;
259    let provider_name = engine_raw.provider_name().await;
260    let preferred_model = preferred_coding_model_target(&config, cockpit);
261    let gpu_state = ui::gpu_monitor::spawn_gpu_monitor();
262    let git_state = agent::git_monitor::spawn_git_monitor();
263
264    if !engine_raw.health_check().await {
265        println!(
266            "ERROR: {} not detected at {}",
267            provider_name, engine_raw.base_url
268        );
269        println!(
270            "{}",
271            provider_help_hint(&engine_raw.base_url, &provider_name)
272        );
273        print_provider_bootstrap_help(&provider_name, &engine_raw.base_url).await;
274        std::process::exit(1);
275    }
276
277    let mut detected_model = String::new();
278    let mut detected_context = 0;
279    let mut empty_observations = 0u8;
280
281    // Handshake loop: wait briefly for the provider to settle, but treat repeated
282    // "no model loaded" observations as a valid steady state rather than blocking startup.
283    for _ in 0..20 {
284        detected_model = engine_raw.get_loaded_model().await.unwrap_or_default();
285        detected_context = engine_raw.detect_context_length().await;
286
287        if !detected_model.trim().is_empty() && detected_context > 0 {
288            break;
289        }
290        if detected_model.trim().is_empty() && detected_context == 0 {
291            empty_observations = empty_observations.saturating_add(1);
292            if empty_observations >= 2 {
293                break;
294            }
295        } else {
296            empty_observations = 0;
297        }
298        tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;
299    }
300
301    let mut auto_loaded_coding_model = false;
302
303    if detected_model.trim().is_empty() {
304        let target = preferred_model
305            .as_deref()
306            .or(if provider_name == "LM Studio" {
307                Some("gemma-4-9b-it")
308            } else {
309                None
310            });
311        if let Some(target) = target {
312            println!(
313                "Notice: No model loaded in {}. Attempting to auto-load `{}`...",
314                provider_name, target
315            );
316            if let Err(e) = engine_raw.load_model(target).await {
317                println!(
318                    "Warning: Auto-load failed: {}. Please load a model manually in {}.",
319                    e, provider_name
320                );
321            } else {
322                auto_loaded_coding_model = true;
323                detected_model = engine_raw.get_loaded_model().await.unwrap_or_default();
324                detected_context = engine_raw.detect_context_length().await;
325            }
326        }
327    }
328
329    let effective_model = if detected_model.trim().is_empty() {
330        "no model loaded".to_string()
331    } else {
332        detected_model.clone()
333    };
334    let effective_context = if effective_model == "no model loaded" {
335        0
336    } else {
337        detected_context
338    };
339    engine_raw
340        .set_runtime_profile(&effective_model, effective_context)
341        .await;
342    if let Some(warning) = coding_runtime_budget_warning(
343        &provider_name,
344        &effective_model,
345        effective_context,
346        preferred_model.as_deref(),
347    ) {
348        println!("{}", warning);
349    }
350
351    if auto_loaded_coding_model {
352        if let Some(embed_target) = config.embed_model.as_deref() {
353            let current_embed = engine_raw.get_embedding_model().await;
354            let needs_embed = current_embed
355                .as_deref()
356                .map(|loaded| !model_name_matches(loaded, embed_target))
357                .unwrap_or(true);
358            if needs_embed {
359                println!(
360                    "Notice: preferred embed model `{}` is not loaded. Attempting to load it for semantic search...",
361                    embed_target
362                );
363                if let Err(e) = engine_raw.load_embedding_model(embed_target).await {
364                    println!(
365                        "Warning: Preferred embed model auto-load failed: {}. Load `{}` manually or save a different `/embed prefer` target if you want semantic search.",
366                        e, embed_target
367                    );
368                }
369            }
370        }
371    }
372
373    let (specular_tx, specular_rx) = mpsc::channel(32);
374    let watcher_guard = agent::specular::spawn_watcher(specular_tx)?;
375
376    let (agent_tx, agent_rx) = mpsc::channel::<InferenceEvent>(100);
377    let (swarm_tx, swarm_rx) = mpsc::channel(32);
378    let voice_manager = Arc::new(VoiceManager::new(agent_tx.clone()));
379
380    if let Some(worker) = config
381        .fast_model
382        .clone()
383        .or_else(|| cockpit.fast_model.clone())
384    {
385        engine_raw.worker_model = Some(worker);
386    }
387
388    let engine = Arc::new(engine_raw);
389    let swarm_coordinator = Arc::new(agent::swarm::SwarmCoordinator::new(
390        engine.clone(),
391        gpu_state.clone(),
392        cockpit.fast_model.clone(),
393        professional,
394    ));
395
396    let (user_input_tx, user_input_rx) = mpsc::channel::<UserTurn>(32);
397    let cancel_token = Arc::new(std::sync::atomic::AtomicBool::new(false));
398
399    Ok(RuntimeBundle {
400        services: RuntimeServices {
401            engine,
402            gpu_state,
403            git_state,
404            voice_manager,
405            swarm_coordinator,
406            cancel_token,
407            searx_session,
408        },
409        channels: RuntimeChannels {
410            specular_rx,
411            agent_tx,
412            agent_rx,
413            swarm_tx,
414            swarm_rx,
415            user_input_tx,
416            user_input_rx,
417        },
418        watcher_guard,
419    })
420}
421
422pub fn spawn_runtime_profile_sync(
423    engine: Arc<InferenceEngine>,
424    agent_tx: mpsc::Sender<InferenceEvent>,
425) -> tokio::task::JoinHandle<()> {
426    tokio::spawn(async move {
427        // Initial delay before the first background poll.
428        tokio::time::sleep(tokio::time::Duration::from_secs(4)).await;
429
430        let mut last_embed: Option<String> = None;
431
432        loop {
433            let result = engine.refresh_runtime_profile().await;
434
435            let Some((model_id, context_length, _changed)) = result else {
436                if agent_tx.is_closed() {
437                    break;
438                }
439                // LM Studio unreachable — back off; no need to hammer a closed server.
440                tokio::time::sleep(tokio::time::Duration::from_secs(15)).await;
441                continue;
442            };
443            let provider_name = engine.provider_name().await;
444
445            // When no coding model is loaded, back off to reduce log noise in LM Studio.
446            let poll_interval = if model_id == "no model loaded" {
447                tokio::time::Duration::from_secs(12)
448            } else {
449                tokio::time::Duration::from_secs(4)
450            };
451
452            if agent_tx
453                .send(InferenceEvent::RuntimeProfile {
454                    provider_name,
455                    endpoint: session_endpoint_url(&engine.base_url),
456                    model_id,
457                    context_length,
458                })
459                .await
460                .is_err()
461            {
462                break;
463            }
464
465            // Poll embed model separately and notify on change.
466            let current_embed = engine.get_embedding_model().await;
467            if current_embed != last_embed {
468                if agent_tx
469                    .send(InferenceEvent::EmbedProfile {
470                        model_id: current_embed.clone(),
471                    })
472                    .await
473                    .is_err()
474                {
475                    break;
476                }
477                last_embed = current_embed;
478            }
479
480            tokio::time::sleep(poll_interval).await;
481        }
482    })
483}
484
485pub async fn run_agent_loop(runtime: AgentLoopRuntime, config: AgentLoopConfig) {
486    let AgentLoopRuntime {
487        mut user_input_rx,
488        agent_tx,
489        services,
490    } = runtime;
491    let RuntimeServices {
492        engine,
493        gpu_state,
494        git_state,
495        voice_manager,
496        swarm_coordinator,
497        cancel_token,
498        searx_session,
499    } = services;
500
501    let mut manager = ConversationManager::new(
502        engine,
503        config.professional,
504        config.brief,
505        config.snark,
506        config.chaos,
507        config.soul_personality,
508        config.fast_model,
509        config.think_model,
510        gpu_state.clone(),
511        git_state,
512        swarm_coordinator,
513        voice_manager,
514    );
515    manager.cancel_token = cancel_token;
516
517    let _ = agent_tx
518        .send(InferenceEvent::RuntimeProfile {
519            provider_name: manager.engine.provider_name().await,
520            endpoint: session_endpoint_url(&manager.engine.base_url),
521            model_id: manager.engine.current_model(),
522            context_length: manager.engine.current_context_length(),
523        })
524        .await;
525
526    let workspace_root = crate::tools::file_ops::workspace_root();
527    let _ = crate::agent::workspace_profile::ensure_workspace_profile(&workspace_root);
528
529    // Send the startup greeting immediately — before MCP and Vein so it always
530    // appears right away, even if vein indexing takes a while on first run.
531    let gpu_name = gpu_state.gpu_name();
532    let vram = gpu_state.label();
533    let voice_cfg = crate::agent::config::load_config();
534    let voice_status = format!(
535        "Voice: {} | Speed: {}x | Volume: {}x",
536        crate::agent::config::effective_voice(&voice_cfg),
537        crate::agent::config::effective_voice_speed(&voice_cfg),
538        crate::agent::config::effective_voice_volume(&voice_cfg),
539    );
540    let embed_status = match manager.engine.get_embedding_model().await {
541        Some(id) => format!("Embed: {} (semantic search ready)", id),
542        None => {
543            "Embed: none loaded (load a preferred embedding model for semantic search)".to_string()
544        }
545    };
546    let workspace_root = crate::tools::file_ops::workspace_root();
547    let docs_only_mode = !crate::tools::file_ops::is_project_workspace();
548    let workspace_mode = if docs_only_mode {
549        "docs-only"
550    } else {
551        "project"
552    };
553    let launched_from_home = home::home_dir()
554        .and_then(|home| std::env::current_dir().ok().map(|cwd| cwd == home))
555        .unwrap_or(false);
556    let project_hint = if !docs_only_mode {
557        String::new()
558    } else if launched_from_home {
559        "\nTip: you launched Hematite from your home directory. That is fine for workstation questions and docs-only memory, but for project-specific build, test, script, or repo work you should relaunch in the target project directory. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
560    } else {
561        "\nTip: source indexing is disabled outside a project folder. Launch Hematite in the target project directory for project-specific build, test, script, or repo work. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
562    };
563    let display_model = {
564        let m = manager.engine.current_model();
565        if m.is_empty() || m == "no model loaded" {
566            "no model loaded".to_string()
567        } else {
568            m
569        }
570    };
571    let provider_name = manager.engine.provider_name().await;
572    let startup_endpoint = session_endpoint_url(&manager.engine.base_url);
573    let terminal_name = crate::ui::terminal::detect_terminal().label();
574    let greeting = format!(
575        "Hematite {} Online [{}] | Provider: {}\nModel: {} | CTX: {} | GPU: {} | VRAM: {}\nEndpoint: {}\nWorkspace: {} ({})\n{}\n{}\n/ask · read-only analysis   /code · implement   /architect · plan-first   /chat · conversation\nRecovery: /undo · /new · /forget · /clear   |   /version · /about{}",
576        crate::hematite_version_display(),
577        terminal_name,
578        provider_name,
579        display_model,
580        runtime_context_display(&display_model, manager.engine.current_context_length()),
581        gpu_name,
582        vram,
583        startup_endpoint,
584        workspace_root.display(),
585        workspace_mode,
586        embed_status,
587        voice_status,
588        project_hint
589    );
590    let _ = agent_tx
591        .send(InferenceEvent::MutedToken(format!("\n{}", greeting)))
592        .await;
593    if let Some(summary) = searx_session.startup_summary.as_deref() {
594        let _ = agent_tx
595            .send(InferenceEvent::Thought(summary.to_string()))
596            .await;
597    }
598    if display_model == "no model loaded" {
599        let guidance = provider_startup_guidance(&provider_name, &startup_endpoint, false).await;
600        let _ = agent_tx.send(InferenceEvent::Thought(guidance)).await;
601    }
602
603    if let Err(e) = manager.initialize_mcp(&agent_tx).await {
604        let _ = agent_tx
605            .send(InferenceEvent::Error(format!("MCP Init Failed: {}", e)))
606            .await;
607    }
608    let indexed = manager.initialize_vein();
609    manager.initialize_repo_map();
610    let _ = agent_tx
611        .send(InferenceEvent::VeinStatus {
612            file_count: manager.vein.file_count(),
613            embedded_count: manager.vein.embedded_chunk_count(),
614            docs_only: docs_only_mode,
615        })
616        .await;
617    let _ = agent_tx
618        .send(InferenceEvent::Thought(format!(
619            "The Vein: indexed {} files",
620            indexed
621        )))
622        .await;
623
624    // Show a compact resume line if a prior session left a checkpoint.
625    if let Some(cp) = crate::agent::conversation::load_checkpoint() {
626        let verify_tag = match cp.last_verify_ok {
627            Some(true) => " | last verify: PASS",
628            Some(false) => " | last verify: FAIL",
629            None => "",
630        };
631        let files_tag = if cp.working_files.is_empty() {
632            String::new()
633        } else {
634            format!(" | files: {}", cp.working_files.join(", "))
635        };
636        let goal_preview: String = cp.last_goal.chars().take(120).collect();
637        let trail = if cp.last_goal.len() > 120 { "…" } else { "" };
638        let resume_msg = format!(
639            "Resumed: {} turn{}{}{} — last goal: \"{}{}\"",
640            cp.turn_count,
641            if cp.turn_count == 1 { "" } else { "s" },
642            verify_tag,
643            files_tag,
644            goal_preview,
645            trail,
646        );
647        let _ = agent_tx.send(InferenceEvent::Thought(resume_msg)).await;
648    } else {
649        let session_path = crate::tools::file_ops::hematite_dir().join("session.json");
650        if !session_path.exists() {
651            let first_run_msg = "\nWelcome to Hematite! I'm your local AI workstation assistant.\n\n\
652                                 Since this is your first time here, what would you like to do?\n\
653                                 - System Check: Wondering if your tools are working? Run `/health`\n\
654                                 - Code: Ready to build something? Run `/architect Let's build a new feature`\n\
655                                 - Setup: Need help configuring Git or the workspace? Run `/ask What should I set up first?`\n\
656                                 - Help: Have a weird error? Type `/explain ` and paste it.\n\n\
657                                 Just type \"hello\" to start a normal conversation!".to_string();
658            let _ = agent_tx.send(InferenceEvent::Thought(first_run_msg)).await;
659            let provider_setup = provider_startup_guidance(
660                &provider_name,
661                &startup_endpoint,
662                display_model != "no model loaded",
663            )
664            .await;
665            let _ = agent_tx.send(InferenceEvent::Thought(provider_setup)).await;
666
667            // Create a minimal empty session struct so we don't show this again until they intentionally /forget
668            let _ = std::fs::write(&session_path, "{\"turn_count\": 0}");
669        }
670    }
671
672    let _ = agent_tx.send(InferenceEvent::Done).await;
673    let startup_config = crate::agent::config::load_config();
674    manager.engine.set_gemma_native_formatting(
675        crate::agent::config::effective_gemma_native_formatting(
676            &startup_config,
677            &manager.engine.current_model(),
678        ),
679    );
680    let startup_model = manager.engine.current_model();
681    if crate::agent::inference::is_hematite_native_model(&startup_model) {
682        let mode = crate::agent::config::gemma_native_mode_label(&startup_config, &startup_model);
683        let status = match mode {
684            "on" => "Sovereign Engine detected | Native Turn-Formatting: ON (forced)",
685            "auto" => "Sovereign Engine detected | Native Turn-Formatting: ON (auto)",
686            _ => "Sovereign Engine detected | Native Turn-Formatting: OFF (use /gemma-native auto|on)",
687        };
688        let _ = agent_tx
689            .send(InferenceEvent::MutedToken(status.to_string()))
690            .await;
691    }
692
693    while let Some(input) = user_input_rx.recv().await {
694        if let Err(e) = manager
695            .run_turn(&input, agent_tx.clone(), config.yolo)
696            .await
697        {
698            let _ = agent_tx.send(InferenceEvent::Error(e.to_string())).await;
699            let _ = agent_tx.send(InferenceEvent::Done).await;
700        }
701    }
702}
703
704#[cfg(test)]
705mod tests {
706    use super::{
707        coding_runtime_budget_warning, model_name_matches, preferred_coding_model_target,
708        runtime_context_display,
709    };
710    use crate::agent::config::HematiteConfig;
711
712    #[test]
713    fn preferred_coding_model_uses_config_before_cli() {
714        let mut config = HematiteConfig::default();
715        config.think_model = Some("qwen-config".into());
716        config.fast_model = Some("fast-config".into());
717        let cockpit = crate::CliCockpit {
718            yolo: false,
719            swarm_size: 3,
720            brief: false,
721            reroll: None,
722            rusty: false,
723            stats: false,
724            no_splash: false,
725            fast_model: Some("fast-cli".into()),
726            think_model: Some("think-cli".into()),
727            url: "http://localhost:1234/v1".into(),
728            mcp_server: false,
729            edge_redact: false,
730            semantic_redact: false,
731            semantic_url: None,
732            semantic_model: None,
733            pdf_extract_helper: None,
734            teleported_from: None,
735        };
736
737        assert_eq!(
738            preferred_coding_model_target(&config, &cockpit),
739            Some("qwen-config".to_string())
740        );
741    }
742
743    #[test]
744    fn model_name_matches_is_case_insensitive() {
745        assert!(model_name_matches("Qwen/Qwen3.5-9B", "qwen/qwen3.5-9b"));
746        assert!(!model_name_matches("bonsai-8b", "qwen/qwen3.5-9b"));
747    }
748
749    #[test]
750    fn coding_runtime_budget_warning_flags_small_context() {
751        let warning =
752            coding_runtime_budget_warning("LM Studio", "bonsai-8b", 4096, Some("qwen/qwen3.5-9b"))
753                .expect("warning expected");
754        assert!(warning.contains("bonsai-8b"));
755        assert!(warning.contains("4096"));
756        assert!(warning.contains("qwen/qwen3.5-9b"));
757    }
758
759    #[test]
760    fn runtime_context_display_reports_none_without_loaded_model() {
761        assert_eq!(runtime_context_display("no model loaded", 0), "none");
762        assert_eq!(runtime_context_display("", 32768), "none");
763        assert_eq!(runtime_context_display("qwen/qwen3.5-9b", 32000), "32000");
764    }
765}
hematite/runtime.rs

hematite/
runtime.rs