hematite/
runtime.rs

1use crate::agent;
2use crate::agent::conversation::{ConversationManager, UserTurn};
3use crate::agent::git_monitor::GitState;
4use crate::agent::inference::{InferenceEngine, InferenceEvent};
5use crate::ui;
6use crate::ui::gpu_monitor::GpuState;
7use crate::ui::voice::VoiceManager;
8use crate::CliCockpit;
9use notify::RecommendedWatcher;
10use std::sync::Arc;
11use tokio::sync::mpsc;
12
13pub struct RuntimeServices {
14    pub engine: Arc<InferenceEngine>,
15    pub gpu_state: Arc<GpuState>,
16    pub git_state: Arc<GitState>,
17    pub voice_manager: Arc<VoiceManager>,
18    pub swarm_coordinator: Arc<agent::swarm::SwarmCoordinator>,
19    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
20}
21
22pub struct RuntimeChannels {
23    pub specular_rx: mpsc::Receiver<agent::specular::SpecularEvent>,
24    pub agent_tx: mpsc::Sender<InferenceEvent>,
25    pub agent_rx: mpsc::Receiver<InferenceEvent>,
26    pub swarm_tx: mpsc::Sender<agent::swarm::SwarmMessage>,
27    pub swarm_rx: mpsc::Receiver<agent::swarm::SwarmMessage>,
28    pub user_input_tx: mpsc::Sender<UserTurn>,
29    pub user_input_rx: mpsc::Receiver<UserTurn>,
30}
31
32pub struct RuntimeBundle {
33    pub services: RuntimeServices,
34    pub channels: RuntimeChannels,
35    pub watcher_guard: RecommendedWatcher,
36}
37
38pub struct AgentLoopRuntime {
39    pub user_input_rx: mpsc::Receiver<UserTurn>,
40    pub agent_tx: mpsc::Sender<InferenceEvent>,
41    pub services: RuntimeServices,
42}
43
44pub struct AgentLoopConfig {
45    pub yolo: bool,
46    pub professional: bool,
47    pub brief: bool,
48    pub snark: u8,
49    pub chaos: u8,
50    pub soul_personality: String,
51    pub fast_model: Option<String>,
52    pub think_model: Option<String>,
53}
54
55pub async fn build_runtime_bundle(
56    cockpit: &CliCockpit,
57    species: &str,
58    snark: u8,
59    professional: bool,
60) -> Result<RuntimeBundle, Box<dyn std::error::Error>> {
61    println!("Booting Hematite systems...");
62    let config = crate::agent::config::load_config();
63
64    // Auto-boot SearXNG if enabled and offline.
65    crate::agent::searx_lifecycle::boot_searx_if_needed(&config).await;
66
67    // settings.json api_url overrides the --url CLI flag so users don't need to retype it.
68    let api_url = config
69        .api_url
70        .clone()
71        .unwrap_or_else(|| cockpit.url.clone());
72    let mut engine_raw = InferenceEngine::new(api_url, species.to_string(), snark)?;
73    let gpu_state = ui::gpu_monitor::spawn_gpu_monitor();
74    let git_state = agent::git_monitor::spawn_git_monitor();
75
76    if !engine_raw.health_check().await {
77        println!(
78            "ERROR: LLM Provider not detected at {}",
79            engine_raw.base_url
80        );
81        println!("Check if LM Studio (or your local server) is running and port mapped correctly.");
82        std::process::exit(1);
83    }
84
85    let model_name = engine_raw.get_loaded_model().await;
86    if let Some(name) = model_name {
87        engine_raw.set_runtime_profile(&name, engine_raw.current_context_length());
88    }
89    let detected_context = engine_raw.detect_context_length().await;
90    let detected_model = engine_raw.current_model();
91    engine_raw.set_runtime_profile(&detected_model, detected_context);
92
93    let (specular_tx, specular_rx) = mpsc::channel(32);
94    let watcher_guard = agent::specular::spawn_watcher(specular_tx)?;
95
96    let (agent_tx, agent_rx) = mpsc::channel::<InferenceEvent>(100);
97    let (swarm_tx, swarm_rx) = mpsc::channel(32);
98    let voice_manager = Arc::new(VoiceManager::new(agent_tx.clone()));
99
100    if let Some(ref worker) = cockpit.fast_model {
101        engine_raw.worker_model = Some(worker.clone());
102    }
103
104    let engine = Arc::new(engine_raw);
105    let swarm_coordinator = Arc::new(agent::swarm::SwarmCoordinator::new(
106        engine.clone(),
107        gpu_state.clone(),
108        cockpit.fast_model.clone(),
109        professional,
110    ));
111
112    let (user_input_tx, user_input_rx) = mpsc::channel::<UserTurn>(32);
113    let cancel_token = Arc::new(std::sync::atomic::AtomicBool::new(false));
114
115    Ok(RuntimeBundle {
116        services: RuntimeServices {
117            engine,
118            gpu_state,
119            git_state,
120            voice_manager,
121            swarm_coordinator,
122            cancel_token,
123        },
124        channels: RuntimeChannels {
125            specular_rx,
126            agent_tx,
127            agent_rx,
128            swarm_tx,
129            swarm_rx,
130            user_input_tx,
131            user_input_rx,
132        },
133        watcher_guard,
134    })
135}
136
137pub fn spawn_runtime_profile_sync(
138    engine: Arc<InferenceEngine>,
139    agent_tx: mpsc::Sender<InferenceEvent>,
140) -> tokio::task::JoinHandle<()> {
141    tokio::spawn(async move {
142        // Initial delay before the first background poll.
143        tokio::time::sleep(tokio::time::Duration::from_secs(4)).await;
144
145        let mut last_embed: Option<String> = None;
146
147        loop {
148            let result = engine.refresh_runtime_profile().await;
149
150            let Some((model_id, context_length, _changed)) = result else {
151                if agent_tx.is_closed() {
152                    break;
153                }
154                // LM Studio unreachable — back off; no need to hammer a closed server.
155                tokio::time::sleep(tokio::time::Duration::from_secs(15)).await;
156                continue;
157            };
158
159            // When no coding model is loaded, back off to reduce log noise in LM Studio.
160            let poll_interval = if model_id == "no model loaded" {
161                tokio::time::Duration::from_secs(12)
162            } else {
163                tokio::time::Duration::from_secs(4)
164            };
165
166            if agent_tx
167                .send(InferenceEvent::RuntimeProfile {
168                    model_id,
169                    context_length,
170                })
171                .await
172                .is_err()
173            {
174                break;
175            }
176
177            // Poll embed model separately and notify on change.
178            let current_embed = engine.get_embedding_model().await;
179            if current_embed != last_embed {
180                if agent_tx
181                    .send(InferenceEvent::EmbedProfile {
182                        model_id: current_embed.clone(),
183                    })
184                    .await
185                    .is_err()
186                {
187                    break;
188                }
189                last_embed = current_embed;
190            }
191
192            tokio::time::sleep(poll_interval).await;
193        }
194    })
195}
196
197pub async fn run_agent_loop(runtime: AgentLoopRuntime, config: AgentLoopConfig) {
198    let AgentLoopRuntime {
199        mut user_input_rx,
200        agent_tx,
201        services,
202    } = runtime;
203    let RuntimeServices {
204        engine,
205        gpu_state,
206        git_state,
207        voice_manager,
208        swarm_coordinator,
209        cancel_token,
210    } = services;
211
212    let mut manager = ConversationManager::new(
213        engine,
214        config.professional,
215        config.brief,
216        config.snark,
217        config.chaos,
218        config.soul_personality,
219        config.fast_model,
220        config.think_model,
221        gpu_state.clone(),
222        git_state,
223        swarm_coordinator,
224        voice_manager,
225    );
226    manager.cancel_token = cancel_token;
227
228    let _ = agent_tx
229        .send(InferenceEvent::RuntimeProfile {
230            model_id: manager.engine.current_model(),
231            context_length: manager.engine.current_context_length(),
232        })
233        .await;
234
235    let workspace_root = crate::tools::file_ops::workspace_root();
236    let _ = crate::agent::workspace_profile::ensure_workspace_profile(&workspace_root);
237
238    // Send the startup greeting immediately — before MCP and Vein so it always
239    // appears right away, even if vein indexing takes a while on first run.
240    let gpu_name = gpu_state.gpu_name();
241    let vram = gpu_state.label();
242    let voice_cfg = crate::agent::config::load_config();
243    let voice_status = format!(
244        "Voice: {} | Speed: {}x | Volume: {}x",
245        crate::agent::config::effective_voice(&voice_cfg),
246        crate::agent::config::effective_voice_speed(&voice_cfg),
247        crate::agent::config::effective_voice_volume(&voice_cfg),
248    );
249    let embed_status = match manager.engine.get_embedding_model().await {
250        Some(id) => format!("Embed: {} (semantic search ready)", id),
251        None => "Embed: none loaded (load nomic-embed-text-v2 for semantic search)".to_string(),
252    };
253    let workspace_root = crate::tools::file_ops::workspace_root();
254    let docs_only_mode = !crate::tools::file_ops::is_project_workspace();
255    let workspace_mode = if docs_only_mode {
256        "docs-only"
257    } else {
258        "project"
259    };
260    let launched_from_home = home::home_dir()
261        .and_then(|home| std::env::current_dir().ok().map(|cwd| cwd == home))
262        .unwrap_or(false);
263    let project_hint = if !docs_only_mode {
264        String::new()
265    } else if launched_from_home {
266        "\nTip: you launched Hematite from your home directory. That is fine for workstation questions and docs-only memory, but for project-specific build, test, script, or repo work you should relaunch in the target project directory. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
267    } else {
268        "\nTip: source indexing is disabled outside a project folder. Launch Hematite in the target project directory for project-specific build, test, script, or repo work. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
269    };
270    let display_model = {
271        let m = manager.engine.current_model();
272        if m.is_empty() {
273            "no chat model loaded".to_string()
274        } else {
275            m
276        }
277    };
278    let greeting = format!(
279        "Hematite {} Online | Model: {} | CTX: {} | GPU: {} | VRAM: {}\nEndpoint: {}\nWorkspace: {} ({})\n{}\n{}\n/ask · read-only analysis   /code · implement   /architect · plan-first   /chat · conversation\nRecovery: /undo · /new · /forget · /clear   |   /version · /about{}",
280        crate::hematite_version_display(),
281        display_model,
282        manager.engine.current_context_length(),
283        gpu_name,
284        vram,
285        format!("{}/v1", manager.engine.base_url),
286        workspace_root.display(),
287        workspace_mode,
288        embed_status,
289        voice_status,
290        project_hint
291    );
292    let _ = agent_tx
293        .send(InferenceEvent::MutedToken(format!("\n{}", greeting)))
294        .await;
295
296    if let Err(e) = manager.initialize_mcp(&agent_tx).await {
297        let _ = agent_tx
298            .send(InferenceEvent::Error(format!("MCP Init Failed: {}", e)))
299            .await;
300    }
301    let indexed = manager.initialize_vein();
302    manager.initialize_repo_map();
303    let _ = agent_tx
304        .send(InferenceEvent::VeinStatus {
305            file_count: manager.vein.file_count(),
306            embedded_count: manager.vein.embedded_chunk_count(),
307            docs_only: docs_only_mode,
308        })
309        .await;
310    let _ = agent_tx
311        .send(InferenceEvent::Thought(format!(
312            "The Vein: indexed {} files",
313            indexed
314        )))
315        .await;
316
317    // Show a compact resume line if a prior session left a checkpoint.
318    if let Some(cp) = crate::agent::conversation::load_checkpoint() {
319        let verify_tag = match cp.last_verify_ok {
320            Some(true) => " | last verify: PASS",
321            Some(false) => " | last verify: FAIL",
322            None => "",
323        };
324        let files_tag = if cp.working_files.is_empty() {
325            String::new()
326        } else {
327            format!(" | files: {}", cp.working_files.join(", "))
328        };
329        let goal_preview: String = cp.last_goal.chars().take(120).collect();
330        let trail = if cp.last_goal.len() > 120 { "…" } else { "" };
331        let resume_msg = format!(
332            "Resumed: {} turn{}{}{} — last goal: \"{}{}\"",
333            cp.turn_count,
334            if cp.turn_count == 1 { "" } else { "s" },
335            verify_tag,
336            files_tag,
337            goal_preview,
338            trail,
339        );
340        let _ = agent_tx.send(InferenceEvent::Thought(resume_msg)).await;
341    } else {
342        let session_path = crate::tools::file_ops::hematite_dir().join("session.json");
343        if !session_path.exists() {
344            let first_run_msg = "\nWelcome to Hematite! I'm your local AI workstation assistant.\n\n\
345                                 Since this is your first time here, what would you like to do?\n\
346                                 - System Check: Wondering if your tools are working? Run `/health`\n\
347                                 - Code: Ready to build something? Run `/architect Let's build a new feature`\n\
348                                 - Setup: Need help configuring Git or the workspace? Run `/ask What should I set up first?`\n\
349                                 - Help: Have a weird error? Type `/explain ` and paste it.\n\n\
350                                 Just type \"hello\" to start a normal conversation!".to_string();
351            let _ = agent_tx.send(InferenceEvent::Thought(first_run_msg)).await;
352
353            // Create a minimal empty session struct so we don't show this again until they intentionally /forget
354            let _ = std::fs::write(&session_path, "{\"turn_count\": 0}");
355        }
356    }
357
358    let _ = agent_tx.send(InferenceEvent::Done).await;
359    let startup_config = crate::agent::config::load_config();
360    manager.engine.set_gemma_native_formatting(
361        crate::agent::config::effective_gemma_native_formatting(
362            &startup_config,
363            &manager.engine.current_model(),
364        ),
365    );
366    let startup_model = manager.engine.current_model();
367    if crate::agent::inference::is_hematite_native_model(&startup_model) {
368        let mode = crate::agent::config::gemma_native_mode_label(&startup_config, &startup_model);
369        let status = match mode {
370            "on" => "Sovereign Engine detected | Native Turn-Formatting: ON (forced)",
371            "auto" => "Sovereign Engine detected | Native Turn-Formatting: ON (auto)",
372            _ => "Sovereign Engine detected | Native Turn-Formatting: OFF (use /gemma-native auto|on)",
373        };
374        let _ = agent_tx
375            .send(InferenceEvent::MutedToken(status.to_string()))
376            .await;
377    }
378
379    while let Some(input) = user_input_rx.recv().await {
380        if let Err(e) = manager
381            .run_turn(&input, agent_tx.clone(), config.yolo)
382            .await
383        {
384            let _ = agent_tx.send(InferenceEvent::Error(e.to_string())).await;
385            let _ = agent_tx.send(InferenceEvent::Done).await;
386        }
387    }
388}
hematite/runtime.rs

hematite/
runtime.rs