hematite/
runtime.rs

1use crate::agent;
2use crate::agent::conversation::{ConversationManager, UserTurn};
3use crate::agent::git_monitor::GitState;
4use crate::agent::inference::{InferenceEngine, InferenceEvent};
5use crate::ui;
6use crate::ui::gpu_monitor::GpuState;
7use crate::ui::voice::VoiceManager;
8use crate::CliCockpit;
9use notify::RecommendedWatcher;
10use std::sync::Arc;
11use tokio::sync::mpsc;
12
13pub struct RuntimeServices {
14    pub engine: Arc<InferenceEngine>,
15    pub gpu_state: Arc<GpuState>,
16    pub git_state: Arc<GitState>,
17    pub voice_manager: Arc<VoiceManager>,
18    pub swarm_coordinator: Arc<agent::swarm::SwarmCoordinator>,
19    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
20}
21
22pub struct RuntimeChannels {
23    pub specular_rx: mpsc::Receiver<agent::specular::SpecularEvent>,
24    pub agent_tx: mpsc::Sender<InferenceEvent>,
25    pub agent_rx: mpsc::Receiver<InferenceEvent>,
26    pub swarm_tx: mpsc::Sender<agent::swarm::SwarmMessage>,
27    pub swarm_rx: mpsc::Receiver<agent::swarm::SwarmMessage>,
28    pub user_input_tx: mpsc::Sender<UserTurn>,
29    pub user_input_rx: mpsc::Receiver<UserTurn>,
30}
31
32pub struct RuntimeBundle {
33    pub services: RuntimeServices,
34    pub channels: RuntimeChannels,
35    pub watcher_guard: RecommendedWatcher,
36}
37
38pub struct AgentLoopRuntime {
39    pub user_input_rx: mpsc::Receiver<UserTurn>,
40    pub agent_tx: mpsc::Sender<InferenceEvent>,
41    pub services: RuntimeServices,
42}
43
44pub struct AgentLoopConfig {
45    pub yolo: bool,
46    pub professional: bool,
47    pub brief: bool,
48    pub snark: u8,
49    pub chaos: u8,
50    pub soul_personality: String,
51    pub fast_model: Option<String>,
52    pub think_model: Option<String>,
53}
54
55pub async fn build_runtime_bundle(
56    cockpit: &CliCockpit,
57    species: &str,
58    snark: u8,
59    professional: bool,
60) -> Result<RuntimeBundle, Box<dyn std::error::Error>> {
61    println!("Booting Hematite systems...");
62    // settings.json api_url overrides the --url CLI flag so users don't need to retype it.
63    let api_url = crate::agent::config::load_config()
64        .api_url
65        .unwrap_or_else(|| cockpit.url.clone());
66    let mut engine_raw = InferenceEngine::new(api_url, species.to_string(), snark)?;
67    let gpu_state = ui::gpu_monitor::spawn_gpu_monitor();
68    let git_state = agent::git_monitor::spawn_git_monitor();
69
70    if !engine_raw.health_check().await {
71        println!(
72            "ERROR: LLM Provider not detected at {}",
73            engine_raw.base_url
74        );
75        println!("Check if LM Studio (or your local server) is running and port mapped correctly.");
76        std::process::exit(1);
77    }
78
79    let model_name = engine_raw.get_loaded_model().await;
80    if let Some(name) = model_name {
81        engine_raw.set_runtime_profile(&name, engine_raw.current_context_length());
82    }
83    let detected_context = engine_raw.detect_context_length().await;
84    let detected_model = engine_raw.current_model();
85    engine_raw.set_runtime_profile(&detected_model, detected_context);
86
87    let (specular_tx, specular_rx) = mpsc::channel(32);
88    let watcher_guard = agent::specular::spawn_watcher(specular_tx)?;
89
90    let (agent_tx, agent_rx) = mpsc::channel::<InferenceEvent>(100);
91    let (swarm_tx, swarm_rx) = mpsc::channel(32);
92    let voice_manager = Arc::new(VoiceManager::new(agent_tx.clone()));
93
94    if let Some(ref worker) = cockpit.fast_model {
95        engine_raw.worker_model = Some(worker.clone());
96    }
97
98    let engine = Arc::new(engine_raw);
99    let swarm_coordinator = Arc::new(agent::swarm::SwarmCoordinator::new(
100        engine.clone(),
101        gpu_state.clone(),
102        cockpit.fast_model.clone(),
103        professional,
104    ));
105
106    let (user_input_tx, user_input_rx) = mpsc::channel::<UserTurn>(32);
107    let cancel_token = Arc::new(std::sync::atomic::AtomicBool::new(false));
108
109    Ok(RuntimeBundle {
110        services: RuntimeServices {
111            engine,
112            gpu_state,
113            git_state,
114            voice_manager,
115            swarm_coordinator,
116            cancel_token,
117        },
118        channels: RuntimeChannels {
119            specular_rx,
120            agent_tx,
121            agent_rx,
122            swarm_tx,
123            swarm_rx,
124            user_input_tx,
125            user_input_rx,
126        },
127        watcher_guard,
128    })
129}
130
131pub fn spawn_runtime_profile_sync(
132    engine: Arc<InferenceEngine>,
133    agent_tx: mpsc::Sender<InferenceEvent>,
134) -> tokio::task::JoinHandle<()> {
135    tokio::spawn(async move {
136        // Initial delay before the first background poll.
137        tokio::time::sleep(tokio::time::Duration::from_secs(4)).await;
138
139        let mut last_embed: Option<String> = None;
140
141        loop {
142            let result = engine.refresh_runtime_profile().await;
143
144            let Some((model_id, context_length, _changed)) = result else {
145                if agent_tx.is_closed() {
146                    break;
147                }
148                // LM Studio unreachable — back off; no need to hammer a closed server.
149                tokio::time::sleep(tokio::time::Duration::from_secs(15)).await;
150                continue;
151            };
152
153            // When no coding model is loaded, back off to reduce log noise in LM Studio.
154            let poll_interval = if model_id == "no model loaded" {
155                tokio::time::Duration::from_secs(12)
156            } else {
157                tokio::time::Duration::from_secs(4)
158            };
159
160            if agent_tx
161                .send(InferenceEvent::RuntimeProfile {
162                    model_id,
163                    context_length,
164                })
165                .await
166                .is_err()
167            {
168                break;
169            }
170
171            // Poll embed model separately and notify on change.
172            let current_embed = engine.get_embedding_model().await;
173            if current_embed != last_embed {
174                if agent_tx
175                    .send(InferenceEvent::EmbedProfile {
176                        model_id: current_embed.clone(),
177                    })
178                    .await
179                    .is_err()
180                {
181                    break;
182                }
183                last_embed = current_embed;
184            }
185
186            tokio::time::sleep(poll_interval).await;
187        }
188    })
189}
190
191pub async fn run_agent_loop(runtime: AgentLoopRuntime, config: AgentLoopConfig) {
192    let AgentLoopRuntime {
193        mut user_input_rx,
194        agent_tx,
195        services,
196    } = runtime;
197    let RuntimeServices {
198        engine,
199        gpu_state,
200        git_state,
201        voice_manager,
202        swarm_coordinator,
203        cancel_token,
204    } = services;
205
206    let mut manager = ConversationManager::new(
207        engine,
208        config.professional,
209        config.brief,
210        config.snark,
211        config.chaos,
212        config.soul_personality,
213        config.fast_model,
214        config.think_model,
215        gpu_state.clone(),
216        git_state,
217        swarm_coordinator,
218        voice_manager,
219    );
220    manager.cancel_token = cancel_token;
221
222    let _ = agent_tx
223        .send(InferenceEvent::RuntimeProfile {
224            model_id: manager.engine.current_model(),
225            context_length: manager.engine.current_context_length(),
226        })
227        .await;
228
229    let workspace_root = crate::tools::file_ops::workspace_root();
230    let _ = crate::agent::workspace_profile::ensure_workspace_profile(&workspace_root);
231
232    // Send the startup greeting immediately — before MCP and Vein so it always
233    // appears right away, even if vein indexing takes a while on first run.
234    let gpu_name = gpu_state.gpu_name();
235    let vram = gpu_state.label();
236    let voice_cfg = crate::agent::config::load_config();
237    let voice_status = format!(
238        "Voice: {} | Speed: {}x | Volume: {}x",
239        crate::agent::config::effective_voice(&voice_cfg),
240        crate::agent::config::effective_voice_speed(&voice_cfg),
241        crate::agent::config::effective_voice_volume(&voice_cfg),
242    );
243    let embed_status = match manager.engine.get_embedding_model().await {
244        Some(id) => format!("Embed: {} (semantic search ready)", id),
245        None => "Embed: none loaded (load nomic-embed-text-v2 for semantic search)".to_string(),
246    };
247    let docs_only_mode = !crate::tools::file_ops::is_project_workspace();
248    let project_hint = if !docs_only_mode {
249        String::new()
250    } else {
251        "\nTip: source indexing is disabled outside a project folder. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
252    };
253    let display_model = {
254        let m = manager.engine.current_model();
255        if m.is_empty() {
256            "no chat model loaded".to_string()
257        } else {
258            m
259        }
260    };
261    let greeting = format!(
262        "Hematite {} Online | Model: {} | CTX: {} | GPU: {} | VRAM: {}\nEndpoint: {}\n{}\n{}\n/chat - conversation mode | /agent - full coding harness + workstation mode | /version - current build | /about - app info{}",
263        crate::hematite_version_display(),
264        display_model,
265        manager.engine.current_context_length(),
266        gpu_name,
267        vram,
268        format!("{}/v1", manager.engine.base_url),
269        embed_status,
270        voice_status,
271        project_hint
272    );
273    let _ = agent_tx
274        .send(InferenceEvent::MutedToken(format!("\n{}", greeting)))
275        .await;
276
277    if let Err(e) = manager.initialize_mcp(&agent_tx).await {
278        let _ = agent_tx
279            .send(InferenceEvent::Error(format!("MCP Init Failed: {}", e)))
280            .await;
281    }
282    let indexed = manager.initialize_vein();
283    let _ = agent_tx
284        .send(InferenceEvent::VeinStatus {
285            file_count: manager.vein.file_count(),
286            embedded_count: manager.vein.embedded_chunk_count(),
287            docs_only: docs_only_mode,
288        })
289        .await;
290    let _ = agent_tx
291        .send(InferenceEvent::Thought(format!(
292            "The Vein: indexed {} files",
293            indexed
294        )))
295        .await;
296    let _ = agent_tx.send(InferenceEvent::Done).await;
297    let startup_config = crate::agent::config::load_config();
298    manager.engine.set_gemma_native_formatting(
299        crate::agent::config::effective_gemma_native_formatting(
300            &startup_config,
301            &manager.engine.current_model(),
302        ),
303    );
304    let startup_model = manager.engine.current_model();
305    if crate::agent::inference::is_gemma4_model_name(&startup_model) {
306        let mode = crate::agent::config::gemma_native_mode_label(&startup_config, &startup_model);
307        let status = match mode {
308            "on" => "Gemma 4 detected | Gemma Native Formatting: ON (forced)",
309            "auto" => "Gemma 4 detected | Gemma Native Formatting: ON (auto)",
310            _ => "Gemma 4 detected | Gemma Native Formatting: OFF (use /gemma-native auto|on)",
311        };
312        let _ = agent_tx
313            .send(InferenceEvent::MutedToken(status.to_string()))
314            .await;
315    }
316
317    while let Some(input) = user_input_rx.recv().await {
318        if let Err(e) = manager
319            .run_turn(&input, agent_tx.clone(), config.yolo)
320            .await
321        {
322            let _ = agent_tx.send(InferenceEvent::Error(e.to_string())).await;
323            let _ = agent_tx.send(InferenceEvent::Done).await;
324        }
325    }
326}
hematite/runtime.rs

hematite/
runtime.rs