1use crate::agent;
2use crate::agent::conversation::{ConversationManager, UserTurn};
3use crate::agent::git_monitor::GitState;
4use crate::agent::inference::{InferenceEngine, InferenceEvent};
5use crate::ui;
6use crate::ui::gpu_monitor::GpuState;
7use crate::ui::voice::VoiceManager;
8use crate::CliCockpit;
9use notify::RecommendedWatcher;
10use std::sync::Arc;
11use tokio::sync::mpsc;
12
13pub struct RuntimeServices {
14 pub engine: Arc<InferenceEngine>,
15 pub gpu_state: Arc<GpuState>,
16 pub git_state: Arc<GitState>,
17 pub voice_manager: Arc<VoiceManager>,
18 pub swarm_coordinator: Arc<agent::swarm::SwarmCoordinator>,
19 pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
20}
21
22pub struct RuntimeChannels {
23 pub specular_rx: mpsc::Receiver<agent::specular::SpecularEvent>,
24 pub agent_tx: mpsc::Sender<InferenceEvent>,
25 pub agent_rx: mpsc::Receiver<InferenceEvent>,
26 pub swarm_tx: mpsc::Sender<agent::swarm::SwarmMessage>,
27 pub swarm_rx: mpsc::Receiver<agent::swarm::SwarmMessage>,
28 pub user_input_tx: mpsc::Sender<UserTurn>,
29 pub user_input_rx: mpsc::Receiver<UserTurn>,
30}
31
32pub struct RuntimeBundle {
33 pub services: RuntimeServices,
34 pub channels: RuntimeChannels,
35 pub watcher_guard: RecommendedWatcher,
36}
37
38pub struct AgentLoopRuntime {
39 pub user_input_rx: mpsc::Receiver<UserTurn>,
40 pub agent_tx: mpsc::Sender<InferenceEvent>,
41 pub services: RuntimeServices,
42}
43
44pub struct AgentLoopConfig {
45 pub yolo: bool,
46 pub professional: bool,
47 pub brief: bool,
48 pub snark: u8,
49 pub chaos: u8,
50 pub soul_personality: String,
51 pub fast_model: Option<String>,
52 pub think_model: Option<String>,
53}
54
55pub async fn build_runtime_bundle(
56 cockpit: &CliCockpit,
57 species: &str,
58 snark: u8,
59 professional: bool,
60) -> Result<RuntimeBundle, Box<dyn std::error::Error>> {
61 println!("Booting Hematite systems...");
62 let api_url = crate::agent::config::load_config()
64 .api_url
65 .unwrap_or_else(|| cockpit.url.clone());
66 let mut engine_raw = InferenceEngine::new(api_url, species.to_string(), snark)?;
67 let gpu_state = ui::gpu_monitor::spawn_gpu_monitor();
68 let git_state = agent::git_monitor::spawn_git_monitor();
69
70 if !engine_raw.health_check().await {
71 println!(
72 "ERROR: LLM Provider not detected at {}",
73 engine_raw.base_url
74 );
75 println!("Check if LM Studio (or your local server) is running and port mapped correctly.");
76 std::process::exit(1);
77 }
78
79 let model_name = engine_raw.get_loaded_model().await;
80 if let Some(name) = model_name {
81 engine_raw.set_runtime_profile(&name, engine_raw.current_context_length());
82 }
83 let detected_context = engine_raw.detect_context_length().await;
84 let detected_model = engine_raw.current_model();
85 engine_raw.set_runtime_profile(&detected_model, detected_context);
86
87 let (specular_tx, specular_rx) = mpsc::channel(32);
88 let watcher_guard = agent::specular::spawn_watcher(specular_tx)?;
89
90 let (agent_tx, agent_rx) = mpsc::channel::<InferenceEvent>(100);
91 let (swarm_tx, swarm_rx) = mpsc::channel(32);
92 let voice_manager = Arc::new(VoiceManager::new(agent_tx.clone()));
93
94 if let Some(ref worker) = cockpit.fast_model {
95 engine_raw.worker_model = Some(worker.clone());
96 }
97
98 let engine = Arc::new(engine_raw);
99 let swarm_coordinator = Arc::new(agent::swarm::SwarmCoordinator::new(
100 engine.clone(),
101 gpu_state.clone(),
102 cockpit.fast_model.clone(),
103 professional,
104 ));
105
106 let (user_input_tx, user_input_rx) = mpsc::channel::<UserTurn>(32);
107 let cancel_token = Arc::new(std::sync::atomic::AtomicBool::new(false));
108
109 Ok(RuntimeBundle {
110 services: RuntimeServices {
111 engine,
112 gpu_state,
113 git_state,
114 voice_manager,
115 swarm_coordinator,
116 cancel_token,
117 },
118 channels: RuntimeChannels {
119 specular_rx,
120 agent_tx,
121 agent_rx,
122 swarm_tx,
123 swarm_rx,
124 user_input_tx,
125 user_input_rx,
126 },
127 watcher_guard,
128 })
129}
130
131pub fn spawn_runtime_profile_sync(
132 engine: Arc<InferenceEngine>,
133 agent_tx: mpsc::Sender<InferenceEvent>,
134) -> tokio::task::JoinHandle<()> {
135 tokio::spawn(async move {
136 tokio::time::sleep(tokio::time::Duration::from_secs(4)).await;
138
139 let mut last_embed: Option<String> = None;
140
141 loop {
142 let result = engine.refresh_runtime_profile().await;
143
144 let Some((model_id, context_length, _changed)) = result else {
145 if agent_tx.is_closed() {
146 break;
147 }
148 tokio::time::sleep(tokio::time::Duration::from_secs(15)).await;
150 continue;
151 };
152
153 let poll_interval = if model_id == "no model loaded" {
155 tokio::time::Duration::from_secs(12)
156 } else {
157 tokio::time::Duration::from_secs(4)
158 };
159
160 if agent_tx
161 .send(InferenceEvent::RuntimeProfile {
162 model_id,
163 context_length,
164 })
165 .await
166 .is_err()
167 {
168 break;
169 }
170
171 let current_embed = engine.get_embedding_model().await;
173 if current_embed != last_embed {
174 if agent_tx
175 .send(InferenceEvent::EmbedProfile {
176 model_id: current_embed.clone(),
177 })
178 .await
179 .is_err()
180 {
181 break;
182 }
183 last_embed = current_embed;
184 }
185
186 tokio::time::sleep(poll_interval).await;
187 }
188 })
189}
190
191pub async fn run_agent_loop(runtime: AgentLoopRuntime, config: AgentLoopConfig) {
192 let AgentLoopRuntime {
193 mut user_input_rx,
194 agent_tx,
195 services,
196 } = runtime;
197 let RuntimeServices {
198 engine,
199 gpu_state,
200 git_state,
201 voice_manager,
202 swarm_coordinator,
203 cancel_token,
204 } = services;
205
206 let mut manager = ConversationManager::new(
207 engine,
208 config.professional,
209 config.brief,
210 config.snark,
211 config.chaos,
212 config.soul_personality,
213 config.fast_model,
214 config.think_model,
215 gpu_state.clone(),
216 git_state,
217 swarm_coordinator,
218 voice_manager,
219 );
220 manager.cancel_token = cancel_token;
221
222 let _ = agent_tx
223 .send(InferenceEvent::RuntimeProfile {
224 model_id: manager.engine.current_model(),
225 context_length: manager.engine.current_context_length(),
226 })
227 .await;
228
229 let workspace_root = crate::tools::file_ops::workspace_root();
230 let _ = crate::agent::workspace_profile::ensure_workspace_profile(&workspace_root);
231
232 let gpu_name = gpu_state.gpu_name();
235 let vram = gpu_state.label();
236 let voice_cfg = crate::agent::config::load_config();
237 let voice_status = format!(
238 "Voice: {} | Speed: {}x | Volume: {}x",
239 crate::agent::config::effective_voice(&voice_cfg),
240 crate::agent::config::effective_voice_speed(&voice_cfg),
241 crate::agent::config::effective_voice_volume(&voice_cfg),
242 );
243 let embed_status = match manager.engine.get_embedding_model().await {
244 Some(id) => format!("Embed: {} (semantic search ready)", id),
245 None => "Embed: none loaded (load nomic-embed-text-v2 for semantic search)".to_string(),
246 };
247 let docs_only_mode = !crate::tools::file_ops::is_project_workspace();
248 let project_hint = if !docs_only_mode {
249 String::new()
250 } else {
251 "\nTip: source indexing is disabled outside a project folder. `.hematite/docs/`, `.hematite/imports/`, and recent local session reports remain searchable in docs-only vein mode.".to_string()
252 };
253 let display_model = {
254 let m = manager.engine.current_model();
255 if m.is_empty() {
256 "no chat model loaded".to_string()
257 } else {
258 m
259 }
260 };
261 let greeting = format!(
262 "Hematite {} Online | Model: {} | CTX: {} | GPU: {} | VRAM: {}\nEndpoint: {}\n{}\n{}\n/chat - conversation mode | /agent - full coding harness + workstation mode | /version - current build | /about - app info{}",
263 crate::hematite_version_display(),
264 display_model,
265 manager.engine.current_context_length(),
266 gpu_name,
267 vram,
268 format!("{}/v1", manager.engine.base_url),
269 embed_status,
270 voice_status,
271 project_hint
272 );
273 let _ = agent_tx
274 .send(InferenceEvent::MutedToken(format!("\n{}", greeting)))
275 .await;
276
277 if let Err(e) = manager.initialize_mcp(&agent_tx).await {
278 let _ = agent_tx
279 .send(InferenceEvent::Error(format!("MCP Init Failed: {}", e)))
280 .await;
281 }
282 let indexed = manager.initialize_vein();
283 let _ = agent_tx
284 .send(InferenceEvent::VeinStatus {
285 file_count: manager.vein.file_count(),
286 embedded_count: manager.vein.embedded_chunk_count(),
287 docs_only: docs_only_mode,
288 })
289 .await;
290 let _ = agent_tx
291 .send(InferenceEvent::Thought(format!(
292 "The Vein: indexed {} files",
293 indexed
294 )))
295 .await;
296 let _ = agent_tx.send(InferenceEvent::Done).await;
297 let startup_config = crate::agent::config::load_config();
298 manager.engine.set_gemma_native_formatting(
299 crate::agent::config::effective_gemma_native_formatting(
300 &startup_config,
301 &manager.engine.current_model(),
302 ),
303 );
304 let startup_model = manager.engine.current_model();
305 if crate::agent::inference::is_gemma4_model_name(&startup_model) {
306 let mode = crate::agent::config::gemma_native_mode_label(&startup_config, &startup_model);
307 let status = match mode {
308 "on" => "Gemma 4 detected | Gemma Native Formatting: ON (forced)",
309 "auto" => "Gemma 4 detected | Gemma Native Formatting: ON (auto)",
310 _ => "Gemma 4 detected | Gemma Native Formatting: OFF (use /gemma-native auto|on)",
311 };
312 let _ = agent_tx
313 .send(InferenceEvent::MutedToken(status.to_string()))
314 .await;
315 }
316
317 while let Some(input) = user_input_rx.recv().await {
318 if let Err(e) = manager
319 .run_turn(&input, agent_tx.clone(), config.yolo)
320 .await
321 {
322 let _ = agent_tx.send(InferenceEvent::Error(e.to_string())).await;
323 let _ = agent_tx.send(InferenceEvent::Done).await;
324 }
325 }
326}