1use serde::Serialize;
2use serde_json::Value;
3use tokio::sync::{mpsc, Semaphore};
4
5pub use crate::agent::economics::{SessionEconomics, ToolRecord};
6pub use crate::agent::types::*;
7
8pub struct InferenceEngine {
11 pub provider:
12 std::sync::Arc<tokio::sync::RwLock<Box<dyn crate::agent::provider::ModelProvider>>>,
13 pub cached_model: std::sync::Arc<std::sync::RwLock<String>>,
14 pub cached_context: std::sync::Arc<std::sync::atomic::AtomicUsize>,
15 pub base_url: String,
16 pub species: String,
17 pub snark: u8,
18 pub kv_semaphore: Semaphore,
19 pub economics: std::sync::Arc<std::sync::Mutex<SessionEconomics>>,
20 pub worker_model: Option<String>,
22 pub gemma_native_formatting: std::sync::Arc<std::sync::atomic::AtomicBool>,
24 pub cancel_token: std::sync::Arc<std::sync::atomic::AtomicBool>,
26}
27
28pub fn is_hematite_native_model(model: &str) -> bool {
29 let lower = model.to_ascii_lowercase();
30 lower.contains("gemma-4") || lower.contains("gemma4")
31}
32
33fn should_use_native_formatting(engine: &InferenceEngine, model: &str) -> bool {
34 is_hematite_native_model(model) && engine.gemma_native_formatting_enabled()
35}
36
37pub fn tool_metadata_for_name(name: &str) -> ToolMetadata {
40 if name.starts_with("mcp__") {
41 let lower = name.to_ascii_lowercase();
42 let mutates_workspace = [
43 "__edit",
44 "__write",
45 "__create",
46 "__move",
47 "__delete",
48 "__remove",
49 "__rename",
50 "__replace",
51 "__patch",
52 ]
53 .iter()
54 .any(|needle| lower.contains(needle));
55 return ToolMetadata {
56 category: ToolCategory::External,
57 mutates_workspace,
58 external_surface: true,
59 trust_sensitive: true,
60 read_only_friendly: !mutates_workspace,
61 plan_scope: false,
62 };
63 }
64
65 match name {
66 "read_file" | "inspect_lines" | "grep_files" | "list_files" => ToolMetadata {
67 category: ToolCategory::RepoRead,
68 mutates_workspace: false,
69 external_surface: false,
70 trust_sensitive: false,
71 read_only_friendly: true,
72 plan_scope: true,
73 },
74 "create_directory" | "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace" => {
75 ToolMetadata {
76 category: ToolCategory::RepoWrite,
77 mutates_workspace: true,
78 external_surface: false,
79 trust_sensitive: true,
80 read_only_friendly: false,
81 plan_scope: true,
82 }
83 }
84 "trace_runtime_flow" => ToolMetadata {
85 category: ToolCategory::Architecture,
86 mutates_workspace: false,
87 external_surface: false,
88 trust_sensitive: false,
89 read_only_friendly: true,
90 plan_scope: false,
91 },
92 "describe_toolchain" => ToolMetadata {
93 category: ToolCategory::Toolchain,
94 mutates_workspace: false,
95 external_surface: false,
96 trust_sensitive: false,
97 read_only_friendly: true,
98 plan_scope: false,
99 },
100 "shell" => ToolMetadata {
101 category: ToolCategory::Runtime,
102 mutates_workspace: true,
103 external_surface: false,
104 trust_sensitive: true,
105 read_only_friendly: false,
106 plan_scope: false,
107 },
108 "inspect_host" => ToolMetadata {
109 category: ToolCategory::Runtime,
110 mutates_workspace: false,
111 external_surface: false,
112 trust_sensitive: false,
113 read_only_friendly: true,
114 plan_scope: false,
115 },
116 "resolve_host_issue" => ToolMetadata {
117 category: ToolCategory::Runtime,
118 mutates_workspace: true,
119 external_surface: true,
120 trust_sensitive: true,
121 read_only_friendly: false,
122 plan_scope: false,
123 },
124 "run_hematite_maintainer_workflow" => ToolMetadata {
125 category: ToolCategory::Workflow,
126 mutates_workspace: true,
127 external_surface: false,
128 trust_sensitive: true,
129 read_only_friendly: false,
130 plan_scope: false,
131 },
132 "run_workspace_workflow" => ToolMetadata {
133 category: ToolCategory::Workflow,
134 mutates_workspace: true,
135 external_surface: false,
136 trust_sensitive: true,
137 read_only_friendly: false,
138 plan_scope: false,
139 },
140 "verify_build" => ToolMetadata {
141 category: ToolCategory::Verification,
142 mutates_workspace: false,
143 external_surface: false,
144 trust_sensitive: false,
145 read_only_friendly: true,
146 plan_scope: true,
147 },
148 "git_commit" | "git_push" | "git_remote" | "git_onboarding" | "git_worktree" => {
149 ToolMetadata {
150 category: ToolCategory::Git,
151 mutates_workspace: true,
152 external_surface: false,
153 trust_sensitive: true,
154 read_only_friendly: false,
155 plan_scope: false,
156 }
157 }
158 "research_web" | "fetch_docs" => ToolMetadata {
159 category: ToolCategory::Research,
160 mutates_workspace: false,
161 external_surface: false,
162 trust_sensitive: false,
163 read_only_friendly: true,
164 plan_scope: false,
165 },
166 "vision_analyze" => ToolMetadata {
167 category: ToolCategory::Vision,
168 mutates_workspace: false,
169 external_surface: false,
170 trust_sensitive: false,
171 read_only_friendly: true,
172 plan_scope: false,
173 },
174 "lsp_definitions"
175 | "lsp_references"
176 | "lsp_hover"
177 | "lsp_rename_symbol"
178 | "lsp_get_diagnostics"
179 | "lsp_search_symbol" => ToolMetadata {
180 category: ToolCategory::Lsp,
181 mutates_workspace: false,
182 external_surface: false,
183 trust_sensitive: false,
184 read_only_friendly: true,
185 plan_scope: false,
186 },
187 "auto_pin_context" | "list_pinned" | "clarify" => ToolMetadata {
188 category: ToolCategory::Workflow,
189 mutates_workspace: false,
190 external_surface: false,
191 trust_sensitive: false,
192 read_only_friendly: true,
193 plan_scope: true,
194 },
195 "manage_tasks" => ToolMetadata {
196 category: ToolCategory::Workflow,
197 mutates_workspace: false,
198 external_surface: false,
199 trust_sensitive: false,
200 read_only_friendly: true,
201 plan_scope: false,
202 },
203 _ => ToolMetadata {
204 category: ToolCategory::Other,
205 mutates_workspace: false,
206 external_surface: false,
207 trust_sensitive: false,
208 read_only_friendly: true,
209 plan_scope: false,
210 },
211 }
212}
213const MIN_RESERVED_OUTPUT_TOKENS: usize = 1024;
218const MAX_RESERVED_OUTPUT_TOKENS: usize = 4096;
219
220fn is_tiny_context_window(context_length: usize) -> bool {
221 context_length <= 8_192
222}
223
224fn is_compact_context_window(context_length: usize) -> bool {
225 context_length > 8_192 && context_length <= 49_152
226}
227
228pub fn is_compact_context_window_pub(context_length: usize) -> bool {
229 is_compact_context_window(context_length)
230}
231
232fn is_provider_context_limit_detail(lower: &str) -> bool {
233 (lower.contains("n_keep") && lower.contains("n_ctx"))
234 || lower.contains("context length")
235 || lower.contains("keep from the initial prompt")
236 || lower.contains("prompt is greater than the context length")
237 || lower.contains("exceeds the context window")
238}
239
240fn classify_runtime_failure_tag(detail: &str) -> &'static str {
241 let lower = detail.to_ascii_lowercase();
242 if lower.contains("context_window_blocked")
243 || lower.contains("context ceiling reached")
244 || lower.contains("exceeds the")
245 || is_provider_context_limit_detail(&lower)
246 {
247 "context_window"
248 } else if lower.contains("empty response from model")
249 || lower.contains("model returned an empty response")
250 {
251 "empty_model_response"
252 } else if lower.contains("action blocked:")
253 || lower.contains("access denied")
254 || lower.contains("declined by user")
255 {
256 "tool_policy_blocked"
257 } else {
258 "provider_degraded"
259 }
260}
261
262fn runtime_failure_guidance(tag: &str) -> &'static str {
263 match tag {
264 "context_window" => {
265 "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
266 }
267 "empty_model_response" => {
268 "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
269 }
270 "tool_policy_blocked" => {
271 "Stay inside the allowed workflow or switch modes before retrying."
272 }
273 _ => "Retry once automatically, then narrow the turn or restart LM Studio if it persists.",
274 }
275}
276
277fn format_runtime_failure_message(detail: &str) -> String {
278 let tag = classify_runtime_failure_tag(detail);
279 format!(
280 "[failure:{}] {} Detail: {}",
281 tag,
282 runtime_failure_guidance(tag),
283 detail.trim()
284 )
285}
286
287impl InferenceEngine {
292 pub fn new(
293 api_url: String,
294 species: String,
295 snark: u8,
296 ) -> Result<Self, Box<dyn std::error::Error>> {
297 let client = reqwest::Client::builder()
298 .timeout(std::time::Duration::from_secs(180))
299 .build()?;
300
301 let base_url = {
302 let trimmed = api_url.trim_end_matches('/');
303 if let Some(scheme_end) = trimmed.find("://") {
304 let after_scheme = &trimmed[scheme_end + 3..];
305 if let Some(path_start) = after_scheme.find('/') {
306 format!(
307 "{}://{}",
308 &trimmed[..scheme_end],
309 &after_scheme[..path_start]
310 )
311 } else {
312 trimmed.to_string()
313 }
314 } else {
315 trimmed.to_string()
316 }
317 };
318
319 let api_url_full = if api_url.ends_with("/chat/completions") {
320 api_url
321 } else if api_url.ends_with("/") {
322 format!("{}chat/completions", api_url)
323 } else {
324 format!("{}/chat/completions", api_url)
325 };
326
327 let lms = crate::agent::lms::LmsHarness::new();
328 let ollama_harness = crate::agent::ollama::OllamaHarness::new(&base_url);
329
330 let provider = if base_url.contains("11434") {
331 Box::new(crate::agent::provider::OllamaProvider {
332 client: client.clone(),
333 base_url: base_url.clone(),
334 model: String::new(),
335 context_length: 8192,
336 embed_model: std::sync::Arc::new(std::sync::RwLock::new(None)),
337 ollama: ollama_harness,
338 }) as Box<dyn crate::agent::provider::ModelProvider>
339 } else {
340 Box::new(crate::agent::provider::LmsProvider {
341 client: client.clone(),
342 api_url: api_url_full,
343 base_url: base_url.clone(),
344 model: String::new(),
345 context_length: 0,
346 lms,
347 }) as Box<dyn crate::agent::provider::ModelProvider>
348 };
349
350 Ok(Self {
351 provider: std::sync::Arc::new(tokio::sync::RwLock::new(provider)),
352 cached_model: std::sync::Arc::new(std::sync::RwLock::new(String::new())),
353 cached_context: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)),
354 base_url: base_url.clone(),
355 species: species.clone(),
356 snark,
357 kv_semaphore: Semaphore::new(3),
358 economics: std::sync::Arc::new(std::sync::Mutex::new(SessionEconomics::new())),
359 worker_model: None,
360 gemma_native_formatting: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
361 cancel_token: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
362 })
363 }
364
365 pub fn set_gemma_native_formatting(&self, enabled: bool) {
366 self.gemma_native_formatting
367 .store(enabled, std::sync::atomic::Ordering::SeqCst);
368 }
369
370 pub async fn health_check(&self) -> bool {
371 let p = self.provider.read().await;
372 p.health_check().await
373 }
374
375 pub async fn provider_name(&self) -> String {
376 let p = self.provider.read().await;
377 p.name().to_string()
378 }
379
380 pub async fn get_loaded_model(&self) -> Option<String> {
381 let p = self.provider.read().await;
382 match p.detect_model().await {
383 Ok(m) if m.is_empty() => Some("".to_string()),
384 Ok(m) => Some(m),
385 Err(_) => None,
386 }
387 }
388
389 pub async fn get_embedding_model(&self) -> Option<String> {
390 let p = self.provider.read().await;
391 p.get_embedding_model().await
392 }
393
394 pub async fn load_model(&self, model_id: &str) -> Result<(), String> {
395 let p = self.provider.read().await;
396 p.load_model(model_id).await
397 }
398
399 pub async fn load_model_with_context(
400 &self,
401 model_id: &str,
402 context_length: Option<usize>,
403 ) -> Result<(), String> {
404 let p = self.provider.read().await;
405 p.load_model_with_context(model_id, context_length).await
406 }
407
408 pub async fn load_embedding_model(&self, model_id: &str) -> Result<(), String> {
409 let p = self.provider.read().await;
410 p.load_embedding_model(model_id).await
411 }
412
413 pub async fn list_provider_models(
414 &self,
415 kind: crate::agent::provider::ProviderModelKind,
416 loaded_only: bool,
417 ) -> Result<Vec<String>, String> {
418 let p = self.provider.read().await;
419 p.list_models(kind, loaded_only).await
420 }
421
422 pub async fn unload_model(&self, model_id: Option<&str>, all: bool) -> Result<String, String> {
423 let p = self.provider.read().await;
424 p.unload_model(model_id, all).await
425 }
426
427 pub async fn unload_embedding_model(&self, model_id: Option<&str>) -> Result<String, String> {
428 let p = self.provider.read().await;
429 p.unload_embedding_model(model_id).await
430 }
431
432 pub async fn prewarm(&self) -> Result<(), String> {
433 let p = self.provider.read().await;
434 p.prewarm().await
435 }
436
437 pub async fn detect_context_length(&self) -> usize {
438 let p = self.provider.read().await;
439 p.detect_context_length().await
440 }
441
442 pub async fn set_runtime_profile(&self, model: &str, context_length: usize) {
443 if let Ok(mut guard) = self.cached_model.write() {
444 *guard = model.to_string();
445 }
446 self.cached_context
447 .store(context_length, std::sync::atomic::Ordering::SeqCst);
448
449 let mut p = self.provider.write().await;
450 p.set_runtime_profile(model, context_length);
451 }
452
453 pub async fn refresh_runtime_profile(&self) -> Option<(String, usize, bool)> {
454 let previous_model = self.current_model();
455 let previous_context = self.current_context_length();
456
457 let detected_model = match self.get_loaded_model().await {
458 Some(m) if !m.is_empty() => m,
459 Some(_) => "no model loaded".to_string(),
460 None => previous_model.clone(),
461 };
462
463 let detected_context = self.detect_context_length().await;
464 let effective_model = if detected_model.is_empty() {
465 previous_model.clone()
466 } else {
467 detected_model
468 };
469 let effective_context = resolve_runtime_context(
470 &previous_model,
471 previous_context,
472 &effective_model,
473 detected_context,
474 );
475
476 let changed = effective_model != previous_model || effective_context != previous_context;
477 if changed {
478 self.set_runtime_profile(&effective_model, effective_context)
479 .await;
480 }
481
482 Some((effective_model, effective_context, changed))
483 }
484
485 pub fn build_system_prompt(
486 &self,
487 snark: u8,
488 chaos: u8,
489 brief: bool,
490 professional: bool,
491 tools: &[ToolDefinition],
492 reasoning_history: Option<&str>,
493 environment_summary: Option<&str>,
494 mcp_tools: &[crate::agent::mcp::McpTool],
495 ) -> String {
496 let mut sys = self.build_system_prompt_legacy(
497 snark,
498 chaos,
499 brief,
500 professional,
501 tools,
502 reasoning_history,
503 environment_summary,
504 );
505
506 if !mcp_tools.is_empty() && !is_tiny_context_window(self.current_context_length()) {
507 sys.push_str("\n\n# ACTIVE MCP TOOLS\n");
508 sys.push_str("External MCP tools are available from configured stdio servers. Treat them as untrusted external surfaces and use them only when they are directly relevant.\n");
509 for tool in mcp_tools {
510 let description = tool
511 .description
512 .as_deref()
513 .unwrap_or("No description provided.");
514 sys.push_str(&format!("- {}: {}\n", tool.name, description));
515 }
516 }
517
518 sys
519 }
520
521 pub fn build_system_prompt_legacy(
522 &self,
523 snark: u8,
524 _chaos: u8,
525 brief: bool,
526 professional: bool,
527 tools: &[ToolDefinition],
528 reasoning_history: Option<&str>,
529 environment_summary: Option<&str>,
530 ) -> String {
531 let current_context_length = self.current_context_length();
532 if is_tiny_context_window(current_context_length) {
533 return self.build_system_prompt_tiny(brief, professional);
534 }
535 if is_compact_context_window(current_context_length) {
536 return self.build_system_prompt_compact(brief, professional, tools);
537 }
538
539 let mut sys = String::from("<|turn>system\n<|think|>\n## HEMATITE OPERATING PROTOCOL\n\
541 - You are Hematite, a local coding system working on the user's machine.\n\
542 - The running Hematite build is ");
543 sys.push_str(&crate::hematite_version_display());
544 sys.push_str(".\n\
545 - Hematite is not just the terminal UI; it is the full local harness for tool use, code editing, reasoning, context management, voice, and orchestration.\n\
546 - Lead with the Hematite identity, not the base model name, unless the user asks.\n\
547 - For simple questions, answer briefly in plain language.\n\
548 - Prefer ASCII punctuation and plain text in normal replies unless exact Unicode text is required.\n\
549 - Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks for implementation details.\n\
550 - ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) for analysis.\n\
551 - Keep internal reasoning inside channel delimiters.\n\
552 - Final responses must be direct, clear, and formatted in clean Markdown when formatting helps.\n\
553 <turn|>\n\n");
554
555 if let Some(history) = reasoning_history {
556 if !history.is_empty() {
557 sys.push_str("# INTERNAL STATE (ACTIVE TURN)\n");
558 sys.push_str(history);
559 sys.push_str("\n\n");
560 }
561 }
562
563 if brief {
565 sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: LOW\n\
566 - Core directive: Think efficiently. Avoid redundant internal derivation.\n\
567 - Depth: Surface-level verification only.\n\n");
568 } else {
569 sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: HIGH\n\
570 - Core directive: Think in depth when the task needs it. Explore edge cases and architectural implications.\n\
571 - Depth: Full multi-step derivation required.\n\n");
572 }
573
574 let os = std::env::consts::OS;
576 if let Some(summary) = environment_summary {
577 sys.push_str("## HOST ENVIRONMENT\n");
578 sys.push_str(summary);
579 sys.push_str("\n\n");
580 }
581
582 if professional {
583 sys.push_str(&format!(
584 "You are Hematite, a local coding system running on {}. \
585 The TUI is one interface layer, not your whole identity. \
586 Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
587 Skip filler and keep the focus on the work.\n",
588 os
589 ));
590 } else {
591 sys.push_str(&format!(
592 "You are Hematite, a [{}] local AI coding system (Snark: {}/100) running on the user's hardware on {}. \
593 The terminal UI is only one surface of the system. \
594 Be direct, efficient, technical, and ASCII-first in ordinary prose. \
595 When the user asks who you are, describe Hematite as the local coding harness and agent, not merely the TUI.\n",
596 self.species, snark, os
597 ));
598 }
599
600 let current_model = self.current_model();
602 if !current_model.is_empty() {
603 sys.push_str(&format!(
604 "Loaded model: {} | Context window: {} tokens. \
605 Calibrate response length and tool-call depth to fit within this budget.\n\n",
606 current_model, current_context_length
607 ));
608 if is_hematite_native_model(¤t_model) {
609 sys.push_str(
610 "Sovereign native note: prefer exact tool JSON with no extra prose when calling tools. \
611 Do not wrap `path`, `extension`, or other string arguments in extra quote layers. \
612 For `grep_files`, provide the raw regex pattern without surrounding slash delimiters.\n\n",
613 );
614 }
615 } else {
616 sys.push_str(&format!(
617 "Context window: {} tokens. Calibrate response length to fit within this budget.\n\n",
618 current_context_length
619 ));
620 }
621
622 let shell_desc = if cfg!(target_os = "windows") {
624 "[EXTERNAL SHELL]: `powershell` (Windows).\n\
625 - Use ONLY for builds, tests, or file migrations. \n\
626 - You MUST use the `powershell` tool directly. \n\
627 - NEVER attempt to use `bash`, `sh`, or `/dev/null` on this system. \n\n"
628 } else {
629 "[EXTERNAL SHELL]: `bash` (Unix).\n\
630 - Use ONLY for builds, tests, or file migrations. \n\
631 - NEVER wrap bash in other shells. \n\n"
632 };
633
634 sys.push_str("You distinguish strictly between [INTERNAL TOOLS] and [EXTERNAL SHELL].\n\n\
635 [INTERNAL TOOLS]: `list_files`, `grep_files`, `read_file`, `edit_file`, `write_file`.\n\
636 - These are the ONLY way to explore and modify code. \n\
637 - NEVER attempt to run these as shell commands (e.g. `bash $ grep_files` is FORBIDDEN).\n\n");
638 sys.push_str(shell_desc);
639
640 sys.push_str("ANTI-LOOPING: If a tool returns (no output) or 'not recognized' in a shell, pivot to a different internal tool. \n\
642 SELF-AUDIT: If you see your own command echoed back as the result, the shell failed; pivot to an internal tool immediately.\n\n");
643
644 sys.push_str("## TURN ADVISORY\n");
646 if brief {
647 sys.push_str("- BRIEF MODE: Respond with ONE concise sentence/block unless more code is required.\n");
648 }
649 sys.push_str("- INTERNAL REASONING: Plan your move in the thought channel first.\n");
650
651 sys.push_str("\n## SCAFFOLDING PROTOCOL\n\
653 2. ALWAYS call verify_build immediately after to confirm the project compiles/runs.\n\
654 3. If verify_build fails, use `lsp_get_diagnostics` to find the exact line and error.\n\
655 4. Fix all errors before declaring success.\n\n\
656 ## PRE-FLIGHT SCOPING PROTOCOL\n\
657 Before attempting any multi-file task or complex refactor:\n\
658 1. Identify 1-3 core files (entry-points, central models, or types) that drive the logic.\n\
659 2. Use `auto_pin_context` to keep those files in active context.\n\
660 3. Only then proceed to deeper edits or research.\n\n\
661 ## REFACTORING PROTOCOL\n\
662 When modifying existing code or renaming symbols:\n\
663 1. Use `lsp_rename_symbol` for all variable/function renames to ensure project-wide safety.\n\
664 2. After any significant edit, call `lsp_get_diagnostics` on the affected files.\n\
665 3. If errors are found, you MUST fix them. Do not wait for the user to point them out.\n\n");
666
667 sys.push_str(&load_instruction_files());
669 sys.push_str(&load_agent_skill_catalog());
670
671 sys.push_str(&crate::memory::deep_reflect::load_recent_memories());
673
674 if !tools.is_empty() {
676 sys.push_str("\n\n# NATIVE TOOL DECLARATIONS\n");
677 for tool in tools {
678 let schema = serde_json::to_string(&tool.function.parameters)
679 .unwrap_or_else(|_| "{}".to_string());
680 sys.push_str(&format!(
681 "<|tool>declaration:{}{}{}<tool|>\n",
682 tool.function.name, "{", schema
683 ));
684 sys.push_str(&format!("// {})\n", tool.function.description));
685 }
686 }
687
688 sys
689 }
690
691 fn build_system_prompt_compact(
692 &self,
693 brief: bool,
694 professional: bool,
695 tools: &[ToolDefinition],
696 ) -> String {
697 let current_model = self.current_model();
700 let current_context_length = self.current_context_length();
701 let os = std::env::consts::OS;
702
703 let mut sys = String::from("<|turn>system\n<|think|>\n");
704 sys.push_str(&format!(
705 "You are Hematite {}, a local coding harness working on the user's machine.\n",
706 crate::hematite_version_display()
707 ));
708 if professional {
709 sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
710 } else {
711 sys.push_str(&format!(
712 "You are a [{}] local AI coding system. Be direct, concise, and technical.\n",
713 self.species
714 ));
715 }
716 sys.push_str(&format!(
717 "Model: {} | Context: {} tokens. Keep turns focused.\n",
718 current_model, current_context_length
719 ));
720 if is_hematite_native_model(¤t_model) {
721 sys.push_str(
722 "Sovereign native: use exact tool JSON. No extra prose in tool calls. \
723 Raw regex patterns in grep_files, no slash delimiters.\n",
724 );
725 }
726 if cfg!(target_os = "windows") {
727 sys.push_str(&format!(
728 "OS: {}. Use PowerShell for shell. Never bash or /dev/null.\n",
729 os
730 ));
731 } else {
732 sys.push_str(&format!("OS: {}. Use native Unix shell.\n", os));
733 }
734 if brief {
735 sys.push_str("BRIEF MODE: one concise sentence unless code is required.\n");
736 }
737
738 sys.push_str(
739 "\nCORE RULES:\n\
740 - Read before editing: use `read_file` or `inspect_lines` on a file before mutating it.\n\
741 - Verify after edits: run `verify_build` after code changes, before committing.\n\
742 - One tool at a time. Do not batch unrelated tool calls.\n\
743 - Do not invent tool names, file paths, or symbols not confirmed by tool output.\n\
744 - Built-in tools first: prefer `read_file`, `edit_file`, `grep_files` over MCP filesystem tools.\n\
745 - STARTUP/UI CHANGES: read the owner file first, make one focused edit, then run `verify_build`.\n",
746 );
747
748 if !tools.is_empty() {
749 sys.push_str("\n# AVAILABLE TOOLS\n");
750 for tool in tools {
751 let desc: String = tool.function.description.chars().take(120).collect();
752 sys.push_str(&format!("- {}: {}\n", tool.function.name, desc));
753 }
754 }
755
756 sys.push_str("<turn|>\n");
757 sys
758 }
759
760 fn build_system_prompt_tiny(&self, brief: bool, professional: bool) -> String {
761 let current_model = self.current_model();
762 let current_context_length = self.current_context_length();
763 let os = std::env::consts::OS;
764 let mut sys = format!(
765 "<|turn>system\nYou are Hematite {}, a local coding harness working on the user's machine.\n",
766 crate::hematite_version_display()
767 );
768 if professional {
769 sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
770 } else {
771 sys.push_str(&format!(
772 "You are a [{}] local AI coding system. Be direct, concise, and technical.\n",
773 self.species
774 ));
775 }
776 if !current_model.is_empty() {
777 sys.push_str(&format!(
778 "Loaded model: {} | Context window: {} tokens.\n",
779 current_model, current_context_length
780 ));
781 } else {
782 sys.push_str(&format!(
783 "Context window: {} tokens.\n",
784 current_context_length
785 ));
786 }
787 sys.push_str("Tiny-context mode is active. Keep turns short. Prefer final answers over long analysis. Only use tools when necessary.\n");
788 sys.push_str("Use built-in workspace tools for local inspection and edits. Do not invent tools, files, channels, or symbols.\n");
789 sys.push_str("Before editing an existing file, gather recent file evidence first. After code edits, verify before commit.\n");
790 if cfg!(target_os = "windows") {
791 sys.push_str(&format!(
792 "You are running on {}. Use PowerShell for shell work. Do not assume bash or /dev/null.\n",
793 os
794 ));
795 } else {
796 sys.push_str(&format!(
797 "You are running on {}. Use the native Unix shell conventions.\n",
798 os
799 ));
800 }
801 if brief {
802 sys.push_str("BRIEF MODE: answer in one concise sentence unless code is required.\n");
803 }
804 sys.push_str("<turn|>\n");
805 sys
806 }
807
808 pub fn current_model(&self) -> String {
809 self.cached_model
810 .read()
811 .map(|g| g.clone())
812 .unwrap_or_default()
813 }
814
815 pub fn current_context_length(&self) -> usize {
816 self.cached_context
817 .load(std::sync::atomic::Ordering::Relaxed)
818 }
819
820 pub fn is_compact_context_window(&self) -> bool {
821 let len = self.current_context_length();
822 len <= 16384
823 }
824
825 pub fn gemma_native_formatting_enabled(&self) -> bool {
826 self.gemma_native_formatting
827 .load(std::sync::atomic::Ordering::Relaxed)
828 }
829
830 pub async fn call_with_tools(
831 &self,
832 messages: &[ChatMessage],
833 tools: &[ToolDefinition],
834 model_override: Option<&str>,
836 ) -> Result<
837 (
838 Option<String>,
839 Option<Vec<ToolCallResponse>>,
840 Option<TokenUsage>,
841 Option<String>,
842 ),
843 String,
844 > {
845 let _permit = self
846 .kv_semaphore
847 .acquire()
848 .await
849 .map_err(|e| e.to_string())?;
850
851 let (res, model_name, prepared_messages) = {
852 let p = self.provider.read().await;
853 let model_name = model_override.unwrap_or(&p.current_model()).to_string();
854 let prepared_messages = if should_use_native_formatting(self, &model_name) {
855 prepare_gemma_native_messages(messages)
856 } else {
857 messages.to_vec()
858 };
859 if let Err(detail) = preflight_chat_request(
860 &model_name,
861 &prepared_messages,
862 tools,
863 self.current_context_length(),
864 ) {
865 return Err(format_runtime_failure_message(&detail));
866 }
867 let res = p
868 .call_with_tools(&prepared_messages, tools, model_override)
869 .await
870 .map_err(|e| format_runtime_failure_message(&e))?;
871 (res, model_name, prepared_messages)
872 };
873
874 if let Ok(mut econ) = self.economics.lock() {
875 econ.input_tokens += res.usage.prompt_tokens;
876 econ.output_tokens += res.usage.completion_tokens;
877 }
878
879 let mut content = res.content;
880 let mut tool_calls = res.tool_calls;
881
882 if let Some(text) = &content {
884 if should_use_native_formatting(self, &model_name) {
885 let native_calls = extract_native_tool_calls(text);
886 if !native_calls.is_empty() {
887 let mut existing = tool_calls.unwrap_or_default();
888 existing.extend(native_calls);
889 tool_calls = Some(existing);
890
891 let stripped = strip_native_tool_call_text(text);
892 content = if stripped.trim().is_empty() {
893 None
894 } else {
895 Some(stripped)
896 };
897 }
898 }
899 }
900
901 if should_use_native_formatting(self, &model_name) {
903 if let Some(calls) = tool_calls.as_mut() {
904 for call in calls.iter_mut() {
905 normalize_tool_argument_value(
906 &call.function.name,
907 &mut call.function.arguments,
908 );
909 }
910 }
911 }
912
913 if should_use_native_formatting(self, &model_name)
914 && content.is_none()
915 && tool_calls.is_none()
916 && !prepared_messages.is_empty()
917 {
918 return Err(format_runtime_failure_message(
919 "model returned an empty response after native-format message preparation",
920 ));
921 }
922
923 Ok((content, tool_calls, Some(res.usage), res.finish_reason))
924 }
925
926 pub async fn stream_messages(
930 &self,
931 messages: &[ChatMessage],
932 tx: mpsc::Sender<InferenceEvent>,
933 ) -> Result<(), Box<dyn std::error::Error>> {
934 let provider = self.provider.read().await;
935 provider.stream(messages, tx).await
936 }
937
938 pub async fn stream_generation(
940 &self,
941 prompt: &str,
942 snark: u8,
943 chaos: u8,
944 brief: bool,
945 professional: bool,
946 tx: mpsc::Sender<InferenceEvent>,
947 ) -> Result<(), Box<dyn std::error::Error>> {
948 let system =
949 self.build_system_prompt(snark, chaos, brief, professional, &[], None, None, &[]);
950 let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
951 self.stream_messages(&messages, tx).await
952 }
953
954 pub async fn generate_task_worker(
958 &self,
959 prompt: &str,
960 professional: bool,
961 ) -> Result<String, String> {
962 let current_model = self.current_model();
963 let model = self
964 .worker_model
965 .as_deref()
966 .unwrap_or(current_model.as_str());
967 self.generate_task_with_model(prompt, 0.1, professional, model)
968 .await
969 }
970
971 pub async fn generate_task(&self, prompt: &str, professional: bool) -> Result<String, String> {
972 self.generate_task_with_temp(prompt, 0.1, professional)
973 .await
974 }
975
976 pub async fn generate_task_with_temp(
977 &self,
978 prompt: &str,
979 temp: f32,
980 professional: bool,
981 ) -> Result<String, String> {
982 let current_model = self.current_model();
983 self.generate_task_with_model(prompt, temp, professional, ¤t_model)
984 .await
985 }
986
987 pub async fn generate_task_with_model(
988 &self,
989 prompt: &str,
990 _temp: f32,
991 professional: bool,
992 model: &str,
993 ) -> Result<String, String> {
994 let _permit = self
995 .kv_semaphore
996 .acquire()
997 .await
998 .map_err(|e| e.to_string())?;
999
1000 let system =
1001 self.build_system_prompt(self.snark, 50, false, professional, &[], None, None, &[]);
1002 let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
1003 if let Err(detail) =
1004 preflight_chat_request(model, &messages, &[], self.current_context_length())
1005 {
1006 return Err(format_runtime_failure_message(&detail));
1007 }
1008
1009 let p = self.provider.read().await;
1010 let res = p
1011 .call_with_tools(&messages, &[], Some(model))
1012 .await
1013 .map_err(|e| format_runtime_failure_message(&e))?;
1014
1015 res.content
1016 .ok_or_else(|| "Empty response from model".to_string())
1017 }
1018
1019 #[allow(dead_code)]
1023 pub fn snip_history(
1024 &self,
1025 turns: &[ChatMessage],
1026 max_tokens_estimate: usize,
1027 keep_recent: usize,
1028 ) -> Vec<ChatMessage> {
1029 let total_chars: usize = turns.iter().map(|m| m.content.as_str().len()).sum();
1030 if total_chars / 4 <= max_tokens_estimate {
1031 return turns.to_vec();
1032 }
1033 let keep = keep_recent.min(turns.len());
1034 let mut snipped = vec![turns[0].clone()];
1035 if turns.len() > keep + 1 {
1036 snipped.push(ChatMessage::system(&format!(
1037 "[CONTEXT SNIPPED: {} earlier turns pruned to preserve VRAM]",
1038 turns.len() - keep - 1
1039 )));
1040 snipped.extend_from_slice(&turns[turns.len() - keep..]);
1041 } else {
1042 snipped = turns.to_vec();
1043 }
1044 snipped
1045 }
1046}
1047
1048fn estimate_serialized_tokens<T: Serialize + ?Sized>(value: &T) -> usize {
1049 serde_json::to_vec(value)
1050 .ok()
1051 .map_or(0, |bytes| bytes.len() / 4 + 1)
1052}
1053
1054const IMAGE_PART_TOKEN_ESTIMATE: usize = 1024;
1055
1056pub fn estimate_message_tokens(message: &ChatMessage) -> usize {
1057 let content_tokens = match &message.content {
1058 MessageContent::Text(s) => s.len() / 4 + 1,
1059 MessageContent::Parts(parts) => parts
1060 .iter()
1061 .map(|part| match part {
1062 ContentPart::Text { text } => text.len() / 4 + 1,
1063 ContentPart::ImageUrl { .. } => IMAGE_PART_TOKEN_ESTIMATE,
1066 })
1067 .sum(),
1068 };
1069 let tool_tokens: usize = message
1070 .tool_calls
1071 .iter()
1072 .flatten()
1073 .map(|call| (call.function.name.len() + call.function.arguments.to_string().len()) / 4 + 4)
1074 .sum();
1075 content_tokens + tool_tokens + 6
1076}
1077
1078pub fn estimate_message_batch_tokens(messages: &[ChatMessage]) -> usize {
1079 messages.iter().map(estimate_message_tokens).sum()
1080}
1081
1082fn reserved_output_tokens(context_length: usize) -> usize {
1083 let proportional = (context_length / 8).max(MIN_RESERVED_OUTPUT_TOKENS);
1084 proportional.min(MAX_RESERVED_OUTPUT_TOKENS)
1085}
1086
1087pub fn estimate_prompt_pressure(
1088 messages: &[ChatMessage],
1089 tools: &[ToolDefinition],
1090 context_length: usize,
1091) -> (usize, usize, usize, u8) {
1092 let estimated_input_tokens =
1093 estimate_message_batch_tokens(messages) + estimate_serialized_tokens(tools) + 32;
1094 let reserved_output = reserved_output_tokens(context_length);
1095 let estimated_total = estimated_input_tokens.saturating_add(reserved_output);
1096 let percent = if context_length == 0 {
1097 0
1098 } else {
1099 ((estimated_total.saturating_mul(100)) / context_length).min(100) as u8
1100 };
1101 (
1102 estimated_input_tokens,
1103 reserved_output,
1104 estimated_total,
1105 percent,
1106 )
1107}
1108
1109fn preflight_chat_request(
1110 model: &str,
1111 messages: &[ChatMessage],
1112 tools: &[ToolDefinition],
1113 context_length: usize,
1114) -> Result<(), String> {
1115 let (estimated_input_tokens, reserved_output, estimated_total, _) =
1116 estimate_prompt_pressure(messages, tools, context_length);
1117
1118 if estimated_total > context_length {
1119 return Err(format!(
1120 "context_window_blocked for {}: estimated input {} + reserved output {} = {} tokens exceeds the {}-token context window; narrow the request, compact the session, or preserve grounded tool output instead of restyling it.",
1121 model, estimated_input_tokens, reserved_output, estimated_total, context_length
1122 ));
1123 }
1124
1125 Ok(())
1126}
1127
1128fn load_instruction_files() -> String {
1133 use std::collections::hash_map::DefaultHasher;
1134 use std::collections::HashSet;
1135 use std::hash::{Hash, Hasher};
1136
1137 let Ok(cwd) = std::env::current_dir() else {
1138 return String::new();
1139 };
1140 let mut result = String::new();
1141 let mut seen: HashSet<u64> = HashSet::new();
1142 let mut total_chars: usize = 0;
1143 const MAX_TOTAL: usize = 12_000;
1144 const MAX_PER_FILE: usize = 4_000;
1145
1146 let mut dir = cwd.clone();
1147 for _ in 0..4 {
1148 for name in crate::agent::instructions::PROJECT_GUIDANCE_FILES {
1149 let path = crate::agent::instructions::resolve_guidance_path(&dir, name);
1150 if !path.exists() {
1151 continue;
1152 }
1153 let Ok(content) = std::fs::read_to_string(&path) else {
1154 continue;
1155 };
1156 if content.trim().is_empty() {
1157 continue;
1158 }
1159
1160 let mut hasher = DefaultHasher::new();
1161 content.hash(&mut hasher);
1162 let h = hasher.finish();
1163 if !seen.insert(h) {
1164 continue;
1165 }
1166
1167 let truncated = if content.len() > MAX_PER_FILE {
1168 format!("{}...[truncated]", &content[..MAX_PER_FILE])
1169 } else {
1170 content
1171 };
1172
1173 if total_chars + truncated.len() > MAX_TOTAL {
1174 break;
1175 }
1176 total_chars += truncated.len();
1177 result.push_str(&format!("\n--- {} ---\n{}\n", path.display(), truncated));
1178 }
1179 match dir.parent().map(|p| p.to_owned()) {
1180 Some(p) => dir = p,
1181 None => break,
1182 }
1183 }
1184
1185 if result.is_empty() {
1186 return String::new();
1187 }
1188 format!("\n\n# Project Instructions And Skills\n{}", result)
1189}
1190
1191fn load_agent_skill_catalog() -> String {
1192 let workspace_root = crate::tools::file_ops::workspace_root();
1193 let config = crate::agent::config::load_config();
1194 let discovery =
1195 crate::agent::instructions::discover_agent_skills(&workspace_root, &config.trust);
1196 crate::agent::instructions::render_skill_catalog(&discovery, 6_000)
1197 .map(|rendered| format!("\n\n{}", rendered))
1198 .unwrap_or_default()
1199}
1200
1201pub fn extract_think_block(text: &str) -> Option<String> {
1202 let lower = text.to_lowercase();
1203
1204 let open_tag = "<|channel>thought";
1206 let close_tag = "<channel|>";
1207
1208 let start_pos = lower.find(open_tag)?;
1209 let content_start = start_pos + open_tag.len();
1210
1211 let close_pos = lower[content_start..]
1212 .find(close_tag)
1213 .map(|p| content_start + p)
1214 .unwrap_or(text.len());
1215
1216 let content = text[content_start..close_pos].trim();
1217 if content.is_empty() {
1218 None
1219 } else {
1220 Some(content.to_string())
1221 }
1222}
1223
1224pub fn strip_think_blocks(text: &str) -> String {
1225 let text = {
1229 let t = text.trim_start();
1230 if t.to_lowercase().starts_with("</think>") {
1231 &t[8..]
1232 } else {
1233 text
1234 }
1235 };
1236
1237 let lower = text.to_lowercase();
1238
1239 if let Some(end) = lower.find("<channel|>").map(|i| i + "<channel|>".len()) {
1241 let answer = text[end..]
1242 .replace("<|channel>thought", "")
1243 .replace("<channel|>", "");
1244 return answer.trim().replace("\n\n\n", "\n\n").to_string();
1245 }
1246
1247 let first_open = [
1249 lower.find("<|channel>thought"), lower.find("<think>"),
1251 lower.find("<thinking>"),
1252 lower.find("<thought>"),
1253 lower.find("<|think|>"),
1254 ]
1255 .iter()
1256 .filter_map(|&x| x)
1257 .min();
1258
1259 if let Some(start) = first_open {
1260 if start > 0 {
1261 return text[..start].trim().replace("\n\n\n", "\n\n").to_string();
1262 }
1263 return String::new();
1264 }
1265
1266 let naked_reasoning_phrases: &[&str] = &[
1270 "the user asked",
1271 "the user is asking",
1272 "the user wants",
1273 "i will structure",
1274 "i should provide",
1275 "i should give",
1276 "i should avoid",
1277 "i should note",
1278 "i should focus",
1279 "i should keep",
1280 "i should respond",
1281 "i should present",
1282 "i should display",
1283 "i should show",
1284 "i need to",
1285 "i can see from",
1286 "without being overly",
1287 "let me ",
1288 "necessary information in my identity",
1289 "was computed successfully",
1290 "computed successfully",
1291 ];
1292 let is_naked_reasoning = naked_reasoning_phrases.iter().any(|p| lower.contains(p));
1293 if is_naked_reasoning {
1294 let lines: Vec<&str> = text.lines().collect();
1295 if !lines.is_empty() {
1296 let mut start_idx = 0;
1299 for (i, line) in lines.iter().enumerate() {
1300 let l = line.to_lowercase();
1301 let is_reasoning_line =
1302 naked_reasoning_phrases.iter().any(|p| l.contains(p)) || l.trim().is_empty();
1303 if is_reasoning_line {
1304 start_idx = i + 1;
1305 } else {
1306 break;
1307 }
1308 }
1309 if start_idx < lines.len() {
1310 return lines[start_idx..]
1311 .join("\n")
1312 .trim()
1313 .replace("\n\n\n", "\n\n")
1314 .to_string();
1315 }
1316 return String::new();
1318 }
1319 }
1320
1321 let cleaned = strip_xml_tool_call_artifacts(text);
1324 cleaned.trim().replace("\n\n\n", "\n\n").to_string()
1325}
1326
1327fn strip_xml_tool_call_artifacts(text: &str) -> String {
1330 const XML_ARTIFACTS: &[&str] = &[
1332 "</tool_call>",
1333 "<tool_call>",
1334 "</function>",
1335 "<function>",
1336 "</parameter>",
1337 "<parameter>",
1338 "</arguments>",
1339 "<arguments>",
1340 "</tool_use>",
1341 "<tool_use>",
1342 "</invoke>",
1343 "<invoke>",
1344 "</think>",
1346 "<thinking>",
1347 "</thought>",
1348 "</thinking>",
1349 ];
1350 let mut out = text.to_string();
1351 for tag in XML_ARTIFACTS {
1352 while let Some(pos) = out.to_lowercase().find(&tag.to_lowercase()) {
1354 out.drain(pos..pos + tag.len());
1355 }
1356 }
1357 out
1359}
1360
1361pub fn extract_native_tool_calls(text: &str) -> Vec<ToolCallResponse> {
1364 use regex::Regex;
1365 let mut results = Vec::new();
1366
1367 let re_call = Regex::new(
1369 r#"(?s)<\|?tool_call\|?>\s*call:([A-Za-z_][A-Za-z0-9_]*)\{(.*?)\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#
1370 ).unwrap();
1371 let re_arg = Regex::new(r#"(\w+):(?:<\|"\|>(.*?)<\|"\|>|([^,}]*))"#).unwrap();
1372
1373 for cap in re_call.captures_iter(text) {
1374 let name = cap[1].to_string();
1375 let args_str = &cap[2];
1376 let mut arguments = serde_json::Map::new();
1377
1378 for arg_cap in re_arg.captures_iter(args_str) {
1379 let key = arg_cap[1].to_string();
1380 let val_raw = arg_cap
1381 .get(2)
1382 .map(|m| m.as_str())
1383 .or_else(|| arg_cap.get(3).map(|m| m.as_str()))
1384 .unwrap_or("")
1385 .trim();
1386 let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1387
1388 let val = if normalized_raw == "true" {
1389 Value::Bool(true)
1390 } else if normalized_raw == "false" {
1391 Value::Bool(false)
1392 } else if let Ok(n) = normalized_raw.parse::<i64>() {
1393 Value::Number(n.into())
1394 } else if let Ok(n) = normalized_raw.parse::<u64>() {
1395 Value::Number(n.into())
1396 } else if let Ok(n) = normalized_raw.parse::<f64>() {
1397 serde_json::Number::from_f64(n)
1398 .map(Value::Number)
1399 .unwrap_or(Value::String(normalized_raw.clone()))
1400 } else {
1401 Value::String(normalized_raw)
1402 };
1403
1404 arguments.insert(key, val);
1405 }
1406
1407 results.push(ToolCallResponse {
1408 id: format!("call_{}", rand::random::<u32>()),
1409 call_type: "function".to_string(),
1410 function: ToolCallFn {
1411 name,
1412 arguments: Value::Object(arguments),
1413 },
1414 index: None,
1415 });
1416 }
1417
1418 let re_xml_call = Regex::new(
1420 r#"(?s)<tool_call>\s*<function=([A-Za-z_][A-Za-z0-9_]*)>(.*?)(?:</function>)?\s*</tool_call>"#
1421 ).unwrap();
1422 let re_xml_param =
1423 Regex::new(r#"(?s)<parameter=([A-Za-z_][A-Za-z0-9_]*)>(.*?)</parameter>"#).unwrap();
1424
1425 for cap in re_xml_call.captures_iter(text) {
1426 let name = cap[1].to_string();
1427 let body = &cap[2];
1428 let mut arguments = serde_json::Map::new();
1429
1430 for p_cap in re_xml_param.captures_iter(body) {
1431 let key = p_cap[1].to_string();
1432 let val_raw = p_cap[2].trim();
1433 let val = if val_raw == "true" {
1434 Value::Bool(true)
1435 } else if val_raw == "false" {
1436 Value::Bool(false)
1437 } else if let Ok(n) = val_raw.parse::<i64>() {
1438 Value::Number(n.into())
1439 } else if let Ok(n) = val_raw.parse::<u64>() {
1440 Value::Number(n.into())
1441 } else {
1442 Value::String(val_raw.to_string())
1443 };
1444 arguments.insert(key, val);
1445 }
1446
1447 results.push(ToolCallResponse {
1448 id: format!("call_{}", rand::random::<u32>()),
1449 call_type: "function".to_string(),
1450 function: ToolCallFn {
1451 name,
1452 arguments: Value::Object(arguments),
1453 },
1454 index: None,
1455 });
1456 }
1457
1458 let re_short_call =
1460 Regex::new(r#"(?s)<tool_call>\s*([A-Za-z_][A-Za-z0-9_]*)\((.*?)\)\s*</tool_call>"#)
1461 .unwrap();
1462 let re_short_arg = Regex::new(
1463 r#"([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:"((?:\\.|[^"])*)"|'((?:\\.|[^'])*)'|([^,\)]+))"#,
1464 )
1465 .unwrap();
1466
1467 for cap in re_short_call.captures_iter(text) {
1468 let name = cap[1].to_string();
1469 let args_str = cap[2].trim();
1470 let mut arguments = serde_json::Map::new();
1471
1472 for arg_cap in re_short_arg.captures_iter(args_str) {
1473 let key = arg_cap[1].to_string();
1474 let val_raw = arg_cap
1475 .get(2)
1476 .or_else(|| arg_cap.get(3))
1477 .or_else(|| arg_cap.get(4))
1478 .map(|m| m.as_str())
1479 .unwrap_or("")
1480 .trim();
1481 let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1482
1483 let val = if normalized_raw == "true" {
1484 Value::Bool(true)
1485 } else if normalized_raw == "false" {
1486 Value::Bool(false)
1487 } else if let Ok(n) = normalized_raw.parse::<i64>() {
1488 Value::Number(n.into())
1489 } else if let Ok(n) = normalized_raw.parse::<u64>() {
1490 Value::Number(n.into())
1491 } else if let Ok(n) = normalized_raw.parse::<f64>() {
1492 serde_json::Number::from_f64(n)
1493 .map(Value::Number)
1494 .unwrap_or(Value::String(normalized_raw.clone()))
1495 } else {
1496 Value::String(normalized_raw)
1497 };
1498
1499 arguments.insert(key, val);
1500 }
1501
1502 results.push(ToolCallResponse {
1503 id: format!("call_{}", rand::random::<u32>()),
1504 call_type: "function".to_string(),
1505 function: ToolCallFn {
1506 name,
1507 arguments: Value::Object(arguments),
1508 },
1509 index: None,
1510 });
1511 }
1512
1513 results
1514}
1515
1516pub fn normalize_tool_argument_string(tool_name: &str, raw: &str) -> String {
1517 let trimmed = raw.trim();
1518 let candidate = unwrap_json_string_once(trimmed).unwrap_or_else(|| trimmed.to_string());
1519
1520 let mut value = match serde_json::from_str::<Value>(&candidate) {
1521 Ok(v) => v,
1522 Err(_) => return candidate,
1523 };
1524 normalize_tool_argument_value(tool_name, &mut value);
1525 value.to_string()
1526}
1527
1528pub fn normalize_tool_argument_value(tool_name: &str, value: &mut Value) {
1529 match value {
1530 Value::String(s) => *s = normalize_string_arg(s),
1531 Value::Array(items) => {
1532 for item in items {
1533 normalize_tool_argument_value(tool_name, item);
1534 }
1535 }
1536 Value::Object(map) => {
1537 for val in map.values_mut() {
1538 normalize_tool_argument_value(tool_name, val);
1539 }
1540 if tool_name == "grep_files" {
1541 if let Some(Value::String(pattern)) = map.get_mut("pattern") {
1542 *pattern = normalize_regex_pattern(pattern);
1543 }
1544 }
1545 for key in ["path", "extension", "query", "command", "reason"] {
1546 if let Some(Value::String(s)) = map.get_mut(key) {
1547 *s = normalize_string_arg(s);
1548 }
1549 }
1550 }
1551 _ => {}
1552 }
1553}
1554
1555fn unwrap_json_string_once(input: &str) -> Option<String> {
1556 if input.len() < 2 {
1557 return None;
1558 }
1559 let first = input.chars().next()?;
1560 let last = input.chars().last()?;
1561 if !matches!((first, last), ('"', '"') | ('\'', '\'') | ('`', '`')) {
1562 return None;
1563 }
1564 let inner = &input[1..input.len() - 1];
1565 let unescaped = inner.replace("\\\"", "\"").replace("\\\\", "\\");
1566 Some(unescaped.trim().to_string())
1567}
1568
1569fn normalize_string_arg(input: &str) -> String {
1570 let mut out = input.trim().to_string();
1571 while out.len() >= 2 {
1572 let mut changed = false;
1573 for (start, end) in [("\"", "\""), ("'", "'"), ("`", "`")] {
1574 if out.starts_with(start) && out.ends_with(end) {
1575 out = out[start.len()..out.len() - end.len()].trim().to_string();
1576 changed = true;
1577 break;
1578 }
1579 }
1580 if !changed {
1581 break;
1582 }
1583 }
1584 out
1585}
1586
1587fn normalize_regex_pattern(input: &str) -> String {
1588 let out = normalize_string_arg(input);
1589 if out.len() >= 2 && out.starts_with('/') && out.ends_with('/') {
1590 out[1..out.len() - 1].to_string()
1591 } else {
1592 out
1593 }
1594}
1595
1596fn prepare_gemma_native_messages(messages: &[ChatMessage]) -> Vec<ChatMessage> {
1597 let mut system_blocks = Vec::new();
1598 let mut prepared = Vec::new();
1599 let mut seeded = false;
1600
1601 for message in messages {
1602 if message.role == "system" {
1603 let cleaned = strip_legacy_turn_wrappers(message.content.as_str())
1604 .trim()
1605 .to_string();
1606 if !cleaned.is_empty() {
1607 system_blocks.push(cleaned);
1608 }
1609 continue;
1610 }
1611
1612 let mut clone = message.clone();
1613 clone.content = MessageContent::Text(strip_legacy_turn_wrappers(message.content.as_str()));
1614
1615 if !seeded && message.role == "user" {
1616 let mut merged = String::new();
1617 if !system_blocks.is_empty() {
1618 merged.push_str("System instructions for this turn:\n");
1619 merged.push_str(&system_blocks.join("\n\n"));
1620 merged.push_str("\n\n");
1621 }
1622 merged.push_str(clone.content.as_str());
1623 clone.content = MessageContent::Text(merged);
1624 seeded = true;
1625 }
1626
1627 prepared.push(clone);
1628 }
1629
1630 if !seeded && !system_blocks.is_empty() {
1631 prepared.insert(
1632 0,
1633 ChatMessage::user(&format!(
1634 "System instructions for this turn:\n{}",
1635 system_blocks.join("\n\n")
1636 )),
1637 );
1638 }
1639
1640 prepared
1641}
1642
1643fn strip_legacy_turn_wrappers(text: &str) -> String {
1644 text.replace("<|turn>system\n", "")
1645 .replace("<|turn>user\n", "")
1646 .replace("<|turn>assistant\n", "")
1647 .replace("<|turn>tool\n", "")
1648 .replace("<turn|>", "")
1649 .trim()
1650 .to_string()
1651}
1652
1653pub fn strip_native_tool_call_text(text: &str) -> String {
1654 use regex::Regex;
1655 let re_call = Regex::new(
1657 r#"(?s)<\|?tool_call\|?>\s*call:[A-Za-z_][A-Za-z0-9_]*\{.*?\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#
1658 ).unwrap();
1659 let re_xml = Regex::new(r#"(?s)<tool_call>\s*<function=.*?>.*?</tool_call>"#).unwrap();
1661 let re_short =
1663 Regex::new(r#"(?s)<tool_call>\s*[A-Za-z_][A-Za-z0-9_]*\(.*?\)\s*</tool_call>"#).unwrap();
1664 let re_response =
1665 Regex::new(r#"(?s)<\|tool_response\|?>.*?(?:<\|tool_response\|?>|<tool_response\|>)"#)
1666 .unwrap();
1667 let without_calls = re_call.replace_all(text, "");
1668 let without_xml = re_xml.replace_all(without_calls.as_ref(), "");
1669 let without_short = re_short.replace_all(without_xml.as_ref(), "");
1670 re_response
1671 .replace_all(without_short.as_ref(), "")
1672 .trim()
1673 .to_string()
1674}
1675
1676fn resolve_runtime_context(
1677 previous_model: &str,
1678 previous_context: usize,
1679 effective_model: &str,
1680 detected_context: usize,
1681) -> usize {
1682 if effective_model == "no model loaded" || effective_model.trim().is_empty() {
1683 0
1684 } else if detected_context > 0 {
1685 detected_context
1686 } else if effective_model == previous_model {
1687 previous_context
1688 } else {
1689 0
1690 }
1691}
1692
1693#[cfg(test)]
1694mod tests {
1695 use super::*;
1696 use std::fs;
1697
1698 #[test]
1699 fn system_prompt_includes_running_hematite_version() {
1700 let engine = InferenceEngine::new(
1701 "http://localhost:1234/v1".to_string(),
1702 "strategist".to_string(),
1703 0,
1704 )
1705 .expect("engine");
1706
1707 let system = engine.build_system_prompt(0, 50, false, true, &[], None, None, &[]);
1708 assert!(system.contains(crate::HEMATITE_VERSION));
1709 }
1710
1711 #[test]
1712 fn extracts_gemma_native_tool_call_with_mixed_tool_call_tags() {
1713 let text = r#"<|channel>thought
1714Reading the next chunk.<channel|>The startup banner wording is likely defined within the UI drawing logic.
1715<|tool_call>call:read_file{limit:100,offset:100,path:\"src/ui/tui.rs\"}<tool_call|>"#;
1716
1717 let calls = extract_native_tool_calls(text);
1718 assert_eq!(calls.len(), 1);
1719 assert_eq!(calls[0].function.name, "read_file");
1720
1721 let args: Value = calls[0].function.arguments.clone();
1722 assert_eq!(args.get("limit").and_then(|v| v.as_i64()), Some(100));
1723 assert_eq!(args.get("offset").and_then(|v| v.as_i64()), Some(100));
1724 assert_eq!(
1725 args.get("path").and_then(|v| v.as_str()),
1726 Some("src/ui/tui.rs")
1727 );
1728
1729 let stripped = strip_native_tool_call_text(text);
1730 assert!(!stripped.contains("<|tool_call"));
1731 assert!(!stripped.contains("<tool_call|>"));
1732 }
1733
1734 #[test]
1735 fn strips_hallucinated_tool_responses_from_native_tool_transcript() {
1736 let text = r#"<|channel>thought
1737Planning.
1738<channel|><|tool_call>call:list_files{extension:<|\"|>rs<|\"|>,path:<|\"|>src/<|\"|>}<tool_call|><|tool_response>thought
1739Mapped src.
1740<channel|><|tool_call>call:read_file{limit:100,offset:0,path:<|\"|>src/main.rs<|\"|>}<tool_call|><|tool_response>thought
1741Read main.
1742<channel|>"#;
1743
1744 let calls = extract_native_tool_calls(text);
1745 assert_eq!(calls.len(), 2);
1746 assert_eq!(calls[0].function.name, "list_files");
1747 assert_eq!(calls[1].function.name, "read_file");
1748
1749 let stripped = strip_native_tool_call_text(text);
1750 assert!(!stripped.contains("<|tool_call"));
1751 assert!(!stripped.contains("<|tool_response"));
1752 assert!(!stripped.contains("<tool_response|>"));
1753 }
1754
1755 #[test]
1756 fn create_directory_is_treated_as_mutating_repo_write() {
1757 let metadata = tool_metadata_for_name("create_directory");
1758 assert!(metadata.mutates_workspace);
1759 assert!(!metadata.read_only_friendly);
1760 }
1761
1762 #[test]
1763 fn extracts_qwen_xml_tool_calls_from_reasoning() {
1764 let text = r#"Based on the project structure, I need to check the binary.
1765<tool_call>
1766<function=shell>
1767<parameter=command>
1768ls -la hematite.exe
1769</parameter>
1770<parameter=reason>
1771Check if the binary exists
1772</parameter>
1773</function>
1774</tool_call>"#;
1775
1776 let calls = extract_native_tool_calls(text);
1777 assert_eq!(calls.len(), 1);
1778 assert_eq!(calls[0].function.name, "shell");
1779
1780 let args: Value = calls[0].function.arguments.clone();
1781 assert_eq!(
1782 args.get("command").and_then(|v| v.as_str()),
1783 Some("ls -la hematite.exe")
1784 );
1785 assert_eq!(
1786 args.get("reason").and_then(|v| v.as_str()),
1787 Some("Check if the binary exists")
1788 );
1789
1790 let stripped = strip_native_tool_call_text(text);
1791 assert!(!stripped.contains("<tool_call>"));
1792 assert!(!stripped.contains("<function=shell>"));
1793 }
1794
1795 #[test]
1796 fn extracts_shorthand_tool_calls_from_reasoning() {
1797 let text = r#"<thinking>
1798The user wants a search first.
1799</thinking>
1800
1801I'll search before continuing.
1802
1803<tool_call>research_web(query="uefn toolbelt python automation unreal engine fortnite")</tool_call>"#;
1804
1805 let calls = extract_native_tool_calls(text);
1806 assert_eq!(calls.len(), 1);
1807 assert_eq!(calls[0].function.name, "research_web");
1808
1809 let args: Value = calls[0].function.arguments.clone();
1810 assert_eq!(
1811 args.get("query").and_then(|v| v.as_str()),
1812 Some("uefn toolbelt python automation unreal engine fortnite")
1813 );
1814
1815 let stripped = strip_native_tool_call_text(text);
1816 assert!(!stripped.contains("<tool_call>"));
1817 assert!(!stripped.contains("research_web(query="));
1818 }
1819
1820 #[test]
1821 fn strips_thinking_tag_as_reasoning_prefix() {
1822 let cleaned =
1823 strip_think_blocks("<thinking>\nThe user wants a search.\n</thinking>\nVisible answer");
1824 assert_eq!(cleaned, "");
1825 }
1826
1827 #[test]
1828 fn resolve_runtime_context_returns_zero_when_no_model_loaded() {
1829 assert_eq!(
1830 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "no model loaded", 0),
1831 0
1832 );
1833 }
1834
1835 #[test]
1836 fn resolve_runtime_context_preserves_previous_only_for_same_model() {
1837 assert_eq!(
1838 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "qwen/qwen3.5-9b", 0),
1839 32000
1840 );
1841 assert_eq!(
1842 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "bonsai-8b", 0),
1843 0
1844 );
1845 }
1846
1847 #[test]
1848 fn load_instruction_files_includes_workspace_guidance_files() {
1849 let temp = tempfile::tempdir().unwrap();
1850 let previous = std::env::current_dir().unwrap();
1851
1852 fs::write(
1853 temp.path().join("SKILLS.md"),
1854 "# Workspace Skills\n- Prefer API-first changes before UI polish.",
1855 )
1856 .unwrap();
1857
1858 std::env::set_current_dir(temp.path()).unwrap();
1859 let loaded = load_instruction_files();
1860 std::env::set_current_dir(previous).unwrap();
1861
1862 assert!(loaded.contains("SKILLS.md"));
1863 assert!(loaded.contains("Prefer API-first changes before UI polish."));
1864 }
1865
1866 #[test]
1867 fn load_agent_skill_catalog_includes_skill_directory_entries() {
1868 let temp = tempfile::tempdir().unwrap();
1869 let previous = std::env::current_dir().unwrap();
1870
1871 std::fs::create_dir_all(temp.path().join(".agents/skills/code-review")).unwrap();
1872 fs::write(
1873 temp.path().join(".agents/skills/code-review/SKILL.md"),
1874 "---\nname: code-review\ndescription: Review diffs and flag regressions.\ncompatibility: Requires git\n---\n",
1875 )
1876 .unwrap();
1877
1878 std::env::set_current_dir(temp.path()).unwrap();
1879 let loaded = load_agent_skill_catalog();
1880 std::env::set_current_dir(previous).unwrap();
1881
1882 assert!(loaded.contains("Agent Skills Catalog"));
1883 assert!(loaded.contains("code-review"));
1884 assert!(loaded.contains("Review diffs and flag regressions."));
1885 }
1886}