1use std::fmt::Write as _;
2
3use serde::Serialize;
4use serde_json::Value;
5use tokio::sync::{mpsc, Semaphore};
6
7pub use crate::agent::economics::{SessionEconomics, ToolRecord};
8pub use crate::agent::types::*;
9
10pub struct InferenceEngine {
13 pub provider:
14 std::sync::Arc<tokio::sync::RwLock<Box<dyn crate::agent::provider::ModelProvider>>>,
15 pub cached_model: std::sync::Arc<std::sync::RwLock<String>>,
16 pub cached_context: std::sync::Arc<std::sync::atomic::AtomicUsize>,
17 pub base_url: String,
18 pub species: String,
19 pub snark: u8,
20 pub kv_semaphore: Semaphore,
21 pub economics: std::sync::Arc<std::sync::Mutex<SessionEconomics>>,
22 pub worker_model: Option<String>,
24 pub gemma_native_formatting: std::sync::Arc<std::sync::atomic::AtomicBool>,
26 pub cancel_token: std::sync::Arc<std::sync::atomic::AtomicBool>,
28}
29
30pub fn is_hematite_native_model(model: &str) -> bool {
31 let lower = model.to_ascii_lowercase();
32 lower.contains("gemma-4") || lower.contains("gemma4")
33}
34
35fn should_use_native_formatting(engine: &InferenceEngine, model: &str) -> bool {
36 is_hematite_native_model(model) && engine.gemma_native_formatting_enabled()
37}
38
39pub fn tool_metadata_for_name(name: &str) -> ToolMetadata {
42 if name.starts_with("mcp__") {
43 let lower = name.to_ascii_lowercase();
44 let mutates_workspace = [
45 "__edit",
46 "__write",
47 "__create",
48 "__move",
49 "__delete",
50 "__remove",
51 "__rename",
52 "__replace",
53 "__patch",
54 ]
55 .iter()
56 .any(|needle| lower.contains(needle));
57 return ToolMetadata {
58 category: ToolCategory::External,
59 mutates_workspace,
60 external_surface: true,
61 trust_sensitive: true,
62 read_only_friendly: !mutates_workspace,
63 plan_scope: false,
64 };
65 }
66
67 match name {
68 "read_file" | "inspect_lines" | "grep_files" | "list_files" => ToolMetadata {
69 category: ToolCategory::RepoRead,
70 mutates_workspace: false,
71 external_surface: false,
72 trust_sensitive: false,
73 read_only_friendly: true,
74 plan_scope: true,
75 },
76 "create_directory" | "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace" => {
77 ToolMetadata {
78 category: ToolCategory::RepoWrite,
79 mutates_workspace: true,
80 external_surface: false,
81 trust_sensitive: true,
82 read_only_friendly: false,
83 plan_scope: true,
84 }
85 }
86 "trace_runtime_flow" => ToolMetadata {
87 category: ToolCategory::Architecture,
88 mutates_workspace: false,
89 external_surface: false,
90 trust_sensitive: false,
91 read_only_friendly: true,
92 plan_scope: false,
93 },
94 "describe_toolchain" => ToolMetadata {
95 category: ToolCategory::Toolchain,
96 mutates_workspace: false,
97 external_surface: false,
98 trust_sensitive: false,
99 read_only_friendly: true,
100 plan_scope: false,
101 },
102 "shell" => ToolMetadata {
103 category: ToolCategory::Runtime,
104 mutates_workspace: true,
105 external_surface: false,
106 trust_sensitive: true,
107 read_only_friendly: false,
108 plan_scope: false,
109 },
110 "inspect_host" => ToolMetadata {
111 category: ToolCategory::Runtime,
112 mutates_workspace: false,
113 external_surface: false,
114 trust_sensitive: false,
115 read_only_friendly: true,
116 plan_scope: false,
117 },
118 "resolve_host_issue" => ToolMetadata {
119 category: ToolCategory::Runtime,
120 mutates_workspace: true,
121 external_surface: true,
122 trust_sensitive: true,
123 read_only_friendly: false,
124 plan_scope: false,
125 },
126 "run_hematite_maintainer_workflow" => ToolMetadata {
127 category: ToolCategory::Workflow,
128 mutates_workspace: true,
129 external_surface: false,
130 trust_sensitive: true,
131 read_only_friendly: false,
132 plan_scope: false,
133 },
134 "run_workspace_workflow" => ToolMetadata {
135 category: ToolCategory::Workflow,
136 mutates_workspace: true,
137 external_surface: false,
138 trust_sensitive: true,
139 read_only_friendly: false,
140 plan_scope: false,
141 },
142 "verify_build" => ToolMetadata {
143 category: ToolCategory::Verification,
144 mutates_workspace: false,
145 external_surface: false,
146 trust_sensitive: false,
147 read_only_friendly: true,
148 plan_scope: true,
149 },
150 "git_commit" | "git_push" | "git_remote" | "git_onboarding" | "git_worktree" => {
151 ToolMetadata {
152 category: ToolCategory::Git,
153 mutates_workspace: true,
154 external_surface: false,
155 trust_sensitive: true,
156 read_only_friendly: false,
157 plan_scope: false,
158 }
159 }
160 "research_web" | "fetch_docs" => ToolMetadata {
161 category: ToolCategory::Research,
162 mutates_workspace: false,
163 external_surface: false,
164 trust_sensitive: false,
165 read_only_friendly: true,
166 plan_scope: false,
167 },
168 "vision_analyze" => ToolMetadata {
169 category: ToolCategory::Vision,
170 mutates_workspace: false,
171 external_surface: false,
172 trust_sensitive: false,
173 read_only_friendly: true,
174 plan_scope: false,
175 },
176 "lsp_definitions"
177 | "lsp_references"
178 | "lsp_hover"
179 | "lsp_rename_symbol"
180 | "lsp_get_diagnostics"
181 | "lsp_search_symbol" => ToolMetadata {
182 category: ToolCategory::Lsp,
183 mutates_workspace: false,
184 external_surface: false,
185 trust_sensitive: false,
186 read_only_friendly: true,
187 plan_scope: false,
188 },
189 "auto_pin_context" | "list_pinned" | "clarify" => ToolMetadata {
190 category: ToolCategory::Workflow,
191 mutates_workspace: false,
192 external_surface: false,
193 trust_sensitive: false,
194 read_only_friendly: true,
195 plan_scope: true,
196 },
197 "manage_tasks" => ToolMetadata {
198 category: ToolCategory::Workflow,
199 mutates_workspace: false,
200 external_surface: false,
201 trust_sensitive: false,
202 read_only_friendly: true,
203 plan_scope: false,
204 },
205 _ => ToolMetadata {
206 category: ToolCategory::Other,
207 mutates_workspace: false,
208 external_surface: false,
209 trust_sensitive: false,
210 read_only_friendly: true,
211 plan_scope: false,
212 },
213 }
214}
215const MIN_RESERVED_OUTPUT_TOKENS: usize = 1024;
220const MAX_RESERVED_OUTPUT_TOKENS: usize = 4096;
221
222fn is_tiny_context_window(context_length: usize) -> bool {
223 context_length <= 8_192
224}
225
226fn is_compact_context_window(context_length: usize) -> bool {
227 context_length > 8_192 && context_length <= 49_152
228}
229
230pub fn is_compact_context_window_pub(context_length: usize) -> bool {
231 is_compact_context_window(context_length)
232}
233
234fn is_provider_context_limit_detail(lower: &str) -> bool {
235 (lower.contains("n_keep") && lower.contains("n_ctx"))
236 || lower.contains("context length")
237 || lower.contains("keep from the initial prompt")
238 || lower.contains("prompt is greater than the context length")
239 || lower.contains("exceeds the context window")
240}
241
242fn classify_runtime_failure_tag(detail: &str) -> &'static str {
243 let lower = detail.to_ascii_lowercase();
244 if lower.contains("context_window_blocked")
245 || lower.contains("context ceiling reached")
246 || lower.contains("exceeds the")
247 || is_provider_context_limit_detail(&lower)
248 {
249 "context_window"
250 } else if lower.contains("empty response from model")
251 || lower.contains("model returned an empty response")
252 {
253 "empty_model_response"
254 } else if lower.contains("action blocked:")
255 || lower.contains("access denied")
256 || lower.contains("declined by user")
257 {
258 "tool_policy_blocked"
259 } else {
260 "provider_degraded"
261 }
262}
263
264fn runtime_failure_guidance(tag: &str) -> &'static str {
265 match tag {
266 "context_window" => {
267 "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
268 }
269 "empty_model_response" => {
270 "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
271 }
272 "tool_policy_blocked" => {
273 "Stay inside the allowed workflow or switch modes before retrying."
274 }
275 _ => "Retry once automatically, then narrow the turn or restart LM Studio if it persists.",
276 }
277}
278
279fn format_runtime_failure_message(detail: &str) -> String {
280 let tag = classify_runtime_failure_tag(detail);
281 format!(
282 "[failure:{}] {} Detail: {}",
283 tag,
284 runtime_failure_guidance(tag),
285 detail.trim()
286 )
287}
288
289impl InferenceEngine {
294 pub fn new(
295 api_url: String,
296 species: String,
297 snark: u8,
298 ) -> Result<Self, Box<dyn std::error::Error>> {
299 let client = reqwest::Client::builder()
300 .timeout(std::time::Duration::from_secs(180))
301 .build()?;
302
303 let base_url = {
304 let trimmed = api_url.trim_end_matches('/');
305 if let Some(scheme_end) = trimmed.find("://") {
306 let after_scheme = &trimmed[scheme_end + 3..];
307 if let Some(path_start) = after_scheme.find('/') {
308 format!(
309 "{}://{}",
310 &trimmed[..scheme_end],
311 &after_scheme[..path_start]
312 )
313 } else {
314 trimmed.to_string()
315 }
316 } else {
317 trimmed.to_string()
318 }
319 };
320
321 let api_url_full = if api_url.ends_with("/chat/completions") {
322 api_url
323 } else if api_url.ends_with("/") {
324 format!("{}chat/completions", api_url)
325 } else {
326 format!("{}/chat/completions", api_url)
327 };
328
329 let lms = crate::agent::lms::LmsHarness::new();
330 let ollama_harness = crate::agent::ollama::OllamaHarness::new(&base_url);
331
332 let provider = if base_url.contains("11434") {
333 Box::new(crate::agent::provider::OllamaProvider {
334 client: client.clone(),
335 base_url: base_url.clone(),
336 model: String::new(),
337 context_length: 8192,
338 embed_model: std::sync::Arc::new(std::sync::RwLock::new(None)),
339 ollama: ollama_harness,
340 }) as Box<dyn crate::agent::provider::ModelProvider>
341 } else {
342 Box::new(crate::agent::provider::LmsProvider {
343 client: client.clone(),
344 api_url: api_url_full,
345 base_url: base_url.clone(),
346 model: String::new(),
347 context_length: 0,
348 lms,
349 }) as Box<dyn crate::agent::provider::ModelProvider>
350 };
351
352 Ok(Self {
353 provider: std::sync::Arc::new(tokio::sync::RwLock::new(provider)),
354 cached_model: std::sync::Arc::new(std::sync::RwLock::new(String::new())),
355 cached_context: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)),
356 base_url: base_url.clone(),
357 species: species.clone(),
358 snark,
359 kv_semaphore: Semaphore::new(3),
360 economics: std::sync::Arc::new(std::sync::Mutex::new(SessionEconomics::new())),
361 worker_model: None,
362 gemma_native_formatting: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
363 cancel_token: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
364 })
365 }
366
367 pub fn set_gemma_native_formatting(&self, enabled: bool) {
368 self.gemma_native_formatting
369 .store(enabled, std::sync::atomic::Ordering::SeqCst);
370 }
371
372 pub async fn health_check(&self) -> bool {
373 let p = self.provider.read().await;
374 p.health_check().await
375 }
376
377 pub async fn provider_name(&self) -> String {
378 let p = self.provider.read().await;
379 p.name().to_string()
380 }
381
382 pub async fn get_loaded_model(&self) -> Option<String> {
383 let p = self.provider.read().await;
384 match p.detect_model().await {
385 Ok(m) if m.is_empty() => Some("".to_string()),
386 Ok(m) => Some(m),
387 Err(_) => None,
388 }
389 }
390
391 pub async fn get_embedding_model(&self) -> Option<String> {
392 let p = self.provider.read().await;
393 p.get_embedding_model().await
394 }
395
396 pub async fn load_model(&self, model_id: &str) -> Result<(), String> {
397 let p = self.provider.read().await;
398 p.load_model(model_id).await
399 }
400
401 pub async fn load_model_with_context(
402 &self,
403 model_id: &str,
404 context_length: Option<usize>,
405 ) -> Result<(), String> {
406 let p = self.provider.read().await;
407 p.load_model_with_context(model_id, context_length).await
408 }
409
410 pub async fn load_embedding_model(&self, model_id: &str) -> Result<(), String> {
411 let p = self.provider.read().await;
412 p.load_embedding_model(model_id).await
413 }
414
415 pub async fn list_provider_models(
416 &self,
417 kind: crate::agent::provider::ProviderModelKind,
418 loaded_only: bool,
419 ) -> Result<Vec<String>, String> {
420 let p = self.provider.read().await;
421 p.list_models(kind, loaded_only).await
422 }
423
424 pub async fn unload_model(&self, model_id: Option<&str>, all: bool) -> Result<String, String> {
425 let p = self.provider.read().await;
426 p.unload_model(model_id, all).await
427 }
428
429 pub async fn unload_embedding_model(&self, model_id: Option<&str>) -> Result<String, String> {
430 let p = self.provider.read().await;
431 p.unload_embedding_model(model_id).await
432 }
433
434 pub async fn prewarm(&self) -> Result<(), String> {
435 let p = self.provider.read().await;
436 p.prewarm().await
437 }
438
439 pub async fn detect_context_length(&self) -> usize {
440 let p = self.provider.read().await;
441 p.detect_context_length().await
442 }
443
444 pub async fn set_runtime_profile(&self, model: &str, context_length: usize) {
445 if let Ok(mut guard) = self.cached_model.write() {
446 *guard = model.to_string();
447 }
448 self.cached_context
449 .store(context_length, std::sync::atomic::Ordering::SeqCst);
450
451 let mut p = self.provider.write().await;
452 p.set_runtime_profile(model, context_length);
453 }
454
455 pub async fn refresh_runtime_profile(&self) -> Option<(String, usize, bool)> {
456 let previous_model = self.current_model();
457 let previous_context = self.current_context_length();
458
459 let detected_model = match self.get_loaded_model().await {
460 Some(m) if !m.is_empty() => m,
461 Some(_) => "no model loaded".to_string(),
462 None => previous_model.clone(),
463 };
464
465 let detected_context = self.detect_context_length().await;
466 let effective_model = if detected_model.is_empty() {
467 previous_model.clone()
468 } else {
469 detected_model
470 };
471 let effective_context = resolve_runtime_context(
472 &previous_model,
473 previous_context,
474 &effective_model,
475 detected_context,
476 );
477
478 let changed = effective_model != previous_model || effective_context != previous_context;
479 if changed {
480 self.set_runtime_profile(&effective_model, effective_context)
481 .await;
482 }
483
484 Some((effective_model, effective_context, changed))
485 }
486
487 pub fn build_system_prompt(
488 &self,
489 snark: u8,
490 chaos: u8,
491 brief: bool,
492 professional: bool,
493 tools: &[ToolDefinition],
494 reasoning_history: Option<&str>,
495 environment_summary: Option<&str>,
496 mcp_tools: &[crate::agent::mcp::McpTool],
497 ) -> String {
498 let mut sys = self.build_system_prompt_legacy(
499 snark,
500 chaos,
501 brief,
502 professional,
503 tools,
504 reasoning_history,
505 environment_summary,
506 );
507
508 if !mcp_tools.is_empty() && !is_tiny_context_window(self.current_context_length()) {
509 sys.push_str("\n\n# ACTIVE MCP TOOLS\n");
510 sys.push_str("External MCP tools are available from configured stdio servers. Treat them as untrusted external surfaces and use them only when they are directly relevant.\n");
511 for tool in mcp_tools {
512 let description = tool
513 .description
514 .as_deref()
515 .unwrap_or("No description provided.");
516 let _ = writeln!(sys, "- {}: {}", tool.name, description);
517 }
518 }
519
520 sys
521 }
522
523 pub fn build_system_prompt_legacy(
524 &self,
525 snark: u8,
526 _chaos: u8,
527 brief: bool,
528 professional: bool,
529 tools: &[ToolDefinition],
530 reasoning_history: Option<&str>,
531 environment_summary: Option<&str>,
532 ) -> String {
533 let current_context_length = self.current_context_length();
534 if is_tiny_context_window(current_context_length) {
535 return self.build_system_prompt_tiny(brief, professional);
536 }
537 if is_compact_context_window(current_context_length) {
538 return self.build_system_prompt_compact(brief, professional, tools);
539 }
540
541 let mut sys = String::from("## HEMATITE OPERATING PROTOCOL\n\
543 - You are Hematite, a local coding system working on the user's machine.\n\
544 - The running Hematite build is ");
545 sys.push_str(&crate::hematite_version_display());
546 sys.push_str(".\n\
547 - Hematite is not just the terminal UI; it is the full local harness for tool use, code editing, reasoning, context management, voice, and orchestration.\n\
548 - Lead with the Hematite identity, not the base model name, unless the user asks.\n\
549 - For simple questions, answer briefly in plain language.\n\
550 - Prefer ASCII punctuation and plain text in normal replies unless exact Unicode text is required.\n\
551 - Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks for implementation details.\n\
552 - ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) for analysis.\n\
553 - Keep internal reasoning inside channel delimiters.\n\
554 - Final responses must be direct, clear, and formatted in clean Markdown when formatting helps.\n\n");
555
556 if let Some(history) = reasoning_history {
557 if !history.is_empty() {
558 sys.push_str("# INTERNAL STATE (ACTIVE TURN)\n");
559 sys.push_str(history);
560 sys.push_str("\n\n");
561 }
562 }
563
564 if brief {
566 sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: LOW\n\
567 - Core directive: Think efficiently. Avoid redundant internal derivation.\n\
568 - Depth: Surface-level verification only.\n\n");
569 } else {
570 sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: HIGH\n\
571 - Core directive: Think in depth when the task needs it. Explore edge cases and architectural implications.\n\
572 - Depth: Full multi-step derivation required.\n\n");
573 }
574
575 let os = std::env::consts::OS;
577 if let Some(summary) = environment_summary {
578 sys.push_str("## HOST ENVIRONMENT\n");
579 sys.push_str(summary);
580 sys.push_str("\n\n");
581 }
582
583 if professional {
584 let _ = writeln!(
585 sys,
586 "You are Hematite, a local coding system running on {}. \
587 The TUI is one interface layer, not your whole identity. \
588 Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
589 Skip filler and keep the focus on the work.",
590 os
591 );
592 } else {
593 let _ = writeln!(sys,
594 "You are Hematite, a [{}] local AI coding system (Snark: {}/100) running on the user's hardware on {}. \
595 The terminal UI is only one surface of the system. \
596 Be direct, efficient, technical, and ASCII-first in ordinary prose. \
597 When the user asks who you are, describe Hematite as the local coding harness and agent, not merely the TUI.",
598 self.species, snark, os
599 );
600 }
601
602 let current_model = self.current_model();
604 if !current_model.is_empty() {
605 let _ = write!(
606 sys,
607 "Loaded model: {} | Context window: {} tokens. \
608 Calibrate response length and tool-call depth to fit within this budget.\n\n",
609 current_model, current_context_length
610 );
611 if is_hematite_native_model(¤t_model) {
612 sys.push_str(
613 "Sovereign native note: prefer exact tool JSON with no extra prose when calling tools. \
614 Do not wrap `path`, `extension`, or other string arguments in extra quote layers. \
615 For `grep_files`, provide the raw regex pattern without surrounding slash delimiters.\n\n",
616 );
617 }
618 } else {
619 let _ = write!(sys,
620 "Context window: {} tokens. Calibrate response length to fit within this budget.\n\n",
621 current_context_length
622 );
623 }
624
625 let shell_desc = if cfg!(target_os = "windows") {
627 "[EXTERNAL SHELL]: `powershell` (Windows).\n\
628 - Use ONLY for builds, tests, or file migrations. \n\
629 - You MUST use the `powershell` tool directly. \n\
630 - NEVER attempt to use `bash`, `sh`, or `/dev/null` on this system. \n\n"
631 } else {
632 "[EXTERNAL SHELL]: `bash` (Unix).\n\
633 - Use ONLY for builds, tests, or file migrations. \n\
634 - NEVER wrap bash in other shells. \n\n"
635 };
636
637 sys.push_str("You distinguish strictly between [INTERNAL TOOLS] and [EXTERNAL SHELL].\n\n\
638 [INTERNAL TOOLS]: `list_files`, `grep_files`, `read_file`, `edit_file`, `write_file`.\n\
639 - These are the ONLY way to explore and modify code. \n\
640 - NEVER attempt to run these as shell commands (e.g. `bash $ grep_files` is FORBIDDEN).\n\n");
641 sys.push_str(shell_desc);
642
643 sys.push_str("ANTI-LOOPING: If a tool returns (no output) or 'not recognized' in a shell, pivot to a different internal tool. \n\
645 SELF-AUDIT: If you see your own command echoed back as the result, the shell failed; pivot to an internal tool immediately.\n\n");
646
647 sys.push_str("## THE COMPUTATIONAL RESEARCH MANDATE\n\
648 - You are a Lead Computational Researcher and Senior Scientist.\n\
649 - ZERO-TRUST MATH: You never guess results for math, physics, or algorithmic complexity.\n\
650 - UNIT-SAFETY: All physical calculations must use `scientific_compute(mode='units')` to ensure dimensional consistency.\n\
651 - SYMBOLIC PROOF: Use `scientific_compute(mode='symbolic')` for formal algebraic derivations and multi-variable proofs. Set `latex: true` for formal presentation.\n\
652 - EMPIRICAL AUDITING: All algorithmic performance claims must be verified with `scientific_compute(mode='complexity')` before being finalized.\n\
653 - SCIENTIFIC MEMORY (LEDGER): Use `scientific_compute(mode='ledger')` to persist long-form derivations, constants, and theorem steps to `.hematite/docs/scientific_ledger.md`. This ledger is RAG-indexed by The Vein, giving you persistent cross-session memory for project math.\n\
654 - DATASET COMPUTATION: Use `scientific_compute(mode='dataset')` to perform high-precision calculations on SQL results (CSV/DB/JSON). This bridges data science and formal research.\n\
655 - LIGHTWEIGHT SANDBOX: Prioritize pure Python implementations for all research tasks. Do NOT attempt to import heavy external libraries like 'numpy', 'scipy', or 'pandas' unless you have verified they are available or the user explicitly asks to work in a specific heavy environment or venv.\n\
656 - Every result must be backed by the executable logic used to prove it.\n\n");
657
658 sys.push_str("## TURN ADVISORY\n");
660 if brief {
661 sys.push_str("- BRIEF MODE: Respond with ONE concise sentence/block unless more code is required.\n");
662 }
663 sys.push_str("- INTERNAL REASONING: Plan your move in the thought channel first.\n");
664
665 sys.push_str("\n## SCAFFOLDING PROTOCOL\n\
667 2. ALWAYS call verify_build immediately after to confirm the project compiles/runs.\n\
668 3. If verify_build fails, use `lsp_get_diagnostics` to find the exact line and error.\n\
669 4. Fix all errors before declaring success.\n\n\
670 ## PRE-FLIGHT SCOPING PROTOCOL\n\
671 Before attempting any multi-file task or complex refactor:\n\
672 1. Identify 1-3 core files (entry-points, central models, or types) that drive the logic.\n\
673 2. Use `auto_pin_context` to keep those files in active context.\n\
674 3. Only then proceed to deeper edits or research.\n\n\
675 ## REFACTORING PROTOCOL\n\
676 When modifying existing code or renaming symbols:\n\
677 1. Use `lsp_rename_symbol` for all variable/function renames to ensure project-wide safety.\n\
678 2. After any significant edit, call `lsp_get_diagnostics` on the affected files.\n\
679 3. If errors are found, you MUST fix them. Do not wait for the user to point them out.\n\n");
680
681 sys.push_str(&load_instruction_files());
683 sys.push_str(&load_agent_skill_catalog());
684
685 sys.push_str(&crate::memory::deep_reflect::load_recent_memories());
687
688 if !tools.is_empty() {
690 sys.push_str("\n\n# NATIVE TOOL DECLARATIONS\n");
691 for tool in tools {
692 let schema = serde_json::to_string(&tool.function.parameters)
693 .unwrap_or_else(|_| "{}".to_string());
694 let _ = writeln!(
695 sys,
696 "<|tool>declaration:{}{{{}<tool|>",
697 tool.function.name, schema
698 );
699 let _ = writeln!(sys, "// {})", tool.function.description);
700 }
701 }
702
703 sys
704 }
705
706 fn build_system_prompt_compact(
707 &self,
708 brief: bool,
709 professional: bool,
710 tools: &[ToolDefinition],
711 ) -> String {
712 let current_model = self.current_model();
715 let current_context_length = self.current_context_length();
716 let os = std::env::consts::OS;
717
718 let mut sys = format!(
719 "You are Hematite {}, a local coding harness working on the user's machine.\n",
720 crate::hematite_version_display()
721 );
722 if professional {
723 sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
724 } else {
725 let _ = writeln!(
726 sys,
727 "You are a [{}] local AI coding system. Be direct, concise, and technical.",
728 self.species
729 );
730 }
731 let _ = writeln!(
732 sys,
733 "Model: {} | Context: {} tokens. Keep turns focused.",
734 current_model, current_context_length
735 );
736 if is_hematite_native_model(¤t_model) {
737 sys.push_str(
738 "Sovereign native: use exact tool JSON. No extra prose in tool calls. \
739 Raw regex patterns in grep_files, no slash delimiters.\n",
740 );
741 }
742 if cfg!(target_os = "windows") {
743 let _ = writeln!(
744 sys,
745 "OS: {}. Use PowerShell for shell. Never bash or /dev/null.",
746 os
747 );
748 } else {
749 let _ = writeln!(sys, "OS: {}. Use native Unix shell.", os);
750 }
751 if brief {
752 sys.push_str("BRIEF MODE: one concise sentence unless code is required.\n");
753 }
754
755 sys.push_str(
756 "\nCORE RULES:\n\
757 - Read before editing: use `read_file` or `inspect_lines` on a file before mutating it.\n\
758 - Verify after edits: run `verify_build` after code changes, before committing.\n\
759 - One tool at a time. Do not batch unrelated tool calls.\n\
760 - Do not invent tool names, file paths, or symbols not confirmed by tool output.\n\
761 - Built-in tools first: prefer `read_file`, `edit_file`, `grep_files` over MCP filesystem tools.\n\
762 - STARTUP/UI CHANGES: read the owner file first, make one focused edit, then run `verify_build`.\n",
763 );
764
765 if !tools.is_empty() {
766 sys.push_str("\n# AVAILABLE TOOLS\n");
767 for tool in tools {
768 let desc: String = tool.function.description.chars().take(120).collect();
769 let _ = writeln!(sys, "- {}: {}", tool.function.name, desc);
770 }
771 }
772
773 sys
774 }
775
776 fn build_system_prompt_tiny(&self, brief: bool, professional: bool) -> String {
777 let current_model = self.current_model();
778 let current_context_length = self.current_context_length();
779 let os = std::env::consts::OS;
780 let mut sys = format!(
781 "You are Hematite {}, a local coding harness working on the user's machine.\n",
782 crate::hematite_version_display()
783 );
784 if professional {
785 sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
786 } else {
787 let _ = writeln!(
788 sys,
789 "You are a [{}] local AI coding system. Be direct, concise, and technical.",
790 self.species
791 );
792 }
793 if !current_model.is_empty() {
794 let _ = writeln!(
795 sys,
796 "Loaded model: {} | Context window: {} tokens.",
797 current_model, current_context_length
798 );
799 } else {
800 let _ = writeln!(sys, "Context window: {} tokens.", current_context_length);
801 }
802 sys.push_str("Tiny-context mode is active. Keep turns short. Prefer final answers over long analysis. Only use tools when necessary.\n");
803 sys.push_str("Use built-in workspace tools for local inspection and edits. Do not invent tools, files, channels, or symbols.\n");
804 sys.push_str("Before editing an existing file, gather recent file evidence first. After code edits, verify before commit.\n");
805 if cfg!(target_os = "windows") {
806 let _ = writeln!(sys,
807 "You are running on {}. Use PowerShell for shell work. Do not assume bash or /dev/null.",
808 os
809 );
810 } else {
811 let _ = writeln!(
812 sys,
813 "You are running on {}. Use the native Unix shell conventions.",
814 os
815 );
816 }
817 if brief {
818 sys.push_str("BRIEF MODE: answer in one concise sentence unless code is required.\n");
819 }
820 sys
821 }
822
823 pub fn current_model(&self) -> String {
824 self.cached_model
825 .read()
826 .map(|g| g.clone())
827 .unwrap_or_default()
828 }
829
830 pub fn current_context_length(&self) -> usize {
831 self.cached_context
832 .load(std::sync::atomic::Ordering::Relaxed)
833 }
834
835 pub fn is_compact_context_window(&self) -> bool {
836 let len = self.current_context_length();
837 len <= 16384
838 }
839
840 pub fn gemma_native_formatting_enabled(&self) -> bool {
841 self.gemma_native_formatting
842 .load(std::sync::atomic::Ordering::Relaxed)
843 }
844
845 pub async fn call_with_tools(
846 &self,
847 messages: &[ChatMessage],
848 tools: &[ToolDefinition],
849 model_override: Option<&str>,
851 ) -> Result<
852 (
853 Option<String>,
854 Option<Vec<ToolCallResponse>>,
855 Option<TokenUsage>,
856 Option<String>,
857 ),
858 String,
859 > {
860 let _permit = self
861 .kv_semaphore
862 .acquire()
863 .await
864 .map_err(|e| e.to_string())?;
865
866 let (res, model_name, prepared_messages) = {
867 let p = self.provider.read().await;
868 let model_name = model_override.unwrap_or(&p.current_model()).to_string();
869 let prepared_messages = if should_use_native_formatting(self, &model_name) {
870 prepare_gemma_native_messages(messages)
871 } else {
872 messages.to_vec()
873 };
874 if let Err(detail) = preflight_chat_request(
875 &model_name,
876 &prepared_messages,
877 tools,
878 self.current_context_length(),
879 ) {
880 return Err(format_runtime_failure_message(&detail));
881 }
882 let res = p
883 .call_with_tools(&prepared_messages, tools, model_override)
884 .await
885 .map_err(|e| format_runtime_failure_message(&e))?;
886 (res, model_name, prepared_messages)
887 };
888
889 if let Ok(mut econ) = self.economics.lock() {
890 econ.input_tokens += res.usage.prompt_tokens;
891 econ.output_tokens += res.usage.completion_tokens;
892 }
893
894 let mut content = res.content;
895 let mut tool_calls = res.tool_calls;
896
897 if let Some(text) = &content {
899 if should_use_native_formatting(self, &model_name) {
900 let native_calls = extract_native_tool_calls(text);
901 if !native_calls.is_empty() {
902 let mut existing = tool_calls.unwrap_or_default();
903 existing.extend(native_calls);
904 tool_calls = Some(existing);
905
906 let stripped = strip_native_tool_call_text(text);
907 content = if stripped.trim().is_empty() {
908 None
909 } else {
910 Some(stripped)
911 };
912 }
913 }
914 }
915
916 if should_use_native_formatting(self, &model_name) {
918 if let Some(calls) = tool_calls.as_mut() {
919 for call in calls.iter_mut() {
920 normalize_tool_argument_value(
921 &call.function.name,
922 &mut call.function.arguments,
923 );
924 }
925 }
926 }
927
928 if should_use_native_formatting(self, &model_name)
929 && content.is_none()
930 && tool_calls.is_none()
931 && !prepared_messages.is_empty()
932 {
933 return Err(format_runtime_failure_message(
934 "model returned an empty response after native-format message preparation",
935 ));
936 }
937
938 Ok((content, tool_calls, Some(res.usage), res.finish_reason))
939 }
940
941 pub async fn stream_messages(
945 &self,
946 messages: &[ChatMessage],
947 tx: mpsc::Sender<InferenceEvent>,
948 ) -> Result<(), Box<dyn std::error::Error>> {
949 let provider = self.provider.read().await;
950 provider.stream(messages, tx).await
951 }
952
953 pub async fn stream_generation(
955 &self,
956 prompt: &str,
957 snark: u8,
958 chaos: u8,
959 brief: bool,
960 professional: bool,
961 tx: mpsc::Sender<InferenceEvent>,
962 ) -> Result<(), Box<dyn std::error::Error>> {
963 let system =
964 self.build_system_prompt(snark, chaos, brief, professional, &[], None, None, &[]);
965 let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
966 self.stream_messages(&messages, tx).await
967 }
968
969 pub async fn generate_task_worker(
973 &self,
974 prompt: &str,
975 professional: bool,
976 ) -> Result<String, String> {
977 let current_model = self.current_model();
978 let model = self
979 .worker_model
980 .as_deref()
981 .unwrap_or(current_model.as_str());
982 self.generate_task_with_model(prompt, 0.1, professional, model)
983 .await
984 }
985
986 pub async fn generate_task(&self, prompt: &str, professional: bool) -> Result<String, String> {
987 self.generate_task_with_temp(prompt, 0.1, professional)
988 .await
989 }
990
991 pub async fn generate_task_with_temp(
992 &self,
993 prompt: &str,
994 temp: f32,
995 professional: bool,
996 ) -> Result<String, String> {
997 let current_model = self.current_model();
998 self.generate_task_with_model(prompt, temp, professional, ¤t_model)
999 .await
1000 }
1001
1002 pub async fn generate_task_with_model(
1003 &self,
1004 prompt: &str,
1005 _temp: f32,
1006 professional: bool,
1007 model: &str,
1008 ) -> Result<String, String> {
1009 let _permit = self
1010 .kv_semaphore
1011 .acquire()
1012 .await
1013 .map_err(|e| e.to_string())?;
1014
1015 let system =
1016 self.build_system_prompt(self.snark, 50, false, professional, &[], None, None, &[]);
1017 let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
1018 if let Err(detail) =
1019 preflight_chat_request(model, &messages, &[], self.current_context_length())
1020 {
1021 return Err(format_runtime_failure_message(&detail));
1022 }
1023
1024 let p = self.provider.read().await;
1025 let res = p
1026 .call_with_tools(&messages, &[], Some(model))
1027 .await
1028 .map_err(|e| format_runtime_failure_message(&e))?;
1029
1030 res.content
1031 .ok_or_else(|| "Empty response from model".to_string())
1032 }
1033
1034 #[allow(dead_code)]
1038 pub fn snip_history(
1039 &self,
1040 turns: &[ChatMessage],
1041 max_tokens_estimate: usize,
1042 keep_recent: usize,
1043 ) -> Vec<ChatMessage> {
1044 let total_chars: usize = turns.iter().map(|m| m.content.as_str().len()).sum();
1045 if total_chars / 4 <= max_tokens_estimate {
1046 return turns.to_vec();
1047 }
1048 let keep = keep_recent.min(turns.len());
1049 let mut snipped = vec![turns[0].clone()];
1050 if turns.len() > keep + 1 {
1051 snipped.push(ChatMessage::system(&format!(
1052 "[CONTEXT SNIPPED: {} earlier turns pruned to preserve VRAM]",
1053 turns.len() - keep - 1
1054 )));
1055 snipped.extend_from_slice(&turns[turns.len() - keep..]);
1056 } else {
1057 snipped = turns.to_vec();
1058 }
1059 snipped
1060 }
1061}
1062
1063fn estimate_serialized_tokens<T: Serialize + ?Sized>(value: &T) -> usize {
1064 serde_json::to_vec(value)
1065 .ok()
1066 .map_or(0, |bytes| bytes.len() / 4 + 1)
1067}
1068
1069const IMAGE_PART_TOKEN_ESTIMATE: usize = 1024;
1070
1071pub fn estimate_message_tokens(message: &ChatMessage) -> usize {
1072 let content_tokens = match &message.content {
1073 MessageContent::Text(s) => s.len() / 4 + 1,
1074 MessageContent::Parts(parts) => parts
1075 .iter()
1076 .map(|part| match part {
1077 ContentPart::Text { text } => text.len() / 4 + 1,
1078 ContentPart::ImageUrl { .. } => IMAGE_PART_TOKEN_ESTIMATE,
1081 })
1082 .sum(),
1083 };
1084 let tool_tokens: usize = message
1085 .tool_calls
1086 .iter()
1087 .flatten()
1088 .map(|call| (call.function.name.len() + call.function.arguments.to_string().len()) / 4 + 4)
1089 .sum();
1090 content_tokens + tool_tokens + 6
1091}
1092
1093pub fn estimate_message_batch_tokens(messages: &[ChatMessage]) -> usize {
1094 messages.iter().map(estimate_message_tokens).sum()
1095}
1096
1097fn reserved_output_tokens(context_length: usize) -> usize {
1098 let proportional = (context_length / 8).max(MIN_RESERVED_OUTPUT_TOKENS);
1099 proportional.min(MAX_RESERVED_OUTPUT_TOKENS)
1100}
1101
1102pub fn estimate_prompt_pressure(
1103 messages: &[ChatMessage],
1104 tools: &[ToolDefinition],
1105 context_length: usize,
1106) -> (usize, usize, usize, u8) {
1107 let estimated_input_tokens =
1108 estimate_message_batch_tokens(messages) + estimate_serialized_tokens(tools) + 32;
1109 let reserved_output = reserved_output_tokens(context_length);
1110 let estimated_total = estimated_input_tokens.saturating_add(reserved_output);
1111 let percent = (estimated_total.saturating_mul(100))
1112 .checked_div(context_length)
1113 .unwrap_or(0)
1114 .min(100) as u8;
1115 (
1116 estimated_input_tokens,
1117 reserved_output,
1118 estimated_total,
1119 percent,
1120 )
1121}
1122
1123fn preflight_chat_request(
1124 model: &str,
1125 messages: &[ChatMessage],
1126 tools: &[ToolDefinition],
1127 context_length: usize,
1128) -> Result<(), String> {
1129 let (estimated_input_tokens, reserved_output, estimated_total, _) =
1130 estimate_prompt_pressure(messages, tools, context_length);
1131
1132 if estimated_total > context_length {
1133 return Err(format!(
1134 "context_window_blocked for {}: estimated input {} + reserved output {} = {} tokens exceeds the {}-token context window; narrow the request, compact the session, or preserve grounded tool output instead of restyling it.",
1135 model, estimated_input_tokens, reserved_output, estimated_total, context_length
1136 ));
1137 }
1138
1139 Ok(())
1140}
1141
1142fn load_instruction_files() -> String {
1148 use std::collections::hash_map::DefaultHasher;
1149 use std::collections::HashSet;
1150 use std::hash::{Hash, Hasher};
1151
1152 let Ok(cwd) = std::env::current_dir() else {
1153 return String::new();
1154 };
1155
1156 static CACHE: std::sync::Mutex<Option<(String, String)>> = std::sync::Mutex::new(None);
1158 let cwd_key = cwd.to_string_lossy().into_owned();
1159 if let Ok(g) = CACHE.lock() {
1160 if let Some((ref k, ref v)) = *g {
1161 if *k == cwd_key {
1162 return v.clone();
1163 }
1164 }
1165 }
1166 let mut result = String::with_capacity(4096);
1167 let mut seen: HashSet<u64> = HashSet::new();
1168 let mut total_chars: usize = 0;
1169 const MAX_TOTAL: usize = 12_000;
1170 const MAX_PER_FILE: usize = 4_000;
1171
1172 let mut dir = cwd.clone();
1173 for _ in 0..4 {
1174 for name in crate::agent::instructions::PROJECT_GUIDANCE_FILES {
1175 let path = crate::agent::instructions::resolve_guidance_path(&dir, name);
1176 if !path.exists() {
1177 continue;
1178 }
1179 let Ok(content) = std::fs::read_to_string(&path) else {
1180 continue;
1181 };
1182 if content.trim().is_empty() {
1183 continue;
1184 }
1185
1186 let mut hasher = DefaultHasher::new();
1187 content.hash(&mut hasher);
1188 let h = hasher.finish();
1189 if !seen.insert(h) {
1190 continue;
1191 }
1192
1193 let truncated = if content.len() > MAX_PER_FILE {
1194 format!("{}...[truncated]", &content[..MAX_PER_FILE])
1195 } else {
1196 content
1197 };
1198
1199 if total_chars + truncated.len() > MAX_TOTAL {
1200 break;
1201 }
1202 total_chars += truncated.len();
1203 let _ = write!(result, "\n--- {} ---\n{}\n", path.display(), truncated);
1204 }
1205 match dir.parent().map(|p| p.to_owned()) {
1206 Some(p) => dir = p,
1207 None => break,
1208 }
1209 }
1210
1211 let output = if result.is_empty() {
1212 String::new()
1213 } else {
1214 format!("\n\n# Project Instructions And Skills\n{}", result)
1215 };
1216 if let Ok(mut g) = CACHE.lock() {
1217 *g = Some((cwd_key, output.clone()));
1218 }
1219 output
1220}
1221
1222fn load_agent_skill_catalog() -> String {
1223 static CACHE: std::sync::Mutex<Option<(String, String)>> = std::sync::Mutex::new(None);
1224 let workspace_root = crate::tools::file_ops::workspace_root();
1225 let cwd_key = workspace_root.to_string_lossy().into_owned();
1226 if let Ok(g) = CACHE.lock() {
1227 if let Some((ref k, ref v)) = *g {
1228 if *k == cwd_key {
1229 return v.clone();
1230 }
1231 }
1232 }
1233
1234 let config = crate::agent::config::load_config();
1235 let discovery =
1236 crate::agent::instructions::discover_agent_skills(&workspace_root, &config.trust);
1237 let output = crate::agent::instructions::render_skill_catalog(&discovery, 6_000)
1238 .map(|rendered| format!("\n\n{}", rendered))
1239 .unwrap_or_default();
1240 if let Ok(mut g) = CACHE.lock() {
1241 *g = Some((cwd_key, output.clone()));
1242 }
1243 output
1244}
1245
1246pub fn extract_think_block(text: &str) -> Option<String> {
1247 let lower = text.to_ascii_lowercase();
1250
1251 let open_tag = "<|channel>thought";
1253 let close_tag = "<channel|>";
1254
1255 let start_pos = lower.find(open_tag)?;
1256 let content_start = start_pos + open_tag.len();
1257
1258 let close_pos = lower[content_start..]
1259 .find(close_tag)
1260 .map(|p| content_start + p)
1261 .unwrap_or(text.len());
1262
1263 let content = text[content_start..close_pos].trim();
1264 if content.is_empty() {
1265 None
1266 } else {
1267 Some(content.to_string())
1268 }
1269}
1270
1271pub fn strip_think_blocks(text: &str) -> String {
1272 let text = {
1276 let t = text.trim_start();
1277 if t.get(..8)
1278 .map(|s| s.eq_ignore_ascii_case("</think>"))
1279 .unwrap_or(false)
1280 {
1281 &t[8..]
1282 } else {
1283 text
1284 }
1285 };
1286
1287 let lower = text.to_ascii_lowercase();
1288
1289 if let Some(end) = lower.find("<channel|>").map(|i| i + "<channel|>".len()) {
1291 let answer = text[end..]
1292 .replace("<|channel>thought", "")
1293 .replace("<channel|>", "");
1294 return answer.trim().replace("\n\n\n", "\n\n").to_string();
1295 }
1296
1297 let first_open = [
1299 lower.find("<|channel>thought"), lower.find("<think>"),
1301 lower.find("<thinking>"),
1302 lower.find("<thought>"),
1303 lower.find("<|think|>"),
1304 ]
1305 .iter()
1306 .filter_map(|&x| x)
1307 .min();
1308
1309 if let Some(start) = first_open {
1310 if start > 0 {
1311 return text[..start].trim().replace("\n\n\n", "\n\n").to_string();
1312 }
1313 return String::new();
1314 }
1315
1316 static NAKED_AC: std::sync::OnceLock<aho_corasick::AhoCorasick> = std::sync::OnceLock::new();
1320 let naked_ac = NAKED_AC.get_or_init(|| {
1321 aho_corasick::AhoCorasick::builder()
1322 .ascii_case_insensitive(true)
1323 .build([
1324 "the user asked",
1325 "the user is asking",
1326 "the user wants",
1327 "i will structure",
1328 "i should provide",
1329 "i should give",
1330 "i should avoid",
1331 "i should note",
1332 "i should focus",
1333 "i should keep",
1334 "i should respond",
1335 "i should present",
1336 "i should display",
1337 "i should show",
1338 "i need to",
1339 "i can see from",
1340 "without being overly",
1341 "let me ",
1342 "necessary information in my identity",
1343 "was computed successfully",
1344 "computed successfully",
1345 ])
1346 .expect("valid patterns")
1347 });
1348 let is_naked_reasoning = naked_ac.find(text).is_some();
1349 if is_naked_reasoning {
1350 let lines: Vec<&str> = text.lines().collect();
1351 if !lines.is_empty() {
1352 let mut start_idx = 0;
1355 for (i, line) in lines.iter().enumerate() {
1356 let is_reasoning_line = naked_ac.find(line).is_some() || line.trim().is_empty();
1357 if is_reasoning_line {
1358 start_idx = i + 1;
1359 } else {
1360 break;
1361 }
1362 }
1363 if start_idx < lines.len() {
1364 return lines[start_idx..]
1365 .join("\n")
1366 .trim()
1367 .replace("\n\n\n", "\n\n")
1368 .to_string();
1369 }
1370 return String::new();
1372 }
1373 }
1374
1375 let cleaned = strip_xml_tool_call_artifacts(text);
1378 cleaned.trim().replace("\n\n\n", "\n\n").to_string()
1379}
1380
1381fn strip_xml_tool_call_artifacts(text: &str) -> String {
1384 use aho_corasick::AhoCorasick;
1385 use std::sync::OnceLock;
1386
1387 const XML_ARTIFACTS: &[&str] = &[
1389 "</tool_call>",
1390 "<tool_call>",
1391 "</function>",
1392 "<function>",
1393 "</parameter>",
1394 "<parameter>",
1395 "</arguments>",
1396 "<arguments>",
1397 "</tool_use>",
1398 "<tool_use>",
1399 "</invoke>",
1400 "<invoke>",
1401 "</think>",
1403 "<thinking>",
1404 "</thought>",
1405 "</thinking>",
1406 "<|turn>system",
1408 "<|turn>user",
1409 "<|turn>assistant",
1410 "<|turn>tool",
1411 "<turn|>",
1412 "<|think|>",
1413 "<|im_start|>",
1415 "<|im_end|>",
1416 "<|endoftext|>",
1417 ];
1418
1419 static ARTIFACT_AC: OnceLock<AhoCorasick> = OnceLock::new();
1423 let ac = ARTIFACT_AC.get_or_init(|| {
1424 let lowered: Vec<String> = XML_ARTIFACTS.iter().map(|s| s.to_lowercase()).collect();
1425 AhoCorasick::new(&lowered).expect("valid XML artifact patterns")
1426 });
1427
1428 let lower = text.to_ascii_lowercase();
1429
1430 if ac.find(&lower).is_none() {
1432 return text.to_string();
1433 }
1434
1435 let spans: Vec<(usize, usize)> = ac.find_iter(&lower).map(|m| (m.start(), m.end())).collect();
1438 let mut out = text.to_string();
1439 for (start, end) in spans.into_iter().rev() {
1440 out.drain(start..end);
1441 }
1442 out
1443}
1444
1445fn re_gemma_call() -> &'static regex::Regex {
1449 use std::sync::OnceLock;
1450 static RE: OnceLock<regex::Regex> = OnceLock::new();
1451 RE.get_or_init(|| {
1452 regex::Regex::new(r#"(?s)<\|?tool_call\|?>\s*call:([A-Za-z_][A-Za-z0-9_]*)\{(.*?)\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#)
1453 .expect("valid gemma call regex")
1454 })
1455}
1456fn re_gemma_arg() -> &'static regex::Regex {
1457 use std::sync::OnceLock;
1458 static RE: OnceLock<regex::Regex> = OnceLock::new();
1459 RE.get_or_init(|| {
1460 regex::Regex::new(r#"(\w+):(?:<\|"\|>(.*?)<\|"\|>|([^,}]*))"#)
1461 .expect("valid gemma arg regex")
1462 })
1463}
1464fn re_xml_call() -> &'static regex::Regex {
1465 use std::sync::OnceLock;
1466 static RE: OnceLock<regex::Regex> = OnceLock::new();
1467 RE.get_or_init(|| {
1468 regex::Regex::new(r#"(?s)<tool_call>\s*<function=([A-Za-z_][A-Za-z0-9_]*)>(.*?)(?:</function>)?\s*</tool_call>"#)
1469 .expect("valid xml call regex")
1470 })
1471}
1472fn re_xml_param() -> &'static regex::Regex {
1473 use std::sync::OnceLock;
1474 static RE: OnceLock<regex::Regex> = OnceLock::new();
1475 RE.get_or_init(|| {
1476 regex::Regex::new(r#"(?s)<parameter=([A-Za-z_][A-Za-z0-9_]*)>(.*?)</parameter>"#)
1477 .expect("valid xml param regex")
1478 })
1479}
1480fn re_short_call() -> &'static regex::Regex {
1481 use std::sync::OnceLock;
1482 static RE: OnceLock<regex::Regex> = OnceLock::new();
1483 RE.get_or_init(|| {
1484 regex::Regex::new(r#"(?s)<tool_call>\s*([A-Za-z_][A-Za-z0-9_]*)\((.*?)\)\s*</tool_call>"#)
1485 .expect("valid short call regex")
1486 })
1487}
1488fn re_short_arg() -> &'static regex::Regex {
1489 use std::sync::OnceLock;
1490 static RE: OnceLock<regex::Regex> = OnceLock::new();
1491 RE.get_or_init(|| {
1492 regex::Regex::new(
1493 r#"([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:"((?:\\.|[^"])*)"|'((?:\\.|[^'])*)'|([^,\)]+))"#,
1494 )
1495 .expect("valid short arg regex")
1496 })
1497}
1498fn re_strip_gemma_call() -> &'static regex::Regex {
1499 use std::sync::OnceLock;
1500 static RE: OnceLock<regex::Regex> = OnceLock::new();
1501 RE.get_or_init(|| {
1502 regex::Regex::new(r#"(?s)<\|?tool_call\|?>\s*call:[A-Za-z_][A-Za-z0-9_]*\{.*?\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#)
1503 .expect("valid strip gemma call regex")
1504 })
1505}
1506fn re_strip_xml() -> &'static regex::Regex {
1507 use std::sync::OnceLock;
1508 static RE: OnceLock<regex::Regex> = OnceLock::new();
1509 RE.get_or_init(|| {
1510 regex::Regex::new(r#"(?s)<tool_call>\s*<function=.*?>.*?</tool_call>"#)
1511 .expect("valid strip xml regex")
1512 })
1513}
1514fn re_strip_short() -> &'static regex::Regex {
1515 use std::sync::OnceLock;
1516 static RE: OnceLock<regex::Regex> = OnceLock::new();
1517 RE.get_or_init(|| {
1518 regex::Regex::new(r#"(?s)<tool_call>\s*[A-Za-z_][A-Za-z0-9_]*\(.*?\)\s*</tool_call>"#)
1519 .expect("valid strip short regex")
1520 })
1521}
1522fn re_strip_response() -> &'static regex::Regex {
1523 use std::sync::OnceLock;
1524 static RE: OnceLock<regex::Regex> = OnceLock::new();
1525 RE.get_or_init(|| {
1526 regex::Regex::new(
1527 r#"(?s)<\|tool_response\|?>.*?(?:<\|tool_response\|?>|<tool_response\|>)"#,
1528 )
1529 .expect("valid strip response regex")
1530 })
1531}
1532
1533pub fn extract_native_tool_calls(text: &str) -> Vec<ToolCallResponse> {
1536 let mut results = Vec::new();
1537
1538 let re_call = re_gemma_call();
1540 let re_arg = re_gemma_arg();
1541
1542 for cap in re_call.captures_iter(text) {
1543 let name = cap[1].to_string();
1544 let args_str = &cap[2];
1545 let mut arguments = serde_json::Map::new();
1546
1547 for arg_cap in re_arg.captures_iter(args_str) {
1548 let key = arg_cap[1].to_string();
1549 let val_raw = arg_cap
1550 .get(2)
1551 .map(|m| m.as_str())
1552 .or_else(|| arg_cap.get(3).map(|m| m.as_str()))
1553 .unwrap_or("")
1554 .trim();
1555 let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1556
1557 let val = if normalized_raw == "true" {
1558 Value::Bool(true)
1559 } else if normalized_raw == "false" {
1560 Value::Bool(false)
1561 } else if let Ok(n) = normalized_raw.parse::<i64>() {
1562 Value::Number(n.into())
1563 } else if let Ok(n) = normalized_raw.parse::<u64>() {
1564 Value::Number(n.into())
1565 } else if let Ok(n) = normalized_raw.parse::<f64>() {
1566 serde_json::Number::from_f64(n)
1567 .map(Value::Number)
1568 .unwrap_or(Value::String(normalized_raw.clone()))
1569 } else {
1570 Value::String(normalized_raw)
1571 };
1572
1573 arguments.insert(key, val);
1574 }
1575
1576 results.push(ToolCallResponse {
1577 id: format!("call_{}", rand::random::<u32>()),
1578 call_type: "function".to_string(),
1579 function: ToolCallFn {
1580 name,
1581 arguments: Value::Object(arguments),
1582 },
1583 index: None,
1584 });
1585 }
1586
1587 for cap in re_xml_call().captures_iter(text) {
1589 let name = cap[1].to_string();
1590 let body = &cap[2];
1591 let mut arguments = serde_json::Map::new();
1592
1593 for p_cap in re_xml_param().captures_iter(body) {
1594 let key = p_cap[1].to_string();
1595 let val_raw = p_cap[2].trim();
1596 let val = if val_raw == "true" {
1597 Value::Bool(true)
1598 } else if val_raw == "false" {
1599 Value::Bool(false)
1600 } else if let Ok(n) = val_raw.parse::<i64>() {
1601 Value::Number(n.into())
1602 } else if let Ok(n) = val_raw.parse::<u64>() {
1603 Value::Number(n.into())
1604 } else {
1605 Value::String(val_raw.to_string())
1606 };
1607 arguments.insert(key, val);
1608 }
1609
1610 results.push(ToolCallResponse {
1611 id: format!("call_{}", rand::random::<u32>()),
1612 call_type: "function".to_string(),
1613 function: ToolCallFn {
1614 name,
1615 arguments: Value::Object(arguments),
1616 },
1617 index: None,
1618 });
1619 }
1620
1621 for cap in re_short_call().captures_iter(text) {
1623 let name = cap[1].to_string();
1624 let args_str = cap[2].trim();
1625 let mut arguments = serde_json::Map::new();
1626
1627 for arg_cap in re_short_arg().captures_iter(args_str) {
1628 let key = arg_cap[1].to_string();
1629 let val_raw = arg_cap
1630 .get(2)
1631 .or_else(|| arg_cap.get(3))
1632 .or_else(|| arg_cap.get(4))
1633 .map(|m| m.as_str())
1634 .unwrap_or("")
1635 .trim();
1636 let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1637
1638 let val = if normalized_raw == "true" {
1639 Value::Bool(true)
1640 } else if normalized_raw == "false" {
1641 Value::Bool(false)
1642 } else if let Ok(n) = normalized_raw.parse::<i64>() {
1643 Value::Number(n.into())
1644 } else if let Ok(n) = normalized_raw.parse::<u64>() {
1645 Value::Number(n.into())
1646 } else if let Ok(n) = normalized_raw.parse::<f64>() {
1647 serde_json::Number::from_f64(n)
1648 .map(Value::Number)
1649 .unwrap_or(Value::String(normalized_raw.clone()))
1650 } else {
1651 Value::String(normalized_raw)
1652 };
1653
1654 arguments.insert(key, val);
1655 }
1656
1657 results.push(ToolCallResponse {
1658 id: format!("call_{}", rand::random::<u32>()),
1659 call_type: "function".to_string(),
1660 function: ToolCallFn {
1661 name,
1662 arguments: Value::Object(arguments),
1663 },
1664 index: None,
1665 });
1666 }
1667
1668 results
1669}
1670
1671pub fn normalize_tool_argument_string(tool_name: &str, raw: &str) -> String {
1672 let trimmed = raw.trim();
1673 let candidate = unwrap_json_string_once(trimmed).unwrap_or_else(|| trimmed.to_string());
1674
1675 let mut value = match serde_json::from_str::<Value>(&candidate) {
1676 Ok(v) => v,
1677 Err(_) => return candidate,
1678 };
1679 normalize_tool_argument_value(tool_name, &mut value);
1680 value.to_string()
1681}
1682
1683pub fn normalize_tool_argument_value(tool_name: &str, value: &mut Value) {
1684 match value {
1685 Value::String(s) => *s = normalize_string_arg(s),
1686 Value::Array(items) => {
1687 for item in items {
1688 normalize_tool_argument_value(tool_name, item);
1689 }
1690 }
1691 Value::Object(map) => {
1692 for val in map.values_mut() {
1693 normalize_tool_argument_value(tool_name, val);
1694 }
1695 if tool_name == "grep_files" {
1696 if let Some(Value::String(pattern)) = map.get_mut("pattern") {
1697 *pattern = normalize_regex_pattern(pattern);
1698 }
1699 }
1700 for key in ["path", "extension", "query", "command", "reason"] {
1701 if let Some(Value::String(s)) = map.get_mut(key) {
1702 *s = normalize_string_arg(s);
1703 }
1704 }
1705 }
1706 _ => {}
1707 }
1708}
1709
1710fn unwrap_json_string_once(input: &str) -> Option<String> {
1711 if input.len() < 2 {
1712 return None;
1713 }
1714 let first = input.chars().next()?;
1715 let last = input.chars().last()?;
1716 if !matches!((first, last), ('"', '"') | ('\'', '\'') | ('`', '`')) {
1717 return None;
1718 }
1719 let inner = &input[1..input.len() - 1];
1720 let unescaped = inner.replace("\\\"", "\"").replace("\\\\", "\\");
1721 Some(unescaped.trim().to_string())
1722}
1723
1724fn normalize_string_arg(input: &str) -> String {
1725 let mut s = input.trim();
1726 loop {
1727 let len = s.len();
1728 if len < 2 {
1729 break;
1730 }
1731 let first = s.as_bytes()[0];
1732 let last = s.as_bytes()[len - 1];
1733 if (first == b'"' && last == b'"')
1734 || (first == b'\'' && last == b'\'')
1735 || (first == b'`' && last == b'`')
1736 {
1737 s = s[1..len - 1].trim();
1738 } else {
1739 break;
1740 }
1741 }
1742 s.to_string()
1743}
1744
1745fn normalize_regex_pattern(input: &str) -> String {
1746 let out = normalize_string_arg(input);
1747 if out.len() >= 2 && out.starts_with('/') && out.ends_with('/') {
1748 out[1..out.len() - 1].to_string()
1749 } else {
1750 out
1751 }
1752}
1753
1754fn prepare_gemma_native_messages(messages: &[ChatMessage]) -> Vec<ChatMessage> {
1755 let mut system_blocks = Vec::with_capacity(2);
1756 let mut prepared = Vec::with_capacity(messages.len());
1757 let mut seeded = false;
1758
1759 for message in messages {
1760 if message.role == "system" {
1761 let cleaned = strip_legacy_turn_wrappers(message.content.as_str())
1762 .trim()
1763 .to_string();
1764 if !cleaned.is_empty() {
1765 system_blocks.push(cleaned);
1766 }
1767 continue;
1768 }
1769
1770 let mut clone = message.clone();
1771 clone.content = MessageContent::Text(strip_legacy_turn_wrappers(message.content.as_str()));
1772
1773 if !seeded && message.role == "user" {
1774 let content_str = clone.content.as_str();
1775 let mut merged = String::with_capacity(
1776 system_blocks.iter().map(|s| s.len()).sum::<usize>()
1777 + system_blocks.len().saturating_sub(1) * 2
1778 + content_str.len()
1779 + 40,
1780 );
1781 if !system_blocks.is_empty() {
1782 merged.push_str("System instructions for this turn:\n");
1783 merged.push_str(&system_blocks.join("\n\n"));
1784 merged.push_str("\n\n");
1785 }
1786 merged.push_str(content_str);
1787 clone.content = MessageContent::Text(merged);
1788 seeded = true;
1789 }
1790
1791 prepared.push(clone);
1792 }
1793
1794 if !seeded && !system_blocks.is_empty() {
1795 prepared.insert(
1796 0,
1797 ChatMessage::user(&format!(
1798 "System instructions for this turn:\n{}",
1799 system_blocks.join("\n\n")
1800 )),
1801 );
1802 }
1803
1804 prepared
1805}
1806
1807fn strip_legacy_turn_wrappers(text: &str) -> String {
1808 static AC: std::sync::OnceLock<aho_corasick::AhoCorasick> = std::sync::OnceLock::new();
1809 let ac = AC.get_or_init(|| {
1810 aho_corasick::AhoCorasick::new([
1811 "<|turn>system\n",
1812 "<|turn>user\n",
1813 "<|turn>assistant\n",
1814 "<|turn>tool\n",
1815 "<turn|>",
1816 ])
1817 .expect("valid turn wrapper patterns")
1818 });
1819 ac.replace_all(text, &["", "", "", "", ""])
1820 .trim()
1821 .to_string()
1822}
1823
1824pub fn strip_native_tool_call_text(text: &str) -> String {
1825 let without_calls = re_strip_gemma_call().replace_all(text, "");
1826 let without_xml = re_strip_xml().replace_all(without_calls.as_ref(), "");
1827 let without_short = re_strip_short().replace_all(without_xml.as_ref(), "");
1828 re_strip_response()
1829 .replace_all(without_short.as_ref(), "")
1830 .trim()
1831 .to_string()
1832}
1833
1834fn resolve_runtime_context(
1835 previous_model: &str,
1836 previous_context: usize,
1837 effective_model: &str,
1838 detected_context: usize,
1839) -> usize {
1840 if effective_model == "no model loaded" || effective_model.trim().is_empty() {
1841 0
1842 } else if detected_context > 0 {
1843 detected_context
1844 } else if effective_model == previous_model {
1845 previous_context
1846 } else {
1847 0
1848 }
1849}
1850
1851#[cfg(test)]
1852mod tests {
1853 use super::*;
1854 use std::fs;
1855
1856 #[test]
1857 fn system_prompt_includes_running_hematite_version() {
1858 let engine = InferenceEngine::new(
1859 "http://localhost:1234/v1".to_string(),
1860 "strategist".to_string(),
1861 0,
1862 )
1863 .expect("engine");
1864
1865 let system = engine.build_system_prompt(0, 50, false, true, &[], None, None, &[]);
1866 assert!(system.contains(crate::HEMATITE_VERSION));
1867 }
1868
1869 #[test]
1870 fn extracts_gemma_native_tool_call_with_mixed_tool_call_tags() {
1871 let text = r#"<|channel>thought
1872Reading the next chunk.<channel|>The startup banner wording is likely defined within the UI drawing logic.
1873<|tool_call>call:read_file{limit:100,offset:100,path:\"src/ui/tui.rs\"}<tool_call|>"#;
1874
1875 let calls = extract_native_tool_calls(text);
1876 assert_eq!(calls.len(), 1);
1877 assert_eq!(calls[0].function.name, "read_file");
1878
1879 let args: Value = calls[0].function.arguments.clone();
1880 assert_eq!(args.get("limit").and_then(|v| v.as_i64()), Some(100));
1881 assert_eq!(args.get("offset").and_then(|v| v.as_i64()), Some(100));
1882 assert_eq!(
1883 args.get("path").and_then(|v| v.as_str()),
1884 Some("src/ui/tui.rs")
1885 );
1886
1887 let stripped = strip_native_tool_call_text(text);
1888 assert!(!stripped.contains("<|tool_call"));
1889 assert!(!stripped.contains("<tool_call|>"));
1890 }
1891
1892 #[test]
1893 fn strips_hallucinated_tool_responses_from_native_tool_transcript() {
1894 let text = r#"<|channel>thought
1895Planning.
1896<channel|><|tool_call>call:list_files{extension:<|\"|>rs<|\"|>,path:<|\"|>src/<|\"|>}<tool_call|><|tool_response>thought
1897Mapped src.
1898<channel|><|tool_call>call:read_file{limit:100,offset:0,path:<|\"|>src/main.rs<|\"|>}<tool_call|><|tool_response>thought
1899Read main.
1900<channel|>"#;
1901
1902 let calls = extract_native_tool_calls(text);
1903 assert_eq!(calls.len(), 2);
1904 assert_eq!(calls[0].function.name, "list_files");
1905 assert_eq!(calls[1].function.name, "read_file");
1906
1907 let stripped = strip_native_tool_call_text(text);
1908 assert!(!stripped.contains("<|tool_call"));
1909 assert!(!stripped.contains("<|tool_response"));
1910 assert!(!stripped.contains("<tool_response|>"));
1911 }
1912
1913 #[test]
1914 fn create_directory_is_treated_as_mutating_repo_write() {
1915 let metadata = tool_metadata_for_name("create_directory");
1916 assert!(metadata.mutates_workspace);
1917 assert!(!metadata.read_only_friendly);
1918 }
1919
1920 #[test]
1921 fn extracts_qwen_xml_tool_calls_from_reasoning() {
1922 let text = r#"Based on the project structure, I need to check the binary.
1923<tool_call>
1924<function=shell>
1925<parameter=command>
1926ls -la hematite.exe
1927</parameter>
1928<parameter=reason>
1929Check if the binary exists
1930</parameter>
1931</function>
1932</tool_call>"#;
1933
1934 let calls = extract_native_tool_calls(text);
1935 assert_eq!(calls.len(), 1);
1936 assert_eq!(calls[0].function.name, "shell");
1937
1938 let args: Value = calls[0].function.arguments.clone();
1939 assert_eq!(
1940 args.get("command").and_then(|v| v.as_str()),
1941 Some("ls -la hematite.exe")
1942 );
1943 assert_eq!(
1944 args.get("reason").and_then(|v| v.as_str()),
1945 Some("Check if the binary exists")
1946 );
1947
1948 let stripped = strip_native_tool_call_text(text);
1949 assert!(!stripped.contains("<tool_call>"));
1950 assert!(!stripped.contains("<function=shell>"));
1951 }
1952
1953 #[test]
1954 fn extracts_shorthand_tool_calls_from_reasoning() {
1955 let text = r#"<thinking>
1956The user wants a search first.
1957</thinking>
1958
1959I'll search before continuing.
1960
1961<tool_call>research_web(query="uefn toolbelt python automation unreal engine fortnite")</tool_call>"#;
1962
1963 let calls = extract_native_tool_calls(text);
1964 assert_eq!(calls.len(), 1);
1965 assert_eq!(calls[0].function.name, "research_web");
1966
1967 let args: Value = calls[0].function.arguments.clone();
1968 assert_eq!(
1969 args.get("query").and_then(|v| v.as_str()),
1970 Some("uefn toolbelt python automation unreal engine fortnite")
1971 );
1972
1973 let stripped = strip_native_tool_call_text(text);
1974 assert!(!stripped.contains("<tool_call>"));
1975 assert!(!stripped.contains("research_web(query="));
1976 }
1977
1978 #[test]
1979 fn strips_thinking_tag_as_reasoning_prefix() {
1980 let cleaned =
1981 strip_think_blocks("<thinking>\nThe user wants a search.\n</thinking>\nVisible answer");
1982 assert_eq!(cleaned, "");
1983 }
1984
1985 #[test]
1986 fn resolve_runtime_context_returns_zero_when_no_model_loaded() {
1987 assert_eq!(
1988 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "no model loaded", 0),
1989 0
1990 );
1991 }
1992
1993 #[test]
1994 fn resolve_runtime_context_preserves_previous_only_for_same_model() {
1995 assert_eq!(
1996 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "qwen/qwen3.5-9b", 0),
1997 32000
1998 );
1999 assert_eq!(
2000 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "bonsai-8b", 0),
2001 0
2002 );
2003 }
2004
2005 #[test]
2006 fn load_instruction_files_includes_workspace_guidance_files() {
2007 let _cwd_lock = crate::TEST_CWD_LOCK
2008 .lock()
2009 .unwrap_or_else(|e| e.into_inner());
2010 let temp = tempfile::tempdir().unwrap();
2011 let previous = env!("CARGO_MANIFEST_DIR");
2012
2013 fs::write(
2014 temp.path().join("SKILLS.md"),
2015 "# Workspace Skills\n- Prefer API-first changes before UI polish.",
2016 )
2017 .unwrap();
2018
2019 std::env::set_current_dir(temp.path()).unwrap();
2020 let loaded = load_instruction_files();
2021 std::env::set_current_dir(previous).unwrap();
2022
2023 assert!(loaded.contains("SKILLS.md"));
2024 assert!(loaded.contains("Prefer API-first changes before UI polish."));
2025 }
2026
2027 #[test]
2028 fn load_agent_skill_catalog_includes_skill_directory_entries() {
2029 let _cwd_lock = crate::TEST_CWD_LOCK
2030 .lock()
2031 .unwrap_or_else(|e| e.into_inner());
2032 let temp = tempfile::tempdir().unwrap();
2033 let previous = env!("CARGO_MANIFEST_DIR");
2034
2035 std::fs::create_dir_all(temp.path().join(".agents/skills/code-review")).unwrap();
2036 fs::write(
2037 temp.path().join(".agents/skills/code-review/SKILL.md"),
2038 "---\nname: code-review\ndescription: Review diffs and flag regressions.\ncompatibility: Requires git\n---\n",
2039 )
2040 .unwrap();
2041
2042 std::env::set_current_dir(temp.path()).unwrap();
2043 let loaded = load_agent_skill_catalog();
2044 std::env::set_current_dir(previous).unwrap();
2045
2046 assert!(loaded.contains("Agent Skills Catalog"));
2047 assert!(loaded.contains("code-review"));
2048 assert!(loaded.contains("Review diffs and flag regressions."));
2049 }
2050}