1use serde::Serialize;
2use serde_json::Value;
3use tokio::sync::{mpsc, Semaphore};
4
5pub use crate::agent::economics::{SessionEconomics, ToolRecord};
6pub use crate::agent::types::*;
7
8pub struct InferenceEngine {
11 pub provider:
12 std::sync::Arc<tokio::sync::RwLock<Box<dyn crate::agent::provider::ModelProvider>>>,
13 pub cached_model: std::sync::Arc<std::sync::RwLock<String>>,
14 pub cached_context: std::sync::Arc<std::sync::atomic::AtomicUsize>,
15 pub base_url: String,
16 pub species: String,
17 pub snark: u8,
18 pub kv_semaphore: Semaphore,
19 pub economics: std::sync::Arc<std::sync::Mutex<SessionEconomics>>,
20 pub worker_model: Option<String>,
22 pub gemma_native_formatting: std::sync::Arc<std::sync::atomic::AtomicBool>,
24 pub cancel_token: std::sync::Arc<std::sync::atomic::AtomicBool>,
26}
27
28pub fn is_hematite_native_model(model: &str) -> bool {
29 let lower = model.to_ascii_lowercase();
30 lower.contains("gemma-4") || lower.contains("gemma4")
31}
32
33fn should_use_native_formatting(engine: &InferenceEngine, model: &str) -> bool {
34 is_hematite_native_model(model) && engine.gemma_native_formatting_enabled()
35}
36
37pub fn tool_metadata_for_name(name: &str) -> ToolMetadata {
40 if name.starts_with("mcp__") {
41 let lower = name.to_ascii_lowercase();
42 let mutates_workspace = [
43 "__edit",
44 "__write",
45 "__create",
46 "__move",
47 "__delete",
48 "__remove",
49 "__rename",
50 "__replace",
51 "__patch",
52 ]
53 .iter()
54 .any(|needle| lower.contains(needle));
55 return ToolMetadata {
56 category: ToolCategory::External,
57 mutates_workspace,
58 external_surface: true,
59 trust_sensitive: true,
60 read_only_friendly: !mutates_workspace,
61 plan_scope: false,
62 };
63 }
64
65 match name {
66 "read_file" | "inspect_lines" | "grep_files" | "list_files" => ToolMetadata {
67 category: ToolCategory::RepoRead,
68 mutates_workspace: false,
69 external_surface: false,
70 trust_sensitive: false,
71 read_only_friendly: true,
72 plan_scope: true,
73 },
74 "create_directory" | "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace" => {
75 ToolMetadata {
76 category: ToolCategory::RepoWrite,
77 mutates_workspace: true,
78 external_surface: false,
79 trust_sensitive: true,
80 read_only_friendly: false,
81 plan_scope: true,
82 }
83 }
84 "trace_runtime_flow" => ToolMetadata {
85 category: ToolCategory::Architecture,
86 mutates_workspace: false,
87 external_surface: false,
88 trust_sensitive: false,
89 read_only_friendly: true,
90 plan_scope: false,
91 },
92 "describe_toolchain" => ToolMetadata {
93 category: ToolCategory::Toolchain,
94 mutates_workspace: false,
95 external_surface: false,
96 trust_sensitive: false,
97 read_only_friendly: true,
98 plan_scope: false,
99 },
100 "shell" => ToolMetadata {
101 category: ToolCategory::Runtime,
102 mutates_workspace: true,
103 external_surface: false,
104 trust_sensitive: true,
105 read_only_friendly: false,
106 plan_scope: false,
107 },
108 "inspect_host" => ToolMetadata {
109 category: ToolCategory::Runtime,
110 mutates_workspace: false,
111 external_surface: false,
112 trust_sensitive: false,
113 read_only_friendly: true,
114 plan_scope: false,
115 },
116 "resolve_host_issue" => ToolMetadata {
117 category: ToolCategory::Runtime,
118 mutates_workspace: true,
119 external_surface: true,
120 trust_sensitive: true,
121 read_only_friendly: false,
122 plan_scope: false,
123 },
124 "run_hematite_maintainer_workflow" => ToolMetadata {
125 category: ToolCategory::Workflow,
126 mutates_workspace: true,
127 external_surface: false,
128 trust_sensitive: true,
129 read_only_friendly: false,
130 plan_scope: false,
131 },
132 "run_workspace_workflow" => ToolMetadata {
133 category: ToolCategory::Workflow,
134 mutates_workspace: true,
135 external_surface: false,
136 trust_sensitive: true,
137 read_only_friendly: false,
138 plan_scope: false,
139 },
140 "verify_build" => ToolMetadata {
141 category: ToolCategory::Verification,
142 mutates_workspace: false,
143 external_surface: false,
144 trust_sensitive: false,
145 read_only_friendly: true,
146 plan_scope: true,
147 },
148 "git_commit" | "git_push" | "git_remote" | "git_onboarding" | "git_worktree" => {
149 ToolMetadata {
150 category: ToolCategory::Git,
151 mutates_workspace: true,
152 external_surface: false,
153 trust_sensitive: true,
154 read_only_friendly: false,
155 plan_scope: false,
156 }
157 }
158 "research_web" | "fetch_docs" => ToolMetadata {
159 category: ToolCategory::Research,
160 mutates_workspace: false,
161 external_surface: false,
162 trust_sensitive: false,
163 read_only_friendly: true,
164 plan_scope: false,
165 },
166 "vision_analyze" => ToolMetadata {
167 category: ToolCategory::Vision,
168 mutates_workspace: false,
169 external_surface: false,
170 trust_sensitive: false,
171 read_only_friendly: true,
172 plan_scope: false,
173 },
174 "lsp_definitions"
175 | "lsp_references"
176 | "lsp_hover"
177 | "lsp_rename_symbol"
178 | "lsp_get_diagnostics"
179 | "lsp_search_symbol" => ToolMetadata {
180 category: ToolCategory::Lsp,
181 mutates_workspace: false,
182 external_surface: false,
183 trust_sensitive: false,
184 read_only_friendly: true,
185 plan_scope: false,
186 },
187 "auto_pin_context" | "list_pinned" | "clarify" => ToolMetadata {
188 category: ToolCategory::Workflow,
189 mutates_workspace: false,
190 external_surface: false,
191 trust_sensitive: false,
192 read_only_friendly: true,
193 plan_scope: true,
194 },
195 "manage_tasks" => ToolMetadata {
196 category: ToolCategory::Workflow,
197 mutates_workspace: false,
198 external_surface: false,
199 trust_sensitive: false,
200 read_only_friendly: true,
201 plan_scope: false,
202 },
203 _ => ToolMetadata {
204 category: ToolCategory::Other,
205 mutates_workspace: false,
206 external_surface: false,
207 trust_sensitive: false,
208 read_only_friendly: true,
209 plan_scope: false,
210 },
211 }
212}
213const MIN_RESERVED_OUTPUT_TOKENS: usize = 1024;
218const MAX_RESERVED_OUTPUT_TOKENS: usize = 4096;
219
220fn is_tiny_context_window(context_length: usize) -> bool {
221 context_length <= 8_192
222}
223
224fn is_compact_context_window(context_length: usize) -> bool {
225 context_length > 8_192 && context_length <= 49_152
226}
227
228pub fn is_compact_context_window_pub(context_length: usize) -> bool {
229 is_compact_context_window(context_length)
230}
231
232fn is_provider_context_limit_detail(lower: &str) -> bool {
233 (lower.contains("n_keep") && lower.contains("n_ctx"))
234 || lower.contains("context length")
235 || lower.contains("keep from the initial prompt")
236 || lower.contains("prompt is greater than the context length")
237 || lower.contains("exceeds the context window")
238}
239
240fn classify_runtime_failure_tag(detail: &str) -> &'static str {
241 let lower = detail.to_ascii_lowercase();
242 if lower.contains("context_window_blocked")
243 || lower.contains("context ceiling reached")
244 || lower.contains("exceeds the")
245 || is_provider_context_limit_detail(&lower)
246 {
247 "context_window"
248 } else if lower.contains("empty response from model")
249 || lower.contains("model returned an empty response")
250 {
251 "empty_model_response"
252 } else if lower.contains("action blocked:")
253 || lower.contains("access denied")
254 || lower.contains("declined by user")
255 {
256 "tool_policy_blocked"
257 } else {
258 "provider_degraded"
259 }
260}
261
262fn runtime_failure_guidance(tag: &str) -> &'static str {
263 match tag {
264 "context_window" => {
265 "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
266 }
267 "empty_model_response" => {
268 "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
269 }
270 "tool_policy_blocked" => {
271 "Stay inside the allowed workflow or switch modes before retrying."
272 }
273 _ => "Retry once automatically, then narrow the turn or restart LM Studio if it persists.",
274 }
275}
276
277fn format_runtime_failure_message(detail: &str) -> String {
278 let tag = classify_runtime_failure_tag(detail);
279 format!(
280 "[failure:{}] {} Detail: {}",
281 tag,
282 runtime_failure_guidance(tag),
283 detail.trim()
284 )
285}
286
287impl InferenceEngine {
292 pub fn new(
293 api_url: String,
294 species: String,
295 snark: u8,
296 ) -> Result<Self, Box<dyn std::error::Error>> {
297 let client = reqwest::Client::builder()
298 .timeout(std::time::Duration::from_secs(180))
299 .build()?;
300
301 let base_url = {
302 let trimmed = api_url.trim_end_matches('/');
303 if let Some(scheme_end) = trimmed.find("://") {
304 let after_scheme = &trimmed[scheme_end + 3..];
305 if let Some(path_start) = after_scheme.find('/') {
306 format!(
307 "{}://{}",
308 &trimmed[..scheme_end],
309 &after_scheme[..path_start]
310 )
311 } else {
312 trimmed.to_string()
313 }
314 } else {
315 trimmed.to_string()
316 }
317 };
318
319 let api_url_full = if api_url.ends_with("/chat/completions") {
320 api_url
321 } else if api_url.ends_with("/") {
322 format!("{}chat/completions", api_url)
323 } else {
324 format!("{}/chat/completions", api_url)
325 };
326
327 let lms = crate::agent::lms::LmsHarness::new();
328 let ollama_harness = crate::agent::ollama::OllamaHarness::new(&base_url);
329
330 let provider = if base_url.contains("11434") {
331 Box::new(crate::agent::provider::OllamaProvider {
332 client: client.clone(),
333 base_url: base_url.clone(),
334 model: String::new(),
335 context_length: 8192,
336 embed_model: std::sync::Arc::new(std::sync::RwLock::new(None)),
337 ollama: ollama_harness,
338 }) as Box<dyn crate::agent::provider::ModelProvider>
339 } else {
340 Box::new(crate::agent::provider::LmsProvider {
341 client: client.clone(),
342 api_url: api_url_full,
343 base_url: base_url.clone(),
344 model: String::new(),
345 context_length: 0,
346 lms,
347 }) as Box<dyn crate::agent::provider::ModelProvider>
348 };
349
350 Ok(Self {
351 provider: std::sync::Arc::new(tokio::sync::RwLock::new(provider)),
352 cached_model: std::sync::Arc::new(std::sync::RwLock::new(String::new())),
353 cached_context: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)),
354 base_url: base_url.clone(),
355 species: species.clone(),
356 snark,
357 kv_semaphore: Semaphore::new(3),
358 economics: std::sync::Arc::new(std::sync::Mutex::new(SessionEconomics::new())),
359 worker_model: None,
360 gemma_native_formatting: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
361 cancel_token: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
362 })
363 }
364
365 pub fn set_gemma_native_formatting(&self, enabled: bool) {
366 self.gemma_native_formatting
367 .store(enabled, std::sync::atomic::Ordering::SeqCst);
368 }
369
370 pub async fn health_check(&self) -> bool {
371 let p = self.provider.read().await;
372 p.health_check().await
373 }
374
375 pub async fn provider_name(&self) -> String {
376 let p = self.provider.read().await;
377 p.name().to_string()
378 }
379
380 pub async fn get_loaded_model(&self) -> Option<String> {
381 let p = self.provider.read().await;
382 match p.detect_model().await {
383 Ok(m) if m.is_empty() => Some("".to_string()),
384 Ok(m) => Some(m),
385 Err(_) => None,
386 }
387 }
388
389 pub async fn get_embedding_model(&self) -> Option<String> {
390 let p = self.provider.read().await;
391 p.get_embedding_model().await
392 }
393
394 pub async fn load_model(&self, model_id: &str) -> Result<(), String> {
395 let p = self.provider.read().await;
396 p.load_model(model_id).await
397 }
398
399 pub async fn load_model_with_context(
400 &self,
401 model_id: &str,
402 context_length: Option<usize>,
403 ) -> Result<(), String> {
404 let p = self.provider.read().await;
405 p.load_model_with_context(model_id, context_length).await
406 }
407
408 pub async fn load_embedding_model(&self, model_id: &str) -> Result<(), String> {
409 let p = self.provider.read().await;
410 p.load_embedding_model(model_id).await
411 }
412
413 pub async fn list_provider_models(
414 &self,
415 kind: crate::agent::provider::ProviderModelKind,
416 loaded_only: bool,
417 ) -> Result<Vec<String>, String> {
418 let p = self.provider.read().await;
419 p.list_models(kind, loaded_only).await
420 }
421
422 pub async fn unload_model(&self, model_id: Option<&str>, all: bool) -> Result<String, String> {
423 let p = self.provider.read().await;
424 p.unload_model(model_id, all).await
425 }
426
427 pub async fn unload_embedding_model(&self, model_id: Option<&str>) -> Result<String, String> {
428 let p = self.provider.read().await;
429 p.unload_embedding_model(model_id).await
430 }
431
432 pub async fn prewarm(&self) -> Result<(), String> {
433 let p = self.provider.read().await;
434 p.prewarm().await
435 }
436
437 pub async fn detect_context_length(&self) -> usize {
438 let p = self.provider.read().await;
439 p.detect_context_length().await
440 }
441
442 pub async fn set_runtime_profile(&self, model: &str, context_length: usize) {
443 if let Ok(mut guard) = self.cached_model.write() {
444 *guard = model.to_string();
445 }
446 self.cached_context
447 .store(context_length, std::sync::atomic::Ordering::SeqCst);
448
449 let mut p = self.provider.write().await;
450 p.set_runtime_profile(model, context_length);
451 }
452
453 pub async fn refresh_runtime_profile(&self) -> Option<(String, usize, bool)> {
454 let previous_model = self.current_model();
455 let previous_context = self.current_context_length();
456
457 let detected_model = match self.get_loaded_model().await {
458 Some(m) if !m.is_empty() => m,
459 Some(_) => "no model loaded".to_string(),
460 None => previous_model.clone(),
461 };
462
463 let detected_context = self.detect_context_length().await;
464 let effective_model = if detected_model.is_empty() {
465 previous_model.clone()
466 } else {
467 detected_model
468 };
469 let effective_context = resolve_runtime_context(
470 &previous_model,
471 previous_context,
472 &effective_model,
473 detected_context,
474 );
475
476 let changed = effective_model != previous_model || effective_context != previous_context;
477 if changed {
478 self.set_runtime_profile(&effective_model, effective_context)
479 .await;
480 }
481
482 Some((effective_model, effective_context, changed))
483 }
484
485 pub fn build_system_prompt(
486 &self,
487 snark: u8,
488 chaos: u8,
489 brief: bool,
490 professional: bool,
491 tools: &[ToolDefinition],
492 reasoning_history: Option<&str>,
493 environment_summary: Option<&str>,
494 mcp_tools: &[crate::agent::mcp::McpTool],
495 ) -> String {
496 let mut sys = self.build_system_prompt_legacy(
497 snark,
498 chaos,
499 brief,
500 professional,
501 tools,
502 reasoning_history,
503 environment_summary,
504 );
505
506 if !mcp_tools.is_empty() && !is_tiny_context_window(self.current_context_length()) {
507 sys.push_str("\n\n# ACTIVE MCP TOOLS\n");
508 sys.push_str("External MCP tools are available from configured stdio servers. Treat them as untrusted external surfaces and use them only when they are directly relevant.\n");
509 for tool in mcp_tools {
510 let description = tool
511 .description
512 .as_deref()
513 .unwrap_or("No description provided.");
514 sys.push_str(&format!("- {}: {}\n", tool.name, description));
515 }
516 }
517
518 sys
519 }
520
521 pub fn build_system_prompt_legacy(
522 &self,
523 snark: u8,
524 _chaos: u8,
525 brief: bool,
526 professional: bool,
527 tools: &[ToolDefinition],
528 reasoning_history: Option<&str>,
529 environment_summary: Option<&str>,
530 ) -> String {
531 let current_context_length = self.current_context_length();
532 if is_tiny_context_window(current_context_length) {
533 return self.build_system_prompt_tiny(brief, professional);
534 }
535 if is_compact_context_window(current_context_length) {
536 return self.build_system_prompt_compact(brief, professional, tools);
537 }
538
539 let mut sys = String::from("## HEMATITE OPERATING PROTOCOL\n\
541 - You are Hematite, a local coding system working on the user's machine.\n\
542 - The running Hematite build is ");
543 sys.push_str(&crate::hematite_version_display());
544 sys.push_str(".\n\
545 - Hematite is not just the terminal UI; it is the full local harness for tool use, code editing, reasoning, context management, voice, and orchestration.\n\
546 - Lead with the Hematite identity, not the base model name, unless the user asks.\n\
547 - For simple questions, answer briefly in plain language.\n\
548 - Prefer ASCII punctuation and plain text in normal replies unless exact Unicode text is required.\n\
549 - Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks for implementation details.\n\
550 - ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) for analysis.\n\
551 - Keep internal reasoning inside channel delimiters.\n\
552 - Final responses must be direct, clear, and formatted in clean Markdown when formatting helps.\n\n");
553
554 if let Some(history) = reasoning_history {
555 if !history.is_empty() {
556 sys.push_str("# INTERNAL STATE (ACTIVE TURN)\n");
557 sys.push_str(history);
558 sys.push_str("\n\n");
559 }
560 }
561
562 if brief {
564 sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: LOW\n\
565 - Core directive: Think efficiently. Avoid redundant internal derivation.\n\
566 - Depth: Surface-level verification only.\n\n");
567 } else {
568 sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: HIGH\n\
569 - Core directive: Think in depth when the task needs it. Explore edge cases and architectural implications.\n\
570 - Depth: Full multi-step derivation required.\n\n");
571 }
572
573 let os = std::env::consts::OS;
575 if let Some(summary) = environment_summary {
576 sys.push_str("## HOST ENVIRONMENT\n");
577 sys.push_str(summary);
578 sys.push_str("\n\n");
579 }
580
581 if professional {
582 sys.push_str(&format!(
583 "You are Hematite, a local coding system running on {}. \
584 The TUI is one interface layer, not your whole identity. \
585 Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
586 Skip filler and keep the focus on the work.\n",
587 os
588 ));
589 } else {
590 sys.push_str(&format!(
591 "You are Hematite, a [{}] local AI coding system (Snark: {}/100) running on the user's hardware on {}. \
592 The terminal UI is only one surface of the system. \
593 Be direct, efficient, technical, and ASCII-first in ordinary prose. \
594 When the user asks who you are, describe Hematite as the local coding harness and agent, not merely the TUI.\n",
595 self.species, snark, os
596 ));
597 }
598
599 let current_model = self.current_model();
601 if !current_model.is_empty() {
602 sys.push_str(&format!(
603 "Loaded model: {} | Context window: {} tokens. \
604 Calibrate response length and tool-call depth to fit within this budget.\n\n",
605 current_model, current_context_length
606 ));
607 if is_hematite_native_model(¤t_model) {
608 sys.push_str(
609 "Sovereign native note: prefer exact tool JSON with no extra prose when calling tools. \
610 Do not wrap `path`, `extension`, or other string arguments in extra quote layers. \
611 For `grep_files`, provide the raw regex pattern without surrounding slash delimiters.\n\n",
612 );
613 }
614 } else {
615 sys.push_str(&format!(
616 "Context window: {} tokens. Calibrate response length to fit within this budget.\n\n",
617 current_context_length
618 ));
619 }
620
621 let shell_desc = if cfg!(target_os = "windows") {
623 "[EXTERNAL SHELL]: `powershell` (Windows).\n\
624 - Use ONLY for builds, tests, or file migrations. \n\
625 - You MUST use the `powershell` tool directly. \n\
626 - NEVER attempt to use `bash`, `sh`, or `/dev/null` on this system. \n\n"
627 } else {
628 "[EXTERNAL SHELL]: `bash` (Unix).\n\
629 - Use ONLY for builds, tests, or file migrations. \n\
630 - NEVER wrap bash in other shells. \n\n"
631 };
632
633 sys.push_str("You distinguish strictly between [INTERNAL TOOLS] and [EXTERNAL SHELL].\n\n\
634 [INTERNAL TOOLS]: `list_files`, `grep_files`, `read_file`, `edit_file`, `write_file`.\n\
635 - These are the ONLY way to explore and modify code. \n\
636 - NEVER attempt to run these as shell commands (e.g. `bash $ grep_files` is FORBIDDEN).\n\n");
637 sys.push_str(shell_desc);
638
639 sys.push_str("ANTI-LOOPING: If a tool returns (no output) or 'not recognized' in a shell, pivot to a different internal tool. \n\
641 SELF-AUDIT: If you see your own command echoed back as the result, the shell failed; pivot to an internal tool immediately.\n\n");
642
643 sys.push_str("## THE COMPUTATIONAL RESEARCH MANDATE\n\
644 - You are a Lead Computational Researcher and Senior Scientist.\n\
645 - ZERO-TRUST MATH: You never guess results for math, physics, or algorithmic complexity.\n\
646 - UNIT-SAFETY: All physical calculations must use `scientific_compute(mode='units')` to ensure dimensional consistency.\n\
647 - SYMBOLIC PROOF: Use `scientific_compute(mode='symbolic')` for formal algebraic derivations and multi-variable proofs. Set `latex: true` for formal presentation.\n\
648 - EMPIRICAL AUDITING: All algorithmic performance claims must be verified with `scientific_compute(mode='complexity')` before being finalized.\n\
649 - SCIENTIFIC MEMORY (LEDGER): Use `scientific_compute(mode='ledger')` to persist long-form derivations, constants, and theorem steps to `.hematite/docs/scientific_ledger.md`. This ledger is RAG-indexed by The Vein, giving you persistent cross-session memory for project math.\n\
650 - DATASET COMPUTATION: Use `scientific_compute(mode='dataset')` to perform high-precision calculations on SQL results (CSV/DB/JSON). This bridges data science and formal research.\n\
651 - LIGHTWEIGHT SANDBOX: Prioritize pure Python implementations for all research tasks. Do NOT attempt to import heavy external libraries like 'numpy', 'scipy', or 'pandas' unless you have verified they are available or the user explicitly asks to work in a specific heavy environment or venv.\n\
652 - Every result must be backed by the executable logic used to prove it.\n\n");
653
654 sys.push_str("## TURN ADVISORY\n");
656 if brief {
657 sys.push_str("- BRIEF MODE: Respond with ONE concise sentence/block unless more code is required.\n");
658 }
659 sys.push_str("- INTERNAL REASONING: Plan your move in the thought channel first.\n");
660
661 sys.push_str("\n## SCAFFOLDING PROTOCOL\n\
663 2. ALWAYS call verify_build immediately after to confirm the project compiles/runs.\n\
664 3. If verify_build fails, use `lsp_get_diagnostics` to find the exact line and error.\n\
665 4. Fix all errors before declaring success.\n\n\
666 ## PRE-FLIGHT SCOPING PROTOCOL\n\
667 Before attempting any multi-file task or complex refactor:\n\
668 1. Identify 1-3 core files (entry-points, central models, or types) that drive the logic.\n\
669 2. Use `auto_pin_context` to keep those files in active context.\n\
670 3. Only then proceed to deeper edits or research.\n\n\
671 ## REFACTORING PROTOCOL\n\
672 When modifying existing code or renaming symbols:\n\
673 1. Use `lsp_rename_symbol` for all variable/function renames to ensure project-wide safety.\n\
674 2. After any significant edit, call `lsp_get_diagnostics` on the affected files.\n\
675 3. If errors are found, you MUST fix them. Do not wait for the user to point them out.\n\n");
676
677 sys.push_str(&load_instruction_files());
679 sys.push_str(&load_agent_skill_catalog());
680
681 sys.push_str(&crate::memory::deep_reflect::load_recent_memories());
683
684 if !tools.is_empty() {
686 sys.push_str("\n\n# NATIVE TOOL DECLARATIONS\n");
687 for tool in tools {
688 let schema = serde_json::to_string(&tool.function.parameters)
689 .unwrap_or_else(|_| "{}".to_string());
690 sys.push_str(&format!(
691 "<|tool>declaration:{}{}{}<tool|>\n",
692 tool.function.name, "{", schema
693 ));
694 sys.push_str(&format!("// {})\n", tool.function.description));
695 }
696 }
697
698 sys
699 }
700
701 fn build_system_prompt_compact(
702 &self,
703 brief: bool,
704 professional: bool,
705 tools: &[ToolDefinition],
706 ) -> String {
707 let current_model = self.current_model();
710 let current_context_length = self.current_context_length();
711 let os = std::env::consts::OS;
712
713 let mut sys = format!(
714 "You are Hematite {}, a local coding harness working on the user's machine.\n",
715 crate::hematite_version_display()
716 );
717 if professional {
718 sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
719 } else {
720 sys.push_str(&format!(
721 "You are a [{}] local AI coding system. Be direct, concise, and technical.\n",
722 self.species
723 ));
724 }
725 sys.push_str(&format!(
726 "Model: {} | Context: {} tokens. Keep turns focused.\n",
727 current_model, current_context_length
728 ));
729 if is_hematite_native_model(¤t_model) {
730 sys.push_str(
731 "Sovereign native: use exact tool JSON. No extra prose in tool calls. \
732 Raw regex patterns in grep_files, no slash delimiters.\n",
733 );
734 }
735 if cfg!(target_os = "windows") {
736 sys.push_str(&format!(
737 "OS: {}. Use PowerShell for shell. Never bash or /dev/null.\n",
738 os
739 ));
740 } else {
741 sys.push_str(&format!("OS: {}. Use native Unix shell.\n", os));
742 }
743 if brief {
744 sys.push_str("BRIEF MODE: one concise sentence unless code is required.\n");
745 }
746
747 sys.push_str(
748 "\nCORE RULES:\n\
749 - Read before editing: use `read_file` or `inspect_lines` on a file before mutating it.\n\
750 - Verify after edits: run `verify_build` after code changes, before committing.\n\
751 - One tool at a time. Do not batch unrelated tool calls.\n\
752 - Do not invent tool names, file paths, or symbols not confirmed by tool output.\n\
753 - Built-in tools first: prefer `read_file`, `edit_file`, `grep_files` over MCP filesystem tools.\n\
754 - STARTUP/UI CHANGES: read the owner file first, make one focused edit, then run `verify_build`.\n",
755 );
756
757 if !tools.is_empty() {
758 sys.push_str("\n# AVAILABLE TOOLS\n");
759 for tool in tools {
760 let desc: String = tool.function.description.chars().take(120).collect();
761 sys.push_str(&format!("- {}: {}\n", tool.function.name, desc));
762 }
763 }
764
765 sys
766 }
767
768 fn build_system_prompt_tiny(&self, brief: bool, professional: bool) -> String {
769 let current_model = self.current_model();
770 let current_context_length = self.current_context_length();
771 let os = std::env::consts::OS;
772 let mut sys = format!(
773 "You are Hematite {}, a local coding harness working on the user's machine.\n",
774 crate::hematite_version_display()
775 );
776 if professional {
777 sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
778 } else {
779 sys.push_str(&format!(
780 "You are a [{}] local AI coding system. Be direct, concise, and technical.\n",
781 self.species
782 ));
783 }
784 if !current_model.is_empty() {
785 sys.push_str(&format!(
786 "Loaded model: {} | Context window: {} tokens.\n",
787 current_model, current_context_length
788 ));
789 } else {
790 sys.push_str(&format!(
791 "Context window: {} tokens.\n",
792 current_context_length
793 ));
794 }
795 sys.push_str("Tiny-context mode is active. Keep turns short. Prefer final answers over long analysis. Only use tools when necessary.\n");
796 sys.push_str("Use built-in workspace tools for local inspection and edits. Do not invent tools, files, channels, or symbols.\n");
797 sys.push_str("Before editing an existing file, gather recent file evidence first. After code edits, verify before commit.\n");
798 if cfg!(target_os = "windows") {
799 sys.push_str(&format!(
800 "You are running on {}. Use PowerShell for shell work. Do not assume bash or /dev/null.\n",
801 os
802 ));
803 } else {
804 sys.push_str(&format!(
805 "You are running on {}. Use the native Unix shell conventions.\n",
806 os
807 ));
808 }
809 if brief {
810 sys.push_str("BRIEF MODE: answer in one concise sentence unless code is required.\n");
811 }
812 sys
813 }
814
815 pub fn current_model(&self) -> String {
816 self.cached_model
817 .read()
818 .map(|g| g.clone())
819 .unwrap_or_default()
820 }
821
822 pub fn current_context_length(&self) -> usize {
823 self.cached_context
824 .load(std::sync::atomic::Ordering::Relaxed)
825 }
826
827 pub fn is_compact_context_window(&self) -> bool {
828 let len = self.current_context_length();
829 len <= 16384
830 }
831
832 pub fn gemma_native_formatting_enabled(&self) -> bool {
833 self.gemma_native_formatting
834 .load(std::sync::atomic::Ordering::Relaxed)
835 }
836
837 pub async fn call_with_tools(
838 &self,
839 messages: &[ChatMessage],
840 tools: &[ToolDefinition],
841 model_override: Option<&str>,
843 ) -> Result<
844 (
845 Option<String>,
846 Option<Vec<ToolCallResponse>>,
847 Option<TokenUsage>,
848 Option<String>,
849 ),
850 String,
851 > {
852 let _permit = self
853 .kv_semaphore
854 .acquire()
855 .await
856 .map_err(|e| e.to_string())?;
857
858 let (res, model_name, prepared_messages) = {
859 let p = self.provider.read().await;
860 let model_name = model_override.unwrap_or(&p.current_model()).to_string();
861 let prepared_messages = if should_use_native_formatting(self, &model_name) {
862 prepare_gemma_native_messages(messages)
863 } else {
864 messages.to_vec()
865 };
866 if let Err(detail) = preflight_chat_request(
867 &model_name,
868 &prepared_messages,
869 tools,
870 self.current_context_length(),
871 ) {
872 return Err(format_runtime_failure_message(&detail));
873 }
874 let res = p
875 .call_with_tools(&prepared_messages, tools, model_override)
876 .await
877 .map_err(|e| format_runtime_failure_message(&e))?;
878 (res, model_name, prepared_messages)
879 };
880
881 if let Ok(mut econ) = self.economics.lock() {
882 econ.input_tokens += res.usage.prompt_tokens;
883 econ.output_tokens += res.usage.completion_tokens;
884 }
885
886 let mut content = res.content;
887 let mut tool_calls = res.tool_calls;
888
889 if let Some(text) = &content {
891 if should_use_native_formatting(self, &model_name) {
892 let native_calls = extract_native_tool_calls(text);
893 if !native_calls.is_empty() {
894 let mut existing = tool_calls.unwrap_or_default();
895 existing.extend(native_calls);
896 tool_calls = Some(existing);
897
898 let stripped = strip_native_tool_call_text(text);
899 content = if stripped.trim().is_empty() {
900 None
901 } else {
902 Some(stripped)
903 };
904 }
905 }
906 }
907
908 if should_use_native_formatting(self, &model_name) {
910 if let Some(calls) = tool_calls.as_mut() {
911 for call in calls.iter_mut() {
912 normalize_tool_argument_value(
913 &call.function.name,
914 &mut call.function.arguments,
915 );
916 }
917 }
918 }
919
920 if should_use_native_formatting(self, &model_name)
921 && content.is_none()
922 && tool_calls.is_none()
923 && !prepared_messages.is_empty()
924 {
925 return Err(format_runtime_failure_message(
926 "model returned an empty response after native-format message preparation",
927 ));
928 }
929
930 Ok((content, tool_calls, Some(res.usage), res.finish_reason))
931 }
932
933 pub async fn stream_messages(
937 &self,
938 messages: &[ChatMessage],
939 tx: mpsc::Sender<InferenceEvent>,
940 ) -> Result<(), Box<dyn std::error::Error>> {
941 let provider = self.provider.read().await;
942 provider.stream(messages, tx).await
943 }
944
945 pub async fn stream_generation(
947 &self,
948 prompt: &str,
949 snark: u8,
950 chaos: u8,
951 brief: bool,
952 professional: bool,
953 tx: mpsc::Sender<InferenceEvent>,
954 ) -> Result<(), Box<dyn std::error::Error>> {
955 let system =
956 self.build_system_prompt(snark, chaos, brief, professional, &[], None, None, &[]);
957 let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
958 self.stream_messages(&messages, tx).await
959 }
960
961 pub async fn generate_task_worker(
965 &self,
966 prompt: &str,
967 professional: bool,
968 ) -> Result<String, String> {
969 let current_model = self.current_model();
970 let model = self
971 .worker_model
972 .as_deref()
973 .unwrap_or(current_model.as_str());
974 self.generate_task_with_model(prompt, 0.1, professional, model)
975 .await
976 }
977
978 pub async fn generate_task(&self, prompt: &str, professional: bool) -> Result<String, String> {
979 self.generate_task_with_temp(prompt, 0.1, professional)
980 .await
981 }
982
983 pub async fn generate_task_with_temp(
984 &self,
985 prompt: &str,
986 temp: f32,
987 professional: bool,
988 ) -> Result<String, String> {
989 let current_model = self.current_model();
990 self.generate_task_with_model(prompt, temp, professional, ¤t_model)
991 .await
992 }
993
994 pub async fn generate_task_with_model(
995 &self,
996 prompt: &str,
997 _temp: f32,
998 professional: bool,
999 model: &str,
1000 ) -> Result<String, String> {
1001 let _permit = self
1002 .kv_semaphore
1003 .acquire()
1004 .await
1005 .map_err(|e| e.to_string())?;
1006
1007 let system =
1008 self.build_system_prompt(self.snark, 50, false, professional, &[], None, None, &[]);
1009 let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
1010 if let Err(detail) =
1011 preflight_chat_request(model, &messages, &[], self.current_context_length())
1012 {
1013 return Err(format_runtime_failure_message(&detail));
1014 }
1015
1016 let p = self.provider.read().await;
1017 let res = p
1018 .call_with_tools(&messages, &[], Some(model))
1019 .await
1020 .map_err(|e| format_runtime_failure_message(&e))?;
1021
1022 res.content
1023 .ok_or_else(|| "Empty response from model".to_string())
1024 }
1025
1026 #[allow(dead_code)]
1030 pub fn snip_history(
1031 &self,
1032 turns: &[ChatMessage],
1033 max_tokens_estimate: usize,
1034 keep_recent: usize,
1035 ) -> Vec<ChatMessage> {
1036 let total_chars: usize = turns.iter().map(|m| m.content.as_str().len()).sum();
1037 if total_chars / 4 <= max_tokens_estimate {
1038 return turns.to_vec();
1039 }
1040 let keep = keep_recent.min(turns.len());
1041 let mut snipped = vec![turns[0].clone()];
1042 if turns.len() > keep + 1 {
1043 snipped.push(ChatMessage::system(&format!(
1044 "[CONTEXT SNIPPED: {} earlier turns pruned to preserve VRAM]",
1045 turns.len() - keep - 1
1046 )));
1047 snipped.extend_from_slice(&turns[turns.len() - keep..]);
1048 } else {
1049 snipped = turns.to_vec();
1050 }
1051 snipped
1052 }
1053}
1054
1055fn estimate_serialized_tokens<T: Serialize + ?Sized>(value: &T) -> usize {
1056 serde_json::to_vec(value)
1057 .ok()
1058 .map_or(0, |bytes| bytes.len() / 4 + 1)
1059}
1060
1061const IMAGE_PART_TOKEN_ESTIMATE: usize = 1024;
1062
1063pub fn estimate_message_tokens(message: &ChatMessage) -> usize {
1064 let content_tokens = match &message.content {
1065 MessageContent::Text(s) => s.len() / 4 + 1,
1066 MessageContent::Parts(parts) => parts
1067 .iter()
1068 .map(|part| match part {
1069 ContentPart::Text { text } => text.len() / 4 + 1,
1070 ContentPart::ImageUrl { .. } => IMAGE_PART_TOKEN_ESTIMATE,
1073 })
1074 .sum(),
1075 };
1076 let tool_tokens: usize = message
1077 .tool_calls
1078 .iter()
1079 .flatten()
1080 .map(|call| (call.function.name.len() + call.function.arguments.to_string().len()) / 4 + 4)
1081 .sum();
1082 content_tokens + tool_tokens + 6
1083}
1084
1085pub fn estimate_message_batch_tokens(messages: &[ChatMessage]) -> usize {
1086 messages.iter().map(estimate_message_tokens).sum()
1087}
1088
1089fn reserved_output_tokens(context_length: usize) -> usize {
1090 let proportional = (context_length / 8).max(MIN_RESERVED_OUTPUT_TOKENS);
1091 proportional.min(MAX_RESERVED_OUTPUT_TOKENS)
1092}
1093
1094pub fn estimate_prompt_pressure(
1095 messages: &[ChatMessage],
1096 tools: &[ToolDefinition],
1097 context_length: usize,
1098) -> (usize, usize, usize, u8) {
1099 let estimated_input_tokens =
1100 estimate_message_batch_tokens(messages) + estimate_serialized_tokens(tools) + 32;
1101 let reserved_output = reserved_output_tokens(context_length);
1102 let estimated_total = estimated_input_tokens.saturating_add(reserved_output);
1103 let percent = if context_length == 0 {
1104 0
1105 } else {
1106 ((estimated_total.saturating_mul(100)) / context_length).min(100) as u8
1107 };
1108 (
1109 estimated_input_tokens,
1110 reserved_output,
1111 estimated_total,
1112 percent,
1113 )
1114}
1115
1116fn preflight_chat_request(
1117 model: &str,
1118 messages: &[ChatMessage],
1119 tools: &[ToolDefinition],
1120 context_length: usize,
1121) -> Result<(), String> {
1122 let (estimated_input_tokens, reserved_output, estimated_total, _) =
1123 estimate_prompt_pressure(messages, tools, context_length);
1124
1125 if estimated_total > context_length {
1126 return Err(format!(
1127 "context_window_blocked for {}: estimated input {} + reserved output {} = {} tokens exceeds the {}-token context window; narrow the request, compact the session, or preserve grounded tool output instead of restyling it.",
1128 model, estimated_input_tokens, reserved_output, estimated_total, context_length
1129 ));
1130 }
1131
1132 Ok(())
1133}
1134
1135fn load_instruction_files() -> String {
1140 use std::collections::hash_map::DefaultHasher;
1141 use std::collections::HashSet;
1142 use std::hash::{Hash, Hasher};
1143
1144 let Ok(cwd) = std::env::current_dir() else {
1145 return String::new();
1146 };
1147 let mut result = String::new();
1148 let mut seen: HashSet<u64> = HashSet::new();
1149 let mut total_chars: usize = 0;
1150 const MAX_TOTAL: usize = 12_000;
1151 const MAX_PER_FILE: usize = 4_000;
1152
1153 let mut dir = cwd.clone();
1154 for _ in 0..4 {
1155 for name in crate::agent::instructions::PROJECT_GUIDANCE_FILES {
1156 let path = crate::agent::instructions::resolve_guidance_path(&dir, name);
1157 if !path.exists() {
1158 continue;
1159 }
1160 let Ok(content) = std::fs::read_to_string(&path) else {
1161 continue;
1162 };
1163 if content.trim().is_empty() {
1164 continue;
1165 }
1166
1167 let mut hasher = DefaultHasher::new();
1168 content.hash(&mut hasher);
1169 let h = hasher.finish();
1170 if !seen.insert(h) {
1171 continue;
1172 }
1173
1174 let truncated = if content.len() > MAX_PER_FILE {
1175 format!("{}...[truncated]", &content[..MAX_PER_FILE])
1176 } else {
1177 content
1178 };
1179
1180 if total_chars + truncated.len() > MAX_TOTAL {
1181 break;
1182 }
1183 total_chars += truncated.len();
1184 result.push_str(&format!("\n--- {} ---\n{}\n", path.display(), truncated));
1185 }
1186 match dir.parent().map(|p| p.to_owned()) {
1187 Some(p) => dir = p,
1188 None => break,
1189 }
1190 }
1191
1192 if result.is_empty() {
1193 return String::new();
1194 }
1195 format!("\n\n# Project Instructions And Skills\n{}", result)
1196}
1197
1198fn load_agent_skill_catalog() -> String {
1199 let workspace_root = crate::tools::file_ops::workspace_root();
1200 let config = crate::agent::config::load_config();
1201 let discovery =
1202 crate::agent::instructions::discover_agent_skills(&workspace_root, &config.trust);
1203 crate::agent::instructions::render_skill_catalog(&discovery, 6_000)
1204 .map(|rendered| format!("\n\n{}", rendered))
1205 .unwrap_or_default()
1206}
1207
1208pub fn extract_think_block(text: &str) -> Option<String> {
1209 let lower = text.to_lowercase();
1210
1211 let open_tag = "<|channel>thought";
1213 let close_tag = "<channel|>";
1214
1215 let start_pos = lower.find(open_tag)?;
1216 let content_start = start_pos + open_tag.len();
1217
1218 let close_pos = lower[content_start..]
1219 .find(close_tag)
1220 .map(|p| content_start + p)
1221 .unwrap_or(text.len());
1222
1223 let content = text[content_start..close_pos].trim();
1224 if content.is_empty() {
1225 None
1226 } else {
1227 Some(content.to_string())
1228 }
1229}
1230
1231pub fn strip_think_blocks(text: &str) -> String {
1232 let text = {
1236 let t = text.trim_start();
1237 if t.to_lowercase().starts_with("</think>") {
1238 &t[8..]
1239 } else {
1240 text
1241 }
1242 };
1243
1244 let lower = text.to_lowercase();
1245
1246 if let Some(end) = lower.find("<channel|>").map(|i| i + "<channel|>".len()) {
1248 let answer = text[end..]
1249 .replace("<|channel>thought", "")
1250 .replace("<channel|>", "");
1251 return answer.trim().replace("\n\n\n", "\n\n").to_string();
1252 }
1253
1254 let first_open = [
1256 lower.find("<|channel>thought"), lower.find("<think>"),
1258 lower.find("<thinking>"),
1259 lower.find("<thought>"),
1260 lower.find("<|think|>"),
1261 ]
1262 .iter()
1263 .filter_map(|&x| x)
1264 .min();
1265
1266 if let Some(start) = first_open {
1267 if start > 0 {
1268 return text[..start].trim().replace("\n\n\n", "\n\n").to_string();
1269 }
1270 return String::new();
1271 }
1272
1273 static NAKED_AC: std::sync::OnceLock<aho_corasick::AhoCorasick> = std::sync::OnceLock::new();
1277 let naked_ac = NAKED_AC.get_or_init(|| {
1278 aho_corasick::AhoCorasick::new([
1279 "the user asked",
1280 "the user is asking",
1281 "the user wants",
1282 "i will structure",
1283 "i should provide",
1284 "i should give",
1285 "i should avoid",
1286 "i should note",
1287 "i should focus",
1288 "i should keep",
1289 "i should respond",
1290 "i should present",
1291 "i should display",
1292 "i should show",
1293 "i need to",
1294 "i can see from",
1295 "without being overly",
1296 "let me ",
1297 "necessary information in my identity",
1298 "was computed successfully",
1299 "computed successfully",
1300 ])
1301 .expect("valid patterns")
1302 });
1303 let is_naked_reasoning = naked_ac.find(&lower).is_some();
1304 if is_naked_reasoning {
1305 let lines: Vec<&str> = text.lines().collect();
1306 if !lines.is_empty() {
1307 let mut start_idx = 0;
1310 for (i, line) in lines.iter().enumerate() {
1311 let l = line.to_lowercase();
1312 let is_reasoning_line = naked_ac.find(&l).is_some() || l.trim().is_empty();
1313 if is_reasoning_line {
1314 start_idx = i + 1;
1315 } else {
1316 break;
1317 }
1318 }
1319 if start_idx < lines.len() {
1320 return lines[start_idx..]
1321 .join("\n")
1322 .trim()
1323 .replace("\n\n\n", "\n\n")
1324 .to_string();
1325 }
1326 return String::new();
1328 }
1329 }
1330
1331 let cleaned = strip_xml_tool_call_artifacts(text);
1334 cleaned.trim().replace("\n\n\n", "\n\n").to_string()
1335}
1336
1337fn strip_xml_tool_call_artifacts(text: &str) -> String {
1340 use aho_corasick::AhoCorasick;
1341 use std::sync::OnceLock;
1342
1343 const XML_ARTIFACTS: &[&str] = &[
1345 "</tool_call>",
1346 "<tool_call>",
1347 "</function>",
1348 "<function>",
1349 "</parameter>",
1350 "<parameter>",
1351 "</arguments>",
1352 "<arguments>",
1353 "</tool_use>",
1354 "<tool_use>",
1355 "</invoke>",
1356 "<invoke>",
1357 "</think>",
1359 "<thinking>",
1360 "</thought>",
1361 "</thinking>",
1362 "<|turn>system",
1364 "<|turn>user",
1365 "<|turn>assistant",
1366 "<|turn>tool",
1367 "<turn|>",
1368 "<|think|>",
1369 "<|im_start|>",
1371 "<|im_end|>",
1372 "<|endoftext|>",
1373 ];
1374
1375 static ARTIFACT_AC: OnceLock<AhoCorasick> = OnceLock::new();
1379 let ac = ARTIFACT_AC.get_or_init(|| {
1380 let lowered: Vec<String> = XML_ARTIFACTS.iter().map(|s| s.to_lowercase()).collect();
1381 AhoCorasick::new(&lowered).expect("valid XML artifact patterns")
1382 });
1383
1384 let lower = text.to_lowercase();
1386
1387 if ac.find(&lower).is_none() {
1389 return text.to_string();
1390 }
1391
1392 let spans: Vec<(usize, usize)> = ac.find_iter(&lower).map(|m| (m.start(), m.end())).collect();
1395 let mut out = text.to_string();
1396 for (start, end) in spans.into_iter().rev() {
1397 out.drain(start..end);
1398 }
1399 out
1400}
1401
1402fn re_gemma_call() -> &'static regex::Regex {
1406 use std::sync::OnceLock;
1407 static RE: OnceLock<regex::Regex> = OnceLock::new();
1408 RE.get_or_init(|| {
1409 regex::Regex::new(r#"(?s)<\|?tool_call\|?>\s*call:([A-Za-z_][A-Za-z0-9_]*)\{(.*?)\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#)
1410 .expect("valid gemma call regex")
1411 })
1412}
1413fn re_gemma_arg() -> &'static regex::Regex {
1414 use std::sync::OnceLock;
1415 static RE: OnceLock<regex::Regex> = OnceLock::new();
1416 RE.get_or_init(|| {
1417 regex::Regex::new(r#"(\w+):(?:<\|"\|>(.*?)<\|"\|>|([^,}]*))"#)
1418 .expect("valid gemma arg regex")
1419 })
1420}
1421fn re_xml_call() -> &'static regex::Regex {
1422 use std::sync::OnceLock;
1423 static RE: OnceLock<regex::Regex> = OnceLock::new();
1424 RE.get_or_init(|| {
1425 regex::Regex::new(r#"(?s)<tool_call>\s*<function=([A-Za-z_][A-Za-z0-9_]*)>(.*?)(?:</function>)?\s*</tool_call>"#)
1426 .expect("valid xml call regex")
1427 })
1428}
1429fn re_xml_param() -> &'static regex::Regex {
1430 use std::sync::OnceLock;
1431 static RE: OnceLock<regex::Regex> = OnceLock::new();
1432 RE.get_or_init(|| {
1433 regex::Regex::new(r#"(?s)<parameter=([A-Za-z_][A-Za-z0-9_]*)>(.*?)</parameter>"#)
1434 .expect("valid xml param regex")
1435 })
1436}
1437fn re_short_call() -> &'static regex::Regex {
1438 use std::sync::OnceLock;
1439 static RE: OnceLock<regex::Regex> = OnceLock::new();
1440 RE.get_or_init(|| {
1441 regex::Regex::new(r#"(?s)<tool_call>\s*([A-Za-z_][A-Za-z0-9_]*)\((.*?)\)\s*</tool_call>"#)
1442 .expect("valid short call regex")
1443 })
1444}
1445fn re_short_arg() -> &'static regex::Regex {
1446 use std::sync::OnceLock;
1447 static RE: OnceLock<regex::Regex> = OnceLock::new();
1448 RE.get_or_init(|| {
1449 regex::Regex::new(
1450 r#"([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:"((?:\\.|[^"])*)"|'((?:\\.|[^'])*)'|([^,\)]+))"#,
1451 )
1452 .expect("valid short arg regex")
1453 })
1454}
1455fn re_strip_gemma_call() -> &'static regex::Regex {
1456 use std::sync::OnceLock;
1457 static RE: OnceLock<regex::Regex> = OnceLock::new();
1458 RE.get_or_init(|| {
1459 regex::Regex::new(r#"(?s)<\|?tool_call\|?>\s*call:[A-Za-z_][A-Za-z0-9_]*\{.*?\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#)
1460 .expect("valid strip gemma call regex")
1461 })
1462}
1463fn re_strip_xml() -> &'static regex::Regex {
1464 use std::sync::OnceLock;
1465 static RE: OnceLock<regex::Regex> = OnceLock::new();
1466 RE.get_or_init(|| {
1467 regex::Regex::new(r#"(?s)<tool_call>\s*<function=.*?>.*?</tool_call>"#)
1468 .expect("valid strip xml regex")
1469 })
1470}
1471fn re_strip_short() -> &'static regex::Regex {
1472 use std::sync::OnceLock;
1473 static RE: OnceLock<regex::Regex> = OnceLock::new();
1474 RE.get_or_init(|| {
1475 regex::Regex::new(r#"(?s)<tool_call>\s*[A-Za-z_][A-Za-z0-9_]*\(.*?\)\s*</tool_call>"#)
1476 .expect("valid strip short regex")
1477 })
1478}
1479fn re_strip_response() -> &'static regex::Regex {
1480 use std::sync::OnceLock;
1481 static RE: OnceLock<regex::Regex> = OnceLock::new();
1482 RE.get_or_init(|| {
1483 regex::Regex::new(
1484 r#"(?s)<\|tool_response\|?>.*?(?:<\|tool_response\|?>|<tool_response\|>)"#,
1485 )
1486 .expect("valid strip response regex")
1487 })
1488}
1489
1490pub fn extract_native_tool_calls(text: &str) -> Vec<ToolCallResponse> {
1493 let mut results = Vec::new();
1494
1495 let re_call = re_gemma_call();
1497 let re_arg = re_gemma_arg();
1498
1499 for cap in re_call.captures_iter(text) {
1500 let name = cap[1].to_string();
1501 let args_str = &cap[2];
1502 let mut arguments = serde_json::Map::new();
1503
1504 for arg_cap in re_arg.captures_iter(args_str) {
1505 let key = arg_cap[1].to_string();
1506 let val_raw = arg_cap
1507 .get(2)
1508 .map(|m| m.as_str())
1509 .or_else(|| arg_cap.get(3).map(|m| m.as_str()))
1510 .unwrap_or("")
1511 .trim();
1512 let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1513
1514 let val = if normalized_raw == "true" {
1515 Value::Bool(true)
1516 } else if normalized_raw == "false" {
1517 Value::Bool(false)
1518 } else if let Ok(n) = normalized_raw.parse::<i64>() {
1519 Value::Number(n.into())
1520 } else if let Ok(n) = normalized_raw.parse::<u64>() {
1521 Value::Number(n.into())
1522 } else if let Ok(n) = normalized_raw.parse::<f64>() {
1523 serde_json::Number::from_f64(n)
1524 .map(Value::Number)
1525 .unwrap_or(Value::String(normalized_raw.clone()))
1526 } else {
1527 Value::String(normalized_raw)
1528 };
1529
1530 arguments.insert(key, val);
1531 }
1532
1533 results.push(ToolCallResponse {
1534 id: format!("call_{}", rand::random::<u32>()),
1535 call_type: "function".to_string(),
1536 function: ToolCallFn {
1537 name,
1538 arguments: Value::Object(arguments),
1539 },
1540 index: None,
1541 });
1542 }
1543
1544 for cap in re_xml_call().captures_iter(text) {
1546 let name = cap[1].to_string();
1547 let body = &cap[2];
1548 let mut arguments = serde_json::Map::new();
1549
1550 for p_cap in re_xml_param().captures_iter(body) {
1551 let key = p_cap[1].to_string();
1552 let val_raw = p_cap[2].trim();
1553 let val = if val_raw == "true" {
1554 Value::Bool(true)
1555 } else if val_raw == "false" {
1556 Value::Bool(false)
1557 } else if let Ok(n) = val_raw.parse::<i64>() {
1558 Value::Number(n.into())
1559 } else if let Ok(n) = val_raw.parse::<u64>() {
1560 Value::Number(n.into())
1561 } else {
1562 Value::String(val_raw.to_string())
1563 };
1564 arguments.insert(key, val);
1565 }
1566
1567 results.push(ToolCallResponse {
1568 id: format!("call_{}", rand::random::<u32>()),
1569 call_type: "function".to_string(),
1570 function: ToolCallFn {
1571 name,
1572 arguments: Value::Object(arguments),
1573 },
1574 index: None,
1575 });
1576 }
1577
1578 for cap in re_short_call().captures_iter(text) {
1580 let name = cap[1].to_string();
1581 let args_str = cap[2].trim();
1582 let mut arguments = serde_json::Map::new();
1583
1584 for arg_cap in re_short_arg().captures_iter(args_str) {
1585 let key = arg_cap[1].to_string();
1586 let val_raw = arg_cap
1587 .get(2)
1588 .or_else(|| arg_cap.get(3))
1589 .or_else(|| arg_cap.get(4))
1590 .map(|m| m.as_str())
1591 .unwrap_or("")
1592 .trim();
1593 let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1594
1595 let val = if normalized_raw == "true" {
1596 Value::Bool(true)
1597 } else if normalized_raw == "false" {
1598 Value::Bool(false)
1599 } else if let Ok(n) = normalized_raw.parse::<i64>() {
1600 Value::Number(n.into())
1601 } else if let Ok(n) = normalized_raw.parse::<u64>() {
1602 Value::Number(n.into())
1603 } else if let Ok(n) = normalized_raw.parse::<f64>() {
1604 serde_json::Number::from_f64(n)
1605 .map(Value::Number)
1606 .unwrap_or(Value::String(normalized_raw.clone()))
1607 } else {
1608 Value::String(normalized_raw)
1609 };
1610
1611 arguments.insert(key, val);
1612 }
1613
1614 results.push(ToolCallResponse {
1615 id: format!("call_{}", rand::random::<u32>()),
1616 call_type: "function".to_string(),
1617 function: ToolCallFn {
1618 name,
1619 arguments: Value::Object(arguments),
1620 },
1621 index: None,
1622 });
1623 }
1624
1625 results
1626}
1627
1628pub fn normalize_tool_argument_string(tool_name: &str, raw: &str) -> String {
1629 let trimmed = raw.trim();
1630 let candidate = unwrap_json_string_once(trimmed).unwrap_or_else(|| trimmed.to_string());
1631
1632 let mut value = match serde_json::from_str::<Value>(&candidate) {
1633 Ok(v) => v,
1634 Err(_) => return candidate,
1635 };
1636 normalize_tool_argument_value(tool_name, &mut value);
1637 value.to_string()
1638}
1639
1640pub fn normalize_tool_argument_value(tool_name: &str, value: &mut Value) {
1641 match value {
1642 Value::String(s) => *s = normalize_string_arg(s),
1643 Value::Array(items) => {
1644 for item in items {
1645 normalize_tool_argument_value(tool_name, item);
1646 }
1647 }
1648 Value::Object(map) => {
1649 for val in map.values_mut() {
1650 normalize_tool_argument_value(tool_name, val);
1651 }
1652 if tool_name == "grep_files" {
1653 if let Some(Value::String(pattern)) = map.get_mut("pattern") {
1654 *pattern = normalize_regex_pattern(pattern);
1655 }
1656 }
1657 for key in ["path", "extension", "query", "command", "reason"] {
1658 if let Some(Value::String(s)) = map.get_mut(key) {
1659 *s = normalize_string_arg(s);
1660 }
1661 }
1662 }
1663 _ => {}
1664 }
1665}
1666
1667fn unwrap_json_string_once(input: &str) -> Option<String> {
1668 if input.len() < 2 {
1669 return None;
1670 }
1671 let first = input.chars().next()?;
1672 let last = input.chars().last()?;
1673 if !matches!((first, last), ('"', '"') | ('\'', '\'') | ('`', '`')) {
1674 return None;
1675 }
1676 let inner = &input[1..input.len() - 1];
1677 let unescaped = inner.replace("\\\"", "\"").replace("\\\\", "\\");
1678 Some(unescaped.trim().to_string())
1679}
1680
1681fn normalize_string_arg(input: &str) -> String {
1682 let mut out = input.trim().to_string();
1683 while out.len() >= 2 {
1684 let mut changed = false;
1685 for (start, end) in [("\"", "\""), ("'", "'"), ("`", "`")] {
1686 if out.starts_with(start) && out.ends_with(end) {
1687 out = out[start.len()..out.len() - end.len()].trim().to_string();
1688 changed = true;
1689 break;
1690 }
1691 }
1692 if !changed {
1693 break;
1694 }
1695 }
1696 out
1697}
1698
1699fn normalize_regex_pattern(input: &str) -> String {
1700 let out = normalize_string_arg(input);
1701 if out.len() >= 2 && out.starts_with('/') && out.ends_with('/') {
1702 out[1..out.len() - 1].to_string()
1703 } else {
1704 out
1705 }
1706}
1707
1708fn prepare_gemma_native_messages(messages: &[ChatMessage]) -> Vec<ChatMessage> {
1709 let mut system_blocks = Vec::new();
1710 let mut prepared = Vec::new();
1711 let mut seeded = false;
1712
1713 for message in messages {
1714 if message.role == "system" {
1715 let cleaned = strip_legacy_turn_wrappers(message.content.as_str())
1716 .trim()
1717 .to_string();
1718 if !cleaned.is_empty() {
1719 system_blocks.push(cleaned);
1720 }
1721 continue;
1722 }
1723
1724 let mut clone = message.clone();
1725 clone.content = MessageContent::Text(strip_legacy_turn_wrappers(message.content.as_str()));
1726
1727 if !seeded && message.role == "user" {
1728 let mut merged = String::new();
1729 if !system_blocks.is_empty() {
1730 merged.push_str("System instructions for this turn:\n");
1731 merged.push_str(&system_blocks.join("\n\n"));
1732 merged.push_str("\n\n");
1733 }
1734 merged.push_str(clone.content.as_str());
1735 clone.content = MessageContent::Text(merged);
1736 seeded = true;
1737 }
1738
1739 prepared.push(clone);
1740 }
1741
1742 if !seeded && !system_blocks.is_empty() {
1743 prepared.insert(
1744 0,
1745 ChatMessage::user(&format!(
1746 "System instructions for this turn:\n{}",
1747 system_blocks.join("\n\n")
1748 )),
1749 );
1750 }
1751
1752 prepared
1753}
1754
1755fn strip_legacy_turn_wrappers(text: &str) -> String {
1756 text.replace("<|turn>system\n", "")
1757 .replace("<|turn>user\n", "")
1758 .replace("<|turn>assistant\n", "")
1759 .replace("<|turn>tool\n", "")
1760 .replace("<turn|>", "")
1761 .trim()
1762 .to_string()
1763}
1764
1765pub fn strip_native_tool_call_text(text: &str) -> String {
1766 let without_calls = re_strip_gemma_call().replace_all(text, "");
1767 let without_xml = re_strip_xml().replace_all(without_calls.as_ref(), "");
1768 let without_short = re_strip_short().replace_all(without_xml.as_ref(), "");
1769 re_strip_response()
1770 .replace_all(without_short.as_ref(), "")
1771 .trim()
1772 .to_string()
1773}
1774
1775fn resolve_runtime_context(
1776 previous_model: &str,
1777 previous_context: usize,
1778 effective_model: &str,
1779 detected_context: usize,
1780) -> usize {
1781 if effective_model == "no model loaded" || effective_model.trim().is_empty() {
1782 0
1783 } else if detected_context > 0 {
1784 detected_context
1785 } else if effective_model == previous_model {
1786 previous_context
1787 } else {
1788 0
1789 }
1790}
1791
1792#[cfg(test)]
1793mod tests {
1794 use super::*;
1795 use std::fs;
1796
1797 #[test]
1798 fn system_prompt_includes_running_hematite_version() {
1799 let engine = InferenceEngine::new(
1800 "http://localhost:1234/v1".to_string(),
1801 "strategist".to_string(),
1802 0,
1803 )
1804 .expect("engine");
1805
1806 let system = engine.build_system_prompt(0, 50, false, true, &[], None, None, &[]);
1807 assert!(system.contains(crate::HEMATITE_VERSION));
1808 }
1809
1810 #[test]
1811 fn extracts_gemma_native_tool_call_with_mixed_tool_call_tags() {
1812 let text = r#"<|channel>thought
1813Reading the next chunk.<channel|>The startup banner wording is likely defined within the UI drawing logic.
1814<|tool_call>call:read_file{limit:100,offset:100,path:\"src/ui/tui.rs\"}<tool_call|>"#;
1815
1816 let calls = extract_native_tool_calls(text);
1817 assert_eq!(calls.len(), 1);
1818 assert_eq!(calls[0].function.name, "read_file");
1819
1820 let args: Value = calls[0].function.arguments.clone();
1821 assert_eq!(args.get("limit").and_then(|v| v.as_i64()), Some(100));
1822 assert_eq!(args.get("offset").and_then(|v| v.as_i64()), Some(100));
1823 assert_eq!(
1824 args.get("path").and_then(|v| v.as_str()),
1825 Some("src/ui/tui.rs")
1826 );
1827
1828 let stripped = strip_native_tool_call_text(text);
1829 assert!(!stripped.contains("<|tool_call"));
1830 assert!(!stripped.contains("<tool_call|>"));
1831 }
1832
1833 #[test]
1834 fn strips_hallucinated_tool_responses_from_native_tool_transcript() {
1835 let text = r#"<|channel>thought
1836Planning.
1837<channel|><|tool_call>call:list_files{extension:<|\"|>rs<|\"|>,path:<|\"|>src/<|\"|>}<tool_call|><|tool_response>thought
1838Mapped src.
1839<channel|><|tool_call>call:read_file{limit:100,offset:0,path:<|\"|>src/main.rs<|\"|>}<tool_call|><|tool_response>thought
1840Read main.
1841<channel|>"#;
1842
1843 let calls = extract_native_tool_calls(text);
1844 assert_eq!(calls.len(), 2);
1845 assert_eq!(calls[0].function.name, "list_files");
1846 assert_eq!(calls[1].function.name, "read_file");
1847
1848 let stripped = strip_native_tool_call_text(text);
1849 assert!(!stripped.contains("<|tool_call"));
1850 assert!(!stripped.contains("<|tool_response"));
1851 assert!(!stripped.contains("<tool_response|>"));
1852 }
1853
1854 #[test]
1855 fn create_directory_is_treated_as_mutating_repo_write() {
1856 let metadata = tool_metadata_for_name("create_directory");
1857 assert!(metadata.mutates_workspace);
1858 assert!(!metadata.read_only_friendly);
1859 }
1860
1861 #[test]
1862 fn extracts_qwen_xml_tool_calls_from_reasoning() {
1863 let text = r#"Based on the project structure, I need to check the binary.
1864<tool_call>
1865<function=shell>
1866<parameter=command>
1867ls -la hematite.exe
1868</parameter>
1869<parameter=reason>
1870Check if the binary exists
1871</parameter>
1872</function>
1873</tool_call>"#;
1874
1875 let calls = extract_native_tool_calls(text);
1876 assert_eq!(calls.len(), 1);
1877 assert_eq!(calls[0].function.name, "shell");
1878
1879 let args: Value = calls[0].function.arguments.clone();
1880 assert_eq!(
1881 args.get("command").and_then(|v| v.as_str()),
1882 Some("ls -la hematite.exe")
1883 );
1884 assert_eq!(
1885 args.get("reason").and_then(|v| v.as_str()),
1886 Some("Check if the binary exists")
1887 );
1888
1889 let stripped = strip_native_tool_call_text(text);
1890 assert!(!stripped.contains("<tool_call>"));
1891 assert!(!stripped.contains("<function=shell>"));
1892 }
1893
1894 #[test]
1895 fn extracts_shorthand_tool_calls_from_reasoning() {
1896 let text = r#"<thinking>
1897The user wants a search first.
1898</thinking>
1899
1900I'll search before continuing.
1901
1902<tool_call>research_web(query="uefn toolbelt python automation unreal engine fortnite")</tool_call>"#;
1903
1904 let calls = extract_native_tool_calls(text);
1905 assert_eq!(calls.len(), 1);
1906 assert_eq!(calls[0].function.name, "research_web");
1907
1908 let args: Value = calls[0].function.arguments.clone();
1909 assert_eq!(
1910 args.get("query").and_then(|v| v.as_str()),
1911 Some("uefn toolbelt python automation unreal engine fortnite")
1912 );
1913
1914 let stripped = strip_native_tool_call_text(text);
1915 assert!(!stripped.contains("<tool_call>"));
1916 assert!(!stripped.contains("research_web(query="));
1917 }
1918
1919 #[test]
1920 fn strips_thinking_tag_as_reasoning_prefix() {
1921 let cleaned =
1922 strip_think_blocks("<thinking>\nThe user wants a search.\n</thinking>\nVisible answer");
1923 assert_eq!(cleaned, "");
1924 }
1925
1926 #[test]
1927 fn resolve_runtime_context_returns_zero_when_no_model_loaded() {
1928 assert_eq!(
1929 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "no model loaded", 0),
1930 0
1931 );
1932 }
1933
1934 #[test]
1935 fn resolve_runtime_context_preserves_previous_only_for_same_model() {
1936 assert_eq!(
1937 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "qwen/qwen3.5-9b", 0),
1938 32000
1939 );
1940 assert_eq!(
1941 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "bonsai-8b", 0),
1942 0
1943 );
1944 }
1945
1946 #[test]
1947 fn load_instruction_files_includes_workspace_guidance_files() {
1948 let temp = tempfile::tempdir().unwrap();
1949 let previous = std::env::current_dir().unwrap();
1950
1951 fs::write(
1952 temp.path().join("SKILLS.md"),
1953 "# Workspace Skills\n- Prefer API-first changes before UI polish.",
1954 )
1955 .unwrap();
1956
1957 std::env::set_current_dir(temp.path()).unwrap();
1958 let loaded = load_instruction_files();
1959 std::env::set_current_dir(previous).unwrap();
1960
1961 assert!(loaded.contains("SKILLS.md"));
1962 assert!(loaded.contains("Prefer API-first changes before UI polish."));
1963 }
1964
1965 #[test]
1966 fn load_agent_skill_catalog_includes_skill_directory_entries() {
1967 let temp = tempfile::tempdir().unwrap();
1968 let previous = std::env::current_dir().unwrap();
1969
1970 std::fs::create_dir_all(temp.path().join(".agents/skills/code-review")).unwrap();
1971 fs::write(
1972 temp.path().join(".agents/skills/code-review/SKILL.md"),
1973 "---\nname: code-review\ndescription: Review diffs and flag regressions.\ncompatibility: Requires git\n---\n",
1974 )
1975 .unwrap();
1976
1977 std::env::set_current_dir(temp.path()).unwrap();
1978 let loaded = load_agent_skill_catalog();
1979 std::env::set_current_dir(previous).unwrap();
1980
1981 assert!(loaded.contains("Agent Skills Catalog"));
1982 assert!(loaded.contains("code-review"));
1983 assert!(loaded.contains("Review diffs and flag regressions."));
1984 }
1985}