1use serde::Serialize;
2use serde_json::Value;
3use tokio::sync::{mpsc, Semaphore};
4
5pub use crate::agent::economics::{SessionEconomics, ToolRecord};
6pub use crate::agent::types::*;
7
8pub struct InferenceEngine {
11 pub provider:
12 std::sync::Arc<tokio::sync::RwLock<Box<dyn crate::agent::provider::ModelProvider>>>,
13 pub cached_model: std::sync::Arc<std::sync::RwLock<String>>,
14 pub cached_context: std::sync::Arc<std::sync::atomic::AtomicUsize>,
15 pub base_url: String,
16 pub species: String,
17 pub snark: u8,
18 pub kv_semaphore: Semaphore,
19 pub economics: std::sync::Arc<std::sync::Mutex<SessionEconomics>>,
20 pub worker_model: Option<String>,
22 pub gemma_native_formatting: std::sync::Arc<std::sync::atomic::AtomicBool>,
24 pub cancel_token: std::sync::Arc<std::sync::atomic::AtomicBool>,
26}
27
28pub fn is_hematite_native_model(model: &str) -> bool {
29 let lower = model.to_ascii_lowercase();
30 lower.contains("gemma-4") || lower.contains("gemma4")
31}
32
33fn should_use_native_formatting(engine: &InferenceEngine, model: &str) -> bool {
34 is_hematite_native_model(model) && engine.gemma_native_formatting_enabled()
35}
36
37pub fn tool_metadata_for_name(name: &str) -> ToolMetadata {
40 if name.starts_with("mcp__") {
41 let lower = name.to_ascii_lowercase();
42 let mutates_workspace = [
43 "__edit",
44 "__write",
45 "__create",
46 "__move",
47 "__delete",
48 "__remove",
49 "__rename",
50 "__replace",
51 "__patch",
52 ]
53 .iter()
54 .any(|needle| lower.contains(needle));
55 return ToolMetadata {
56 category: ToolCategory::External,
57 mutates_workspace,
58 external_surface: true,
59 trust_sensitive: true,
60 read_only_friendly: !mutates_workspace,
61 plan_scope: false,
62 };
63 }
64
65 match name {
66 "read_file" | "inspect_lines" | "grep_files" | "list_files" => ToolMetadata {
67 category: ToolCategory::RepoRead,
68 mutates_workspace: false,
69 external_surface: false,
70 trust_sensitive: false,
71 read_only_friendly: true,
72 plan_scope: true,
73 },
74 "create_directory" | "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace" => {
75 ToolMetadata {
76 category: ToolCategory::RepoWrite,
77 mutates_workspace: true,
78 external_surface: false,
79 trust_sensitive: true,
80 read_only_friendly: false,
81 plan_scope: true,
82 }
83 }
84 "trace_runtime_flow" => ToolMetadata {
85 category: ToolCategory::Architecture,
86 mutates_workspace: false,
87 external_surface: false,
88 trust_sensitive: false,
89 read_only_friendly: true,
90 plan_scope: false,
91 },
92 "describe_toolchain" => ToolMetadata {
93 category: ToolCategory::Toolchain,
94 mutates_workspace: false,
95 external_surface: false,
96 trust_sensitive: false,
97 read_only_friendly: true,
98 plan_scope: false,
99 },
100 "shell" => ToolMetadata {
101 category: ToolCategory::Runtime,
102 mutates_workspace: true,
103 external_surface: false,
104 trust_sensitive: true,
105 read_only_friendly: false,
106 plan_scope: false,
107 },
108 "inspect_host" => ToolMetadata {
109 category: ToolCategory::Runtime,
110 mutates_workspace: false,
111 external_surface: false,
112 trust_sensitive: false,
113 read_only_friendly: true,
114 plan_scope: false,
115 },
116 "resolve_host_issue" => ToolMetadata {
117 category: ToolCategory::Runtime,
118 mutates_workspace: true,
119 external_surface: true,
120 trust_sensitive: true,
121 read_only_friendly: false,
122 plan_scope: false,
123 },
124 "run_hematite_maintainer_workflow" => ToolMetadata {
125 category: ToolCategory::Workflow,
126 mutates_workspace: true,
127 external_surface: false,
128 trust_sensitive: true,
129 read_only_friendly: false,
130 plan_scope: false,
131 },
132 "run_workspace_workflow" => ToolMetadata {
133 category: ToolCategory::Workflow,
134 mutates_workspace: true,
135 external_surface: false,
136 trust_sensitive: true,
137 read_only_friendly: false,
138 plan_scope: false,
139 },
140 "verify_build" => ToolMetadata {
141 category: ToolCategory::Verification,
142 mutates_workspace: false,
143 external_surface: false,
144 trust_sensitive: false,
145 read_only_friendly: true,
146 plan_scope: true,
147 },
148 "git_commit" | "git_push" | "git_remote" | "git_onboarding" | "git_worktree" => {
149 ToolMetadata {
150 category: ToolCategory::Git,
151 mutates_workspace: true,
152 external_surface: false,
153 trust_sensitive: true,
154 read_only_friendly: false,
155 plan_scope: false,
156 }
157 }
158 "research_web" | "fetch_docs" => ToolMetadata {
159 category: ToolCategory::Research,
160 mutates_workspace: false,
161 external_surface: false,
162 trust_sensitive: false,
163 read_only_friendly: true,
164 plan_scope: false,
165 },
166 "vision_analyze" => ToolMetadata {
167 category: ToolCategory::Vision,
168 mutates_workspace: false,
169 external_surface: false,
170 trust_sensitive: false,
171 read_only_friendly: true,
172 plan_scope: false,
173 },
174 "lsp_definitions"
175 | "lsp_references"
176 | "lsp_hover"
177 | "lsp_rename_symbol"
178 | "lsp_get_diagnostics"
179 | "lsp_search_symbol" => ToolMetadata {
180 category: ToolCategory::Lsp,
181 mutates_workspace: false,
182 external_surface: false,
183 trust_sensitive: false,
184 read_only_friendly: true,
185 plan_scope: false,
186 },
187 "auto_pin_context" | "list_pinned" | "clarify" => ToolMetadata {
188 category: ToolCategory::Workflow,
189 mutates_workspace: false,
190 external_surface: false,
191 trust_sensitive: false,
192 read_only_friendly: true,
193 plan_scope: true,
194 },
195 "manage_tasks" => ToolMetadata {
196 category: ToolCategory::Workflow,
197 mutates_workspace: false,
198 external_surface: false,
199 trust_sensitive: false,
200 read_only_friendly: true,
201 plan_scope: false,
202 },
203 _ => ToolMetadata {
204 category: ToolCategory::Other,
205 mutates_workspace: false,
206 external_surface: false,
207 trust_sensitive: false,
208 read_only_friendly: true,
209 plan_scope: false,
210 },
211 }
212}
213const MIN_RESERVED_OUTPUT_TOKENS: usize = 1024;
218const MAX_RESERVED_OUTPUT_TOKENS: usize = 4096;
219
220fn is_tiny_context_window(context_length: usize) -> bool {
221 context_length <= 8_192
222}
223
224fn is_compact_context_window(context_length: usize) -> bool {
225 context_length > 8_192 && context_length <= 49_152
226}
227
228pub fn is_compact_context_window_pub(context_length: usize) -> bool {
229 is_compact_context_window(context_length)
230}
231
232fn is_provider_context_limit_detail(lower: &str) -> bool {
233 (lower.contains("n_keep") && lower.contains("n_ctx"))
234 || lower.contains("context length")
235 || lower.contains("keep from the initial prompt")
236 || lower.contains("prompt is greater than the context length")
237 || lower.contains("exceeds the context window")
238}
239
240fn classify_runtime_failure_tag(detail: &str) -> &'static str {
241 let lower = detail.to_ascii_lowercase();
242 if lower.contains("context_window_blocked")
243 || lower.contains("context ceiling reached")
244 || lower.contains("exceeds the")
245 || is_provider_context_limit_detail(&lower)
246 {
247 "context_window"
248 } else if lower.contains("empty response from model")
249 || lower.contains("model returned an empty response")
250 {
251 "empty_model_response"
252 } else if lower.contains("action blocked:")
253 || lower.contains("access denied")
254 || lower.contains("declined by user")
255 {
256 "tool_policy_blocked"
257 } else {
258 "provider_degraded"
259 }
260}
261
262fn runtime_failure_guidance(tag: &str) -> &'static str {
263 match tag {
264 "context_window" => {
265 "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
266 }
267 "empty_model_response" => {
268 "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
269 }
270 "tool_policy_blocked" => {
271 "Stay inside the allowed workflow or switch modes before retrying."
272 }
273 _ => "Retry once automatically, then narrow the turn or restart LM Studio if it persists.",
274 }
275}
276
277fn format_runtime_failure_message(detail: &str) -> String {
278 let tag = classify_runtime_failure_tag(detail);
279 format!(
280 "[failure:{}] {} Detail: {}",
281 tag,
282 runtime_failure_guidance(tag),
283 detail.trim()
284 )
285}
286
287impl InferenceEngine {
292 pub fn new(
293 api_url: String,
294 species: String,
295 snark: u8,
296 ) -> Result<Self, Box<dyn std::error::Error>> {
297 let client = reqwest::Client::builder()
298 .timeout(std::time::Duration::from_secs(180))
299 .build()?;
300
301 let base_url = {
302 let trimmed = api_url.trim_end_matches('/');
303 if let Some(scheme_end) = trimmed.find("://") {
304 let after_scheme = &trimmed[scheme_end + 3..];
305 if let Some(path_start) = after_scheme.find('/') {
306 format!(
307 "{}://{}",
308 &trimmed[..scheme_end],
309 &after_scheme[..path_start]
310 )
311 } else {
312 trimmed.to_string()
313 }
314 } else {
315 trimmed.to_string()
316 }
317 };
318
319 let api_url_full = if api_url.ends_with("/chat/completions") {
320 api_url
321 } else if api_url.ends_with("/") {
322 format!("{}chat/completions", api_url)
323 } else {
324 format!("{}/chat/completions", api_url)
325 };
326
327 let lms = crate::agent::lms::LmsHarness::new();
328 let ollama_harness = crate::agent::ollama::OllamaHarness::new(&base_url);
329
330 let provider = if base_url.contains("11434") {
331 Box::new(crate::agent::provider::OllamaProvider {
332 client: client.clone(),
333 base_url: base_url.clone(),
334 model: String::new(),
335 context_length: 8192,
336 embed_model: std::sync::Arc::new(std::sync::RwLock::new(None)),
337 ollama: ollama_harness,
338 }) as Box<dyn crate::agent::provider::ModelProvider>
339 } else {
340 Box::new(crate::agent::provider::LmsProvider {
341 client: client.clone(),
342 api_url: api_url_full,
343 base_url: base_url.clone(),
344 model: String::new(),
345 context_length: 0,
346 lms,
347 }) as Box<dyn crate::agent::provider::ModelProvider>
348 };
349
350 Ok(Self {
351 provider: std::sync::Arc::new(tokio::sync::RwLock::new(provider)),
352 cached_model: std::sync::Arc::new(std::sync::RwLock::new(String::new())),
353 cached_context: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)),
354 base_url: base_url.clone(),
355 species: species.clone(),
356 snark,
357 kv_semaphore: Semaphore::new(3),
358 economics: std::sync::Arc::new(std::sync::Mutex::new(SessionEconomics::new())),
359 worker_model: None,
360 gemma_native_formatting: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
361 cancel_token: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
362 })
363 }
364
365 pub fn set_gemma_native_formatting(&self, enabled: bool) {
366 self.gemma_native_formatting
367 .store(enabled, std::sync::atomic::Ordering::SeqCst);
368 }
369
370 pub async fn health_check(&self) -> bool {
371 let p = self.provider.read().await;
372 p.health_check().await
373 }
374
375 pub async fn provider_name(&self) -> String {
376 let p = self.provider.read().await;
377 p.name().to_string()
378 }
379
380 pub async fn get_loaded_model(&self) -> Option<String> {
381 let p = self.provider.read().await;
382 match p.detect_model().await {
383 Ok(m) if m.is_empty() => Some("".to_string()),
384 Ok(m) => Some(m),
385 Err(_) => None,
386 }
387 }
388
389 pub async fn get_embedding_model(&self) -> Option<String> {
390 let p = self.provider.read().await;
391 p.get_embedding_model().await
392 }
393
394 pub async fn load_model(&self, model_id: &str) -> Result<(), String> {
395 let p = self.provider.read().await;
396 p.load_model(model_id).await
397 }
398
399 pub async fn load_model_with_context(
400 &self,
401 model_id: &str,
402 context_length: Option<usize>,
403 ) -> Result<(), String> {
404 let p = self.provider.read().await;
405 p.load_model_with_context(model_id, context_length).await
406 }
407
408 pub async fn load_embedding_model(&self, model_id: &str) -> Result<(), String> {
409 let p = self.provider.read().await;
410 p.load_embedding_model(model_id).await
411 }
412
413 pub async fn list_provider_models(
414 &self,
415 kind: crate::agent::provider::ProviderModelKind,
416 loaded_only: bool,
417 ) -> Result<Vec<String>, String> {
418 let p = self.provider.read().await;
419 p.list_models(kind, loaded_only).await
420 }
421
422 pub async fn unload_model(&self, model_id: Option<&str>, all: bool) -> Result<String, String> {
423 let p = self.provider.read().await;
424 p.unload_model(model_id, all).await
425 }
426
427 pub async fn unload_embedding_model(&self, model_id: Option<&str>) -> Result<String, String> {
428 let p = self.provider.read().await;
429 p.unload_embedding_model(model_id).await
430 }
431
432 pub async fn prewarm(&self) -> Result<(), String> {
433 let p = self.provider.read().await;
434 p.prewarm().await
435 }
436
437 pub async fn detect_context_length(&self) -> usize {
438 let p = self.provider.read().await;
439 p.detect_context_length().await
440 }
441
442 pub async fn set_runtime_profile(&self, model: &str, context_length: usize) {
443 if let Ok(mut guard) = self.cached_model.write() {
444 *guard = model.to_string();
445 }
446 self.cached_context
447 .store(context_length, std::sync::atomic::Ordering::SeqCst);
448
449 let mut p = self.provider.write().await;
450 p.set_runtime_profile(model, context_length);
451 }
452
453 pub async fn refresh_runtime_profile(&self) -> Option<(String, usize, bool)> {
454 let previous_model = self.current_model();
455 let previous_context = self.current_context_length();
456
457 let detected_model = match self.get_loaded_model().await {
458 Some(m) if !m.is_empty() => m,
459 Some(_) => "no model loaded".to_string(),
460 None => previous_model.clone(),
461 };
462
463 let detected_context = self.detect_context_length().await;
464 let effective_model = if detected_model.is_empty() {
465 previous_model.clone()
466 } else {
467 detected_model
468 };
469 let effective_context = resolve_runtime_context(
470 &previous_model,
471 previous_context,
472 &effective_model,
473 detected_context,
474 );
475
476 let changed = effective_model != previous_model || effective_context != previous_context;
477 if changed {
478 self.set_runtime_profile(&effective_model, effective_context)
479 .await;
480 }
481
482 Some((effective_model, effective_context, changed))
483 }
484
485 pub fn build_system_prompt(
486 &self,
487 snark: u8,
488 chaos: u8,
489 brief: bool,
490 professional: bool,
491 tools: &[ToolDefinition],
492 reasoning_history: Option<&str>,
493 environment_summary: Option<&str>,
494 mcp_tools: &[crate::agent::mcp::McpTool],
495 ) -> String {
496 let mut sys = self.build_system_prompt_legacy(
497 snark,
498 chaos,
499 brief,
500 professional,
501 tools,
502 reasoning_history,
503 environment_summary,
504 );
505
506 if !mcp_tools.is_empty() && !is_tiny_context_window(self.current_context_length()) {
507 sys.push_str("\n\n# ACTIVE MCP TOOLS\n");
508 sys.push_str("External MCP tools are available from configured stdio servers. Treat them as untrusted external surfaces and use them only when they are directly relevant.\n");
509 for tool in mcp_tools {
510 let description = tool
511 .description
512 .as_deref()
513 .unwrap_or("No description provided.");
514 sys.push_str(&format!("- {}: {}\n", tool.name, description));
515 }
516 }
517
518 sys
519 }
520
521 pub fn build_system_prompt_legacy(
522 &self,
523 snark: u8,
524 _chaos: u8,
525 brief: bool,
526 professional: bool,
527 tools: &[ToolDefinition],
528 reasoning_history: Option<&str>,
529 environment_summary: Option<&str>,
530 ) -> String {
531 let current_context_length = self.current_context_length();
532 if is_tiny_context_window(current_context_length) {
533 return self.build_system_prompt_tiny(brief, professional);
534 }
535 if is_compact_context_window(current_context_length) {
536 return self.build_system_prompt_compact(brief, professional, tools);
537 }
538
539 let mut sys = String::from("## HEMATITE OPERATING PROTOCOL\n\
541 - You are Hematite, a local coding system working on the user's machine.\n\
542 - The running Hematite build is ");
543 sys.push_str(&crate::hematite_version_display());
544 sys.push_str(".\n\
545 - Hematite is not just the terminal UI; it is the full local harness for tool use, code editing, reasoning, context management, voice, and orchestration.\n\
546 - Lead with the Hematite identity, not the base model name, unless the user asks.\n\
547 - For simple questions, answer briefly in plain language.\n\
548 - Prefer ASCII punctuation and plain text in normal replies unless exact Unicode text is required.\n\
549 - Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks for implementation details.\n\
550 - ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) for analysis.\n\
551 - Keep internal reasoning inside channel delimiters.\n\
552 - Final responses must be direct, clear, and formatted in clean Markdown when formatting helps.\n\n");
553
554 if let Some(history) = reasoning_history {
555 if !history.is_empty() {
556 sys.push_str("# INTERNAL STATE (ACTIVE TURN)\n");
557 sys.push_str(history);
558 sys.push_str("\n\n");
559 }
560 }
561
562 if brief {
564 sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: LOW\n\
565 - Core directive: Think efficiently. Avoid redundant internal derivation.\n\
566 - Depth: Surface-level verification only.\n\n");
567 } else {
568 sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: HIGH\n\
569 - Core directive: Think in depth when the task needs it. Explore edge cases and architectural implications.\n\
570 - Depth: Full multi-step derivation required.\n\n");
571 }
572
573 let os = std::env::consts::OS;
575 if let Some(summary) = environment_summary {
576 sys.push_str("## HOST ENVIRONMENT\n");
577 sys.push_str(summary);
578 sys.push_str("\n\n");
579 }
580
581 if professional {
582 sys.push_str(&format!(
583 "You are Hematite, a local coding system running on {}. \
584 The TUI is one interface layer, not your whole identity. \
585 Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
586 Skip filler and keep the focus on the work.\n",
587 os
588 ));
589 } else {
590 sys.push_str(&format!(
591 "You are Hematite, a [{}] local AI coding system (Snark: {}/100) running on the user's hardware on {}. \
592 The terminal UI is only one surface of the system. \
593 Be direct, efficient, technical, and ASCII-first in ordinary prose. \
594 When the user asks who you are, describe Hematite as the local coding harness and agent, not merely the TUI.\n",
595 self.species, snark, os
596 ));
597 }
598
599 let current_model = self.current_model();
601 if !current_model.is_empty() {
602 sys.push_str(&format!(
603 "Loaded model: {} | Context window: {} tokens. \
604 Calibrate response length and tool-call depth to fit within this budget.\n\n",
605 current_model, current_context_length
606 ));
607 if is_hematite_native_model(¤t_model) {
608 sys.push_str(
609 "Sovereign native note: prefer exact tool JSON with no extra prose when calling tools. \
610 Do not wrap `path`, `extension`, or other string arguments in extra quote layers. \
611 For `grep_files`, provide the raw regex pattern without surrounding slash delimiters.\n\n",
612 );
613 }
614 } else {
615 sys.push_str(&format!(
616 "Context window: {} tokens. Calibrate response length to fit within this budget.\n\n",
617 current_context_length
618 ));
619 }
620
621 let shell_desc = if cfg!(target_os = "windows") {
623 "[EXTERNAL SHELL]: `powershell` (Windows).\n\
624 - Use ONLY for builds, tests, or file migrations. \n\
625 - You MUST use the `powershell` tool directly. \n\
626 - NEVER attempt to use `bash`, `sh`, or `/dev/null` on this system. \n\n"
627 } else {
628 "[EXTERNAL SHELL]: `bash` (Unix).\n\
629 - Use ONLY for builds, tests, or file migrations. \n\
630 - NEVER wrap bash in other shells. \n\n"
631 };
632
633 sys.push_str("You distinguish strictly between [INTERNAL TOOLS] and [EXTERNAL SHELL].\n\n\
634 [INTERNAL TOOLS]: `list_files`, `grep_files`, `read_file`, `edit_file`, `write_file`.\n\
635 - These are the ONLY way to explore and modify code. \n\
636 - NEVER attempt to run these as shell commands (e.g. `bash $ grep_files` is FORBIDDEN).\n\n");
637 sys.push_str(shell_desc);
638
639 sys.push_str("ANTI-LOOPING: If a tool returns (no output) or 'not recognized' in a shell, pivot to a different internal tool. \n\
641 SELF-AUDIT: If you see your own command echoed back as the result, the shell failed; pivot to an internal tool immediately.\n\n");
642
643 sys.push_str("## THE COMPUTATIONAL RESEARCH MANDATE\n\
644 - You are a Lead Computational Researcher and Senior Scientist.\n\
645 - ZERO-TRUST MATH: You never guess results for math, physics, or algorithmic complexity.\n\
646 - UNIT-SAFETY: All physical calculations must use `scientific_compute(mode='units')` to ensure dimensional consistency.\n\
647 - SYMBOLIC PROOF: Use `scientific_compute(mode='symbolic')` for formal algebraic derivations and multi-variable proofs. Set `latex: true` for formal presentation.\n\
648 - EMPIRICAL AUDITING: All algorithmic performance claims must be verified with `scientific_compute(mode='complexity')` before being finalized.\n\
649 - SCIENTIFIC MEMORY (LEDGER): Use `scientific_compute(mode='ledger')` to persist long-form derivations, constants, and theorem steps to `.hematite/docs/scientific_ledger.md`. This ledger is RAG-indexed by The Vein, giving you persistent cross-session memory for project math.\n\
650 - DATASET COMPUTATION: Use `scientific_compute(mode='dataset')` to perform high-precision calculations on SQL results (CSV/DB/JSON). This bridges data science and formal research.\n\
651 - LIGHTWEIGHT SANDBOX: Prioritize pure Python implementations for all research tasks. Do NOT attempt to import heavy external libraries like 'numpy', 'scipy', or 'pandas' unless you have verified they are available or the user explicitly asks to work in a specific heavy environment or venv.\n\
652 - Every result must be backed by the executable logic used to prove it.\n\n");
653
654 sys.push_str("## TURN ADVISORY\n");
656 if brief {
657 sys.push_str("- BRIEF MODE: Respond with ONE concise sentence/block unless more code is required.\n");
658 }
659 sys.push_str("- INTERNAL REASONING: Plan your move in the thought channel first.\n");
660
661 sys.push_str("\n## SCAFFOLDING PROTOCOL\n\
663 2. ALWAYS call verify_build immediately after to confirm the project compiles/runs.\n\
664 3. If verify_build fails, use `lsp_get_diagnostics` to find the exact line and error.\n\
665 4. Fix all errors before declaring success.\n\n\
666 ## PRE-FLIGHT SCOPING PROTOCOL\n\
667 Before attempting any multi-file task or complex refactor:\n\
668 1. Identify 1-3 core files (entry-points, central models, or types) that drive the logic.\n\
669 2. Use `auto_pin_context` to keep those files in active context.\n\
670 3. Only then proceed to deeper edits or research.\n\n\
671 ## REFACTORING PROTOCOL\n\
672 When modifying existing code or renaming symbols:\n\
673 1. Use `lsp_rename_symbol` for all variable/function renames to ensure project-wide safety.\n\
674 2. After any significant edit, call `lsp_get_diagnostics` on the affected files.\n\
675 3. If errors are found, you MUST fix them. Do not wait for the user to point them out.\n\n");
676
677 sys.push_str(&load_instruction_files());
679 sys.push_str(&load_agent_skill_catalog());
680
681 sys.push_str(&crate::memory::deep_reflect::load_recent_memories());
683
684 if !tools.is_empty() {
686 sys.push_str("\n\n# NATIVE TOOL DECLARATIONS\n");
687 for tool in tools {
688 let schema = serde_json::to_string(&tool.function.parameters)
689 .unwrap_or_else(|_| "{}".to_string());
690 sys.push_str(&format!(
691 "<|tool>declaration:{}{}{}<tool|>\n",
692 tool.function.name, "{", schema
693 ));
694 sys.push_str(&format!("// {})\n", tool.function.description));
695 }
696 }
697
698 sys
699 }
700
701 fn build_system_prompt_compact(
702 &self,
703 brief: bool,
704 professional: bool,
705 tools: &[ToolDefinition],
706 ) -> String {
707 let current_model = self.current_model();
710 let current_context_length = self.current_context_length();
711 let os = std::env::consts::OS;
712
713 let mut sys = format!(
714 "You are Hematite {}, a local coding harness working on the user's machine.\n",
715 crate::hematite_version_display()
716 );
717 if professional {
718 sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
719 } else {
720 sys.push_str(&format!(
721 "You are a [{}] local AI coding system. Be direct, concise, and technical.\n",
722 self.species
723 ));
724 }
725 sys.push_str(&format!(
726 "Model: {} | Context: {} tokens. Keep turns focused.\n",
727 current_model, current_context_length
728 ));
729 if is_hematite_native_model(¤t_model) {
730 sys.push_str(
731 "Sovereign native: use exact tool JSON. No extra prose in tool calls. \
732 Raw regex patterns in grep_files, no slash delimiters.\n",
733 );
734 }
735 if cfg!(target_os = "windows") {
736 sys.push_str(&format!(
737 "OS: {}. Use PowerShell for shell. Never bash or /dev/null.\n",
738 os
739 ));
740 } else {
741 sys.push_str(&format!("OS: {}. Use native Unix shell.\n", os));
742 }
743 if brief {
744 sys.push_str("BRIEF MODE: one concise sentence unless code is required.\n");
745 }
746
747 sys.push_str(
748 "\nCORE RULES:\n\
749 - Read before editing: use `read_file` or `inspect_lines` on a file before mutating it.\n\
750 - Verify after edits: run `verify_build` after code changes, before committing.\n\
751 - One tool at a time. Do not batch unrelated tool calls.\n\
752 - Do not invent tool names, file paths, or symbols not confirmed by tool output.\n\
753 - Built-in tools first: prefer `read_file`, `edit_file`, `grep_files` over MCP filesystem tools.\n\
754 - STARTUP/UI CHANGES: read the owner file first, make one focused edit, then run `verify_build`.\n",
755 );
756
757 if !tools.is_empty() {
758 sys.push_str("\n# AVAILABLE TOOLS\n");
759 for tool in tools {
760 let desc: String = tool.function.description.chars().take(120).collect();
761 sys.push_str(&format!("- {}: {}\n", tool.function.name, desc));
762 }
763 }
764
765 sys
766 }
767
768 fn build_system_prompt_tiny(&self, brief: bool, professional: bool) -> String {
769 let current_model = self.current_model();
770 let current_context_length = self.current_context_length();
771 let os = std::env::consts::OS;
772 let mut sys = format!(
773 "You are Hematite {}, a local coding harness working on the user's machine.\n",
774 crate::hematite_version_display()
775 );
776 if professional {
777 sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
778 } else {
779 sys.push_str(&format!(
780 "You are a [{}] local AI coding system. Be direct, concise, and technical.\n",
781 self.species
782 ));
783 }
784 if !current_model.is_empty() {
785 sys.push_str(&format!(
786 "Loaded model: {} | Context window: {} tokens.\n",
787 current_model, current_context_length
788 ));
789 } else {
790 sys.push_str(&format!(
791 "Context window: {} tokens.\n",
792 current_context_length
793 ));
794 }
795 sys.push_str("Tiny-context mode is active. Keep turns short. Prefer final answers over long analysis. Only use tools when necessary.\n");
796 sys.push_str("Use built-in workspace tools for local inspection and edits. Do not invent tools, files, channels, or symbols.\n");
797 sys.push_str("Before editing an existing file, gather recent file evidence first. After code edits, verify before commit.\n");
798 if cfg!(target_os = "windows") {
799 sys.push_str(&format!(
800 "You are running on {}. Use PowerShell for shell work. Do not assume bash or /dev/null.\n",
801 os
802 ));
803 } else {
804 sys.push_str(&format!(
805 "You are running on {}. Use the native Unix shell conventions.\n",
806 os
807 ));
808 }
809 if brief {
810 sys.push_str("BRIEF MODE: answer in one concise sentence unless code is required.\n");
811 }
812 sys
813 }
814
815 pub fn current_model(&self) -> String {
816 self.cached_model
817 .read()
818 .map(|g| g.clone())
819 .unwrap_or_default()
820 }
821
822 pub fn current_context_length(&self) -> usize {
823 self.cached_context
824 .load(std::sync::atomic::Ordering::Relaxed)
825 }
826
827 pub fn is_compact_context_window(&self) -> bool {
828 let len = self.current_context_length();
829 len <= 16384
830 }
831
832 pub fn gemma_native_formatting_enabled(&self) -> bool {
833 self.gemma_native_formatting
834 .load(std::sync::atomic::Ordering::Relaxed)
835 }
836
837 pub async fn call_with_tools(
838 &self,
839 messages: &[ChatMessage],
840 tools: &[ToolDefinition],
841 model_override: Option<&str>,
843 ) -> Result<
844 (
845 Option<String>,
846 Option<Vec<ToolCallResponse>>,
847 Option<TokenUsage>,
848 Option<String>,
849 ),
850 String,
851 > {
852 let _permit = self
853 .kv_semaphore
854 .acquire()
855 .await
856 .map_err(|e| e.to_string())?;
857
858 let (res, model_name, prepared_messages) = {
859 let p = self.provider.read().await;
860 let model_name = model_override.unwrap_or(&p.current_model()).to_string();
861 let prepared_messages = if should_use_native_formatting(self, &model_name) {
862 prepare_gemma_native_messages(messages)
863 } else {
864 messages.to_vec()
865 };
866 if let Err(detail) = preflight_chat_request(
867 &model_name,
868 &prepared_messages,
869 tools,
870 self.current_context_length(),
871 ) {
872 return Err(format_runtime_failure_message(&detail));
873 }
874 let res = p
875 .call_with_tools(&prepared_messages, tools, model_override)
876 .await
877 .map_err(|e| format_runtime_failure_message(&e))?;
878 (res, model_name, prepared_messages)
879 };
880
881 if let Ok(mut econ) = self.economics.lock() {
882 econ.input_tokens += res.usage.prompt_tokens;
883 econ.output_tokens += res.usage.completion_tokens;
884 }
885
886 let mut content = res.content;
887 let mut tool_calls = res.tool_calls;
888
889 if let Some(text) = &content {
891 if should_use_native_formatting(self, &model_name) {
892 let native_calls = extract_native_tool_calls(text);
893 if !native_calls.is_empty() {
894 let mut existing = tool_calls.unwrap_or_default();
895 existing.extend(native_calls);
896 tool_calls = Some(existing);
897
898 let stripped = strip_native_tool_call_text(text);
899 content = if stripped.trim().is_empty() {
900 None
901 } else {
902 Some(stripped)
903 };
904 }
905 }
906 }
907
908 if should_use_native_formatting(self, &model_name) {
910 if let Some(calls) = tool_calls.as_mut() {
911 for call in calls.iter_mut() {
912 normalize_tool_argument_value(
913 &call.function.name,
914 &mut call.function.arguments,
915 );
916 }
917 }
918 }
919
920 if should_use_native_formatting(self, &model_name)
921 && content.is_none()
922 && tool_calls.is_none()
923 && !prepared_messages.is_empty()
924 {
925 return Err(format_runtime_failure_message(
926 "model returned an empty response after native-format message preparation",
927 ));
928 }
929
930 Ok((content, tool_calls, Some(res.usage), res.finish_reason))
931 }
932
933 pub async fn stream_messages(
937 &self,
938 messages: &[ChatMessage],
939 tx: mpsc::Sender<InferenceEvent>,
940 ) -> Result<(), Box<dyn std::error::Error>> {
941 let provider = self.provider.read().await;
942 provider.stream(messages, tx).await
943 }
944
945 pub async fn stream_generation(
947 &self,
948 prompt: &str,
949 snark: u8,
950 chaos: u8,
951 brief: bool,
952 professional: bool,
953 tx: mpsc::Sender<InferenceEvent>,
954 ) -> Result<(), Box<dyn std::error::Error>> {
955 let system =
956 self.build_system_prompt(snark, chaos, brief, professional, &[], None, None, &[]);
957 let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
958 self.stream_messages(&messages, tx).await
959 }
960
961 pub async fn generate_task_worker(
965 &self,
966 prompt: &str,
967 professional: bool,
968 ) -> Result<String, String> {
969 let current_model = self.current_model();
970 let model = self
971 .worker_model
972 .as_deref()
973 .unwrap_or(current_model.as_str());
974 self.generate_task_with_model(prompt, 0.1, professional, model)
975 .await
976 }
977
978 pub async fn generate_task(&self, prompt: &str, professional: bool) -> Result<String, String> {
979 self.generate_task_with_temp(prompt, 0.1, professional)
980 .await
981 }
982
983 pub async fn generate_task_with_temp(
984 &self,
985 prompt: &str,
986 temp: f32,
987 professional: bool,
988 ) -> Result<String, String> {
989 let current_model = self.current_model();
990 self.generate_task_with_model(prompt, temp, professional, ¤t_model)
991 .await
992 }
993
994 pub async fn generate_task_with_model(
995 &self,
996 prompt: &str,
997 _temp: f32,
998 professional: bool,
999 model: &str,
1000 ) -> Result<String, String> {
1001 let _permit = self
1002 .kv_semaphore
1003 .acquire()
1004 .await
1005 .map_err(|e| e.to_string())?;
1006
1007 let system =
1008 self.build_system_prompt(self.snark, 50, false, professional, &[], None, None, &[]);
1009 let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
1010 if let Err(detail) =
1011 preflight_chat_request(model, &messages, &[], self.current_context_length())
1012 {
1013 return Err(format_runtime_failure_message(&detail));
1014 }
1015
1016 let p = self.provider.read().await;
1017 let res = p
1018 .call_with_tools(&messages, &[], Some(model))
1019 .await
1020 .map_err(|e| format_runtime_failure_message(&e))?;
1021
1022 res.content
1023 .ok_or_else(|| "Empty response from model".to_string())
1024 }
1025
1026 #[allow(dead_code)]
1030 pub fn snip_history(
1031 &self,
1032 turns: &[ChatMessage],
1033 max_tokens_estimate: usize,
1034 keep_recent: usize,
1035 ) -> Vec<ChatMessage> {
1036 let total_chars: usize = turns.iter().map(|m| m.content.as_str().len()).sum();
1037 if total_chars / 4 <= max_tokens_estimate {
1038 return turns.to_vec();
1039 }
1040 let keep = keep_recent.min(turns.len());
1041 let mut snipped = vec![turns[0].clone()];
1042 if turns.len() > keep + 1 {
1043 snipped.push(ChatMessage::system(&format!(
1044 "[CONTEXT SNIPPED: {} earlier turns pruned to preserve VRAM]",
1045 turns.len() - keep - 1
1046 )));
1047 snipped.extend_from_slice(&turns[turns.len() - keep..]);
1048 } else {
1049 snipped = turns.to_vec();
1050 }
1051 snipped
1052 }
1053}
1054
1055fn estimate_serialized_tokens<T: Serialize + ?Sized>(value: &T) -> usize {
1056 serde_json::to_vec(value)
1057 .ok()
1058 .map_or(0, |bytes| bytes.len() / 4 + 1)
1059}
1060
1061const IMAGE_PART_TOKEN_ESTIMATE: usize = 1024;
1062
1063pub fn estimate_message_tokens(message: &ChatMessage) -> usize {
1064 let content_tokens = match &message.content {
1065 MessageContent::Text(s) => s.len() / 4 + 1,
1066 MessageContent::Parts(parts) => parts
1067 .iter()
1068 .map(|part| match part {
1069 ContentPart::Text { text } => text.len() / 4 + 1,
1070 ContentPart::ImageUrl { .. } => IMAGE_PART_TOKEN_ESTIMATE,
1073 })
1074 .sum(),
1075 };
1076 let tool_tokens: usize = message
1077 .tool_calls
1078 .iter()
1079 .flatten()
1080 .map(|call| (call.function.name.len() + call.function.arguments.to_string().len()) / 4 + 4)
1081 .sum();
1082 content_tokens + tool_tokens + 6
1083}
1084
1085pub fn estimate_message_batch_tokens(messages: &[ChatMessage]) -> usize {
1086 messages.iter().map(estimate_message_tokens).sum()
1087}
1088
1089fn reserved_output_tokens(context_length: usize) -> usize {
1090 let proportional = (context_length / 8).max(MIN_RESERVED_OUTPUT_TOKENS);
1091 proportional.min(MAX_RESERVED_OUTPUT_TOKENS)
1092}
1093
1094pub fn estimate_prompt_pressure(
1095 messages: &[ChatMessage],
1096 tools: &[ToolDefinition],
1097 context_length: usize,
1098) -> (usize, usize, usize, u8) {
1099 let estimated_input_tokens =
1100 estimate_message_batch_tokens(messages) + estimate_serialized_tokens(tools) + 32;
1101 let reserved_output = reserved_output_tokens(context_length);
1102 let estimated_total = estimated_input_tokens.saturating_add(reserved_output);
1103 let percent = if context_length == 0 {
1104 0
1105 } else {
1106 ((estimated_total.saturating_mul(100)) / context_length).min(100) as u8
1107 };
1108 (
1109 estimated_input_tokens,
1110 reserved_output,
1111 estimated_total,
1112 percent,
1113 )
1114}
1115
1116fn preflight_chat_request(
1117 model: &str,
1118 messages: &[ChatMessage],
1119 tools: &[ToolDefinition],
1120 context_length: usize,
1121) -> Result<(), String> {
1122 let (estimated_input_tokens, reserved_output, estimated_total, _) =
1123 estimate_prompt_pressure(messages, tools, context_length);
1124
1125 if estimated_total > context_length {
1126 return Err(format!(
1127 "context_window_blocked for {}: estimated input {} + reserved output {} = {} tokens exceeds the {}-token context window; narrow the request, compact the session, or preserve grounded tool output instead of restyling it.",
1128 model, estimated_input_tokens, reserved_output, estimated_total, context_length
1129 ));
1130 }
1131
1132 Ok(())
1133}
1134
1135fn load_instruction_files() -> String {
1140 use std::collections::hash_map::DefaultHasher;
1141 use std::collections::HashSet;
1142 use std::hash::{Hash, Hasher};
1143
1144 let Ok(cwd) = std::env::current_dir() else {
1145 return String::new();
1146 };
1147 let mut result = String::new();
1148 let mut seen: HashSet<u64> = HashSet::new();
1149 let mut total_chars: usize = 0;
1150 const MAX_TOTAL: usize = 12_000;
1151 const MAX_PER_FILE: usize = 4_000;
1152
1153 let mut dir = cwd.clone();
1154 for _ in 0..4 {
1155 for name in crate::agent::instructions::PROJECT_GUIDANCE_FILES {
1156 let path = crate::agent::instructions::resolve_guidance_path(&dir, name);
1157 if !path.exists() {
1158 continue;
1159 }
1160 let Ok(content) = std::fs::read_to_string(&path) else {
1161 continue;
1162 };
1163 if content.trim().is_empty() {
1164 continue;
1165 }
1166
1167 let mut hasher = DefaultHasher::new();
1168 content.hash(&mut hasher);
1169 let h = hasher.finish();
1170 if !seen.insert(h) {
1171 continue;
1172 }
1173
1174 let truncated = if content.len() > MAX_PER_FILE {
1175 format!("{}...[truncated]", &content[..MAX_PER_FILE])
1176 } else {
1177 content
1178 };
1179
1180 if total_chars + truncated.len() > MAX_TOTAL {
1181 break;
1182 }
1183 total_chars += truncated.len();
1184 result.push_str(&format!("\n--- {} ---\n{}\n", path.display(), truncated));
1185 }
1186 match dir.parent().map(|p| p.to_owned()) {
1187 Some(p) => dir = p,
1188 None => break,
1189 }
1190 }
1191
1192 if result.is_empty() {
1193 return String::new();
1194 }
1195 format!("\n\n# Project Instructions And Skills\n{}", result)
1196}
1197
1198fn load_agent_skill_catalog() -> String {
1199 let workspace_root = crate::tools::file_ops::workspace_root();
1200 let config = crate::agent::config::load_config();
1201 let discovery =
1202 crate::agent::instructions::discover_agent_skills(&workspace_root, &config.trust);
1203 crate::agent::instructions::render_skill_catalog(&discovery, 6_000)
1204 .map(|rendered| format!("\n\n{}", rendered))
1205 .unwrap_or_default()
1206}
1207
1208pub fn extract_think_block(text: &str) -> Option<String> {
1209 let lower = text.to_lowercase();
1210
1211 let open_tag = "<|channel>thought";
1213 let close_tag = "<channel|>";
1214
1215 let start_pos = lower.find(open_tag)?;
1216 let content_start = start_pos + open_tag.len();
1217
1218 let close_pos = lower[content_start..]
1219 .find(close_tag)
1220 .map(|p| content_start + p)
1221 .unwrap_or(text.len());
1222
1223 let content = text[content_start..close_pos].trim();
1224 if content.is_empty() {
1225 None
1226 } else {
1227 Some(content.to_string())
1228 }
1229}
1230
1231pub fn strip_think_blocks(text: &str) -> String {
1232 let text = {
1236 let t = text.trim_start();
1237 if t.to_lowercase().starts_with("</think>") {
1238 &t[8..]
1239 } else {
1240 text
1241 }
1242 };
1243
1244 let lower = text.to_lowercase();
1245
1246 if let Some(end) = lower.find("<channel|>").map(|i| i + "<channel|>".len()) {
1248 let answer = text[end..]
1249 .replace("<|channel>thought", "")
1250 .replace("<channel|>", "");
1251 return answer.trim().replace("\n\n\n", "\n\n").to_string();
1252 }
1253
1254 let first_open = [
1256 lower.find("<|channel>thought"), lower.find("<think>"),
1258 lower.find("<thinking>"),
1259 lower.find("<thought>"),
1260 lower.find("<|think|>"),
1261 ]
1262 .iter()
1263 .filter_map(|&x| x)
1264 .min();
1265
1266 if let Some(start) = first_open {
1267 if start > 0 {
1268 return text[..start].trim().replace("\n\n\n", "\n\n").to_string();
1269 }
1270 return String::new();
1271 }
1272
1273 let naked_reasoning_phrases: &[&str] = &[
1277 "the user asked",
1278 "the user is asking",
1279 "the user wants",
1280 "i will structure",
1281 "i should provide",
1282 "i should give",
1283 "i should avoid",
1284 "i should note",
1285 "i should focus",
1286 "i should keep",
1287 "i should respond",
1288 "i should present",
1289 "i should display",
1290 "i should show",
1291 "i need to",
1292 "i can see from",
1293 "without being overly",
1294 "let me ",
1295 "necessary information in my identity",
1296 "was computed successfully",
1297 "computed successfully",
1298 ];
1299 let is_naked_reasoning = naked_reasoning_phrases.iter().any(|p| lower.contains(p));
1300 if is_naked_reasoning {
1301 let lines: Vec<&str> = text.lines().collect();
1302 if !lines.is_empty() {
1303 let mut start_idx = 0;
1306 for (i, line) in lines.iter().enumerate() {
1307 let l = line.to_lowercase();
1308 let is_reasoning_line =
1309 naked_reasoning_phrases.iter().any(|p| l.contains(p)) || l.trim().is_empty();
1310 if is_reasoning_line {
1311 start_idx = i + 1;
1312 } else {
1313 break;
1314 }
1315 }
1316 if start_idx < lines.len() {
1317 return lines[start_idx..]
1318 .join("\n")
1319 .trim()
1320 .replace("\n\n\n", "\n\n")
1321 .to_string();
1322 }
1323 return String::new();
1325 }
1326 }
1327
1328 let cleaned = strip_xml_tool_call_artifacts(text);
1331 cleaned.trim().replace("\n\n\n", "\n\n").to_string()
1332}
1333
1334fn strip_xml_tool_call_artifacts(text: &str) -> String {
1337 const XML_ARTIFACTS: &[&str] = &[
1339 "</tool_call>",
1340 "<tool_call>",
1341 "</function>",
1342 "<function>",
1343 "</parameter>",
1344 "<parameter>",
1345 "</arguments>",
1346 "<arguments>",
1347 "</tool_use>",
1348 "<tool_use>",
1349 "</invoke>",
1350 "<invoke>",
1351 "</think>",
1353 "<thinking>",
1354 "</thought>",
1355 "</thinking>",
1356 "<|turn>system",
1358 "<|turn>user",
1359 "<|turn>assistant",
1360 "<|turn>tool",
1361 "<turn|>",
1362 "<|think|>",
1363 "<|im_start|>",
1365 "<|im_end|>",
1366 "<|endoftext|>",
1367 ];
1368 let mut out = text.to_string();
1369 for tag in XML_ARTIFACTS {
1370 while let Some(pos) = out.to_lowercase().find(&tag.to_lowercase()) {
1372 out.drain(pos..pos + tag.len());
1373 }
1374 }
1375 out
1377}
1378
1379pub fn extract_native_tool_calls(text: &str) -> Vec<ToolCallResponse> {
1382 use regex::Regex;
1383 let mut results = Vec::new();
1384
1385 let re_call = Regex::new(
1387 r#"(?s)<\|?tool_call\|?>\s*call:([A-Za-z_][A-Za-z0-9_]*)\{(.*?)\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#
1388 ).unwrap();
1389 let re_arg = Regex::new(r#"(\w+):(?:<\|"\|>(.*?)<\|"\|>|([^,}]*))"#).unwrap();
1390
1391 for cap in re_call.captures_iter(text) {
1392 let name = cap[1].to_string();
1393 let args_str = &cap[2];
1394 let mut arguments = serde_json::Map::new();
1395
1396 for arg_cap in re_arg.captures_iter(args_str) {
1397 let key = arg_cap[1].to_string();
1398 let val_raw = arg_cap
1399 .get(2)
1400 .map(|m| m.as_str())
1401 .or_else(|| arg_cap.get(3).map(|m| m.as_str()))
1402 .unwrap_or("")
1403 .trim();
1404 let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1405
1406 let val = if normalized_raw == "true" {
1407 Value::Bool(true)
1408 } else if normalized_raw == "false" {
1409 Value::Bool(false)
1410 } else if let Ok(n) = normalized_raw.parse::<i64>() {
1411 Value::Number(n.into())
1412 } else if let Ok(n) = normalized_raw.parse::<u64>() {
1413 Value::Number(n.into())
1414 } else if let Ok(n) = normalized_raw.parse::<f64>() {
1415 serde_json::Number::from_f64(n)
1416 .map(Value::Number)
1417 .unwrap_or(Value::String(normalized_raw.clone()))
1418 } else {
1419 Value::String(normalized_raw)
1420 };
1421
1422 arguments.insert(key, val);
1423 }
1424
1425 results.push(ToolCallResponse {
1426 id: format!("call_{}", rand::random::<u32>()),
1427 call_type: "function".to_string(),
1428 function: ToolCallFn {
1429 name,
1430 arguments: Value::Object(arguments),
1431 },
1432 index: None,
1433 });
1434 }
1435
1436 let re_xml_call = Regex::new(
1438 r#"(?s)<tool_call>\s*<function=([A-Za-z_][A-Za-z0-9_]*)>(.*?)(?:</function>)?\s*</tool_call>"#
1439 ).unwrap();
1440 let re_xml_param =
1441 Regex::new(r#"(?s)<parameter=([A-Za-z_][A-Za-z0-9_]*)>(.*?)</parameter>"#).unwrap();
1442
1443 for cap in re_xml_call.captures_iter(text) {
1444 let name = cap[1].to_string();
1445 let body = &cap[2];
1446 let mut arguments = serde_json::Map::new();
1447
1448 for p_cap in re_xml_param.captures_iter(body) {
1449 let key = p_cap[1].to_string();
1450 let val_raw = p_cap[2].trim();
1451 let val = if val_raw == "true" {
1452 Value::Bool(true)
1453 } else if val_raw == "false" {
1454 Value::Bool(false)
1455 } else if let Ok(n) = val_raw.parse::<i64>() {
1456 Value::Number(n.into())
1457 } else if let Ok(n) = val_raw.parse::<u64>() {
1458 Value::Number(n.into())
1459 } else {
1460 Value::String(val_raw.to_string())
1461 };
1462 arguments.insert(key, val);
1463 }
1464
1465 results.push(ToolCallResponse {
1466 id: format!("call_{}", rand::random::<u32>()),
1467 call_type: "function".to_string(),
1468 function: ToolCallFn {
1469 name,
1470 arguments: Value::Object(arguments),
1471 },
1472 index: None,
1473 });
1474 }
1475
1476 let re_short_call =
1478 Regex::new(r#"(?s)<tool_call>\s*([A-Za-z_][A-Za-z0-9_]*)\((.*?)\)\s*</tool_call>"#)
1479 .unwrap();
1480 let re_short_arg = Regex::new(
1481 r#"([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:"((?:\\.|[^"])*)"|'((?:\\.|[^'])*)'|([^,\)]+))"#,
1482 )
1483 .unwrap();
1484
1485 for cap in re_short_call.captures_iter(text) {
1486 let name = cap[1].to_string();
1487 let args_str = cap[2].trim();
1488 let mut arguments = serde_json::Map::new();
1489
1490 for arg_cap in re_short_arg.captures_iter(args_str) {
1491 let key = arg_cap[1].to_string();
1492 let val_raw = arg_cap
1493 .get(2)
1494 .or_else(|| arg_cap.get(3))
1495 .or_else(|| arg_cap.get(4))
1496 .map(|m| m.as_str())
1497 .unwrap_or("")
1498 .trim();
1499 let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1500
1501 let val = if normalized_raw == "true" {
1502 Value::Bool(true)
1503 } else if normalized_raw == "false" {
1504 Value::Bool(false)
1505 } else if let Ok(n) = normalized_raw.parse::<i64>() {
1506 Value::Number(n.into())
1507 } else if let Ok(n) = normalized_raw.parse::<u64>() {
1508 Value::Number(n.into())
1509 } else if let Ok(n) = normalized_raw.parse::<f64>() {
1510 serde_json::Number::from_f64(n)
1511 .map(Value::Number)
1512 .unwrap_or(Value::String(normalized_raw.clone()))
1513 } else {
1514 Value::String(normalized_raw)
1515 };
1516
1517 arguments.insert(key, val);
1518 }
1519
1520 results.push(ToolCallResponse {
1521 id: format!("call_{}", rand::random::<u32>()),
1522 call_type: "function".to_string(),
1523 function: ToolCallFn {
1524 name,
1525 arguments: Value::Object(arguments),
1526 },
1527 index: None,
1528 });
1529 }
1530
1531 results
1532}
1533
1534pub fn normalize_tool_argument_string(tool_name: &str, raw: &str) -> String {
1535 let trimmed = raw.trim();
1536 let candidate = unwrap_json_string_once(trimmed).unwrap_or_else(|| trimmed.to_string());
1537
1538 let mut value = match serde_json::from_str::<Value>(&candidate) {
1539 Ok(v) => v,
1540 Err(_) => return candidate,
1541 };
1542 normalize_tool_argument_value(tool_name, &mut value);
1543 value.to_string()
1544}
1545
1546pub fn normalize_tool_argument_value(tool_name: &str, value: &mut Value) {
1547 match value {
1548 Value::String(s) => *s = normalize_string_arg(s),
1549 Value::Array(items) => {
1550 for item in items {
1551 normalize_tool_argument_value(tool_name, item);
1552 }
1553 }
1554 Value::Object(map) => {
1555 for val in map.values_mut() {
1556 normalize_tool_argument_value(tool_name, val);
1557 }
1558 if tool_name == "grep_files" {
1559 if let Some(Value::String(pattern)) = map.get_mut("pattern") {
1560 *pattern = normalize_regex_pattern(pattern);
1561 }
1562 }
1563 for key in ["path", "extension", "query", "command", "reason"] {
1564 if let Some(Value::String(s)) = map.get_mut(key) {
1565 *s = normalize_string_arg(s);
1566 }
1567 }
1568 }
1569 _ => {}
1570 }
1571}
1572
1573fn unwrap_json_string_once(input: &str) -> Option<String> {
1574 if input.len() < 2 {
1575 return None;
1576 }
1577 let first = input.chars().next()?;
1578 let last = input.chars().last()?;
1579 if !matches!((first, last), ('"', '"') | ('\'', '\'') | ('`', '`')) {
1580 return None;
1581 }
1582 let inner = &input[1..input.len() - 1];
1583 let unescaped = inner.replace("\\\"", "\"").replace("\\\\", "\\");
1584 Some(unescaped.trim().to_string())
1585}
1586
1587fn normalize_string_arg(input: &str) -> String {
1588 let mut out = input.trim().to_string();
1589 while out.len() >= 2 {
1590 let mut changed = false;
1591 for (start, end) in [("\"", "\""), ("'", "'"), ("`", "`")] {
1592 if out.starts_with(start) && out.ends_with(end) {
1593 out = out[start.len()..out.len() - end.len()].trim().to_string();
1594 changed = true;
1595 break;
1596 }
1597 }
1598 if !changed {
1599 break;
1600 }
1601 }
1602 out
1603}
1604
1605fn normalize_regex_pattern(input: &str) -> String {
1606 let out = normalize_string_arg(input);
1607 if out.len() >= 2 && out.starts_with('/') && out.ends_with('/') {
1608 out[1..out.len() - 1].to_string()
1609 } else {
1610 out
1611 }
1612}
1613
1614fn prepare_gemma_native_messages(messages: &[ChatMessage]) -> Vec<ChatMessage> {
1615 let mut system_blocks = Vec::new();
1616 let mut prepared = Vec::new();
1617 let mut seeded = false;
1618
1619 for message in messages {
1620 if message.role == "system" {
1621 let cleaned = strip_legacy_turn_wrappers(message.content.as_str())
1622 .trim()
1623 .to_string();
1624 if !cleaned.is_empty() {
1625 system_blocks.push(cleaned);
1626 }
1627 continue;
1628 }
1629
1630 let mut clone = message.clone();
1631 clone.content = MessageContent::Text(strip_legacy_turn_wrappers(message.content.as_str()));
1632
1633 if !seeded && message.role == "user" {
1634 let mut merged = String::new();
1635 if !system_blocks.is_empty() {
1636 merged.push_str("System instructions for this turn:\n");
1637 merged.push_str(&system_blocks.join("\n\n"));
1638 merged.push_str("\n\n");
1639 }
1640 merged.push_str(clone.content.as_str());
1641 clone.content = MessageContent::Text(merged);
1642 seeded = true;
1643 }
1644
1645 prepared.push(clone);
1646 }
1647
1648 if !seeded && !system_blocks.is_empty() {
1649 prepared.insert(
1650 0,
1651 ChatMessage::user(&format!(
1652 "System instructions for this turn:\n{}",
1653 system_blocks.join("\n\n")
1654 )),
1655 );
1656 }
1657
1658 prepared
1659}
1660
1661fn strip_legacy_turn_wrappers(text: &str) -> String {
1662 text.replace("<|turn>system\n", "")
1663 .replace("<|turn>user\n", "")
1664 .replace("<|turn>assistant\n", "")
1665 .replace("<|turn>tool\n", "")
1666 .replace("<turn|>", "")
1667 .trim()
1668 .to_string()
1669}
1670
1671pub fn strip_native_tool_call_text(text: &str) -> String {
1672 use regex::Regex;
1673 let re_call = Regex::new(
1675 r#"(?s)<\|?tool_call\|?>\s*call:[A-Za-z_][A-Za-z0-9_]*\{.*?\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#
1676 ).unwrap();
1677 let re_xml = Regex::new(r#"(?s)<tool_call>\s*<function=.*?>.*?</tool_call>"#).unwrap();
1679 let re_short =
1681 Regex::new(r#"(?s)<tool_call>\s*[A-Za-z_][A-Za-z0-9_]*\(.*?\)\s*</tool_call>"#).unwrap();
1682 let re_response =
1683 Regex::new(r#"(?s)<\|tool_response\|?>.*?(?:<\|tool_response\|?>|<tool_response\|>)"#)
1684 .unwrap();
1685 let without_calls = re_call.replace_all(text, "");
1686 let without_xml = re_xml.replace_all(without_calls.as_ref(), "");
1687 let without_short = re_short.replace_all(without_xml.as_ref(), "");
1688 re_response
1689 .replace_all(without_short.as_ref(), "")
1690 .trim()
1691 .to_string()
1692}
1693
1694fn resolve_runtime_context(
1695 previous_model: &str,
1696 previous_context: usize,
1697 effective_model: &str,
1698 detected_context: usize,
1699) -> usize {
1700 if effective_model == "no model loaded" || effective_model.trim().is_empty() {
1701 0
1702 } else if detected_context > 0 {
1703 detected_context
1704 } else if effective_model == previous_model {
1705 previous_context
1706 } else {
1707 0
1708 }
1709}
1710
1711#[cfg(test)]
1712mod tests {
1713 use super::*;
1714 use std::fs;
1715
1716 #[test]
1717 fn system_prompt_includes_running_hematite_version() {
1718 let engine = InferenceEngine::new(
1719 "http://localhost:1234/v1".to_string(),
1720 "strategist".to_string(),
1721 0,
1722 )
1723 .expect("engine");
1724
1725 let system = engine.build_system_prompt(0, 50, false, true, &[], None, None, &[]);
1726 assert!(system.contains(crate::HEMATITE_VERSION));
1727 }
1728
1729 #[test]
1730 fn extracts_gemma_native_tool_call_with_mixed_tool_call_tags() {
1731 let text = r#"<|channel>thought
1732Reading the next chunk.<channel|>The startup banner wording is likely defined within the UI drawing logic.
1733<|tool_call>call:read_file{limit:100,offset:100,path:\"src/ui/tui.rs\"}<tool_call|>"#;
1734
1735 let calls = extract_native_tool_calls(text);
1736 assert_eq!(calls.len(), 1);
1737 assert_eq!(calls[0].function.name, "read_file");
1738
1739 let args: Value = calls[0].function.arguments.clone();
1740 assert_eq!(args.get("limit").and_then(|v| v.as_i64()), Some(100));
1741 assert_eq!(args.get("offset").and_then(|v| v.as_i64()), Some(100));
1742 assert_eq!(
1743 args.get("path").and_then(|v| v.as_str()),
1744 Some("src/ui/tui.rs")
1745 );
1746
1747 let stripped = strip_native_tool_call_text(text);
1748 assert!(!stripped.contains("<|tool_call"));
1749 assert!(!stripped.contains("<tool_call|>"));
1750 }
1751
1752 #[test]
1753 fn strips_hallucinated_tool_responses_from_native_tool_transcript() {
1754 let text = r#"<|channel>thought
1755Planning.
1756<channel|><|tool_call>call:list_files{extension:<|\"|>rs<|\"|>,path:<|\"|>src/<|\"|>}<tool_call|><|tool_response>thought
1757Mapped src.
1758<channel|><|tool_call>call:read_file{limit:100,offset:0,path:<|\"|>src/main.rs<|\"|>}<tool_call|><|tool_response>thought
1759Read main.
1760<channel|>"#;
1761
1762 let calls = extract_native_tool_calls(text);
1763 assert_eq!(calls.len(), 2);
1764 assert_eq!(calls[0].function.name, "list_files");
1765 assert_eq!(calls[1].function.name, "read_file");
1766
1767 let stripped = strip_native_tool_call_text(text);
1768 assert!(!stripped.contains("<|tool_call"));
1769 assert!(!stripped.contains("<|tool_response"));
1770 assert!(!stripped.contains("<tool_response|>"));
1771 }
1772
1773 #[test]
1774 fn create_directory_is_treated_as_mutating_repo_write() {
1775 let metadata = tool_metadata_for_name("create_directory");
1776 assert!(metadata.mutates_workspace);
1777 assert!(!metadata.read_only_friendly);
1778 }
1779
1780 #[test]
1781 fn extracts_qwen_xml_tool_calls_from_reasoning() {
1782 let text = r#"Based on the project structure, I need to check the binary.
1783<tool_call>
1784<function=shell>
1785<parameter=command>
1786ls -la hematite.exe
1787</parameter>
1788<parameter=reason>
1789Check if the binary exists
1790</parameter>
1791</function>
1792</tool_call>"#;
1793
1794 let calls = extract_native_tool_calls(text);
1795 assert_eq!(calls.len(), 1);
1796 assert_eq!(calls[0].function.name, "shell");
1797
1798 let args: Value = calls[0].function.arguments.clone();
1799 assert_eq!(
1800 args.get("command").and_then(|v| v.as_str()),
1801 Some("ls -la hematite.exe")
1802 );
1803 assert_eq!(
1804 args.get("reason").and_then(|v| v.as_str()),
1805 Some("Check if the binary exists")
1806 );
1807
1808 let stripped = strip_native_tool_call_text(text);
1809 assert!(!stripped.contains("<tool_call>"));
1810 assert!(!stripped.contains("<function=shell>"));
1811 }
1812
1813 #[test]
1814 fn extracts_shorthand_tool_calls_from_reasoning() {
1815 let text = r#"<thinking>
1816The user wants a search first.
1817</thinking>
1818
1819I'll search before continuing.
1820
1821<tool_call>research_web(query="uefn toolbelt python automation unreal engine fortnite")</tool_call>"#;
1822
1823 let calls = extract_native_tool_calls(text);
1824 assert_eq!(calls.len(), 1);
1825 assert_eq!(calls[0].function.name, "research_web");
1826
1827 let args: Value = calls[0].function.arguments.clone();
1828 assert_eq!(
1829 args.get("query").and_then(|v| v.as_str()),
1830 Some("uefn toolbelt python automation unreal engine fortnite")
1831 );
1832
1833 let stripped = strip_native_tool_call_text(text);
1834 assert!(!stripped.contains("<tool_call>"));
1835 assert!(!stripped.contains("research_web(query="));
1836 }
1837
1838 #[test]
1839 fn strips_thinking_tag_as_reasoning_prefix() {
1840 let cleaned =
1841 strip_think_blocks("<thinking>\nThe user wants a search.\n</thinking>\nVisible answer");
1842 assert_eq!(cleaned, "");
1843 }
1844
1845 #[test]
1846 fn resolve_runtime_context_returns_zero_when_no_model_loaded() {
1847 assert_eq!(
1848 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "no model loaded", 0),
1849 0
1850 );
1851 }
1852
1853 #[test]
1854 fn resolve_runtime_context_preserves_previous_only_for_same_model() {
1855 assert_eq!(
1856 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "qwen/qwen3.5-9b", 0),
1857 32000
1858 );
1859 assert_eq!(
1860 resolve_runtime_context("qwen/qwen3.5-9b", 32000, "bonsai-8b", 0),
1861 0
1862 );
1863 }
1864
1865 #[test]
1866 fn load_instruction_files_includes_workspace_guidance_files() {
1867 let temp = tempfile::tempdir().unwrap();
1868 let previous = std::env::current_dir().unwrap();
1869
1870 fs::write(
1871 temp.path().join("SKILLS.md"),
1872 "# Workspace Skills\n- Prefer API-first changes before UI polish.",
1873 )
1874 .unwrap();
1875
1876 std::env::set_current_dir(temp.path()).unwrap();
1877 let loaded = load_instruction_files();
1878 std::env::set_current_dir(previous).unwrap();
1879
1880 assert!(loaded.contains("SKILLS.md"));
1881 assert!(loaded.contains("Prefer API-first changes before UI polish."));
1882 }
1883
1884 #[test]
1885 fn load_agent_skill_catalog_includes_skill_directory_entries() {
1886 let temp = tempfile::tempdir().unwrap();
1887 let previous = std::env::current_dir().unwrap();
1888
1889 std::fs::create_dir_all(temp.path().join(".agents/skills/code-review")).unwrap();
1890 fs::write(
1891 temp.path().join(".agents/skills/code-review/SKILL.md"),
1892 "---\nname: code-review\ndescription: Review diffs and flag regressions.\ncompatibility: Requires git\n---\n",
1893 )
1894 .unwrap();
1895
1896 std::env::set_current_dir(temp.path()).unwrap();
1897 let loaded = load_agent_skill_catalog();
1898 std::env::set_current_dir(previous).unwrap();
1899
1900 assert!(loaded.contains("Agent Skills Catalog"));
1901 assert!(loaded.contains("code-review"));
1902 assert!(loaded.contains("Review diffs and flag regressions."));
1903 }
1904}