Skip to main content

hematite/agent/
inference.rs

1use serde::Serialize;
2use serde_json::Value;
3use tokio::sync::{mpsc, Semaphore};
4
5pub use crate::agent::economics::{SessionEconomics, ToolRecord};
6pub use crate::agent::types::*;
7
8// ── Engine ────────────────────────────────────────────────────────────────────
9
10pub struct InferenceEngine {
11    pub provider:
12        std::sync::Arc<tokio::sync::RwLock<Box<dyn crate::agent::provider::ModelProvider>>>,
13    pub cached_model: std::sync::Arc<std::sync::RwLock<String>>,
14    pub cached_context: std::sync::Arc<std::sync::atomic::AtomicUsize>,
15    pub base_url: String,
16    pub species: String,
17    pub snark: u8,
18    pub kv_semaphore: Semaphore,
19    pub economics: std::sync::Arc<std::sync::Mutex<SessionEconomics>>,
20    /// Optional model ID for worker-level tasks (Swarms / research).
21    pub worker_model: Option<String>,
22    /// Opt-in Gemma-native request shaping. Off by default.
23    pub gemma_native_formatting: std::sync::Arc<std::sync::atomic::AtomicBool>,
24    /// Global cancellation token for hard-interrupting the inference stream.
25    pub cancel_token: std::sync::Arc<std::sync::atomic::AtomicBool>,
26}
27
28pub fn is_hematite_native_model(model: &str) -> bool {
29    let lower = model.to_ascii_lowercase();
30    lower.contains("gemma-4") || lower.contains("gemma4")
31}
32
33fn should_use_native_formatting(engine: &InferenceEngine, model: &str) -> bool {
34    is_hematite_native_model(model) && engine.gemma_native_formatting_enabled()
35}
36
37// ── OpenAI Tool Definition ────────────────────────────────────────────────────
38
39pub fn tool_metadata_for_name(name: &str) -> ToolMetadata {
40    if name.starts_with("mcp__") {
41        let lower = name.to_ascii_lowercase();
42        let mutates_workspace = [
43            "__edit",
44            "__write",
45            "__create",
46            "__move",
47            "__delete",
48            "__remove",
49            "__rename",
50            "__replace",
51            "__patch",
52        ]
53        .iter()
54        .any(|needle| lower.contains(needle));
55        return ToolMetadata {
56            category: ToolCategory::External,
57            mutates_workspace,
58            external_surface: true,
59            trust_sensitive: true,
60            read_only_friendly: !mutates_workspace,
61            plan_scope: false,
62        };
63    }
64
65    match name {
66        "read_file" | "inspect_lines" | "grep_files" | "list_files" => ToolMetadata {
67            category: ToolCategory::RepoRead,
68            mutates_workspace: false,
69            external_surface: false,
70            trust_sensitive: false,
71            read_only_friendly: true,
72            plan_scope: true,
73        },
74        "create_directory" | "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace" => {
75            ToolMetadata {
76                category: ToolCategory::RepoWrite,
77                mutates_workspace: true,
78                external_surface: false,
79                trust_sensitive: true,
80                read_only_friendly: false,
81                plan_scope: true,
82            }
83        }
84        "trace_runtime_flow" => ToolMetadata {
85            category: ToolCategory::Architecture,
86            mutates_workspace: false,
87            external_surface: false,
88            trust_sensitive: false,
89            read_only_friendly: true,
90            plan_scope: false,
91        },
92        "describe_toolchain" => ToolMetadata {
93            category: ToolCategory::Toolchain,
94            mutates_workspace: false,
95            external_surface: false,
96            trust_sensitive: false,
97            read_only_friendly: true,
98            plan_scope: false,
99        },
100        "shell" => ToolMetadata {
101            category: ToolCategory::Runtime,
102            mutates_workspace: true,
103            external_surface: false,
104            trust_sensitive: true,
105            read_only_friendly: false,
106            plan_scope: false,
107        },
108        "inspect_host" => ToolMetadata {
109            category: ToolCategory::Runtime,
110            mutates_workspace: false,
111            external_surface: false,
112            trust_sensitive: false,
113            read_only_friendly: true,
114            plan_scope: false,
115        },
116        "resolve_host_issue" => ToolMetadata {
117            category: ToolCategory::Runtime,
118            mutates_workspace: true,
119            external_surface: true,
120            trust_sensitive: true,
121            read_only_friendly: false,
122            plan_scope: false,
123        },
124        "run_hematite_maintainer_workflow" => ToolMetadata {
125            category: ToolCategory::Workflow,
126            mutates_workspace: true,
127            external_surface: false,
128            trust_sensitive: true,
129            read_only_friendly: false,
130            plan_scope: false,
131        },
132        "run_workspace_workflow" => ToolMetadata {
133            category: ToolCategory::Workflow,
134            mutates_workspace: true,
135            external_surface: false,
136            trust_sensitive: true,
137            read_only_friendly: false,
138            plan_scope: false,
139        },
140        "verify_build" => ToolMetadata {
141            category: ToolCategory::Verification,
142            mutates_workspace: false,
143            external_surface: false,
144            trust_sensitive: false,
145            read_only_friendly: true,
146            plan_scope: true,
147        },
148        "git_commit" | "git_push" | "git_remote" | "git_onboarding" | "git_worktree" => {
149            ToolMetadata {
150                category: ToolCategory::Git,
151                mutates_workspace: true,
152                external_surface: false,
153                trust_sensitive: true,
154                read_only_friendly: false,
155                plan_scope: false,
156            }
157        }
158        "research_web" | "fetch_docs" => ToolMetadata {
159            category: ToolCategory::Research,
160            mutates_workspace: false,
161            external_surface: false,
162            trust_sensitive: false,
163            read_only_friendly: true,
164            plan_scope: false,
165        },
166        "vision_analyze" => ToolMetadata {
167            category: ToolCategory::Vision,
168            mutates_workspace: false,
169            external_surface: false,
170            trust_sensitive: false,
171            read_only_friendly: true,
172            plan_scope: false,
173        },
174        "lsp_definitions"
175        | "lsp_references"
176        | "lsp_hover"
177        | "lsp_rename_symbol"
178        | "lsp_get_diagnostics"
179        | "lsp_search_symbol" => ToolMetadata {
180            category: ToolCategory::Lsp,
181            mutates_workspace: false,
182            external_surface: false,
183            trust_sensitive: false,
184            read_only_friendly: true,
185            plan_scope: false,
186        },
187        "auto_pin_context" | "list_pinned" | "clarify" => ToolMetadata {
188            category: ToolCategory::Workflow,
189            mutates_workspace: false,
190            external_surface: false,
191            trust_sensitive: false,
192            read_only_friendly: true,
193            plan_scope: true,
194        },
195        "manage_tasks" => ToolMetadata {
196            category: ToolCategory::Workflow,
197            mutates_workspace: false,
198            external_surface: false,
199            trust_sensitive: false,
200            read_only_friendly: true,
201            plan_scope: false,
202        },
203        _ => ToolMetadata {
204            category: ToolCategory::Other,
205            mutates_workspace: false,
206            external_surface: false,
207            trust_sensitive: false,
208            read_only_friendly: true,
209            plan_scope: false,
210        },
211    }
212}
213// ── Message types migrated to types.rs ────────────────────────────────────────
214
215// ── HTTP request / response shapes ───────────────────────────────────────────
216
217const MIN_RESERVED_OUTPUT_TOKENS: usize = 1024;
218const MAX_RESERVED_OUTPUT_TOKENS: usize = 4096;
219
220fn is_tiny_context_window(context_length: usize) -> bool {
221    context_length <= 8_192
222}
223
224fn is_compact_context_window(context_length: usize) -> bool {
225    context_length > 8_192 && context_length <= 49_152
226}
227
228pub fn is_compact_context_window_pub(context_length: usize) -> bool {
229    is_compact_context_window(context_length)
230}
231
232fn is_provider_context_limit_detail(lower: &str) -> bool {
233    (lower.contains("n_keep") && lower.contains("n_ctx"))
234        || lower.contains("context length")
235        || lower.contains("keep from the initial prompt")
236        || lower.contains("prompt is greater than the context length")
237        || lower.contains("exceeds the context window")
238}
239
240fn classify_runtime_failure_tag(detail: &str) -> &'static str {
241    let lower = detail.to_ascii_lowercase();
242    if lower.contains("context_window_blocked")
243        || lower.contains("context ceiling reached")
244        || lower.contains("exceeds the")
245        || is_provider_context_limit_detail(&lower)
246    {
247        "context_window"
248    } else if lower.contains("empty response from model")
249        || lower.contains("model returned an empty response")
250    {
251        "empty_model_response"
252    } else if lower.contains("action blocked:")
253        || lower.contains("access denied")
254        || lower.contains("declined by user")
255    {
256        "tool_policy_blocked"
257    } else {
258        "provider_degraded"
259    }
260}
261
262fn runtime_failure_guidance(tag: &str) -> &'static str {
263    match tag {
264        "context_window" => {
265            "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
266        }
267        "empty_model_response" => {
268            "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
269        }
270        "tool_policy_blocked" => {
271            "Stay inside the allowed workflow or switch modes before retrying."
272        }
273        _ => "Retry once automatically, then narrow the turn or restart LM Studio if it persists.",
274    }
275}
276
277fn format_runtime_failure_message(detail: &str) -> String {
278    let tag = classify_runtime_failure_tag(detail);
279    format!(
280        "[failure:{}] {} Detail: {}",
281        tag,
282        runtime_failure_guidance(tag),
283        detail.trim()
284    )
285}
286
287// ── Events pushed to the TUI (migrated to types.rs) ──────────────────────────
288
289// ── Engine implementation ─────────────────────────────────────────────────────
290
291impl InferenceEngine {
292    pub fn new(
293        api_url: String,
294        species: String,
295        snark: u8,
296    ) -> Result<Self, Box<dyn std::error::Error>> {
297        let client = reqwest::Client::builder()
298            .timeout(std::time::Duration::from_secs(180))
299            .build()?;
300
301        let base_url = {
302            let trimmed = api_url.trim_end_matches('/');
303            if let Some(scheme_end) = trimmed.find("://") {
304                let after_scheme = &trimmed[scheme_end + 3..];
305                if let Some(path_start) = after_scheme.find('/') {
306                    format!(
307                        "{}://{}",
308                        &trimmed[..scheme_end],
309                        &after_scheme[..path_start]
310                    )
311                } else {
312                    trimmed.to_string()
313                }
314            } else {
315                trimmed.to_string()
316            }
317        };
318
319        let api_url_full = if api_url.ends_with("/chat/completions") {
320            api_url
321        } else if api_url.ends_with("/") {
322            format!("{}chat/completions", api_url)
323        } else {
324            format!("{}/chat/completions", api_url)
325        };
326
327        let lms = crate::agent::lms::LmsHarness::new();
328        let ollama_harness = crate::agent::ollama::OllamaHarness::new(&base_url);
329
330        let provider = if base_url.contains("11434") {
331            Box::new(crate::agent::provider::OllamaProvider {
332                client: client.clone(),
333                base_url: base_url.clone(),
334                model: String::new(),
335                context_length: 8192,
336                embed_model: std::sync::Arc::new(std::sync::RwLock::new(None)),
337                ollama: ollama_harness,
338            }) as Box<dyn crate::agent::provider::ModelProvider>
339        } else {
340            Box::new(crate::agent::provider::LmsProvider {
341                client: client.clone(),
342                api_url: api_url_full,
343                base_url: base_url.clone(),
344                model: String::new(),
345                context_length: 0,
346                lms,
347            }) as Box<dyn crate::agent::provider::ModelProvider>
348        };
349
350        Ok(Self {
351            provider: std::sync::Arc::new(tokio::sync::RwLock::new(provider)),
352            cached_model: std::sync::Arc::new(std::sync::RwLock::new(String::new())),
353            cached_context: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)),
354            base_url: base_url.clone(),
355            species: species.clone(),
356            snark,
357            kv_semaphore: Semaphore::new(3),
358            economics: std::sync::Arc::new(std::sync::Mutex::new(SessionEconomics::new())),
359            worker_model: None,
360            gemma_native_formatting: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
361            cancel_token: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
362        })
363    }
364
365    pub fn set_gemma_native_formatting(&self, enabled: bool) {
366        self.gemma_native_formatting
367            .store(enabled, std::sync::atomic::Ordering::SeqCst);
368    }
369
370    pub async fn health_check(&self) -> bool {
371        let p = self.provider.read().await;
372        p.health_check().await
373    }
374
375    pub async fn provider_name(&self) -> String {
376        let p = self.provider.read().await;
377        p.name().to_string()
378    }
379
380    pub async fn get_loaded_model(&self) -> Option<String> {
381        let p = self.provider.read().await;
382        match p.detect_model().await {
383            Ok(m) if m.is_empty() => Some("".to_string()),
384            Ok(m) => Some(m),
385            Err(_) => None,
386        }
387    }
388
389    pub async fn get_embedding_model(&self) -> Option<String> {
390        let p = self.provider.read().await;
391        p.get_embedding_model().await
392    }
393
394    pub async fn load_model(&self, model_id: &str) -> Result<(), String> {
395        let p = self.provider.read().await;
396        p.load_model(model_id).await
397    }
398
399    pub async fn load_model_with_context(
400        &self,
401        model_id: &str,
402        context_length: Option<usize>,
403    ) -> Result<(), String> {
404        let p = self.provider.read().await;
405        p.load_model_with_context(model_id, context_length).await
406    }
407
408    pub async fn load_embedding_model(&self, model_id: &str) -> Result<(), String> {
409        let p = self.provider.read().await;
410        p.load_embedding_model(model_id).await
411    }
412
413    pub async fn list_provider_models(
414        &self,
415        kind: crate::agent::provider::ProviderModelKind,
416        loaded_only: bool,
417    ) -> Result<Vec<String>, String> {
418        let p = self.provider.read().await;
419        p.list_models(kind, loaded_only).await
420    }
421
422    pub async fn unload_model(&self, model_id: Option<&str>, all: bool) -> Result<String, String> {
423        let p = self.provider.read().await;
424        p.unload_model(model_id, all).await
425    }
426
427    pub async fn unload_embedding_model(&self, model_id: Option<&str>) -> Result<String, String> {
428        let p = self.provider.read().await;
429        p.unload_embedding_model(model_id).await
430    }
431
432    pub async fn prewarm(&self) -> Result<(), String> {
433        let p = self.provider.read().await;
434        p.prewarm().await
435    }
436
437    pub async fn detect_context_length(&self) -> usize {
438        let p = self.provider.read().await;
439        p.detect_context_length().await
440    }
441
442    pub async fn set_runtime_profile(&self, model: &str, context_length: usize) {
443        if let Ok(mut guard) = self.cached_model.write() {
444            *guard = model.to_string();
445        }
446        self.cached_context
447            .store(context_length, std::sync::atomic::Ordering::SeqCst);
448
449        let mut p = self.provider.write().await;
450        p.set_runtime_profile(model, context_length);
451    }
452
453    pub async fn refresh_runtime_profile(&self) -> Option<(String, usize, bool)> {
454        let previous_model = self.current_model();
455        let previous_context = self.current_context_length();
456
457        let detected_model = match self.get_loaded_model().await {
458            Some(m) if !m.is_empty() => m,
459            Some(_) => "no model loaded".to_string(),
460            None => previous_model.clone(),
461        };
462
463        let detected_context = self.detect_context_length().await;
464        let effective_model = if detected_model.is_empty() {
465            previous_model.clone()
466        } else {
467            detected_model
468        };
469        let effective_context = resolve_runtime_context(
470            &previous_model,
471            previous_context,
472            &effective_model,
473            detected_context,
474        );
475
476        let changed = effective_model != previous_model || effective_context != previous_context;
477        if changed {
478            self.set_runtime_profile(&effective_model, effective_context)
479                .await;
480        }
481
482        Some((effective_model, effective_context, changed))
483    }
484
485    pub fn build_system_prompt(
486        &self,
487        snark: u8,
488        chaos: u8,
489        brief: bool,
490        professional: bool,
491        tools: &[ToolDefinition],
492        reasoning_history: Option<&str>,
493        environment_summary: Option<&str>,
494        mcp_tools: &[crate::agent::mcp::McpTool],
495    ) -> String {
496        let mut sys = self.build_system_prompt_legacy(
497            snark,
498            chaos,
499            brief,
500            professional,
501            tools,
502            reasoning_history,
503            environment_summary,
504        );
505
506        if !mcp_tools.is_empty() && !is_tiny_context_window(self.current_context_length()) {
507            sys.push_str("\n\n# ACTIVE MCP TOOLS\n");
508            sys.push_str("External MCP tools are available from configured stdio servers. Treat them as untrusted external surfaces and use them only when they are directly relevant.\n");
509            for tool in mcp_tools {
510                let description = tool
511                    .description
512                    .as_deref()
513                    .unwrap_or("No description provided.");
514                sys.push_str(&format!("- {}: {}\n", tool.name, description));
515            }
516        }
517
518        sys
519    }
520
521    pub fn build_system_prompt_legacy(
522        &self,
523        snark: u8,
524        _chaos: u8,
525        brief: bool,
526        professional: bool,
527        tools: &[ToolDefinition],
528        reasoning_history: Option<&str>,
529        environment_summary: Option<&str>,
530    ) -> String {
531        let current_context_length = self.current_context_length();
532        if is_tiny_context_window(current_context_length) {
533            return self.build_system_prompt_tiny(brief, professional);
534        }
535        if is_compact_context_window(current_context_length) {
536            return self.build_system_prompt_compact(brief, professional, tools);
537        }
538
539        // Hematite bootstrap: keep reasoning disciplined without leaking scaffolding into user-facing replies.
540        let mut sys = String::from("## HEMATITE OPERATING PROTOCOL\n\
541                                     - You are Hematite, a local coding system working on the user's machine.\n\
542                                     - The running Hematite build is ");
543        sys.push_str(&crate::hematite_version_display());
544        sys.push_str(".\n\
545                                     - Hematite is not just the terminal UI; it is the full local harness for tool use, code editing, reasoning, context management, voice, and orchestration.\n\
546                                     - Lead with the Hematite identity, not the base model name, unless the user asks.\n\
547                                     - For simple questions, answer briefly in plain language.\n\
548                                     - Prefer ASCII punctuation and plain text in normal replies unless exact Unicode text is required.\n\
549                                     - Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks for implementation details.\n\
550                                     - ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) for analysis.\n\
551                                     - Keep internal reasoning inside channel delimiters.\n\
552                                     - Final responses must be direct, clear, and formatted in clean Markdown when formatting helps.\n\n");
553
554        if let Some(history) = reasoning_history {
555            if !history.is_empty() {
556                sys.push_str("# INTERNAL STATE (ACTIVE TURN)\n");
557                sys.push_str(history);
558                sys.push_str("\n\n");
559            }
560        }
561
562        // ADAPTIVE THOUGHT EFFICIENCY (Gemma-4 Native)
563        if brief {
564            sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: LOW\n\
565                          - Core directive: Think efficiently. Avoid redundant internal derivation.\n\
566                          - Depth: Surface-level verification only.\n\n");
567        } else {
568            sys.push_str("# ADAPTIVE THOUGHT EFFICIENCY: HIGH\n\
569                          - Core directive: Think in depth when the task needs it. Explore edge cases and architectural implications.\n\
570                          - Depth: Full multi-step derivation required.\n\n");
571        }
572
573        // IDENTITY & ENVIRONMENT
574        let os = std::env::consts::OS;
575        if let Some(summary) = environment_summary {
576            sys.push_str("## HOST ENVIRONMENT\n");
577            sys.push_str(summary);
578            sys.push_str("\n\n");
579        }
580
581        if professional {
582            sys.push_str(&format!(
583                "You are Hematite, a local coding system running on {}. \
584                 The TUI is one interface layer, not your whole identity. \
585                 Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
586                 Skip filler and keep the focus on the work.\n",
587                os
588            ));
589        } else {
590            sys.push_str(&format!(
591                "You are Hematite, a [{}] local AI coding system (Snark: {}/100) running on the user's hardware on {}. \
592                 The terminal UI is only one surface of the system. \
593                 Be direct, efficient, technical, and ASCII-first in ordinary prose. \
594                 When the user asks who you are, describe Hematite as the local coding harness and agent, not merely the TUI.\n",
595                self.species, snark, os
596            ));
597        }
598
599        // Inject loaded model and context window so the model knows its own budget.
600        let current_model = self.current_model();
601        if !current_model.is_empty() {
602            sys.push_str(&format!(
603                "Loaded model: {} | Context window: {} tokens. \
604                 Calibrate response length and tool-call depth to fit within this budget.\n\n",
605                current_model, current_context_length
606            ));
607            if is_hematite_native_model(&current_model) {
608                sys.push_str(
609                    "Sovereign native note: prefer exact tool JSON with no extra prose when calling tools. \
610                     Do not wrap `path`, `extension`, or other string arguments in extra quote layers. \
611                     For `grep_files`, provide the raw regex pattern without surrounding slash delimiters.\n\n",
612                );
613            }
614        } else {
615            sys.push_str(&format!(
616                "Context window: {} tokens. Calibrate response length to fit within this budget.\n\n",
617                current_context_length
618            ));
619        }
620
621        // PROTOCOL & TOOLS
622        let shell_desc = if cfg!(target_os = "windows") {
623            "[EXTERNAL SHELL]: `powershell` (Windows).\n\
624             - Use ONLY for builds, tests, or file migrations. \n\
625             - You MUST use the `powershell` tool directly. \n\
626             - NEVER attempt to use `bash`, `sh`, or `/dev/null` on this system. \n\n"
627        } else {
628            "[EXTERNAL SHELL]: `bash` (Unix).\n\
629             - Use ONLY for builds, tests, or file migrations. \n\
630             - NEVER wrap bash in other shells. \n\n"
631        };
632
633        sys.push_str("You distinguish strictly between [INTERNAL TOOLS] and [EXTERNAL SHELL].\n\n\
634                      [INTERNAL TOOLS]: `list_files`, `grep_files`, `read_file`, `edit_file`, `write_file`.\n\
635                      - These are the ONLY way to explore and modify code. \n\
636                      - NEVER attempt to run these as shell commands (e.g. `bash $ grep_files` is FORBIDDEN).\n\n");
637        sys.push_str(shell_desc);
638
639        // ANTI-LOOPING & SELF-AUDIT
640        sys.push_str("ANTI-LOOPING: If a tool returns (no output) or 'not recognized' in a shell, pivot to a different internal tool. \n\
641                      SELF-AUDIT: If you see your own command echoed back as the result, the shell failed; pivot to an internal tool immediately.\n\n");
642
643        sys.push_str("## THE COMPUTATIONAL RESEARCH MANDATE\n\
644                      - You are a Lead Computational Researcher and Senior Scientist.\n\
645                      - ZERO-TRUST MATH: You never guess results for math, physics, or algorithmic complexity.\n\
646                      - UNIT-SAFETY: All physical calculations must use `scientific_compute(mode='units')` to ensure dimensional consistency.\n\
647                      - SYMBOLIC PROOF: Use `scientific_compute(mode='symbolic')` for formal algebraic derivations and multi-variable proofs. Set `latex: true` for formal presentation.\n\
648                      - EMPIRICAL AUDITING: All algorithmic performance claims must be verified with `scientific_compute(mode='complexity')` before being finalized.\n\
649                      - SCIENTIFIC MEMORY (LEDGER): Use `scientific_compute(mode='ledger')` to persist long-form derivations, constants, and theorem steps to `.hematite/docs/scientific_ledger.md`. This ledger is RAG-indexed by The Vein, giving you persistent cross-session memory for project math.\n\
650                      - DATASET COMPUTATION: Use `scientific_compute(mode='dataset')` to perform high-precision calculations on SQL results (CSV/DB/JSON). This bridges data science and formal research.\n\
651                      - LIGHTWEIGHT SANDBOX: Prioritize pure Python implementations for all research tasks. Do NOT attempt to import heavy external libraries like 'numpy', 'scipy', or 'pandas' unless you have verified they are available or the user explicitly asks to work in a specific heavy environment or venv.\n\
652                      - Every result must be backed by the executable logic used to prove it.\n\n");
653
654        // Consolidated: All directives are now handled by the authoritative prompt.rs builder.
655        sys.push_str("## TURN ADVISORY\n");
656        if brief {
657            sys.push_str("- BRIEF MODE: Respond with ONE concise sentence/block unless more code is required.\n");
658        }
659        sys.push_str("- INTERNAL REASONING: Plan your move in the thought channel first.\n");
660
661        // Scaffolding protocol — enforces build validation after project creation.
662        sys.push_str("\n## SCAFFOLDING PROTOCOL\n\
663            2. ALWAYS call verify_build immediately after to confirm the project compiles/runs.\n\
664            3. If verify_build fails, use `lsp_get_diagnostics` to find the exact line and error.\n\
665            4. Fix all errors before declaring success.\n\n\
666            ## PRE-FLIGHT SCOPING PROTOCOL\n\
667            Before attempting any multi-file task or complex refactor:\n\
668            1. Identify 1-3 core files (entry-points, central models, or types) that drive the logic.\n\
669            2. Use `auto_pin_context` to keep those files in active context.\n\
670            3. Only then proceed to deeper edits or research.\n\n\
671            ## REFACTORING PROTOCOL\n\
672            When modifying existing code or renaming symbols:\n\
673            1. Use `lsp_rename_symbol` for all variable/function renames to ensure project-wide safety.\n\
674            2. After any significant edit, call `lsp_get_diagnostics` on the affected files.\n\
675            3. If errors are found, you MUST fix them. Do not wait for the user to point them out.\n\n");
676
677        // Inject CLAUDE.md / instruction files from the project directory.
678        sys.push_str(&load_instruction_files());
679        sys.push_str(&load_agent_skill_catalog());
680
681        // Inject cross-session memories synthesized by DeepReflect.
682        sys.push_str(&crate::memory::deep_reflect::load_recent_memories());
683
684        // Native Gemma-4 Tool Declarations
685        if !tools.is_empty() {
686            sys.push_str("\n\n# NATIVE TOOL DECLARATIONS\n");
687            for tool in tools {
688                let schema = serde_json::to_string(&tool.function.parameters)
689                    .unwrap_or_else(|_| "{}".to_string());
690                sys.push_str(&format!(
691                    "<|tool>declaration:{}{}{}<tool|>\n",
692                    tool.function.name, "{", schema
693                ));
694                sys.push_str(&format!("// {})\n", tool.function.description));
695            }
696        }
697
698        sys
699    }
700
701    fn build_system_prompt_compact(
702        &self,
703        brief: bool,
704        professional: bool,
705        tools: &[ToolDefinition],
706    ) -> String {
707        // Compact tier: fits in 16k context. Keeps tool names + one-line descriptions
708        // but skips full JSON schemas, verbose protocol sections, and CLAUDE.md injection.
709        let current_model = self.current_model();
710        let current_context_length = self.current_context_length();
711        let os = std::env::consts::OS;
712
713        let mut sys = format!(
714            "You are Hematite {}, a local coding harness working on the user's machine.\n",
715            crate::hematite_version_display()
716        );
717        if professional {
718            sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
719        } else {
720            sys.push_str(&format!(
721                "You are a [{}] local AI coding system. Be direct, concise, and technical.\n",
722                self.species
723            ));
724        }
725        sys.push_str(&format!(
726            "Model: {} | Context: {} tokens. Keep turns focused.\n",
727            current_model, current_context_length
728        ));
729        if is_hematite_native_model(&current_model) {
730            sys.push_str(
731                "Sovereign native: use exact tool JSON. No extra prose in tool calls. \
732                 Raw regex patterns in grep_files, no slash delimiters.\n",
733            );
734        }
735        if cfg!(target_os = "windows") {
736            sys.push_str(&format!(
737                "OS: {}. Use PowerShell for shell. Never bash or /dev/null.\n",
738                os
739            ));
740        } else {
741            sys.push_str(&format!("OS: {}. Use native Unix shell.\n", os));
742        }
743        if brief {
744            sys.push_str("BRIEF MODE: one concise sentence unless code is required.\n");
745        }
746
747        sys.push_str(
748            "\nCORE RULES:\n\
749             - Read before editing: use `read_file` or `inspect_lines` on a file before mutating it.\n\
750             - Verify after edits: run `verify_build` after code changes, before committing.\n\
751             - One tool at a time. Do not batch unrelated tool calls.\n\
752             - Do not invent tool names, file paths, or symbols not confirmed by tool output.\n\
753             - Built-in tools first: prefer `read_file`, `edit_file`, `grep_files` over MCP filesystem tools.\n\
754             - STARTUP/UI CHANGES: read the owner file first, make one focused edit, then run `verify_build`.\n",
755        );
756
757        if !tools.is_empty() {
758            sys.push_str("\n# AVAILABLE TOOLS\n");
759            for tool in tools {
760                let desc: String = tool.function.description.chars().take(120).collect();
761                sys.push_str(&format!("- {}: {}\n", tool.function.name, desc));
762            }
763        }
764
765        sys
766    }
767
768    fn build_system_prompt_tiny(&self, brief: bool, professional: bool) -> String {
769        let current_model = self.current_model();
770        let current_context_length = self.current_context_length();
771        let os = std::env::consts::OS;
772        let mut sys = format!(
773            "You are Hematite {}, a local coding harness working on the user's machine.\n",
774            crate::hematite_version_display()
775        );
776        if professional {
777            sys.push_str("Be direct, technical, concise, and ASCII-first.\n");
778        } else {
779            sys.push_str(&format!(
780                "You are a [{}] local AI coding system. Be direct, concise, and technical.\n",
781                self.species
782            ));
783        }
784        if !current_model.is_empty() {
785            sys.push_str(&format!(
786                "Loaded model: {} | Context window: {} tokens.\n",
787                current_model, current_context_length
788            ));
789        } else {
790            sys.push_str(&format!(
791                "Context window: {} tokens.\n",
792                current_context_length
793            ));
794        }
795        sys.push_str("Tiny-context mode is active. Keep turns short. Prefer final answers over long analysis. Only use tools when necessary.\n");
796        sys.push_str("Use built-in workspace tools for local inspection and edits. Do not invent tools, files, channels, or symbols.\n");
797        sys.push_str("Before editing an existing file, gather recent file evidence first. After code edits, verify before commit.\n");
798        if cfg!(target_os = "windows") {
799            sys.push_str(&format!(
800                "You are running on {}. Use PowerShell for shell work. Do not assume bash or /dev/null.\n",
801                os
802            ));
803        } else {
804            sys.push_str(&format!(
805                "You are running on {}. Use the native Unix shell conventions.\n",
806                os
807            ));
808        }
809        if brief {
810            sys.push_str("BRIEF MODE: answer in one concise sentence unless code is required.\n");
811        }
812        sys
813    }
814
815    pub fn current_model(&self) -> String {
816        self.cached_model
817            .read()
818            .map(|g| g.clone())
819            .unwrap_or_default()
820    }
821
822    pub fn current_context_length(&self) -> usize {
823        self.cached_context
824            .load(std::sync::atomic::Ordering::Relaxed)
825    }
826
827    pub fn is_compact_context_window(&self) -> bool {
828        let len = self.current_context_length();
829        len <= 16384
830    }
831
832    pub fn gemma_native_formatting_enabled(&self) -> bool {
833        self.gemma_native_formatting
834            .load(std::sync::atomic::Ordering::Relaxed)
835    }
836
837    pub async fn call_with_tools(
838        &self,
839        messages: &[ChatMessage],
840        tools: &[ToolDefinition],
841        // Override the model ID for this call. None = use the live runtime model.
842        model_override: Option<&str>,
843    ) -> Result<
844        (
845            Option<String>,
846            Option<Vec<ToolCallResponse>>,
847            Option<TokenUsage>,
848            Option<String>,
849        ),
850        String,
851    > {
852        let _permit = self
853            .kv_semaphore
854            .acquire()
855            .await
856            .map_err(|e| e.to_string())?;
857
858        let (res, model_name, prepared_messages) = {
859            let p = self.provider.read().await;
860            let model_name = model_override.unwrap_or(&p.current_model()).to_string();
861            let prepared_messages = if should_use_native_formatting(self, &model_name) {
862                prepare_gemma_native_messages(messages)
863            } else {
864                messages.to_vec()
865            };
866            if let Err(detail) = preflight_chat_request(
867                &model_name,
868                &prepared_messages,
869                tools,
870                self.current_context_length(),
871            ) {
872                return Err(format_runtime_failure_message(&detail));
873            }
874            let res = p
875                .call_with_tools(&prepared_messages, tools, model_override)
876                .await
877                .map_err(|e| format_runtime_failure_message(&e))?;
878            (res, model_name, prepared_messages)
879        };
880
881        if let Ok(mut econ) = self.economics.lock() {
882            econ.input_tokens += res.usage.prompt_tokens;
883            econ.output_tokens += res.usage.completion_tokens;
884        }
885
886        let mut content = res.content;
887        let mut tool_calls = res.tool_calls;
888
889        // Post-processing: Gemma 4 / thinking block extraction
890        if let Some(text) = &content {
891            if should_use_native_formatting(self, &model_name) {
892                let native_calls = extract_native_tool_calls(text);
893                if !native_calls.is_empty() {
894                    let mut existing = tool_calls.unwrap_or_default();
895                    existing.extend(native_calls);
896                    tool_calls = Some(existing);
897
898                    let stripped = strip_native_tool_call_text(text);
899                    content = if stripped.trim().is_empty() {
900                        None
901                    } else {
902                        Some(stripped)
903                    };
904                }
905            }
906        }
907
908        // Normalization: Tool arguments
909        if should_use_native_formatting(self, &model_name) {
910            if let Some(calls) = tool_calls.as_mut() {
911                for call in calls.iter_mut() {
912                    normalize_tool_argument_value(
913                        &call.function.name,
914                        &mut call.function.arguments,
915                    );
916                }
917            }
918        }
919
920        if should_use_native_formatting(self, &model_name)
921            && content.is_none()
922            && tool_calls.is_none()
923            && !prepared_messages.is_empty()
924        {
925            return Err(format_runtime_failure_message(
926                "model returned an empty response after native-format message preparation",
927            ));
928        }
929
930        Ok((content, tool_calls, Some(res.usage), res.finish_reason))
931    }
932
933    // ── Streaming call (used for plain-text responses) ────────────────────────
934
935    /// Stream a conversation (no tools). Emits Token/Done/Error events.
936    pub async fn stream_messages(
937        &self,
938        messages: &[ChatMessage],
939        tx: mpsc::Sender<InferenceEvent>,
940    ) -> Result<(), Box<dyn std::error::Error>> {
941        let provider = self.provider.read().await;
942        provider.stream(messages, tx).await
943    }
944
945    /// Single-turn streaming (legacy helper used by startup sequence).
946    pub async fn stream_generation(
947        &self,
948        prompt: &str,
949        snark: u8,
950        chaos: u8,
951        brief: bool,
952        professional: bool,
953        tx: mpsc::Sender<InferenceEvent>,
954    ) -> Result<(), Box<dyn std::error::Error>> {
955        let system =
956            self.build_system_prompt(snark, chaos, brief, professional, &[], None, None, &[]);
957        let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
958        self.stream_messages(&messages, tx).await
959    }
960
961    // ── Swarm worker helpers (non-streaming) ──────────────────────────────────
962
963    /// Runs a task using the `worker_model` if set, otherwise falls back to the main `model`.
964    pub async fn generate_task_worker(
965        &self,
966        prompt: &str,
967        professional: bool,
968    ) -> Result<String, String> {
969        let current_model = self.current_model();
970        let model = self
971            .worker_model
972            .as_deref()
973            .unwrap_or(current_model.as_str());
974        self.generate_task_with_model(prompt, 0.1, professional, model)
975            .await
976    }
977
978    pub async fn generate_task(&self, prompt: &str, professional: bool) -> Result<String, String> {
979        self.generate_task_with_temp(prompt, 0.1, professional)
980            .await
981    }
982
983    pub async fn generate_task_with_temp(
984        &self,
985        prompt: &str,
986        temp: f32,
987        professional: bool,
988    ) -> Result<String, String> {
989        let current_model = self.current_model();
990        self.generate_task_with_model(prompt, temp, professional, &current_model)
991            .await
992    }
993
994    pub async fn generate_task_with_model(
995        &self,
996        prompt: &str,
997        _temp: f32,
998        professional: bool,
999        model: &str,
1000    ) -> Result<String, String> {
1001        let _permit = self
1002            .kv_semaphore
1003            .acquire()
1004            .await
1005            .map_err(|e| e.to_string())?;
1006
1007        let system =
1008            self.build_system_prompt(self.snark, 50, false, professional, &[], None, None, &[]);
1009        let messages = vec![ChatMessage::system(&system), ChatMessage::user(prompt)];
1010        if let Err(detail) =
1011            preflight_chat_request(model, &messages, &[], self.current_context_length())
1012        {
1013            return Err(format_runtime_failure_message(&detail));
1014        }
1015
1016        let p = self.provider.read().await;
1017        let res = p
1018            .call_with_tools(&messages, &[], Some(model))
1019            .await
1020            .map_err(|e| format_runtime_failure_message(&e))?;
1021
1022        res.content
1023            .ok_or_else(|| "Empty response from model".to_string())
1024    }
1025
1026    // ── History management ────────────────────────────────────────────────────
1027
1028    /// Prune middle turns when context grows too large, keeping system + recent N.
1029    #[allow(dead_code)]
1030    pub fn snip_history(
1031        &self,
1032        turns: &[ChatMessage],
1033        max_tokens_estimate: usize,
1034        keep_recent: usize,
1035    ) -> Vec<ChatMessage> {
1036        let total_chars: usize = turns.iter().map(|m| m.content.as_str().len()).sum();
1037        if total_chars / 4 <= max_tokens_estimate {
1038            return turns.to_vec();
1039        }
1040        let keep = keep_recent.min(turns.len());
1041        let mut snipped = vec![turns[0].clone()];
1042        if turns.len() > keep + 1 {
1043            snipped.push(ChatMessage::system(&format!(
1044                "[CONTEXT SNIPPED: {} earlier turns pruned to preserve VRAM]",
1045                turns.len() - keep - 1
1046            )));
1047            snipped.extend_from_slice(&turns[turns.len() - keep..]);
1048        } else {
1049            snipped = turns.to_vec();
1050        }
1051        snipped
1052    }
1053}
1054
1055fn estimate_serialized_tokens<T: Serialize + ?Sized>(value: &T) -> usize {
1056    serde_json::to_vec(value)
1057        .ok()
1058        .map_or(0, |bytes| bytes.len() / 4 + 1)
1059}
1060
1061const IMAGE_PART_TOKEN_ESTIMATE: usize = 1024;
1062
1063pub fn estimate_message_tokens(message: &ChatMessage) -> usize {
1064    let content_tokens = match &message.content {
1065        MessageContent::Text(s) => s.len() / 4 + 1,
1066        MessageContent::Parts(parts) => parts
1067            .iter()
1068            .map(|part| match part {
1069                ContentPart::Text { text } => text.len() / 4 + 1,
1070                // Image payloads are transported as data URLs, but their base64
1071                // length should not be treated like plain text context pressure.
1072                ContentPart::ImageUrl { .. } => IMAGE_PART_TOKEN_ESTIMATE,
1073            })
1074            .sum(),
1075    };
1076    let tool_tokens: usize = message
1077        .tool_calls
1078        .iter()
1079        .flatten()
1080        .map(|call| (call.function.name.len() + call.function.arguments.to_string().len()) / 4 + 4)
1081        .sum();
1082    content_tokens + tool_tokens + 6
1083}
1084
1085pub fn estimate_message_batch_tokens(messages: &[ChatMessage]) -> usize {
1086    messages.iter().map(estimate_message_tokens).sum()
1087}
1088
1089fn reserved_output_tokens(context_length: usize) -> usize {
1090    let proportional = (context_length / 8).max(MIN_RESERVED_OUTPUT_TOKENS);
1091    proportional.min(MAX_RESERVED_OUTPUT_TOKENS)
1092}
1093
1094pub fn estimate_prompt_pressure(
1095    messages: &[ChatMessage],
1096    tools: &[ToolDefinition],
1097    context_length: usize,
1098) -> (usize, usize, usize, u8) {
1099    let estimated_input_tokens =
1100        estimate_message_batch_tokens(messages) + estimate_serialized_tokens(tools) + 32;
1101    let reserved_output = reserved_output_tokens(context_length);
1102    let estimated_total = estimated_input_tokens.saturating_add(reserved_output);
1103    let percent = if context_length == 0 {
1104        0
1105    } else {
1106        ((estimated_total.saturating_mul(100)) / context_length).min(100) as u8
1107    };
1108    (
1109        estimated_input_tokens,
1110        reserved_output,
1111        estimated_total,
1112        percent,
1113    )
1114}
1115
1116fn preflight_chat_request(
1117    model: &str,
1118    messages: &[ChatMessage],
1119    tools: &[ToolDefinition],
1120    context_length: usize,
1121) -> Result<(), String> {
1122    let (estimated_input_tokens, reserved_output, estimated_total, _) =
1123        estimate_prompt_pressure(messages, tools, context_length);
1124
1125    if estimated_total > context_length {
1126        return Err(format!(
1127            "context_window_blocked for {}: estimated input {} + reserved output {} = {} tokens exceeds the {}-token context window; narrow the request, compact the session, or preserve grounded tool output instead of restyling it.",
1128            model, estimated_input_tokens, reserved_output, estimated_total, context_length
1129        ));
1130    }
1131
1132    Ok(())
1133}
1134
1135/// Walk from CWD up to 4 parent directories and collect project guidance files.
1136/// Looks for rule files plus optional skill guidance such as CLAUDE.md,
1137/// .hematite/rules.md, SKILLS.md, SKILL.md, and .hematite/instructions.md.
1138/// Deduplicates by content hash; truncates at 4KB per file, 12KB total.
1139fn load_instruction_files() -> String {
1140    use std::collections::hash_map::DefaultHasher;
1141    use std::collections::HashSet;
1142    use std::hash::{Hash, Hasher};
1143
1144    let Ok(cwd) = std::env::current_dir() else {
1145        return String::new();
1146    };
1147    let mut result = String::new();
1148    let mut seen: HashSet<u64> = HashSet::new();
1149    let mut total_chars: usize = 0;
1150    const MAX_TOTAL: usize = 12_000;
1151    const MAX_PER_FILE: usize = 4_000;
1152
1153    let mut dir = cwd.clone();
1154    for _ in 0..4 {
1155        for name in crate::agent::instructions::PROJECT_GUIDANCE_FILES {
1156            let path = crate::agent::instructions::resolve_guidance_path(&dir, name);
1157            if !path.exists() {
1158                continue;
1159            }
1160            let Ok(content) = std::fs::read_to_string(&path) else {
1161                continue;
1162            };
1163            if content.trim().is_empty() {
1164                continue;
1165            }
1166
1167            let mut hasher = DefaultHasher::new();
1168            content.hash(&mut hasher);
1169            let h = hasher.finish();
1170            if !seen.insert(h) {
1171                continue;
1172            }
1173
1174            let truncated = if content.len() > MAX_PER_FILE {
1175                format!("{}...[truncated]", &content[..MAX_PER_FILE])
1176            } else {
1177                content
1178            };
1179
1180            if total_chars + truncated.len() > MAX_TOTAL {
1181                break;
1182            }
1183            total_chars += truncated.len();
1184            result.push_str(&format!("\n--- {} ---\n{}\n", path.display(), truncated));
1185        }
1186        match dir.parent().map(|p| p.to_owned()) {
1187            Some(p) => dir = p,
1188            None => break,
1189        }
1190    }
1191
1192    if result.is_empty() {
1193        return String::new();
1194    }
1195    format!("\n\n# Project Instructions And Skills\n{}", result)
1196}
1197
1198fn load_agent_skill_catalog() -> String {
1199    let workspace_root = crate::tools::file_ops::workspace_root();
1200    let config = crate::agent::config::load_config();
1201    let discovery =
1202        crate::agent::instructions::discover_agent_skills(&workspace_root, &config.trust);
1203    crate::agent::instructions::render_skill_catalog(&discovery, 6_000)
1204        .map(|rendered| format!("\n\n{}", rendered))
1205        .unwrap_or_default()
1206}
1207
1208pub fn extract_think_block(text: &str) -> Option<String> {
1209    let lower = text.to_lowercase();
1210
1211    // Official Gemma-4 Native Tags
1212    let open_tag = "<|channel>thought";
1213    let close_tag = "<channel|>";
1214
1215    let start_pos = lower.find(open_tag)?;
1216    let content_start = start_pos + open_tag.len();
1217
1218    let close_pos = lower[content_start..]
1219        .find(close_tag)
1220        .map(|p| content_start + p)
1221        .unwrap_or(text.len());
1222
1223    let content = text[content_start..close_pos].trim();
1224    if content.is_empty() {
1225        None
1226    } else {
1227        Some(content.to_string())
1228    }
1229}
1230
1231pub fn strip_think_blocks(text: &str) -> String {
1232    // Fast-path: strip a stray </think> the model emits at the start when it skips
1233    // the opening tag (common with Qwen after tool calls). Strip it before the lower
1234    // allocation so it can't slip through any branch below.
1235    let text = {
1236        let t = text.trim_start();
1237        if t.to_lowercase().starts_with("</think>") {
1238            &t[8..]
1239        } else {
1240            text
1241        }
1242    };
1243
1244    let lower = text.to_lowercase();
1245
1246    // Use the official Gemma-4 closing tag — answer is everything after it.
1247    if let Some(end) = lower.find("<channel|>").map(|i| i + "<channel|>".len()) {
1248        let answer = text[end..]
1249            .replace("<|channel>thought", "")
1250            .replace("<channel|>", "");
1251        return answer.trim().replace("\n\n\n", "\n\n").to_string();
1252    }
1253
1254    // No closing tag — if there's an unclosed opening tag, discard everything before and during it.
1255    let first_open = [
1256        lower.find("<|channel>thought"), // Prioritize Gemma-4 native
1257        lower.find("<think>"),
1258        lower.find("<thinking>"),
1259        lower.find("<thought>"),
1260        lower.find("<|think|>"),
1261    ]
1262    .iter()
1263    .filter_map(|&x| x)
1264    .min();
1265
1266    if let Some(start) = first_open {
1267        if start > 0 {
1268            return text[..start].trim().replace("\n\n\n", "\n\n").to_string();
1269        }
1270        return String::new();
1271    }
1272
1273    // If the model outputs 'naked' reasoning without tags:
1274    // Strip leading sentences like "The user asked..." or "I should present..."
1275    // if they appear before actual answer content.
1276    static NAKED_AC: std::sync::OnceLock<aho_corasick::AhoCorasick> = std::sync::OnceLock::new();
1277    let naked_ac = NAKED_AC.get_or_init(|| {
1278        aho_corasick::AhoCorasick::new([
1279            "the user asked",
1280            "the user is asking",
1281            "the user wants",
1282            "i will structure",
1283            "i should provide",
1284            "i should give",
1285            "i should avoid",
1286            "i should note",
1287            "i should focus",
1288            "i should keep",
1289            "i should respond",
1290            "i should present",
1291            "i should display",
1292            "i should show",
1293            "i need to",
1294            "i can see from",
1295            "without being overly",
1296            "let me ",
1297            "necessary information in my identity",
1298            "was computed successfully",
1299            "computed successfully",
1300        ])
1301        .expect("valid patterns")
1302    });
1303    let is_naked_reasoning = naked_ac.find(&lower).is_some();
1304    if is_naked_reasoning {
1305        let lines: Vec<&str> = text.lines().collect();
1306        if !lines.is_empty() {
1307            // Skip leading lines that are themselves reasoning prose or blank.
1308            // Stop skipping at the first line that looks like real answer content.
1309            let mut start_idx = 0;
1310            for (i, line) in lines.iter().enumerate() {
1311                let l = line.to_lowercase();
1312                let is_reasoning_line = naked_ac.find(&l).is_some() || l.trim().is_empty();
1313                if is_reasoning_line {
1314                    start_idx = i + 1;
1315                } else {
1316                    break;
1317                }
1318            }
1319            if start_idx < lines.len() {
1320                return lines[start_idx..]
1321                    .join("\n")
1322                    .trim()
1323                    .replace("\n\n\n", "\n\n")
1324                    .to_string();
1325            }
1326            // Entire response was reasoning prose — return empty.
1327            return String::new();
1328        }
1329    }
1330
1331    // Strip leaked XML tool-call fragments that Qwen sometimes emits when it
1332    // abandons a tool call mid-generation (e.g. </parameter></function></tool_call>).
1333    let cleaned = strip_xml_tool_call_artifacts(text);
1334    cleaned.trim().replace("\n\n\n", "\n\n").to_string()
1335}
1336
1337/// Remove stray XML tool-call closing/opening tags that local models occasionally
1338/// leak into visible output when they start-then-abandon a tool call.
1339fn strip_xml_tool_call_artifacts(text: &str) -> String {
1340    use aho_corasick::AhoCorasick;
1341    use std::sync::OnceLock;
1342
1343    // Tags to remove (both open and close forms, case-insensitive).
1344    const XML_ARTIFACTS: &[&str] = &[
1345        "</tool_call>",
1346        "<tool_call>",
1347        "</function>",
1348        "<function>",
1349        "</parameter>",
1350        "<parameter>",
1351        "</arguments>",
1352        "<arguments>",
1353        "</tool_use>",
1354        "<tool_use>",
1355        "</invoke>",
1356        "<invoke>",
1357        // Stray think/reasoning closing tags that leak after block extraction.
1358        "</think>",
1359        "<thinking>",
1360        "</thought>",
1361        "</thinking>",
1362        // Gemma-style turn markers that Qwen occasionally mirrors back from the system prompt.
1363        "<|turn>system",
1364        "<|turn>user",
1365        "<|turn>assistant",
1366        "<|turn>tool",
1367        "<turn|>",
1368        "<|think|>",
1369        // ChatML EOS/BOS tokens that can leak at end-of-generation.
1370        "<|im_start|>",
1371        "<|im_end|>",
1372        "<|endoftext|>",
1373    ];
1374
1375    // Build AC automaton once from pre-lowercased patterns; zero-cost on every
1376    // subsequent call.  All patterns are ASCII so byte positions are stable after
1377    // lowercasing (no multi-byte expansion).
1378    static ARTIFACT_AC: OnceLock<AhoCorasick> = OnceLock::new();
1379    let ac = ARTIFACT_AC.get_or_init(|| {
1380        let lowered: Vec<String> = XML_ARTIFACTS.iter().map(|s| s.to_lowercase()).collect();
1381        AhoCorasick::new(&lowered).expect("valid XML artifact patterns")
1382    });
1383
1384    // Lowercase once for searching.
1385    let lower = text.to_lowercase();
1386
1387    // Fast path: nothing to strip (common case for clean model output).
1388    if ac.find(&lower).is_none() {
1389        return text.to_string();
1390    }
1391
1392    // Collect all match spans in a single left-to-right AC scan, then drain
1393    // in reverse so earlier byte offsets stay valid as we shorten the string.
1394    let spans: Vec<(usize, usize)> = ac.find_iter(&lower).map(|m| (m.start(), m.end())).collect();
1395    let mut out = text.to_string();
1396    for (start, end) in spans.into_iter().rev() {
1397        out.drain(start..end);
1398    }
1399    out
1400}
1401
1402// ── Cached regex accessors for tool-call parsing ─────────────────────────────
1403// Each regex is compiled once via OnceLock; subsequent calls are zero-cost.
1404
1405fn re_gemma_call() -> &'static regex::Regex {
1406    use std::sync::OnceLock;
1407    static RE: OnceLock<regex::Regex> = OnceLock::new();
1408    RE.get_or_init(|| {
1409        regex::Regex::new(r#"(?s)<\|?tool_call\|?>\s*call:([A-Za-z_][A-Za-z0-9_]*)\{(.*?)\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#)
1410            .expect("valid gemma call regex")
1411    })
1412}
1413fn re_gemma_arg() -> &'static regex::Regex {
1414    use std::sync::OnceLock;
1415    static RE: OnceLock<regex::Regex> = OnceLock::new();
1416    RE.get_or_init(|| {
1417        regex::Regex::new(r#"(\w+):(?:<\|"\|>(.*?)<\|"\|>|([^,}]*))"#)
1418            .expect("valid gemma arg regex")
1419    })
1420}
1421fn re_xml_call() -> &'static regex::Regex {
1422    use std::sync::OnceLock;
1423    static RE: OnceLock<regex::Regex> = OnceLock::new();
1424    RE.get_or_init(|| {
1425        regex::Regex::new(r#"(?s)<tool_call>\s*<function=([A-Za-z_][A-Za-z0-9_]*)>(.*?)(?:</function>)?\s*</tool_call>"#)
1426            .expect("valid xml call regex")
1427    })
1428}
1429fn re_xml_param() -> &'static regex::Regex {
1430    use std::sync::OnceLock;
1431    static RE: OnceLock<regex::Regex> = OnceLock::new();
1432    RE.get_or_init(|| {
1433        regex::Regex::new(r#"(?s)<parameter=([A-Za-z_][A-Za-z0-9_]*)>(.*?)</parameter>"#)
1434            .expect("valid xml param regex")
1435    })
1436}
1437fn re_short_call() -> &'static regex::Regex {
1438    use std::sync::OnceLock;
1439    static RE: OnceLock<regex::Regex> = OnceLock::new();
1440    RE.get_or_init(|| {
1441        regex::Regex::new(r#"(?s)<tool_call>\s*([A-Za-z_][A-Za-z0-9_]*)\((.*?)\)\s*</tool_call>"#)
1442            .expect("valid short call regex")
1443    })
1444}
1445fn re_short_arg() -> &'static regex::Regex {
1446    use std::sync::OnceLock;
1447    static RE: OnceLock<regex::Regex> = OnceLock::new();
1448    RE.get_or_init(|| {
1449        regex::Regex::new(
1450            r#"([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:"((?:\\.|[^"])*)"|'((?:\\.|[^'])*)'|([^,\)]+))"#,
1451        )
1452        .expect("valid short arg regex")
1453    })
1454}
1455fn re_strip_gemma_call() -> &'static regex::Regex {
1456    use std::sync::OnceLock;
1457    static RE: OnceLock<regex::Regex> = OnceLock::new();
1458    RE.get_or_init(|| {
1459        regex::Regex::new(r#"(?s)<\|?tool_call\|?>\s*call:[A-Za-z_][A-Za-z0-9_]*\{.*?\}(?:<\|?tool_call\|?>|\[END_TOOL_REQUEST\])"#)
1460            .expect("valid strip gemma call regex")
1461    })
1462}
1463fn re_strip_xml() -> &'static regex::Regex {
1464    use std::sync::OnceLock;
1465    static RE: OnceLock<regex::Regex> = OnceLock::new();
1466    RE.get_or_init(|| {
1467        regex::Regex::new(r#"(?s)<tool_call>\s*<function=.*?>.*?</tool_call>"#)
1468            .expect("valid strip xml regex")
1469    })
1470}
1471fn re_strip_short() -> &'static regex::Regex {
1472    use std::sync::OnceLock;
1473    static RE: OnceLock<regex::Regex> = OnceLock::new();
1474    RE.get_or_init(|| {
1475        regex::Regex::new(r#"(?s)<tool_call>\s*[A-Za-z_][A-Za-z0-9_]*\(.*?\)\s*</tool_call>"#)
1476            .expect("valid strip short regex")
1477    })
1478}
1479fn re_strip_response() -> &'static regex::Regex {
1480    use std::sync::OnceLock;
1481    static RE: OnceLock<regex::Regex> = OnceLock::new();
1482    RE.get_or_init(|| {
1483        regex::Regex::new(
1484            r#"(?s)<\|tool_response\|?>.*?(?:<\|tool_response\|?>|<tool_response\|>)"#,
1485        )
1486        .expect("valid strip response regex")
1487    })
1488}
1489
1490/// Extract native Gemma-4 <|tool_call|> tags from text.
1491/// Format: <|tool_call|>call:func_name{key:<|"|>value<|"|>, key2:value2}<tool_call|>
1492pub fn extract_native_tool_calls(text: &str) -> Vec<ToolCallResponse> {
1493    let mut results = Vec::new();
1494
1495    // -- Format 1: Gemma 4 Native (call:name{args}) --
1496    let re_call = re_gemma_call();
1497    let re_arg = re_gemma_arg();
1498
1499    for cap in re_call.captures_iter(text) {
1500        let name = cap[1].to_string();
1501        let args_str = &cap[2];
1502        let mut arguments = serde_json::Map::new();
1503
1504        for arg_cap in re_arg.captures_iter(args_str) {
1505            let key = arg_cap[1].to_string();
1506            let val_raw = arg_cap
1507                .get(2)
1508                .map(|m| m.as_str())
1509                .or_else(|| arg_cap.get(3).map(|m| m.as_str()))
1510                .unwrap_or("")
1511                .trim();
1512            let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1513
1514            let val = if normalized_raw == "true" {
1515                Value::Bool(true)
1516            } else if normalized_raw == "false" {
1517                Value::Bool(false)
1518            } else if let Ok(n) = normalized_raw.parse::<i64>() {
1519                Value::Number(n.into())
1520            } else if let Ok(n) = normalized_raw.parse::<u64>() {
1521                Value::Number(n.into())
1522            } else if let Ok(n) = normalized_raw.parse::<f64>() {
1523                serde_json::Number::from_f64(n)
1524                    .map(Value::Number)
1525                    .unwrap_or(Value::String(normalized_raw.clone()))
1526            } else {
1527                Value::String(normalized_raw)
1528            };
1529
1530            arguments.insert(key, val);
1531        }
1532
1533        results.push(ToolCallResponse {
1534            id: format!("call_{}", rand::random::<u32>()),
1535            call_type: "function".to_string(),
1536            function: ToolCallFn {
1537                name,
1538                arguments: Value::Object(arguments),
1539            },
1540            index: None,
1541        });
1542    }
1543
1544    // -- Format 2: XML (Qwen/Claude style) --
1545    for cap in re_xml_call().captures_iter(text) {
1546        let name = cap[1].to_string();
1547        let body = &cap[2];
1548        let mut arguments = serde_json::Map::new();
1549
1550        for p_cap in re_xml_param().captures_iter(body) {
1551            let key = p_cap[1].to_string();
1552            let val_raw = p_cap[2].trim();
1553            let val = if val_raw == "true" {
1554                Value::Bool(true)
1555            } else if val_raw == "false" {
1556                Value::Bool(false)
1557            } else if let Ok(n) = val_raw.parse::<i64>() {
1558                Value::Number(n.into())
1559            } else if let Ok(n) = val_raw.parse::<u64>() {
1560                Value::Number(n.into())
1561            } else {
1562                Value::String(val_raw.to_string())
1563            };
1564            arguments.insert(key, val);
1565        }
1566
1567        results.push(ToolCallResponse {
1568            id: format!("call_{}", rand::random::<u32>()),
1569            call_type: "function".to_string(),
1570            function: ToolCallFn {
1571                name,
1572                arguments: Value::Object(arguments),
1573            },
1574            index: None,
1575        });
1576    }
1577
1578    // -- Format 3: shorthand XML wrapper (<tool_call>name(key="value")</tool_call>) --
1579    for cap in re_short_call().captures_iter(text) {
1580        let name = cap[1].to_string();
1581        let args_str = cap[2].trim();
1582        let mut arguments = serde_json::Map::new();
1583
1584        for arg_cap in re_short_arg().captures_iter(args_str) {
1585            let key = arg_cap[1].to_string();
1586            let val_raw = arg_cap
1587                .get(2)
1588                .or_else(|| arg_cap.get(3))
1589                .or_else(|| arg_cap.get(4))
1590                .map(|m| m.as_str())
1591                .unwrap_or("")
1592                .trim();
1593            let normalized_raw = normalize_string_arg(&val_raw.replace("\\\"", "\""));
1594
1595            let val = if normalized_raw == "true" {
1596                Value::Bool(true)
1597            } else if normalized_raw == "false" {
1598                Value::Bool(false)
1599            } else if let Ok(n) = normalized_raw.parse::<i64>() {
1600                Value::Number(n.into())
1601            } else if let Ok(n) = normalized_raw.parse::<u64>() {
1602                Value::Number(n.into())
1603            } else if let Ok(n) = normalized_raw.parse::<f64>() {
1604                serde_json::Number::from_f64(n)
1605                    .map(Value::Number)
1606                    .unwrap_or(Value::String(normalized_raw.clone()))
1607            } else {
1608                Value::String(normalized_raw)
1609            };
1610
1611            arguments.insert(key, val);
1612        }
1613
1614        results.push(ToolCallResponse {
1615            id: format!("call_{}", rand::random::<u32>()),
1616            call_type: "function".to_string(),
1617            function: ToolCallFn {
1618                name,
1619                arguments: Value::Object(arguments),
1620            },
1621            index: None,
1622        });
1623    }
1624
1625    results
1626}
1627
1628pub fn normalize_tool_argument_string(tool_name: &str, raw: &str) -> String {
1629    let trimmed = raw.trim();
1630    let candidate = unwrap_json_string_once(trimmed).unwrap_or_else(|| trimmed.to_string());
1631
1632    let mut value = match serde_json::from_str::<Value>(&candidate) {
1633        Ok(v) => v,
1634        Err(_) => return candidate,
1635    };
1636    normalize_tool_argument_value(tool_name, &mut value);
1637    value.to_string()
1638}
1639
1640pub fn normalize_tool_argument_value(tool_name: &str, value: &mut Value) {
1641    match value {
1642        Value::String(s) => *s = normalize_string_arg(s),
1643        Value::Array(items) => {
1644            for item in items {
1645                normalize_tool_argument_value(tool_name, item);
1646            }
1647        }
1648        Value::Object(map) => {
1649            for val in map.values_mut() {
1650                normalize_tool_argument_value(tool_name, val);
1651            }
1652            if tool_name == "grep_files" {
1653                if let Some(Value::String(pattern)) = map.get_mut("pattern") {
1654                    *pattern = normalize_regex_pattern(pattern);
1655                }
1656            }
1657            for key in ["path", "extension", "query", "command", "reason"] {
1658                if let Some(Value::String(s)) = map.get_mut(key) {
1659                    *s = normalize_string_arg(s);
1660                }
1661            }
1662        }
1663        _ => {}
1664    }
1665}
1666
1667fn unwrap_json_string_once(input: &str) -> Option<String> {
1668    if input.len() < 2 {
1669        return None;
1670    }
1671    let first = input.chars().next()?;
1672    let last = input.chars().last()?;
1673    if !matches!((first, last), ('"', '"') | ('\'', '\'') | ('`', '`')) {
1674        return None;
1675    }
1676    let inner = &input[1..input.len() - 1];
1677    let unescaped = inner.replace("\\\"", "\"").replace("\\\\", "\\");
1678    Some(unescaped.trim().to_string())
1679}
1680
1681fn normalize_string_arg(input: &str) -> String {
1682    let mut out = input.trim().to_string();
1683    while out.len() >= 2 {
1684        let mut changed = false;
1685        for (start, end) in [("\"", "\""), ("'", "'"), ("`", "`")] {
1686            if out.starts_with(start) && out.ends_with(end) {
1687                out = out[start.len()..out.len() - end.len()].trim().to_string();
1688                changed = true;
1689                break;
1690            }
1691        }
1692        if !changed {
1693            break;
1694        }
1695    }
1696    out
1697}
1698
1699fn normalize_regex_pattern(input: &str) -> String {
1700    let out = normalize_string_arg(input);
1701    if out.len() >= 2 && out.starts_with('/') && out.ends_with('/') {
1702        out[1..out.len() - 1].to_string()
1703    } else {
1704        out
1705    }
1706}
1707
1708fn prepare_gemma_native_messages(messages: &[ChatMessage]) -> Vec<ChatMessage> {
1709    let mut system_blocks = Vec::new();
1710    let mut prepared = Vec::new();
1711    let mut seeded = false;
1712
1713    for message in messages {
1714        if message.role == "system" {
1715            let cleaned = strip_legacy_turn_wrappers(message.content.as_str())
1716                .trim()
1717                .to_string();
1718            if !cleaned.is_empty() {
1719                system_blocks.push(cleaned);
1720            }
1721            continue;
1722        }
1723
1724        let mut clone = message.clone();
1725        clone.content = MessageContent::Text(strip_legacy_turn_wrappers(message.content.as_str()));
1726
1727        if !seeded && message.role == "user" {
1728            let mut merged = String::new();
1729            if !system_blocks.is_empty() {
1730                merged.push_str("System instructions for this turn:\n");
1731                merged.push_str(&system_blocks.join("\n\n"));
1732                merged.push_str("\n\n");
1733            }
1734            merged.push_str(clone.content.as_str());
1735            clone.content = MessageContent::Text(merged);
1736            seeded = true;
1737        }
1738
1739        prepared.push(clone);
1740    }
1741
1742    if !seeded && !system_blocks.is_empty() {
1743        prepared.insert(
1744            0,
1745            ChatMessage::user(&format!(
1746                "System instructions for this turn:\n{}",
1747                system_blocks.join("\n\n")
1748            )),
1749        );
1750    }
1751
1752    prepared
1753}
1754
1755fn strip_legacy_turn_wrappers(text: &str) -> String {
1756    text.replace("<|turn>system\n", "")
1757        .replace("<|turn>user\n", "")
1758        .replace("<|turn>assistant\n", "")
1759        .replace("<|turn>tool\n", "")
1760        .replace("<turn|>", "")
1761        .trim()
1762        .to_string()
1763}
1764
1765pub fn strip_native_tool_call_text(text: &str) -> String {
1766    let without_calls = re_strip_gemma_call().replace_all(text, "");
1767    let without_xml = re_strip_xml().replace_all(without_calls.as_ref(), "");
1768    let without_short = re_strip_short().replace_all(without_xml.as_ref(), "");
1769    re_strip_response()
1770        .replace_all(without_short.as_ref(), "")
1771        .trim()
1772        .to_string()
1773}
1774
1775fn resolve_runtime_context(
1776    previous_model: &str,
1777    previous_context: usize,
1778    effective_model: &str,
1779    detected_context: usize,
1780) -> usize {
1781    if effective_model == "no model loaded" || effective_model.trim().is_empty() {
1782        0
1783    } else if detected_context > 0 {
1784        detected_context
1785    } else if effective_model == previous_model {
1786        previous_context
1787    } else {
1788        0
1789    }
1790}
1791
1792#[cfg(test)]
1793mod tests {
1794    use super::*;
1795    use std::fs;
1796
1797    #[test]
1798    fn system_prompt_includes_running_hematite_version() {
1799        let engine = InferenceEngine::new(
1800            "http://localhost:1234/v1".to_string(),
1801            "strategist".to_string(),
1802            0,
1803        )
1804        .expect("engine");
1805
1806        let system = engine.build_system_prompt(0, 50, false, true, &[], None, None, &[]);
1807        assert!(system.contains(crate::HEMATITE_VERSION));
1808    }
1809
1810    #[test]
1811    fn extracts_gemma_native_tool_call_with_mixed_tool_call_tags() {
1812        let text = r#"<|channel>thought
1813Reading the next chunk.<channel|>The startup banner wording is likely defined within the UI drawing logic.
1814<|tool_call>call:read_file{limit:100,offset:100,path:\"src/ui/tui.rs\"}<tool_call|>"#;
1815
1816        let calls = extract_native_tool_calls(text);
1817        assert_eq!(calls.len(), 1);
1818        assert_eq!(calls[0].function.name, "read_file");
1819
1820        let args: Value = calls[0].function.arguments.clone();
1821        assert_eq!(args.get("limit").and_then(|v| v.as_i64()), Some(100));
1822        assert_eq!(args.get("offset").and_then(|v| v.as_i64()), Some(100));
1823        assert_eq!(
1824            args.get("path").and_then(|v| v.as_str()),
1825            Some("src/ui/tui.rs")
1826        );
1827
1828        let stripped = strip_native_tool_call_text(text);
1829        assert!(!stripped.contains("<|tool_call"));
1830        assert!(!stripped.contains("<tool_call|>"));
1831    }
1832
1833    #[test]
1834    fn strips_hallucinated_tool_responses_from_native_tool_transcript() {
1835        let text = r#"<|channel>thought
1836Planning.
1837<channel|><|tool_call>call:list_files{extension:<|\"|>rs<|\"|>,path:<|\"|>src/<|\"|>}<tool_call|><|tool_response>thought
1838Mapped src.
1839<channel|><|tool_call>call:read_file{limit:100,offset:0,path:<|\"|>src/main.rs<|\"|>}<tool_call|><|tool_response>thought
1840Read main.
1841<channel|>"#;
1842
1843        let calls = extract_native_tool_calls(text);
1844        assert_eq!(calls.len(), 2);
1845        assert_eq!(calls[0].function.name, "list_files");
1846        assert_eq!(calls[1].function.name, "read_file");
1847
1848        let stripped = strip_native_tool_call_text(text);
1849        assert!(!stripped.contains("<|tool_call"));
1850        assert!(!stripped.contains("<|tool_response"));
1851        assert!(!stripped.contains("<tool_response|>"));
1852    }
1853
1854    #[test]
1855    fn create_directory_is_treated_as_mutating_repo_write() {
1856        let metadata = tool_metadata_for_name("create_directory");
1857        assert!(metadata.mutates_workspace);
1858        assert!(!metadata.read_only_friendly);
1859    }
1860
1861    #[test]
1862    fn extracts_qwen_xml_tool_calls_from_reasoning() {
1863        let text = r#"Based on the project structure, I need to check the binary.
1864<tool_call>
1865<function=shell>
1866<parameter=command>
1867ls -la hematite.exe
1868</parameter>
1869<parameter=reason>
1870Check if the binary exists
1871</parameter>
1872</function>
1873</tool_call>"#;
1874
1875        let calls = extract_native_tool_calls(text);
1876        assert_eq!(calls.len(), 1);
1877        assert_eq!(calls[0].function.name, "shell");
1878
1879        let args: Value = calls[0].function.arguments.clone();
1880        assert_eq!(
1881            args.get("command").and_then(|v| v.as_str()),
1882            Some("ls -la hematite.exe")
1883        );
1884        assert_eq!(
1885            args.get("reason").and_then(|v| v.as_str()),
1886            Some("Check if the binary exists")
1887        );
1888
1889        let stripped = strip_native_tool_call_text(text);
1890        assert!(!stripped.contains("<tool_call>"));
1891        assert!(!stripped.contains("<function=shell>"));
1892    }
1893
1894    #[test]
1895    fn extracts_shorthand_tool_calls_from_reasoning() {
1896        let text = r#"<thinking>
1897The user wants a search first.
1898</thinking>
1899
1900I'll search before continuing.
1901
1902<tool_call>research_web(query="uefn toolbelt python automation unreal engine fortnite")</tool_call>"#;
1903
1904        let calls = extract_native_tool_calls(text);
1905        assert_eq!(calls.len(), 1);
1906        assert_eq!(calls[0].function.name, "research_web");
1907
1908        let args: Value = calls[0].function.arguments.clone();
1909        assert_eq!(
1910            args.get("query").and_then(|v| v.as_str()),
1911            Some("uefn toolbelt python automation unreal engine fortnite")
1912        );
1913
1914        let stripped = strip_native_tool_call_text(text);
1915        assert!(!stripped.contains("<tool_call>"));
1916        assert!(!stripped.contains("research_web(query="));
1917    }
1918
1919    #[test]
1920    fn strips_thinking_tag_as_reasoning_prefix() {
1921        let cleaned =
1922            strip_think_blocks("<thinking>\nThe user wants a search.\n</thinking>\nVisible answer");
1923        assert_eq!(cleaned, "");
1924    }
1925
1926    #[test]
1927    fn resolve_runtime_context_returns_zero_when_no_model_loaded() {
1928        assert_eq!(
1929            resolve_runtime_context("qwen/qwen3.5-9b", 32000, "no model loaded", 0),
1930            0
1931        );
1932    }
1933
1934    #[test]
1935    fn resolve_runtime_context_preserves_previous_only_for_same_model() {
1936        assert_eq!(
1937            resolve_runtime_context("qwen/qwen3.5-9b", 32000, "qwen/qwen3.5-9b", 0),
1938            32000
1939        );
1940        assert_eq!(
1941            resolve_runtime_context("qwen/qwen3.5-9b", 32000, "bonsai-8b", 0),
1942            0
1943        );
1944    }
1945
1946    #[test]
1947    fn load_instruction_files_includes_workspace_guidance_files() {
1948        let temp = tempfile::tempdir().unwrap();
1949        let previous = std::env::current_dir().unwrap();
1950
1951        fs::write(
1952            temp.path().join("SKILLS.md"),
1953            "# Workspace Skills\n- Prefer API-first changes before UI polish.",
1954        )
1955        .unwrap();
1956
1957        std::env::set_current_dir(temp.path()).unwrap();
1958        let loaded = load_instruction_files();
1959        std::env::set_current_dir(previous).unwrap();
1960
1961        assert!(loaded.contains("SKILLS.md"));
1962        assert!(loaded.contains("Prefer API-first changes before UI polish."));
1963    }
1964
1965    #[test]
1966    fn load_agent_skill_catalog_includes_skill_directory_entries() {
1967        let temp = tempfile::tempdir().unwrap();
1968        let previous = std::env::current_dir().unwrap();
1969
1970        std::fs::create_dir_all(temp.path().join(".agents/skills/code-review")).unwrap();
1971        fs::write(
1972            temp.path().join(".agents/skills/code-review/SKILL.md"),
1973            "---\nname: code-review\ndescription: Review diffs and flag regressions.\ncompatibility: Requires git\n---\n",
1974        )
1975        .unwrap();
1976
1977        std::env::set_current_dir(temp.path()).unwrap();
1978        let loaded = load_agent_skill_catalog();
1979        std::env::set_current_dir(previous).unwrap();
1980
1981        assert!(loaded.contains("Agent Skills Catalog"));
1982        assert!(loaded.contains("code-review"));
1983        assert!(loaded.contains("Review diffs and flag regressions."));
1984    }
1985}