Skip to main content

hematite/agent/
prompt.rs

1use std::fs;
2use std::path::PathBuf;
3
4use crate::agent::git;
5
6enum WorkspaceMode {
7    Coding,
8    Document,
9    General,
10}
11
12fn detect_workspace_mode(root: &PathBuf) -> WorkspaceMode {
13    // Strong coding signals — any of these present means it's a coding workspace
14    let coding_markers = [
15        "Cargo.toml",
16        "package.json",
17        "pyproject.toml",
18        "setup.py",
19        "go.mod",
20        "pom.xml",
21        "build.gradle",
22        "CMakeLists.txt",
23        ".git",
24        "src",
25        "lib",
26    ];
27    for marker in &coding_markers {
28        if root.join(marker).exists() {
29            return WorkspaceMode::Coding;
30        }
31    }
32
33    // No strong coding signal — check file extensions
34    let code_exts = [
35        "rs", "py", "ts", "js", "go", "cpp", "c", "java", "cs", "rb", "swift", "kt",
36    ];
37    let doc_exts = ["pdf", "md", "txt", "docx", "epub", "rst"];
38    let mut code_count = 0usize;
39    let mut doc_count = 0usize;
40
41    if let Ok(entries) = fs::read_dir(root) {
42        for entry in entries.flatten() {
43            let path = entry.path();
44            if path.is_file() {
45                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
46                    let ext = ext.to_lowercase();
47                    if code_exts.contains(&ext.as_str()) {
48                        code_count += 1;
49                    }
50                    if doc_exts.contains(&ext.as_str()) {
51                        doc_count += 1;
52                    }
53                }
54            }
55        }
56    }
57
58    if code_count > 0 {
59        WorkspaceMode::Coding
60    } else if doc_count > 0 {
61        WorkspaceMode::Document
62    } else {
63        WorkspaceMode::General
64    }
65}
66
67pub struct SystemPromptBuilder {
68    pub workspace_root: PathBuf,
69}
70
71impl SystemPromptBuilder {
72    pub fn new(root: PathBuf) -> Self {
73        Self {
74            workspace_root: root,
75        }
76    }
77
78    /// Build the full system prompt with Rule Hierarchy and Gemma-4 Optimization.
79    /// Hierarchy: Global ($HOME) -> Project (Root) -> Local (Ignored).
80    pub fn build(
81        &self,
82        base_instructions: &str,
83        memory: Option<&str>,
84        summary: Option<&str>,
85        mcp_tools: &[crate::agent::mcp::McpTool],
86    ) -> String {
87        let config = crate::agent::config::load_config();
88        let mut static_sections = Vec::new();
89
90        let workspace_framing = match detect_workspace_mode(&self.workspace_root) {
91            WorkspaceMode::Coding => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
92                                       - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
93                                       - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
94                                       - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
95                                       The current directory is a software project — lean into code editing, build verification, and repo-aware tooling.",
96            WorkspaceMode::Document => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
97                                        - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
98                                        - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
99                                        - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
100                                        The current directory contains documents and files — lean into reading, summarizing, and hardware/network diagnostics.",
101            WorkspaceMode::General => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
102                                       - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
103                                       - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
104                                       - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
105                                       No specific project or document context is loaded — focus on general machine health, system diagnostics, and shell-based tasks.",
106        };
107
108        static_sections.push("# IDENTITY & TONE".to_string());
109        static_sections.push(format!("{} \
110                             Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
111                             You provide 100% workstation visibility across 81+ read-only diagnostic topics (Hardware, Network, Security, OS). \
112                             For simple questions, answer briefly in plain language. \
113                             Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks.", workspace_framing));
114        static_sections.push(format!(
115            "- Running Hematite build: {}",
116            crate::hematite_version_display()
117        ));
118        static_sections.push(format!(
119            "- Hematite author and maintainer: {}",
120            crate::HEMATITE_AUTHOR
121        ));
122        static_sections.push(format!(
123            "- Hematite repository: {}",
124            crate::HEMATITE_REPOSITORY_URL
125        ));
126
127        static_sections.push(format!("\n# BASE INSTRUCTIONS\n{base_instructions}"));
128
129        if let Some(home) = std::env::var_os("USERPROFILE") {
130            let global_path = PathBuf::from(home).join(".hematite").join("CLAUDE.md");
131            if global_path.exists() {
132                if let Ok(content) = fs::read_to_string(&global_path) {
133                    static_sections.push(format!("\n# GLOBAL USER PREFERENCES\n{content}"));
134                }
135            }
136        }
137
138        let project_rule_files = [
139            "CLAUDE.md",
140            ".claude.md",
141            "CLAUDE.local.md",
142            "HEMATITE.md",
143            ".hematite/rules.md",
144            ".hematite/rules.local.md",
145        ];
146
147        for name in &project_rule_files {
148            let path = self.workspace_root.join(name);
149            if path.exists() {
150                if let Ok(content) = fs::read_to_string(&path) {
151                    let content = if content.len() > 6000 {
152                        format!("{}...[Rules Truncated]", &content[..6000])
153                    } else {
154                        content
155                    };
156                    static_sections.push(format!("\n# PROJECT RULES ({})\n{}", name, content));
157                }
158            }
159        }
160
161        let instructions_dir = crate::tools::file_ops::hematite_dir().join("instructions");
162        if instructions_dir.exists() && instructions_dir.is_dir() {
163            if let Ok(entries) = fs::read_dir(instructions_dir) {
164                for entry in entries.flatten() {
165                    let path = entry.path();
166                    if path.extension().map(|e| e == "md").unwrap_or(false) {
167                        let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
168                        let include = if let Some(mem) = memory {
169                            mem.to_lowercase().contains(&stem.to_lowercase())
170                        } else {
171                            false
172                        };
173
174                        if include {
175                            if let Ok(content) = fs::read_to_string(&path) {
176                                static_sections.push(format!(
177                                    "\n# DEEP CONTEXT RULES ({}.md)\n{}",
178                                    stem, content
179                                ));
180                            }
181                        }
182                    }
183                }
184            }
185        }
186
187        let mut prompt = static_sections.join("\n");
188        prompt.push_str(
189            "\n\n###############################################################################\n",
190        );
191        prompt.push_str(
192            "# DYNAMIC CONTEXT (Changes every turn)                                        #\n",
193        );
194        prompt.push_str(
195            "###############################################################################\n",
196        );
197
198        if let Some(s) = summary {
199            prompt.push_str(&format!(
200                "\n# COMPACTED HISTORY SUMMARY\n{}\nRecent messages are preserved below.",
201                s
202            ));
203        }
204
205        if let Some(mem) = memory {
206            prompt.push_str(&format!("\n# SESSION MEMORY\n{mem}"));
207        }
208
209        prompt.push_str("\n# ENVIRONMENT");
210        prompt.push_str(&format!(
211            "\n- Local Time: {}",
212            chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
213        ));
214        prompt.push_str(&format!(
215            "\n- Hematite Build: {}",
216            crate::hematite_version_display()
217        ));
218        if let Ok(user) = std::env::var("USERPROFILE") {
219            prompt.push_str(&format!("\n- USERPROFILE (Authoritative): {user}"));
220        }
221        if let Ok(comp) = std::env::var("COMPUTERNAME") {
222            prompt.push_str(&format!("\n- COMPUTERNAME (Authoritative): {comp}"));
223        }
224        prompt.push_str("\n- Operating System: Windows (User workspace)");
225
226        if git::is_git_repo(&self.workspace_root) {
227            if let Ok(branch) = git::get_active_branch(&self.workspace_root) {
228                prompt.push_str(&format!("\n- Git Branch: {branch}"));
229            }
230        }
231
232        let hematite_dir = crate::tools::file_ops::hematite_dir();
233        for (name, path) in [
234            ("TASK", hematite_dir.join("TASK.md")),
235            ("PLAN", hematite_dir.join("PLAN.md")),
236        ] {
237            if path.exists() {
238                if let Ok(content) = fs::read_to_string(&path) {
239                    if !content.trim().is_empty() {
240                        let content = if content.len() > 3000 {
241                            format!("{}...[Truncated]", &content[..3000])
242                        } else {
243                            content
244                        };
245                        prompt.push_str(&format!(
246                            "\n\n# ACTIVE TASK {} (.hematite/)\n{}",
247                            name, content
248                        ));
249                    }
250                }
251            }
252        }
253
254        if !mcp_tools.is_empty() {
255            prompt.push_str("\n\n# ACTIVE MCP TOOLS");
256            for tool in mcp_tools {
257                let mut description = tool
258                    .description
259                    .clone()
260                    .unwrap_or_else(|| "No description provided.".to_string());
261                if description.len() > 180 {
262                    description.truncate(180);
263                    description.push_str("...");
264                }
265                prompt.push_str(&format!("\n- {}: {}", tool.name, description));
266            }
267        }
268
269        if let Some(hint) = &config.context_hint {
270            prompt.push_str(&format!("\n## PROJECT CONTEXT HINT\n{}\n", hint));
271        }
272
273        prompt.push_str("\n## OPERATIONAL PROTOCOL (Gemma-4-E4B Native)\n");
274        prompt.push_str("1. **Thinking Mode**: ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) to analyze the user's intent, verify facts, and plan your response architecture.\n");
275        prompt.push_str("2. **Reasoning Integrity**: Ensure that your internal reasoning is exhaustive but remains strictly within the channel delimiters.\n");
276        prompt.push_str("3. **Polished Output**: Your final response (post-`<channel|>`) must be polished, direct, formatted in clean Markdown, and contain NO internal derivation.\n");
277        prompt.push_str("4. **Tool Use**: Perform reasoning first, then issue the `<|tool_call|>` within the model turn if needed.\n");
278        prompt.push_str("5. **Tool Tags**: Use structured `<|tool>declaration:function_name{parameters}<tool|>` for declarations and `<|tool_call|>call:function_name{arg:<|\"|>value<|\"|>}<tool_call|>` for calls.\n");
279        prompt.push_str("6. **Safety**: String values MUST use the `<|\"|>` wrapper for safety.\n");
280        prompt.push_str("7. **Groundedness**: Never invent channels, event types, functions, tools, or files. If a detail is not verified from the repo or tool output, say `uncertain`.\n");
281        prompt.push_str("8. **Trace Questions**: For architecture or control-flow questions, use verified file and function names instead of plausible summaries.\n");
282        prompt.push_str("9. **Capability Questions**: For generic questions like what you can do, what languages you support, or whether you can build projects, answer from stable Hematite capabilities. Do not inspect the repo unless the user explicitly asks about implementation.\n");
283        prompt.push_str("10. **Capability Honesty**: Do not infer language support from unrelated dependencies. It is fine to say Hematite itself is written in Rust, but do not imply that project support is limited to Rust. Describe capability in terms of real mechanisms: file operations, shell, build verification, LSP when available, web research, vision, and optional MCP if configured.\n");
284        prompt.push_str("11. **Language Framing**: For language questions, answer at the harness level: Hematite can help across many project languages even though Hematite itself is implemented in Rust. Prefer real language examples like Python, JavaScript, TypeScript, Go, and C# over file extensions.\n");
285        prompt.push_str("12. **Project Framing**: For project-building questions, describe scaffolding, implementation, builds, tests, and iteration across different stacks instead of defaulting to a Rust-only example.\n");
286        prompt.push_str("13. **Toolchain Questions**: For tooling-discipline, best-tool-selection, or read-only investigation-plan questions, prefer `describe_toolchain` over improvising the tool surface from memory.\n");
287        prompt.push_str("14. **Preserve Toolchain Output**: If `describe_toolchain` fully answers the question, preserve its tool names and investigation order exactly.\n");
288        prompt.push_str("15. **Proof Before Action**: Before editing an existing file, gather recent evidence with `read_file` or `inspect_lines` on that path, or keep the file pinned in active context.\n");
289        prompt.push_str("16. **Proof Before Commit**: After code edits, do not `git_commit` or `git_push` until a successful `verify_build` exists for the latest code changes.\n");
290        prompt.push_str("17. **Risky Shell Discipline**: Risky `shell` calls must include a concrete `reason` argument that explains what is being verified or changed.\n");
291        prompt.push_str("18. **Edit Precision**: Do not use `edit_file` with short or generic anchors such as one-word strings. Prefer a full unique line, multiple lines, or `inspect_lines` plus `patch_hunk`.\n");
292        prompt.push_str("19. **Built-In First (MANDATORY)**: For all local workspace filesystem mutations (mkdir, touch, mv, rm, create, edit), you MUST use Hematite's built-in surgical tools (`create_directory`, `write_file`, `update_file`, `patch_hunk`). External `mcp__filesystem__*` mutation tools are BLOCKED by safety guards for these actions and will fail. Only reach for MCP if the user explicitly requests an MCP-specific server action.\n");
293        prompt.push_str("20. **Deep Sync**: Every 6th turn, review the full TASK.md.\n\n21. **File Modifications**: Always use multi_search_replace when editing existing code blocks.\n");
294        prompt.push_str("22. **Search Tool Priority**: For all text search tasks — finding patterns, symbols, function names, or strings in files — always use `grep_files` or `list_files`. Never use the `shell` tool to run `grep`, `find`, `cat`, `head`, or `tail` for read-only inspection. Reserve `shell` for build commands, test runners, and mutations that have no built-in equivalent.");
295
296        prompt.push_str(concat!(
297            "23. **Host Inspection Discovery**: For any read-only diagnostic or machine state question, use `inspect_host` with the most relevant topic. Available topics include: hardware, overclocker, thermal, resource_load, processes, services, ports, connections, network, lan_discovery, audio, bluetooth, connectivity, wifi, vpn, security, updates, health_report, storage, disk_health, battery, recent_crashes, scheduled_tasks, ad_user, dns_lookup, hyperv, ip_config, docker, wsl, ssh, git_config, env, registry_audit, and fix_plan.\n",
298            "24. **Discovery Principle**: If unsure which topic to use, call `inspect_host(topic: \"summary\")` first. NEVER use `shell` for read-only workstation investigations.\n",
299            "25. **Sequential Multi-Topic**: When asked for distinct subsystems (e.g. 'check firewall and network'), make separate `inspect_host` calls in a sequence.\n",
300            "26. **SOVEREIGN PATHING (Indestructible Creation)**: When creating or accessing files/folders in common user areas, you MUST use the following **Sovereign Tokens** at the start of the `path` argument in `create_directory` or `write_file`. This guarantees 100% path accuracy and prevents shell errors:\n",
301            "    - `@DESKTOP/` -> Use for everything on the Desktop.\n",
302            "    - `@DOCUMENTS/` -> Use for the Documents folder.\n",
303            "    - `@DOWNLOADS/` -> Use for the Downloads folder.\n",
304            "    - `@HOME/` or `~/` -> Use for the user home directory.\n",
305            "    - `@TEMP/` -> Use for the system temp directory.\n",
306            "    Example: To create a folder on the Desktop, use `create_directory(path: \"@DESKTOP/MyFolder\")`.\n"
307        ));
308
309        prompt.push_str(concat!(
310            "\n24. **Teacher Mode — Grounded Walkthroughs for Write/Admin Tasks**: ",
311            "When the user asks how to install a driver, edit Group Policy, create a firewall rule, set up SSH keys, configure WSL, edit the registry, manage a service, create a scheduled task, edit the PATH, or perform any other write/admin/config operation that Hematite cannot safely execute itself: ",
312            "For storage and mount triage, prefer `inspect_host(topic='docker_filesystems')` for bind mounts, named volumes, and Docker Desktop disk bloat, and `inspect_host(topic='wsl_filesystems')` for WSL rootfs, VHDX growth, and /mnt/c bridge issues. ",
313            "For printer/NAS visibility, neighborhood discovery, or mDNS/SSDP/UPnP issues, prefer `inspect_host(topic='lan_discovery')`. ",
314            "For speaker, microphone, playback-device, or Windows Audio service issues, prefer `inspect_host(topic='audio')`. ",
315            "For Bluetooth radios, pairing failures, reconnect issues, or headset-role confusion, prefer `inspect_host(topic='bluetooth')`. ",
316            "(1) FIRST call inspect_host with the most relevant topic(s) to observe the actual machine state — e.g. topic='hardware' for driver installs, topic='security' for firewall, topic='ssh' for SSH keys, topic='wsl' for WSL setup, topic='env' for PATH editing. ",
317            "(2) THEN deliver a numbered step-by-step walkthrough that references what you actually observed — not generic advice. ",
318            "(3) Each step must be concrete and machine-specific: include exact PowerShell commands, exact paths, exact values the user should type. ",
319            "(4) End with a verification step the user can run to confirm success. ",
320            "You are a senior technician who has just examined the real machine. Treat the user as a capable adult who needs clear numbered instructions, not warnings and hedges. ",
321            "In /teach workflow mode, this rule is ALWAYS active for every admin/config/write question. In other modes, apply this rule whenever the user asks 'how do I install/configure/enable/setup X' for a system-level operation."
322        ));
323
324        prompt.push_str(concat!(
325            "\n25. **Computation Integrity — Use run_code for Precise Math**: ",
326            "Never answer from training-data memory when the result must be exact. ",
327            "For any of the following, use `run_code` (JavaScript/Deno or Python) and return the real output: ",
328            "checksums or hashes (SHA-256, MD5, CRC), ",
329            "financial or percentage calculations, ",
330            "statistical analysis (mean, median, std dev, regression), ",
331            "unit conversions where precision matters (bytes to MB/GB, time zones, scientific units), ",
332            "algorithmic verification (sorting, searching, graph traversal), ",
333            "date/time arithmetic (days between dates, Unix timestamps, durations), ",
334            "prime checks or factorization, ",
335            "and any calculation where being wrong by even a small amount would matter. ",
336            "A model answer for these is a guess. A run_code answer is a proof. ",
337            "When in doubt: write the code, run it, return the result."
338        ));
339        prompt.push_str("28. **Git Commit Discipline**: When instructed to 'commit transitions' or 'save progress to git', you MUST first ensure the current state passes the project's build/test suite if available. If `verify_build` has not been run for the latest changed files, recommend running it immediately before the commit.\n");
340        prompt.push_str("29. **Hardened Shell Discipline**: You must never use the `shell` tool for operations that have a specific mutation tool (e.g. `write_file`, `create_directory`, `patch_hunk`). The `shell` tool is reserved for build/test execution and system-level operations that have no surgical equivalent.\n");
341        prompt.push_str("30. **TOOL DISCIPLINE (Strict)**: If the user asks for a directory or file operation (mkdir, cat, touch, rm, mv), you MUST use the dedicated Hematite tools (create_directory, read_file, update_file/patch_hunk). NEVER improvise with `shell` for these tasks. This prevents path-hallucination and ensures machine-aware safety.\n");
342        prompt.push_str("31. **Isolation Guard (Mega-Directory Avoidance)**: If the current workspace root is a broad user-owned directory (Desktop, Downloads, Documents, Pictures, Videos, Music, Home, or a drive root like C:\\), you MUST nudge the user to move the project into a dedicated subdirectory. This prevents workspace pollution and keeps session indexing scoped to the right project.\n");
343
344        prompt
345    }
346}