Skip to main content

hematite/agent/
prompt.rs

1use std::fs;
2use std::path::PathBuf;
3
4use crate::agent::git;
5
6enum WorkspaceMode {
7    Coding,
8    Document,
9    General,
10}
11
12fn detect_workspace_mode(root: &PathBuf) -> WorkspaceMode {
13    // Strong coding signals — any of these present means it's a coding workspace
14    let coding_markers = [
15        "Cargo.toml",
16        "package.json",
17        "pyproject.toml",
18        "setup.py",
19        "go.mod",
20        "pom.xml",
21        "build.gradle",
22        "CMakeLists.txt",
23        ".git",
24        "src",
25        "lib",
26    ];
27    for marker in &coding_markers {
28        if root.join(marker).exists() {
29            return WorkspaceMode::Coding;
30        }
31    }
32
33    // No strong coding signal — check file extensions
34    let code_exts = [
35        "rs", "py", "ts", "js", "go", "cpp", "c", "java", "cs", "rb", "swift", "kt",
36    ];
37    let doc_exts = ["pdf", "md", "txt", "docx", "epub", "rst"];
38    let mut code_count = 0usize;
39    let mut doc_count = 0usize;
40
41    if let Ok(entries) = fs::read_dir(root) {
42        for entry in entries.flatten() {
43            let path = entry.path();
44            if path.is_file() {
45                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
46                    let ext = ext.to_lowercase();
47                    if code_exts.contains(&ext.as_str()) {
48                        code_count += 1;
49                    }
50                    if doc_exts.contains(&ext.as_str()) {
51                        doc_count += 1;
52                    }
53                }
54            }
55        }
56    }
57
58    if code_count > 0 {
59        WorkspaceMode::Coding
60    } else if doc_count > 0 {
61        WorkspaceMode::Document
62    } else {
63        WorkspaceMode::General
64    }
65}
66
67pub struct SystemPromptBuilder {
68    pub workspace_root: PathBuf,
69}
70
71impl SystemPromptBuilder {
72    pub fn new(root: PathBuf) -> Self {
73        Self {
74            workspace_root: root,
75        }
76    }
77
78    /// Build the full system prompt with Rule Hierarchy and Gemma-4 Optimization.
79    /// Hierarchy: Global ($HOME) -> Project (Root) -> Local (Ignored).
80    pub fn build(
81        &self,
82        base_instructions: &str,
83        memory: Option<&str>,
84        summary: Option<&str>,
85        mcp_tools: &[crate::agent::mcp::McpTool],
86    ) -> String {
87        let config = crate::agent::config::load_config();
88        let mut static_sections = Vec::new();
89
90        let workspace_framing = match detect_workspace_mode(&self.workspace_root) {
91            WorkspaceMode::Coding => "You are Hematite, a local AI coding agent running on the user's machine. \
92                             Hematite is more than the terminal UI: it is the full local harness for tool use, code editing, context management, voice, and orchestration. \
93                             The current directory is a software project — lean into code editing, build verification, and repo-aware tooling.",
94            WorkspaceMode::Document => "You are Hematite, a local AI assistant running on the user's machine. \
95                             Hematite is more than the terminal UI: it is the full local harness for tool use, file analysis, context management, voice, and orchestration. \
96                             The current directory contains documents and files — lean into reading, summarizing, explaining, and answering questions about the content here.",
97            WorkspaceMode::General => "You are Hematite, a local AI assistant running on the user's machine. \
98                             Hematite is more than the terminal UI: it is the full local harness for tool use, file operations, context management, voice, and orchestration.",
99        };
100
101        static_sections.push("# IDENTITY & TONE".to_string());
102        static_sections.push(format!("{} \
103                             Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
104                             For simple questions, answer briefly in plain language. \
105                             Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks.", workspace_framing));
106        static_sections.push(format!(
107            "- Running Hematite build: {}",
108            crate::hematite_version_display()
109        ));
110        static_sections.push(format!(
111            "- Hematite author and maintainer: {}",
112            crate::HEMATITE_AUTHOR
113        ));
114        static_sections.push(format!(
115            "- Hematite repository: {}",
116            crate::HEMATITE_REPOSITORY_URL
117        ));
118
119        static_sections.push(format!("\n# BASE INSTRUCTIONS\n{base_instructions}"));
120
121        if let Some(home) = std::env::var_os("USERPROFILE") {
122            let global_path = PathBuf::from(home).join(".hematite").join("CLAUDE.md");
123            if global_path.exists() {
124                if let Ok(content) = fs::read_to_string(&global_path) {
125                    static_sections.push(format!("\n# GLOBAL USER PREFERENCES\n{content}"));
126                }
127            }
128        }
129
130        let project_rule_files = [
131            "CLAUDE.md",
132            ".claude.md",
133            "CLAUDE.local.md",
134            "HEMATITE.md",
135            ".hematite/rules.md",
136            ".hematite/rules.local.md",
137        ];
138
139        for name in &project_rule_files {
140            let path = self.workspace_root.join(name);
141            if path.exists() {
142                if let Ok(content) = fs::read_to_string(&path) {
143                    let content = if content.len() > 6000 {
144                        format!("{}...[Rules Truncated]", &content[..6000])
145                    } else {
146                        content
147                    };
148                    static_sections.push(format!("\n# PROJECT RULES ({})\n{}", name, content));
149                }
150            }
151        }
152
153        let instructions_dir = self.workspace_root.join(".hematite").join("instructions");
154        if instructions_dir.exists() && instructions_dir.is_dir() {
155            if let Ok(entries) = fs::read_dir(instructions_dir) {
156                for entry in entries.flatten() {
157                    let path = entry.path();
158                    if path.extension().map(|e| e == "md").unwrap_or(false) {
159                        let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
160                        let include = if let Some(mem) = memory {
161                            mem.to_lowercase().contains(&stem.to_lowercase())
162                        } else {
163                            false
164                        };
165
166                        if include {
167                            if let Ok(content) = fs::read_to_string(&path) {
168                                static_sections.push(format!(
169                                    "\n# DEEP CONTEXT RULES ({}.md)\n{}",
170                                    stem, content
171                                ));
172                            }
173                        }
174                    }
175                }
176            }
177        }
178
179        let mut prompt = static_sections.join("\n");
180        prompt.push_str(
181            "\n\n###############################################################################\n",
182        );
183        prompt.push_str(
184            "# DYNAMIC CONTEXT (Changes every turn)                                        #\n",
185        );
186        prompt.push_str(
187            "###############################################################################\n",
188        );
189
190        if let Some(s) = summary {
191            prompt.push_str(&format!(
192                "\n# COMPACTED HISTORY SUMMARY\n{}\nRecent messages are preserved below.",
193                s
194            ));
195        }
196
197        if let Some(mem) = memory {
198            prompt.push_str(&format!("\n# SESSION MEMORY\n{mem}"));
199        }
200
201        prompt.push_str("\n# ENVIRONMENT");
202        prompt.push_str(&format!(
203            "\n- Local Time: {}",
204            chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
205        ));
206        prompt.push_str(&format!(
207            "\n- Hematite Build: {}",
208            crate::hematite_version_display()
209        ));
210        prompt.push_str("\n- Operating System: Windows (User workspace)");
211
212        if git::is_git_repo(&self.workspace_root) {
213            if let Ok(branch) = git::get_active_branch(&self.workspace_root) {
214                prompt.push_str(&format!("\n- Git Branch: {branch}"));
215            }
216        }
217
218        let hematite_dir = self.workspace_root.join(".hematite");
219        for (name, path) in [
220            ("TASK", hematite_dir.join("TASK.md")),
221            ("PLAN", hematite_dir.join("PLAN.md")),
222        ] {
223            if path.exists() {
224                if let Ok(content) = fs::read_to_string(&path) {
225                    if !content.trim().is_empty() {
226                        let content = if content.len() > 3000 {
227                            format!("{}...[Truncated]", &content[..3000])
228                        } else {
229                            content
230                        };
231                        prompt.push_str(&format!(
232                            "\n\n# ACTIVE TASK {} (.hematite/)\n{}",
233                            name, content
234                        ));
235                    }
236                }
237            }
238        }
239
240        if !mcp_tools.is_empty() {
241            prompt.push_str("\n\n# ACTIVE MCP TOOLS");
242            for tool in mcp_tools {
243                let mut description = tool
244                    .description
245                    .clone()
246                    .unwrap_or_else(|| "No description provided.".to_string());
247                if description.len() > 180 {
248                    description.truncate(180);
249                    description.push_str("...");
250                }
251                prompt.push_str(&format!("\n- {}: {}", tool.name, description));
252            }
253        }
254
255        if let Some(hint) = &config.context_hint {
256            prompt.push_str(&format!("\n## PROJECT CONTEXT HINT\n{}\n", hint));
257        }
258
259        prompt.push_str("\n## OPERATIONAL PROTOCOL (Gemma-4-E4B Native)\n");
260        prompt.push_str("1. **Thinking Mode**: ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) to analyze the user's intent, verify facts, and plan your response architecture.\n");
261        prompt.push_str("2. **Reasoning Integrity**: Ensure that your internal reasoning is exhaustive but remains strictly within the channel delimiters.\n");
262        prompt.push_str("3. **Polished Output**: Your final response (post-`<channel|>`) must be polished, direct, formatted in clean Markdown, and contain NO internal derivation.\n");
263        prompt.push_str("4. **Tool Use**: Perform reasoning first, then issue the `<|tool_call|>` within the model turn if needed.\n");
264        prompt.push_str("5. **Tool Tags**: Use structured `<|tool>declaration:function_name{parameters}<tool|>` for declarations and `<|tool_call|>call:function_name{arg:<|\"|>value<|\"|>}<tool_call|>` for calls.\n");
265        prompt.push_str("6. **Safety**: String values MUST use the `<|\"|>` wrapper for safety.\n");
266        prompt.push_str("7. **Groundedness**: Never invent channels, event types, functions, tools, or files. If a detail is not verified from the repo or tool output, say `uncertain`.\n");
267        prompt.push_str("8. **Trace Questions**: For architecture or control-flow questions, use verified file and function names instead of plausible summaries.\n");
268        prompt.push_str("9. **Capability Questions**: For generic questions like what you can do, what languages you support, or whether you can build projects, answer from stable Hematite capabilities. Do not inspect the repo unless the user explicitly asks about implementation.\n");
269        prompt.push_str("10. **Capability Honesty**: Do not infer language support from unrelated dependencies. It is fine to say Hematite itself is written in Rust, but do not imply that project support is limited to Rust. Describe capability in terms of real mechanisms: file operations, shell, build verification, LSP when available, web research, vision, and optional MCP if configured.\n");
270        prompt.push_str("11. **Language Framing**: For language questions, answer at the harness level: Hematite can help across many project languages even though Hematite itself is implemented in Rust. Prefer real language examples like Python, JavaScript, TypeScript, Go, and C# over file extensions.\n");
271        prompt.push_str("12. **Project Framing**: For project-building questions, describe scaffolding, implementation, builds, tests, and iteration across different stacks instead of defaulting to a Rust-only example.\n");
272        prompt.push_str("13. **Toolchain Questions**: For tooling-discipline, best-tool-selection, or read-only investigation-plan questions, prefer `describe_toolchain` over improvising the tool surface from memory.\n");
273        prompt.push_str("14. **Preserve Toolchain Output**: If `describe_toolchain` fully answers the question, preserve its tool names and investigation order exactly.\n");
274        prompt.push_str("15. **Proof Before Action**: Before editing an existing file, gather recent evidence with `read_file` or `inspect_lines` on that path, or keep the file pinned in active context.\n");
275        prompt.push_str("16. **Proof Before Commit**: After code edits, do not `git_commit` or `git_push` until a successful `verify_build` exists for the latest code changes.\n");
276        prompt.push_str("17. **Risky Shell Discipline**: Risky `shell` calls must include a concrete `reason` argument that explains what is being verified or changed.\n");
277        prompt.push_str("18. **Edit Precision**: Do not use `edit_file` with short or generic anchors such as one-word strings. Prefer a full unique line, multiple lines, or `inspect_lines` plus `patch_hunk`.\n");
278        prompt.push_str("19. **Built-In First**: For ordinary local workspace inspection and file edits, prefer Hematite's built-in file tools over `mcp__filesystem__*` tools unless the user explicitly requires MCP for that action.\n");
279        prompt.push_str("20. **Deep Sync**: Every 6th turn, review the full TASK.md.\n\n21. **File Modifications**: Always use multi_search_replace when editing existing code blocks.\n");
280        prompt.push_str("22. **Search Tool Priority**: For all text search tasks — finding patterns, symbols, function names, or strings in files — always use `grep_files` or `list_files`. Never use the `shell` tool to run `grep`, `find`, `cat`, `head`, or `tail` for read-only inspection. Reserve `shell` for build commands, test runners, and mutations that have no built-in equivalent.");
281
282        prompt.push_str(concat!(
283            "23. **Host Inspection Priority**: NEVER use `shell` for any read-only question about the machine or operating system. ",
284            "Always use `inspect_host` with the correct topic. ",
285            "When the user asks for multiple things, make one `inspect_host` call per topic in sequence — NEVER collapse multiple distinct topics into a single generic topic like 'network'. ",
286            "Example: 'show route table, ARP, DNS cache, and traceroute' → four separate inspect_host calls: route_table, arp, dns_cache, traceroute. ",
287            "Topic routing rules (MANDATORY — no exceptions):\n",
288            "  - 'is my PC up to date?' / 'pending updates?' / 'Windows Update' → topic='updates'\n",
289            "  - 'is antivirus on?' / 'Defender running?' / 'is my PC protected?' / 'Windows activated?' / 'UAC' → topic='security'\n",
290            "  - 'do I need to restart?' / 'reboot required?' / 'pending restart?' → topic='pending_reboot'\n",
291            "  - 'is my drive healthy?' / 'SMART status' / 'hard drive dying?' / 'SSD healthy?' → topic='disk_health'\n",
292            "  - 'battery' / 'battery life' / 'charge level' / 'battery wear' → topic='battery'\n",
293            "  - 'why did PC restart?' / 'BSOD?' / 'blue screen' / 'app crash' / 'crash history' → topic='recent_crashes'\n",
294            "  - 'scheduled tasks' / 'task scheduler' / 'what runs on a timer?' → topic='scheduled_tasks'\n",
295            "  - 'dev conflict' / 'toolchain conflict' / 'python wrong version' / 'duplicate PATH' → topic='dev_conflicts'\n",
296            "  - 'disk space' / 'drive capacity' / 'cache size' / 'storage' → topic='storage'\n",
297            "  - 'CPU model' / 'RAM size' / 'GPU' / 'hardware specs' / 'what hardware do I have?' → topic='hardware'\n",
298            "  - 'system health' / 'overall status' → topic='health_report'\n",
299            "  - 'network adapters' / 'IP address' / 'DNS' / 'wifi' → topic='network'\n",
300            "  - 'am I connected?' / 'internet access?' / 'ping google' / 'DNS resolving?' / 'no internet' → topic='connectivity'\n",
301            "  - 'wifi signal' / 'wireless network' / 'what SSID am I on?' / 'access point' → topic='wifi'\n",
302            "  - 'active connections' / 'tcp connections' / 'netstat' / 'open sockets' → topic='connections'\n",
303            "  - 'vpn connected?' / 'is VPN on?' / 'virtual private network' → topic='vpn'\n",
304            "  - 'proxy settings' / 'system proxy' / 'winhttp proxy' → topic='proxy'\n",
305            "  - 'firewall rules' / 'what does the firewall block?' / 'inbound rules' / 'outbound rules' → topic='firewall_rules'\n",
306            "  - 'traceroute' / 'trace route' / 'how many hops?' / 'network path to X' → topic='traceroute' (optional: host arg defaults to 8.8.8.8)\n",
307            "  - 'dns cache' / 'cached dns entries' / 'what dns lookups are cached?' → topic='dns_cache'\n",
308            "  - 'arp table' / 'arp cache' / 'mac addresses on network' / 'ip to mac' → topic='arp'\n",
309            "  - 'route table' / 'routing table' / 'default gateway' / 'network routes' / 'next hop' → topic='route_table'\n",
310            "  - 'running services' / 'service status' → topic='services'\n",
311            "  - 'running processes' / 'what is using RAM?' / 'CPU usage by process' → topic='processes'\n",
312            "  - 'listening ports' / 'what is on port 3000?' → topic='ports'\n",
313            "  - 'resource load' / 'CPU %' / 'RAM %' / 'performance' → topic='resource_load'\n",
314            "  - 'fix cargo not found' / 'fix port in use' → topic='fix_plan'\n",
315            "  - 'how do I install a driver' / 'update GPU driver' → topic='fix_plan' with issue='install driver'\n",
316            "  - 'how do I create a firewall rule' / 'open a port in the firewall' → topic='fix_plan' with issue='create firewall rule'\n",
317            "  - 'how do I generate SSH keys' / 'set up SSH key pair' → topic='fix_plan' with issue='generate ssh key'\n",
318            "  - 'how do I install WSL' / 'set up Windows Subsystem for Linux' → topic='fix_plan' with issue='set up wsl'\n",
319            "  - 'how do I start/stop a service' / 'enable a service at startup' → topic='fix_plan' with issue='configure service'\n",
320            "  - 'how do I activate Windows' / 'windows not activated' → topic='fix_plan' with issue='activate windows'\n",
321            "  - 'how do I edit the registry' / 'add a registry key' → topic='fix_plan' with issue='edit registry'\n",
322            "  - 'how do I create a scheduled task' / 'run script on startup' → topic='fix_plan' with issue='create scheduled task'\n",
323            "  - 'free up disk space' / 'disk full' / 'reclaim space' → topic='fix_plan' with issue='free up disk space'\n",
324            "  - 'how do I edit Group Policy' / 'gpedit' → topic='fix_plan' with issue='edit group policy'\n",
325            "  - 'PATH entries' / 'which tools are installed?' → topic='toolchains' or 'path'\n",
326            "  - 'docker running?' / 'show containers' / 'docker images' / 'compose projects' → topic='docker'\n",
327            "  - 'wsl distros' / 'ubuntu on windows' / 'windows subsystem for linux' → topic='wsl'\n",
328            "  - 'ssh config' / 'ssh keys' / 'sshd running?' / 'known_hosts' / 'authorized_keys' → topic='ssh'\n",
329            "  - 'git config' / 'git global settings' / 'git user.name' / 'git aliases' → topic='git_config'\n",
330            "  - 'installed software' / 'installed programs' / 'what is installed?' / 'winget list' → topic='installed_software'\n",
331            "  - 'environment variables' / 'env vars' / 'show env' / 'JAVA_HOME set?' → topic='env'\n",
332            "  - 'hosts file' / '/etc/hosts' / 'host entries' / 'custom domain redirect' → topic='hosts_file'\n",
333            "  - 'is postgres running?' / 'mysql service' / 'redis up?' / 'local database engines' / 'mongodb' / 'sqlite' → topic='databases'\n",
334            "  - 'local users' / 'who is logged in?' / 'admin group members' / 'who has admin rights?' / 'is this elevated?' / 'active sessions' / 'net user' → topic='user_accounts'\n",
335            "  - 'audit policy' / 'what is being logged?' / 'is auditing enabled?' / 'auditpol' / 'security audit' / 'event auditing' → topic='audit_policy'\n",
336            "  - 'SMB shares' / 'network shares' / 'shared folders' / 'mapped drives' / 'what is this machine sharing?' / 'SMB1 enabled?' → topic='shares'\n",
337            "  - 'what DNS servers am I using?' / 'configured DNS resolver' / 'nameservers' / 'DNS over HTTPS' / 'DoH configured?' → topic='dns_servers'\n",
338            "  Do NOT use shell, Get-ItemProperty, registry reads, wmic, Get-CimInstance, Get-WinEvent, Get-PhysicalDisk, Get-MpComputerStatus, Get-ScheduledTask, docker CLI, wsl CLI, git config, winget, dpkg, or any shell diagnostic command. ",
339            "Use inspect_host exclusively. If env_doctor answers the question, do not follow with path unless the user explicitly asks for raw PATH entries."
340        ));
341
342        prompt.push_str(concat!(
343            "\n24. **Teacher Mode — Grounded Walkthroughs for Write/Admin Tasks**: ",
344            "When the user asks how to install a driver, edit Group Policy, create a firewall rule, set up SSH keys, configure WSL, edit the registry, manage a service, create a scheduled task, edit the PATH, or perform any other write/admin/config operation that Hematite cannot safely execute itself: ",
345            "(1) FIRST call inspect_host with the most relevant topic(s) to observe the actual machine state — e.g. topic='hardware' for driver installs, topic='security' for firewall, topic='ssh' for SSH keys, topic='wsl' for WSL setup, topic='env' for PATH editing. ",
346            "(2) THEN deliver a numbered step-by-step walkthrough that references what you actually observed — not generic advice. ",
347            "(3) Each step must be concrete and machine-specific: include exact PowerShell commands, exact paths, exact values the user should type. ",
348            "(4) End with a verification step the user can run to confirm success. ",
349            "You are a senior technician who has just examined the real machine. Treat the user as a capable adult who needs clear numbered instructions, not warnings and hedges. ",
350            "In /teach workflow mode, this rule is ALWAYS active for every admin/config/write question. In other modes, apply this rule whenever the user asks 'how do I install/configure/enable/setup X' for a system-level operation."
351        ));
352
353        prompt.push_str(concat!(
354            "\n25. **Computation Integrity — Use run_code for Precise Math**: ",
355            "Never answer from training-data memory when the result must be exact. ",
356            "For any of the following, use `run_code` (JavaScript/Deno or Python) and return the real output: ",
357            "checksums or hashes (SHA-256, MD5, CRC), ",
358            "financial or percentage calculations, ",
359            "statistical analysis (mean, median, std dev, regression), ",
360            "unit conversions where precision matters (bytes to MB/GB, time zones, scientific units), ",
361            "algorithmic verification (sorting, searching, graph traversal), ",
362            "date/time arithmetic (days between dates, Unix timestamps, durations), ",
363            "prime checks or factorization, ",
364            "and any calculation where being wrong by even a small amount would matter. ",
365            "A model answer for these is a guess. A run_code answer is a proof. ",
366            "When in doubt: write the code, run it, return the real result."
367        ));
368
369        prompt
370    }
371}