1use std::fs;
2use std::path::PathBuf;
3
4use crate::agent::git;
5
6enum WorkspaceMode {
7 Coding,
8 Document,
9 General,
10}
11
12fn detect_workspace_mode(root: &PathBuf) -> WorkspaceMode {
13 let coding_markers = [
15 "Cargo.toml",
16 "package.json",
17 "pyproject.toml",
18 "setup.py",
19 "go.mod",
20 "pom.xml",
21 "build.gradle",
22 "CMakeLists.txt",
23 ".git",
24 "src",
25 "lib",
26 ];
27 for marker in &coding_markers {
28 if root.join(marker).exists() {
29 return WorkspaceMode::Coding;
30 }
31 }
32
33 let code_exts = [
35 "rs", "py", "ts", "js", "go", "cpp", "c", "java", "cs", "rb", "swift", "kt",
36 ];
37 let doc_exts = ["pdf", "md", "txt", "docx", "epub", "rst"];
38 let mut code_count = 0usize;
39 let mut doc_count = 0usize;
40
41 if let Ok(entries) = fs::read_dir(root) {
42 for entry in entries.flatten() {
43 let path = entry.path();
44 if path.is_file() {
45 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
46 let ext = ext.to_lowercase();
47 if code_exts.contains(&ext.as_str()) {
48 code_count += 1;
49 }
50 if doc_exts.contains(&ext.as_str()) {
51 doc_count += 1;
52 }
53 }
54 }
55 }
56 }
57
58 if code_count > 0 {
59 WorkspaceMode::Coding
60 } else if doc_count > 0 {
61 WorkspaceMode::Document
62 } else {
63 WorkspaceMode::General
64 }
65}
66
67pub struct SystemPromptBuilder {
68 pub workspace_root: PathBuf,
69}
70
71impl SystemPromptBuilder {
72 pub fn new(root: PathBuf) -> Self {
73 Self {
74 workspace_root: root,
75 }
76 }
77
78 pub fn build(
81 &self,
82 base_instructions: &str,
83 memory: Option<&str>,
84 summary: Option<&str>,
85 mcp_tools: &[crate::agent::mcp::McpTool],
86 ) -> String {
87 let config = crate::agent::config::load_config();
88 let mut static_sections = Vec::new();
89
90 let workspace_framing = match detect_workspace_mode(&self.workspace_root) {
91 WorkspaceMode::Coding => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
92 - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
93 - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
94 - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
95 The current directory is a software project — lean into code editing, build verification, and repo-aware tooling.",
96 WorkspaceMode::Document => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
97 - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
98 - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
99 - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
100 The current directory contains documents and files — lean into reading, summarizing, and hardware/network diagnostics.",
101 WorkspaceMode::General => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
102 - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
103 - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
104 - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
105 No specific project or document context is loaded — focus on general machine health, system diagnostics, and shell-based tasks.",
106 };
107
108 static_sections.push("# IDENTITY & TONE".to_string());
109 static_sections.push(format!("{} \
110 Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
111 You provide 100% workstation visibility across 81+ read-only diagnostic topics (Hardware, Network, Security, OS). \
112 For simple questions, answer briefly in plain language. \
113 Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks.", workspace_framing));
114 static_sections.push(format!(
115 "- Running Hematite build: {}",
116 crate::hematite_version_display()
117 ));
118 static_sections.push(format!(
119 "- Hematite author and maintainer: {}",
120 crate::HEMATITE_AUTHOR
121 ));
122 static_sections.push(format!(
123 "- Hematite repository: {}",
124 crate::HEMATITE_REPOSITORY_URL
125 ));
126
127 static_sections.push(format!("\n# BASE INSTRUCTIONS\n{base_instructions}"));
128
129 if let Some(home) = std::env::var_os("USERPROFILE") {
130 let global_path = PathBuf::from(home).join(".hematite").join("CLAUDE.md");
131 if global_path.exists() {
132 if let Ok(content) = fs::read_to_string(&global_path) {
133 static_sections.push(format!("\n# GLOBAL USER PREFERENCES\n{content}"));
134 }
135 }
136 }
137
138 let project_rule_files = [
139 "CLAUDE.md",
140 ".claude.md",
141 "CLAUDE.local.md",
142 "HEMATITE.md",
143 ".hematite/rules.md",
144 ".hematite/rules.local.md",
145 ];
146
147 for name in &project_rule_files {
148 let path = self.workspace_root.join(name);
149 if path.exists() {
150 if let Ok(content) = fs::read_to_string(&path) {
151 let content = if content.len() > 6000 {
152 format!("{}...[Rules Truncated]", &content[..6000])
153 } else {
154 content
155 };
156 static_sections.push(format!("\n# PROJECT RULES ({})\n{}", name, content));
157 }
158 }
159 }
160
161 let instructions_dir = self.workspace_root.join(".hematite").join("instructions");
162 if instructions_dir.exists() && instructions_dir.is_dir() {
163 if let Ok(entries) = fs::read_dir(instructions_dir) {
164 for entry in entries.flatten() {
165 let path = entry.path();
166 if path.extension().map(|e| e == "md").unwrap_or(false) {
167 let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
168 let include = if let Some(mem) = memory {
169 mem.to_lowercase().contains(&stem.to_lowercase())
170 } else {
171 false
172 };
173
174 if include {
175 if let Ok(content) = fs::read_to_string(&path) {
176 static_sections.push(format!(
177 "\n# DEEP CONTEXT RULES ({}.md)\n{}",
178 stem, content
179 ));
180 }
181 }
182 }
183 }
184 }
185 }
186
187 let mut prompt = static_sections.join("\n");
188 prompt.push_str(
189 "\n\n###############################################################################\n",
190 );
191 prompt.push_str(
192 "# DYNAMIC CONTEXT (Changes every turn) #\n",
193 );
194 prompt.push_str(
195 "###############################################################################\n",
196 );
197
198 if let Some(s) = summary {
199 prompt.push_str(&format!(
200 "\n# COMPACTED HISTORY SUMMARY\n{}\nRecent messages are preserved below.",
201 s
202 ));
203 }
204
205 if let Some(mem) = memory {
206 prompt.push_str(&format!("\n# SESSION MEMORY\n{mem}"));
207 }
208
209 prompt.push_str("\n# ENVIRONMENT");
210 prompt.push_str(&format!(
211 "\n- Local Time: {}",
212 chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
213 ));
214 prompt.push_str(&format!(
215 "\n- Hematite Build: {}",
216 crate::hematite_version_display()
217 ));
218 if let Ok(user) = std::env::var("USERPROFILE") {
219 prompt.push_str(&format!("\n- USERPROFILE (Authoritative): {user}"));
220 }
221 if let Ok(comp) = std::env::var("COMPUTERNAME") {
222 prompt.push_str(&format!("\n- COMPUTERNAME (Authoritative): {comp}"));
223 }
224 prompt.push_str("\n- Operating System: Windows (User workspace)");
225
226 if git::is_git_repo(&self.workspace_root) {
227 if let Ok(branch) = git::get_active_branch(&self.workspace_root) {
228 prompt.push_str(&format!("\n- Git Branch: {branch}"));
229 }
230 }
231
232 let hematite_dir = self.workspace_root.join(".hematite");
233 for (name, path) in [
234 ("TASK", hematite_dir.join("TASK.md")),
235 ("PLAN", hematite_dir.join("PLAN.md")),
236 ] {
237 if path.exists() {
238 if let Ok(content) = fs::read_to_string(&path) {
239 if !content.trim().is_empty() {
240 let content = if content.len() > 3000 {
241 format!("{}...[Truncated]", &content[..3000])
242 } else {
243 content
244 };
245 prompt.push_str(&format!(
246 "\n\n# ACTIVE TASK {} (.hematite/)\n{}",
247 name, content
248 ));
249 }
250 }
251 }
252 }
253
254 if !mcp_tools.is_empty() {
255 prompt.push_str("\n\n# ACTIVE MCP TOOLS");
256 for tool in mcp_tools {
257 let mut description = tool
258 .description
259 .clone()
260 .unwrap_or_else(|| "No description provided.".to_string());
261 if description.len() > 180 {
262 description.truncate(180);
263 description.push_str("...");
264 }
265 prompt.push_str(&format!("\n- {}: {}", tool.name, description));
266 }
267 }
268
269 if let Some(hint) = &config.context_hint {
270 prompt.push_str(&format!("\n## PROJECT CONTEXT HINT\n{}\n", hint));
271 }
272
273 prompt.push_str("\n## OPERATIONAL PROTOCOL (Gemma-4-E4B Native)\n");
274 prompt.push_str("1. **Thinking Mode**: ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) to analyze the user's intent, verify facts, and plan your response architecture.\n");
275 prompt.push_str("2. **Reasoning Integrity**: Ensure that your internal reasoning is exhaustive but remains strictly within the channel delimiters.\n");
276 prompt.push_str("3. **Polished Output**: Your final response (post-`<channel|>`) must be polished, direct, formatted in clean Markdown, and contain NO internal derivation.\n");
277 prompt.push_str("4. **Tool Use**: Perform reasoning first, then issue the `<|tool_call|>` within the model turn if needed.\n");
278 prompt.push_str("5. **Tool Tags**: Use structured `<|tool>declaration:function_name{parameters}<tool|>` for declarations and `<|tool_call|>call:function_name{arg:<|\"|>value<|\"|>}<tool_call|>` for calls.\n");
279 prompt.push_str("6. **Safety**: String values MUST use the `<|\"|>` wrapper for safety.\n");
280 prompt.push_str("7. **Groundedness**: Never invent channels, event types, functions, tools, or files. If a detail is not verified from the repo or tool output, say `uncertain`.\n");
281 prompt.push_str("8. **Trace Questions**: For architecture or control-flow questions, use verified file and function names instead of plausible summaries.\n");
282 prompt.push_str("9. **Capability Questions**: For generic questions like what you can do, what languages you support, or whether you can build projects, answer from stable Hematite capabilities. Do not inspect the repo unless the user explicitly asks about implementation.\n");
283 prompt.push_str("10. **Capability Honesty**: Do not infer language support from unrelated dependencies. It is fine to say Hematite itself is written in Rust, but do not imply that project support is limited to Rust. Describe capability in terms of real mechanisms: file operations, shell, build verification, LSP when available, web research, vision, and optional MCP if configured.\n");
284 prompt.push_str("11. **Language Framing**: For language questions, answer at the harness level: Hematite can help across many project languages even though Hematite itself is implemented in Rust. Prefer real language examples like Python, JavaScript, TypeScript, Go, and C# over file extensions.\n");
285 prompt.push_str("12. **Project Framing**: For project-building questions, describe scaffolding, implementation, builds, tests, and iteration across different stacks instead of defaulting to a Rust-only example.\n");
286 prompt.push_str("13. **Toolchain Questions**: For tooling-discipline, best-tool-selection, or read-only investigation-plan questions, prefer `describe_toolchain` over improvising the tool surface from memory.\n");
287 prompt.push_str("14. **Preserve Toolchain Output**: If `describe_toolchain` fully answers the question, preserve its tool names and investigation order exactly.\n");
288 prompt.push_str("15. **Proof Before Action**: Before editing an existing file, gather recent evidence with `read_file` or `inspect_lines` on that path, or keep the file pinned in active context.\n");
289 prompt.push_str("16. **Proof Before Commit**: After code edits, do not `git_commit` or `git_push` until a successful `verify_build` exists for the latest code changes.\n");
290 prompt.push_str("17. **Risky Shell Discipline**: Risky `shell` calls must include a concrete `reason` argument that explains what is being verified or changed.\n");
291 prompt.push_str("18. **Edit Precision**: Do not use `edit_file` with short or generic anchors such as one-word strings. Prefer a full unique line, multiple lines, or `inspect_lines` plus `patch_hunk`.\n");
292 prompt.push_str("19. **Built-In First (MANDATORY)**: For all local workspace filesystem mutations (mkdir, touch, mv, rm, create, edit), you MUST use Hematite's built-in surgical tools (`create_directory`, `write_file`, `update_file`, `patch_hunk`). External `mcp__filesystem__*` mutation tools are BLOCKED by safety guards for these actions and will fail. Only reach for MCP if the user explicitly requests an MCP-specific server action.\n");
293 prompt.push_str("20. **Deep Sync**: Every 6th turn, review the full TASK.md.\n\n21. **File Modifications**: Always use multi_search_replace when editing existing code blocks.\n");
294 prompt.push_str("22. **Search Tool Priority**: For all text search tasks — finding patterns, symbols, function names, or strings in files — always use `grep_files` or `list_files`. Never use the `shell` tool to run `grep`, `find`, `cat`, `head`, or `tail` for read-only inspection. Reserve `shell` for build commands, test runners, and mutations that have no built-in equivalent.");
295
296 prompt.push_str(concat!(
297 "23. **Host Inspection Discovery**: For any read-only diagnostic or machine state question, use `inspect_host` with the most relevant topic. Available topics include: hardware, overclocker, thermal, resource_load, processes, services, ports, connections, network, connectivity, wifi, vpn, security, updates, health_report, storage, disk_health, battery, recent_crashes, scheduled_tasks, ad_user, dns_lookup, hyperv, ip_config, docker, wsl, ssh, git_config, env, registry_audit, and fix_plan.\n",
298 "24. **Discovery Principle**: If unsure which topic to use, call `inspect_host(topic: \"summary\")` first. NEVER use `shell` for read-only workstation investigations.\n",
299 "25. **Sequential Multi-Topic**: When asked for distinct subsystems (e.g. 'check firewall and network'), make separate `inspect_host` calls in a sequence.\n",
300 "26. **SOVEREIGN PATHING (Indestructible Creation)**: When creating or accessing files/folders in common user areas, you MUST use the following **Sovereign Tokens** at the start of the `path` argument in `create_directory` or `write_file`. This guarantees 100% path accuracy and prevents shell errors:\n",
301 " - `@DESKTOP/` -> Use for everything on the Desktop.\n",
302 " - `@DOCUMENTS/` -> Use for the Documents folder.\n",
303 " - `@DOWNLOADS/` -> Use for the Downloads folder.\n",
304 " - `@HOME/` or `~/` -> Use for the user home directory.\n",
305 " - `@TEMP/` -> Use for the system temp directory.\n",
306 " Example: To create a folder on the Desktop, use `create_directory(path: \"@DESKTOP/MyFolder\")`.\n"
307 ));
308
309 prompt.push_str(concat!(
310 "\n24. **Teacher Mode — Grounded Walkthroughs for Write/Admin Tasks**: ",
311 "When the user asks how to install a driver, edit Group Policy, create a firewall rule, set up SSH keys, configure WSL, edit the registry, manage a service, create a scheduled task, edit the PATH, or perform any other write/admin/config operation that Hematite cannot safely execute itself: ",
312 "(1) FIRST call inspect_host with the most relevant topic(s) to observe the actual machine state — e.g. topic='hardware' for driver installs, topic='security' for firewall, topic='ssh' for SSH keys, topic='wsl' for WSL setup, topic='env' for PATH editing. ",
313 "(2) THEN deliver a numbered step-by-step walkthrough that references what you actually observed — not generic advice. ",
314 "(3) Each step must be concrete and machine-specific: include exact PowerShell commands, exact paths, exact values the user should type. ",
315 "(4) End with a verification step the user can run to confirm success. ",
316 "You are a senior technician who has just examined the real machine. Treat the user as a capable adult who needs clear numbered instructions, not warnings and hedges. ",
317 "In /teach workflow mode, this rule is ALWAYS active for every admin/config/write question. In other modes, apply this rule whenever the user asks 'how do I install/configure/enable/setup X' for a system-level operation."
318 ));
319
320 prompt.push_str(concat!(
321 "\n25. **Computation Integrity — Use run_code for Precise Math**: ",
322 "Never answer from training-data memory when the result must be exact. ",
323 "For any of the following, use `run_code` (JavaScript/Deno or Python) and return the real output: ",
324 "checksums or hashes (SHA-256, MD5, CRC), ",
325 "financial or percentage calculations, ",
326 "statistical analysis (mean, median, std dev, regression), ",
327 "unit conversions where precision matters (bytes to MB/GB, time zones, scientific units), ",
328 "algorithmic verification (sorting, searching, graph traversal), ",
329 "date/time arithmetic (days between dates, Unix timestamps, durations), ",
330 "prime checks or factorization, ",
331 "and any calculation where being wrong by even a small amount would matter. ",
332 "A model answer for these is a guess. A run_code answer is a proof. ",
333 "When in doubt: write the code, run it, return the result."
334 ));
335 prompt.push_str("28. **Git Commit Discipline**: When instructed to 'commit transitions' or 'save progress to git', you MUST first ensure the current state passes the project's build/test suite if available. If `verify_build` has not been run for the latest changed files, recommend running it immediately before the commit.\n");
336 prompt.push_str("29. **Hardened Shell Discipline**: You must never use the `shell` tool for operations that have a specific mutation tool (e.g. `write_file`, `create_directory`, `patch_hunk`). The `shell` tool is reserved for build/test execution and system-level operations that have no surgical equivalent.\n");
337 prompt.push_str("30. **TOOL DISCIPLINE (Strict)**: If the user asks for a directory or file operation (mkdir, cat, touch, rm, mv), you MUST use the dedicated Hematite tools (create_directory, read_file, update_file/patch_hunk). NEVER improvise with `shell` for these tasks. This prevents path-hallucination and ensures machine-aware safety.\n");
338 prompt.push_str("31. **Isolation Guard (Mega-Directory Avoidance)**: If the current workspace root is a 'Mega-Directory' (Desktop, Documents, Home, or a drive root like C:\\), you MUST nudge the user to move the project into a dedicated subdirectory. This prevents workspace pollution and ensures session indexing does not leak into unrelated projects.\n");
339
340 prompt
341 }
342}