1use std::fmt::Write as _;
2use std::fs;
3use std::path::PathBuf;
4
5use crate::agent::git;
6use crate::agent::instructions::{
7 guidance_section_title, resolve_guidance_path, PROJECT_GUIDANCE_FILES,
8};
9use crate::agent::truncation::safe_head;
10
11enum WorkspaceMode {
12 Coding,
13 Document,
14 General,
15}
16
17fn detect_workspace_mode(root: &PathBuf) -> WorkspaceMode {
18 let coding_markers = [
20 "Cargo.toml",
21 "package.json",
22 "pyproject.toml",
23 "setup.py",
24 "go.mod",
25 "pom.xml",
26 "build.gradle",
27 "CMakeLists.txt",
28 "index.html",
29 "style.css",
30 "script.js",
31 ".git",
32 "src",
33 "lib",
34 ];
35 for marker in &coding_markers {
36 if root.join(marker).exists() {
37 return WorkspaceMode::Coding;
38 }
39 }
40
41 let code_exts = [
43 "rs", "py", "ts", "js", "go", "cpp", "c", "java", "cs", "rb", "swift", "kt",
44 ];
45 let doc_exts = ["pdf", "md", "txt", "docx", "epub", "rst"];
46 let mut code_count = 0usize;
47 let mut doc_count = 0usize;
48
49 if let Ok(entries) = fs::read_dir(root) {
50 for entry in entries.flatten() {
51 let path = entry.path();
52 if path.is_file() {
53 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
54 let ext = ext.to_lowercase();
55 if code_exts.contains(&ext.as_str()) {
56 code_count += 1;
57 }
58 if doc_exts.contains(&ext.as_str()) {
59 doc_count += 1;
60 }
61 }
62 }
63 }
64 }
65
66 if code_count > 0 {
67 WorkspaceMode::Coding
68 } else if doc_count > 0 {
69 WorkspaceMode::Document
70 } else {
71 WorkspaceMode::General
72 }
73}
74
75pub struct SystemPromptBuilder {
76 pub workspace_root: PathBuf,
77}
78
79impl SystemPromptBuilder {
80 pub fn new(root: PathBuf) -> Self {
81 Self {
82 workspace_root: root,
83 }
84 }
85
86 pub fn build(
89 &self,
90 base_instructions: &str,
91 memory: Option<&str>,
92 summary: Option<&str>,
93 mcp_tools: &[crate::agent::mcp::McpTool],
94 ) -> String {
95 let config = crate::agent::config::load_config();
96 let mut static_sections = Vec::with_capacity(10);
97
98 let workspace_framing = match detect_workspace_mode(&self.workspace_root) {
99 WorkspaceMode::Coding => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
100 - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
101 - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
102 - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
103 The current directory is a software project — lean into code editing, build verification, and repo-aware tooling.",
104 WorkspaceMode::Document => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
105 - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
106 - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
107 - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
108 The current directory contains documents and files — lean into reading, summarizing, and hardware/network diagnostics.",
109 WorkspaceMode::General => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
110 - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
111 - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
112 - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
113 No specific project or document context is loaded — focus on general machine health, system diagnostics, and shell-based tasks.",
114 };
115
116 static_sections.push("# IDENTITY & TONE".to_string());
117 static_sections.push(format!("{} \
118 Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
119 You provide 100% workstation visibility across 81+ read-only diagnostic topics (Hardware, Network, Security, OS). \
120 For simple questions, answer briefly in plain language. \
121 Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks.", workspace_framing));
122 static_sections.push(format!(
123 "- Running Hematite build: {}",
124 crate::hematite_version_display()
125 ));
126 static_sections.push(format!(
127 "- Hematite author and maintainer: {}",
128 crate::HEMATITE_AUTHOR
129 ));
130 static_sections.push(format!(
131 "- Hematite repository: {}",
132 crate::HEMATITE_REPOSITORY_URL
133 ));
134
135 static_sections.push(format!("\n# BASE INSTRUCTIONS\n{base_instructions}"));
136
137 if let Some(home) = std::env::var_os("USERPROFILE") {
138 let global_path = PathBuf::from(home).join(".hematite").join("CLAUDE.md");
139 if global_path.exists() {
140 if let Ok(content) = fs::read_to_string(&global_path) {
141 static_sections.push(format!("\n# GLOBAL USER PREFERENCES\n{content}"));
142 }
143 }
144 }
145
146 for name in PROJECT_GUIDANCE_FILES {
147 let path = resolve_guidance_path(&self.workspace_root, name);
148 if path.exists() {
149 if let Ok(content) = fs::read_to_string(&path) {
150 let content = if content.len() > 6000 {
151 format!("{}...[Guidance Truncated]", safe_head(&content, 6000))
152 } else {
153 content
154 };
155 static_sections.push(format!(
156 "\n# {} ({})\n{}",
157 guidance_section_title(name),
158 name,
159 content
160 ));
161 }
162 }
163 }
164
165 if let Some(skill_catalog) = crate::agent::instructions::render_skill_catalog(
166 &crate::agent::instructions::discover_agent_skills(&self.workspace_root, &config.trust),
167 6_000,
168 ) {
169 static_sections.push(format!("\n{}", skill_catalog));
170 }
171
172 let instructions_dir = crate::tools::file_ops::hematite_dir().join("instructions");
173 if instructions_dir.exists() && instructions_dir.is_dir() {
174 let mem_lower = memory.map(|m| m.to_lowercase());
175 if let Ok(entries) = fs::read_dir(instructions_dir) {
176 for entry in entries.flatten() {
177 let path = entry.path();
178 if path.extension().map(|e| e == "md").unwrap_or(false) {
179 let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
180 let include = mem_lower
181 .as_deref()
182 .map(|m_lower| m_lower.contains(&stem.to_lowercase()))
183 .unwrap_or(false);
184
185 if include {
186 if let Ok(content) = fs::read_to_string(&path) {
187 static_sections.push(format!(
188 "\n# DEEP CONTEXT RULES ({}.md)\n{}",
189 stem, content
190 ));
191 }
192 }
193 }
194 }
195 }
196 }
197
198 let mut prompt = static_sections.join("\n");
199 prompt.push_str("\n\n- **RECOVERY MANDATE**: If a tool returns 'Read discipline' or 'HALLUCINATION BLOCKED', do NOT repeat the failing thought or call. Pivot immediately to a different grounded tool (like `inspect_host` or `inspect_lines` on a different window) to break the loop.");
200 prompt.push_str(
201 "\n\n###############################################################################\n",
202 );
203 prompt.push_str(
204 "# DYNAMIC CONTEXT (Changes every turn) #\n",
205 );
206 prompt.push_str(
207 "###############################################################################\n",
208 );
209
210 if let Some(s) = summary {
211 let _ = write!(
212 prompt,
213 "\n# COMPACTED HISTORY SUMMARY\n{}\nRecent messages are preserved below.",
214 s
215 );
216 }
217
218 if let Some(mem) = memory {
219 let _ = write!(prompt, "\n# SESSION MEMORY\n{mem}");
220 }
221
222 prompt.push_str("\n# ENVIRONMENT");
223 let _ = write!(
224 prompt,
225 "\n- Local Time: {}",
226 chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
227 );
228 let _ = write!(
229 prompt,
230 "\n- Hematite Build: {}",
231 crate::hematite_version_display()
232 );
233 if let Ok(user) = std::env::var("USERPROFILE") {
234 let _ = write!(prompt, "\n- USERPROFILE (Authoritative): {user}");
235 }
236 if let Ok(comp) = std::env::var("COMPUTERNAME") {
237 let _ = write!(prompt, "\n- COMPUTERNAME (Authoritative): {comp}");
238 }
239 prompt.push_str("\n- Operating System: Windows (User workspace)");
240
241 if git::is_git_repo(&self.workspace_root) {
242 if let Ok(branch) = git::get_active_branch(&self.workspace_root) {
243 let _ = write!(prompt, "\n- Git Branch: {branch}");
244 }
245 }
246
247 if let Ok(entries) = fs::read_dir(&self.workspace_root) {
249 let mut list = Vec::new();
250 for entry in entries.flatten() {
251 let path = entry.path();
252 if path.is_file() {
253 if let Some(name) = path.file_name().and_then(|s| s.to_str()) {
254 if !name.starts_with('.') && name != "Cargo.lock" {
255 list.push(name.to_string());
256 }
257 }
258 }
259 }
260 if !list.is_empty() {
261 list.sort_unstable();
262 let _ = write!(prompt, "\n- Workspace Files (Root): {}", list.join(", "));
263 }
264 }
265
266 let hematite_dir = crate::tools::file_ops::hematite_dir();
267 for (name, path) in [
268 ("TASK", hematite_dir.join("TASK.md")),
269 ("PLAN", hematite_dir.join("PLAN.md")),
270 ] {
271 if path.exists() {
272 if let Ok(content) = fs::read_to_string(&path) {
273 if !content.trim().is_empty() {
274 let content = if content.len() > 3000 {
275 format!("{}...[Truncated]", safe_head(&content, 3000))
276 } else {
277 content
278 };
279 let _ = write!(
280 prompt,
281 "\n\n# ACTIVE TASK {} (.hematite/)\n{}",
282 name, content
283 );
284 }
285 }
286 }
287 }
288
289 if !mcp_tools.is_empty() {
290 prompt.push_str("\n\n# ACTIVE MCP TOOLS");
291 for tool in mcp_tools {
292 let raw = tool
293 .description
294 .as_deref()
295 .unwrap_or("No description provided.");
296 if raw.len() > 180 {
297 let _ = write!(prompt, "\n- {}: {}...", tool.name, safe_head(raw, 180));
298 } else {
299 let _ = write!(prompt, "\n- {}: {}", tool.name, raw);
300 }
301 }
302 }
303
304 if let Some(hint) = &config.context_hint {
305 let _ = write!(prompt, "\n## PROJECT CONTEXT HINT\n{}\n", hint);
306 }
307
308 prompt.push_str("\n## HEMATITE OPERATIONAL PROTOCOL\n");
309 prompt.push_str("1. **Thinking Mode**: ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) to plan your response.\n");
310 prompt.push_str("2. **Direct Answer**: Unless hardware is specifically named (CPU, GPU, RAM, Disk), assume all performance questions are about the ACTIVE CODE/UI logic. DO NOT use `inspect_host` for code-vitals.\n");
311 prompt.push_str("3. **Tool Format**: Use structured XML tags for tool calling. No natural language inside tool arguments.\n");
312 prompt.push_str("4. **Identity**: You are a world-class Software Engineer. Answer from the codebase first.\n");
313 prompt.push_str("5. **Continuous Goal**: Continue your task until you have fulfilled the user's intent. Stay grounded in results.\n");
314 prompt.push_str("6. **Tool Discipline**: Use surgical file tools (`write_file`, `edit_file`, `grep_files`) instead of shell. Overwriting code is blocked; use hunk-patching.\n");
315 prompt.push_str("7. **Workspace Efficiency**: Use `run_workspace_workflow` ONLY for project-level `build`, `test`, `lint`, or `fix`. Do NOT use it for general coding or autonomy.\n");
316 prompt.push_str("8. **Host Inspection**: Use `inspect_host` ONLY for legitimate system diagnostics. Topics: hardware, security, network, updates, health_report, storage, storage_spaces, defender_quarantine, data_audit.\n");
317 prompt.push_str("9. **Proof Before Action**: ALWAYS `grep_files` for symbols and `read_file` to verify content before any edit.\n");
318 prompt.push_str("10. **Proof Before Commit**: Run `verify_build` (or `workflow=build`) after all edits to confirm zero regressions.\n");
319 prompt.push_str("11. **Edit Precision**: Match indentation and whitespace exactly in search/replace targets.\n");
320 prompt.push_str("12. **Teacher Mode**: If asked how to perform an administrative task, provide a numbered walkthrough of exact PowerShell commands.\n");
321 prompt.push_str("13. **Search Priority**: Use regex searches for complex patterns. Never assume a file exists without listing the directory.\n");
322 prompt.push_str("14. **Communication**: Keep technical explanations concise. Focus on the 'what' and 'why' of the code change.\n");
323 prompt.push_str("15. **Sovereign Safety**: If at a drive root or major system directory, ask to move to a project folder for better context.\n");
324 prompt.push_str("16. **Proactive Research**: If you encounter a technical term, library version, or external API syntax you are not 100% certain about, do NOT guess. Use `research_web` to verify the latest authoritative facts. Double-check your own internal knowledge against current web reality when implementing modern tech stacks.\n");
325 prompt.push_str("17. **Tool Precedence**: NEVER use the `shell` tool (e.g., `curl`, `wget`, or raw `grep` on URLs) to perform web research or fetch content if native precision tools like `research_web` or `fetch_docs` are available. Prioritize native tools for privacy and cleaner output.\n");
326 prompt.push_str("18. **Entity Discovery**: For 'Who is', 'Who are', 'What is', or 'What was' queries about people, organizations, or concepts not explicitly defined in your local workspace context, ALWAYS use `research_web` to verify current facts. Do NOT guess or hallucinate identities from internal training data. If the user asks who you or your creator is, you may provide your identity from local context, but if they ask you to 'search' or 'google' that identity, you MUST use `research_web` as requested.\n");
327 prompt.push_str("19. **Scientific Mandate — Zero Hallucination on Numbers**: Every numeric result in your response MUST come from actual code execution, never from training-data memory. Memory-based arithmetic is a hallucination. Use `run_code` for any direct computation (arithmetic, percentages, dates, unit conversions, statistics on given numbers). Use `scientific_compute` for symbolic algebra, calculus, dimensional-unit safety, Big-O complexity auditing, and SQL/Python analysis of attached datasets (CSV/JSON/SQLite). Shell math utilities (bc, expr, perl -e) are blocked — always use run_code. The `ledger` mode in scientific_compute persists derivations across sessions.\n");
328
329 prompt
330 }
331}