Skip to main content

hematite/agent/
prompt.rs

1use std::fmt::Write as _;
2use std::fs;
3use std::path::PathBuf;
4
5use crate::agent::git;
6use crate::agent::instructions::{
7    guidance_section_title, resolve_guidance_path, PROJECT_GUIDANCE_FILES,
8};
9use crate::agent::truncation::safe_head;
10
11enum WorkspaceMode {
12    Coding,
13    Document,
14    General,
15}
16
17fn detect_workspace_mode(root: &PathBuf) -> WorkspaceMode {
18    // Strong coding signals — any of these present means it's a coding workspace
19    let coding_markers = [
20        "Cargo.toml",
21        "package.json",
22        "pyproject.toml",
23        "setup.py",
24        "go.mod",
25        "pom.xml",
26        "build.gradle",
27        "CMakeLists.txt",
28        "index.html",
29        "style.css",
30        "script.js",
31        ".git",
32        "src",
33        "lib",
34    ];
35    for marker in &coding_markers {
36        if root.join(marker).exists() {
37            return WorkspaceMode::Coding;
38        }
39    }
40
41    // No strong coding signal — check file extensions
42    let code_exts = [
43        "rs", "py", "ts", "js", "go", "cpp", "c", "java", "cs", "rb", "swift", "kt",
44    ];
45    let doc_exts = ["pdf", "md", "txt", "docx", "epub", "rst"];
46    let mut code_count = 0usize;
47    let mut doc_count = 0usize;
48
49    if let Ok(entries) = fs::read_dir(root) {
50        for entry in entries.flatten() {
51            let path = entry.path();
52            if path.is_file() {
53                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
54                    let ext = ext.to_lowercase();
55                    if code_exts.contains(&ext.as_str()) {
56                        code_count += 1;
57                    }
58                    if doc_exts.contains(&ext.as_str()) {
59                        doc_count += 1;
60                    }
61                }
62            }
63        }
64    }
65
66    if code_count > 0 {
67        WorkspaceMode::Coding
68    } else if doc_count > 0 {
69        WorkspaceMode::Document
70    } else {
71        WorkspaceMode::General
72    }
73}
74
75pub struct SystemPromptBuilder {
76    pub workspace_root: PathBuf,
77}
78
79impl SystemPromptBuilder {
80    pub fn new(root: PathBuf) -> Self {
81        Self {
82            workspace_root: root,
83        }
84    }
85
86    /// Build the full system prompt with Rule Hierarchy and Gemma-4 Optimization.
87    /// Hierarchy: Global ($HOME) -> Project (Root) -> Local (Ignored).
88    pub fn build(
89        &self,
90        base_instructions: &str,
91        memory: Option<&str>,
92        summary: Option<&str>,
93        mcp_tools: &[crate::agent::mcp::McpTool],
94    ) -> String {
95        let config = crate::agent::config::load_config();
96        let mut static_sections = Vec::with_capacity(10);
97
98        let workspace_framing = match detect_workspace_mode(&self.workspace_root) {
99            WorkspaceMode::Coding => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
100                                       - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
101                                       - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
102                                       - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
103                                       The current directory is a software project — lean into code editing, build verification, and repo-aware tooling.",
104            WorkspaceMode::Document => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
105                                         - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
106                                         - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
107                                         - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
108                                         The current directory contains documents and files — lean into reading, summarizing, and hardware/network diagnostics.",
109            WorkspaceMode::General => "- **Authoritative Identity**: You are a Senior SysAdmin, Network Admin, and Software Engineer. Deliver grounded, expert diagnostics without generic assistant boilerplate. You have 100% workstation visibility via native tools.\n\
110                                       - **Hardware Truth & Tool Discipline**: For any hardware, silicon, or performance query (GPU Vitals, CPU Thermals, Throttling), you MUST use `inspect_host` (topic=\"overclocker\", \"thermal\", \"hardware\").\n\
111                                       - **Forbidden Regressions**: NEVER call raw shell commands like `nvidia-smi`, `wmic`, or `tasklist` for telemetry if a native `inspect_host` topic covers it.\n\
112                                       - **Session History Awareness**: Use the RAM-only Silicon Historian trends reported by `inspect_host` to identify anomalies since the start of the session.\n\
113                                       No specific project or document context is loaded — focus on general machine health, system diagnostics, and shell-based tasks.",
114        };
115
116        static_sections.push("# IDENTITY & TONE".to_string());
117        static_sections.push(format!("{} \
118                             Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
119                             You provide 100% workstation visibility across 81+ read-only diagnostic topics (Hardware, Network, Security, OS). \
120                             For simple questions, answer briefly in plain language. \
121                             Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks.", workspace_framing));
122        static_sections.push(format!(
123            "- Running Hematite build: {}",
124            crate::hematite_version_display()
125        ));
126        static_sections.push(format!(
127            "- Hematite author and maintainer: {}",
128            crate::HEMATITE_AUTHOR
129        ));
130        static_sections.push(format!(
131            "- Hematite repository: {}",
132            crate::HEMATITE_REPOSITORY_URL
133        ));
134
135        static_sections.push(format!("\n# BASE INSTRUCTIONS\n{base_instructions}"));
136
137        if let Some(home) = std::env::var_os("USERPROFILE") {
138            let global_path = PathBuf::from(home).join(".hematite").join("CLAUDE.md");
139            if global_path.exists() {
140                if let Ok(content) = fs::read_to_string(&global_path) {
141                    static_sections.push(format!("\n# GLOBAL USER PREFERENCES\n{content}"));
142                }
143            }
144        }
145
146        for name in PROJECT_GUIDANCE_FILES {
147            let path = resolve_guidance_path(&self.workspace_root, name);
148            if path.exists() {
149                if let Ok(content) = fs::read_to_string(&path) {
150                    let content = if content.len() > 6000 {
151                        format!("{}...[Guidance Truncated]", safe_head(&content, 6000))
152                    } else {
153                        content
154                    };
155                    static_sections.push(format!(
156                        "\n# {} ({})\n{}",
157                        guidance_section_title(name),
158                        name,
159                        content
160                    ));
161                }
162            }
163        }
164
165        if let Some(skill_catalog) = crate::agent::instructions::render_skill_catalog(
166            &crate::agent::instructions::discover_agent_skills(&self.workspace_root, &config.trust),
167            6_000,
168        ) {
169            static_sections.push(format!("\n{}", skill_catalog));
170        }
171
172        let instructions_dir = crate::tools::file_ops::hematite_dir().join("instructions");
173        if instructions_dir.exists() && instructions_dir.is_dir() {
174            let mem_lower = memory.map(|m| m.to_lowercase());
175            if let Ok(entries) = fs::read_dir(instructions_dir) {
176                for entry in entries.flatten() {
177                    let path = entry.path();
178                    if path.extension().map(|e| e == "md").unwrap_or(false) {
179                        let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
180                        let include = mem_lower
181                            .as_deref()
182                            .map(|m_lower| m_lower.contains(&stem.to_lowercase()))
183                            .unwrap_or(false);
184
185                        if include {
186                            if let Ok(content) = fs::read_to_string(&path) {
187                                static_sections.push(format!(
188                                    "\n# DEEP CONTEXT RULES ({}.md)\n{}",
189                                    stem, content
190                                ));
191                            }
192                        }
193                    }
194                }
195            }
196        }
197
198        let mut prompt = static_sections.join("\n");
199        prompt.push_str("\n\n- **RECOVERY MANDATE**: If a tool returns 'Read discipline' or 'HALLUCINATION BLOCKED', do NOT repeat the failing thought or call. Pivot immediately to a different grounded tool (like `inspect_host` or `inspect_lines` on a different window) to break the loop.");
200        prompt.push_str(
201            "\n\n###############################################################################\n",
202        );
203        prompt.push_str(
204            "# DYNAMIC CONTEXT (Changes every turn)                                        #\n",
205        );
206        prompt.push_str(
207            "###############################################################################\n",
208        );
209
210        if let Some(s) = summary {
211            let _ = write!(
212                prompt,
213                "\n# COMPACTED HISTORY SUMMARY\n{}\nRecent messages are preserved below.",
214                s
215            );
216        }
217
218        if let Some(mem) = memory {
219            let _ = write!(prompt, "\n# SESSION MEMORY\n{mem}");
220        }
221
222        prompt.push_str("\n# ENVIRONMENT");
223        let _ = write!(
224            prompt,
225            "\n- Local Time: {}",
226            chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
227        );
228        let _ = write!(
229            prompt,
230            "\n- Hematite Build: {}",
231            crate::hematite_version_display()
232        );
233        if let Ok(user) = std::env::var("USERPROFILE") {
234            let _ = write!(prompt, "\n- USERPROFILE (Authoritative): {user}");
235        }
236        if let Ok(comp) = std::env::var("COMPUTERNAME") {
237            let _ = write!(prompt, "\n- COMPUTERNAME (Authoritative): {comp}");
238        }
239        prompt.push_str("\n- Operating System: Windows (User workspace)");
240
241        if git::is_git_repo(&self.workspace_root) {
242            if let Ok(branch) = git::get_active_branch(&self.workspace_root) {
243                let _ = write!(prompt, "\n- Git Branch: {branch}");
244            }
245        }
246
247        // --- Intelligence Injection: Flat File Inventory ---
248        if let Ok(entries) = fs::read_dir(&self.workspace_root) {
249            let mut list = Vec::new();
250            for entry in entries.flatten() {
251                let path = entry.path();
252                if path.is_file() {
253                    if let Some(name) = path.file_name().and_then(|s| s.to_str()) {
254                        if !name.starts_with('.') && name != "Cargo.lock" {
255                            list.push(name.to_string());
256                        }
257                    }
258                }
259            }
260            if !list.is_empty() {
261                list.sort_unstable();
262                let _ = write!(prompt, "\n- Workspace Files (Root): {}", list.join(", "));
263            }
264        }
265
266        let hematite_dir = crate::tools::file_ops::hematite_dir();
267        for (name, path) in [
268            ("TASK", hematite_dir.join("TASK.md")),
269            ("PLAN", hematite_dir.join("PLAN.md")),
270        ] {
271            if path.exists() {
272                if let Ok(content) = fs::read_to_string(&path) {
273                    if !content.trim().is_empty() {
274                        let content = if content.len() > 3000 {
275                            format!("{}...[Truncated]", safe_head(&content, 3000))
276                        } else {
277                            content
278                        };
279                        let _ = write!(
280                            prompt,
281                            "\n\n# ACTIVE TASK {} (.hematite/)\n{}",
282                            name, content
283                        );
284                    }
285                }
286            }
287        }
288
289        if !mcp_tools.is_empty() {
290            prompt.push_str("\n\n# ACTIVE MCP TOOLS");
291            for tool in mcp_tools {
292                let raw = tool
293                    .description
294                    .as_deref()
295                    .unwrap_or("No description provided.");
296                if raw.len() > 180 {
297                    let _ = write!(prompt, "\n- {}: {}...", tool.name, safe_head(raw, 180));
298                } else {
299                    let _ = write!(prompt, "\n- {}: {}", tool.name, raw);
300                }
301            }
302        }
303
304        if let Some(hint) = &config.context_hint {
305            let _ = write!(prompt, "\n## PROJECT CONTEXT HINT\n{}\n", hint);
306        }
307
308        prompt.push_str("\n## HEMATITE OPERATIONAL PROTOCOL\n");
309        prompt.push_str("1. **Thinking Mode**: ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) to plan your response.\n");
310        prompt.push_str("2. **Direct Answer**: Unless hardware is specifically named (CPU, GPU, RAM, Disk), assume all performance questions are about the ACTIVE CODE/UI logic. DO NOT use `inspect_host` for code-vitals.\n");
311        prompt.push_str("3. **Tool Format**: Use structured XML tags for tool calling. No natural language inside tool arguments.\n");
312        prompt.push_str("4. **Identity**: You are a world-class Software Engineer. Answer from the codebase first.\n");
313        prompt.push_str("5. **Continuous Goal**: Continue your task until you have fulfilled the user's intent. Stay grounded in results.\n");
314        prompt.push_str("6. **Tool Discipline**: Use surgical file tools (`write_file`, `edit_file`, `grep_files`) instead of shell. Overwriting code is blocked; use hunk-patching.\n");
315        prompt.push_str("7. **Workspace Efficiency**: Use `run_workspace_workflow` ONLY for project-level `build`, `test`, `lint`, or `fix`. Do NOT use it for general coding or autonomy.\n");
316        prompt.push_str("8. **Host Inspection**: Use `inspect_host` ONLY for legitimate system diagnostics. Topics: hardware, security, network, updates, health_report, storage, storage_spaces, defender_quarantine, data_audit.\n");
317        prompt.push_str("9. **Proof Before Action**: ALWAYS `grep_files` for symbols and `read_file` to verify content before any edit.\n");
318        prompt.push_str("10. **Proof Before Commit**: Run `verify_build` (or `workflow=build`) after all edits to confirm zero regressions.\n");
319        prompt.push_str("11. **Edit Precision**: Match indentation and whitespace exactly in search/replace targets.\n");
320        prompt.push_str("12. **Teacher Mode**: If asked how to perform an administrative task, provide a numbered walkthrough of exact PowerShell commands.\n");
321        prompt.push_str("13. **Search Priority**: Use regex searches for complex patterns. Never assume a file exists without listing the directory.\n");
322        prompt.push_str("14. **Communication**: Keep technical explanations concise. Focus on the 'what' and 'why' of the code change.\n");
323        prompt.push_str("15. **Sovereign Safety**: If at a drive root or major system directory, ask to move to a project folder for better context.\n");
324        prompt.push_str("16. **Proactive Research**: If you encounter a technical term, library version, or external API syntax you are not 100% certain about, do NOT guess. Use `research_web` to verify the latest authoritative facts. Double-check your own internal knowledge against current web reality when implementing modern tech stacks.\n");
325        prompt.push_str("17. **Tool Precedence**: NEVER use the `shell` tool (e.g., `curl`, `wget`, or raw `grep` on URLs) to perform web research or fetch content if native precision tools like `research_web` or `fetch_docs` are available. Prioritize native tools for privacy and cleaner output.\n");
326        prompt.push_str("18. **Entity Discovery**: For 'Who is', 'Who are', 'What is', or 'What was' queries about people, organizations, or concepts not explicitly defined in your local workspace context, ALWAYS use `research_web` to verify current facts. Do NOT guess or hallucinate identities from internal training data. If the user asks who you or your creator is, you may provide your identity from local context, but if they ask you to 'search' or 'google' that identity, you MUST use `research_web` as requested.\n");
327        prompt.push_str("19. **Scientific Mandate**: You are a Lead Computational Researcher. NEVER guess results for math, physics, or algorithmic complexity. Use `scientific_compute` for formal proofs, unit-safety, and empirical Big-O auditing. Use the `ledger` mode to persist cross-session math memory.\n");
328
329        prompt
330    }
331}