hematite/agent/
prompt.rs

1use std::fs;
2use std::path::PathBuf;
3
4use crate::agent::git;
5
6enum WorkspaceMode {
7    Coding,
8    Document,
9    General,
10}
11
12fn detect_workspace_mode(root: &PathBuf) -> WorkspaceMode {
13    // Strong coding signals — any of these present means it's a coding workspace
14    let coding_markers = [
15        "Cargo.toml",
16        "package.json",
17        "pyproject.toml",
18        "setup.py",
19        "go.mod",
20        "pom.xml",
21        "build.gradle",
22        "CMakeLists.txt",
23        ".git",
24        "src",
25        "lib",
26    ];
27    for marker in &coding_markers {
28        if root.join(marker).exists() {
29            return WorkspaceMode::Coding;
30        }
31    }
32
33    // No strong coding signal — check file extensions
34    let code_exts = [
35        "rs", "py", "ts", "js", "go", "cpp", "c", "java", "cs", "rb", "swift", "kt",
36    ];
37    let doc_exts = ["pdf", "md", "txt", "docx", "epub", "rst"];
38    let mut code_count = 0usize;
39    let mut doc_count = 0usize;
40
41    if let Ok(entries) = fs::read_dir(root) {
42        for entry in entries.flatten() {
43            let path = entry.path();
44            if path.is_file() {
45                if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
46                    let ext = ext.to_lowercase();
47                    if code_exts.contains(&ext.as_str()) {
48                        code_count += 1;
49                    }
50                    if doc_exts.contains(&ext.as_str()) {
51                        doc_count += 1;
52                    }
53                }
54            }
55        }
56    }
57
58    if code_count > 0 {
59        WorkspaceMode::Coding
60    } else if doc_count > 0 {
61        WorkspaceMode::Document
62    } else {
63        WorkspaceMode::General
64    }
65}
66
67pub struct SystemPromptBuilder {
68    pub workspace_root: PathBuf,
69}
70
71impl SystemPromptBuilder {
72    pub fn new(root: PathBuf) -> Self {
73        Self {
74            workspace_root: root,
75        }
76    }
77
78    /// Build the full system prompt with Rule Hierarchy and Gemma-4 Optimization.
79    /// Hierarchy: Global ($HOME) -> Project (Root) -> Local (Ignored).
80    pub fn build(
81        &self,
82        base_instructions: &str,
83        memory: Option<&str>,
84        summary: Option<&str>,
85        mcp_tools: &[crate::agent::mcp::McpTool],
86    ) -> String {
87        let config = crate::agent::config::load_config();
88        let mut static_sections = Vec::new();
89
90        let workspace_framing = match detect_workspace_mode(&self.workspace_root) {
91            WorkspaceMode::Coding => "You are Hematite, a local AI coding agent running on the user's machine. \
92                             Hematite is more than the terminal UI: it is the full local harness for tool use, code editing, context management, voice, and orchestration. \
93                             The current directory is a software project — lean into code editing, build verification, and repo-aware tooling.",
94            WorkspaceMode::Document => "You are Hematite, a local AI assistant running on the user's machine. \
95                             Hematite is more than the terminal UI: it is the full local harness for tool use, file analysis, context management, voice, and orchestration. \
96                             The current directory contains documents and files — lean into reading, summarizing, explaining, and answering questions about the content here.",
97            WorkspaceMode::General => "You are Hematite, a local AI assistant running on the user's machine. \
98                             Hematite is more than the terminal UI: it is the full local harness for tool use, file operations, context management, voice, and orchestration.",
99        };
100
101        static_sections.push("# IDENTITY & TONE".to_string());
102        static_sections.push(format!("{} \
103                             Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
104                             For simple questions, answer briefly in plain language. \
105                             Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks.", workspace_framing));
106        static_sections.push(format!(
107            "- Running Hematite build: {}",
108            crate::hematite_version_display()
109        ));
110        static_sections.push(format!(
111            "- Hematite author and maintainer: {}",
112            crate::HEMATITE_AUTHOR
113        ));
114        static_sections.push(format!(
115            "- Hematite repository: {}",
116            crate::HEMATITE_REPOSITORY_URL
117        ));
118
119        static_sections.push(format!("\n# BASE INSTRUCTIONS\n{base_instructions}"));
120
121        if let Some(home) = std::env::var_os("USERPROFILE") {
122            let global_path = PathBuf::from(home).join(".hematite").join("CLAUDE.md");
123            if global_path.exists() {
124                if let Ok(content) = fs::read_to_string(&global_path) {
125                    static_sections.push(format!("\n# GLOBAL USER PREFERENCES\n{content}"));
126                }
127            }
128        }
129
130        for name in &["CLAUDE.md", ".claude.md", "CLAUDE.local.md"] {
131            let path = self.workspace_root.join(name);
132            if path.exists() {
133                if let Ok(content) = fs::read_to_string(&path) {
134                    let content = if content.len() > 6000 {
135                        format!("{}...[Rules Truncated]", &content[..6000])
136                    } else {
137                        content
138                    };
139                    static_sections.push(format!("\n# PROJECT RULES ({})\n{}", name, content));
140                }
141            }
142        }
143
144        let instructions_dir = self.workspace_root.join(".hematite").join("instructions");
145        if instructions_dir.exists() && instructions_dir.is_dir() {
146            if let Ok(entries) = fs::read_dir(instructions_dir) {
147                for entry in entries.flatten() {
148                    let path = entry.path();
149                    if path.extension().map(|e| e == "md").unwrap_or(false) {
150                        let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
151                        let include = if let Some(mem) = memory {
152                            mem.to_lowercase().contains(&stem.to_lowercase())
153                        } else {
154                            false
155                        };
156
157                        if include {
158                            if let Ok(content) = fs::read_to_string(&path) {
159                                static_sections.push(format!(
160                                    "\n# DEEP CONTEXT RULES ({}.md)\n{}",
161                                    stem, content
162                                ));
163                            }
164                        }
165                    }
166                }
167            }
168        }
169
170        let mut prompt = static_sections.join("\n");
171        prompt.push_str(
172            "\n\n###############################################################################\n",
173        );
174        prompt.push_str(
175            "# DYNAMIC CONTEXT (Changes every turn)                                        #\n",
176        );
177        prompt.push_str(
178            "###############################################################################\n",
179        );
180
181        if let Some(s) = summary {
182            prompt.push_str(&format!(
183                "\n# COMPACTED HISTORY SUMMARY\n{}\nRecent messages are preserved below.",
184                s
185            ));
186        }
187
188        if let Some(mem) = memory {
189            prompt.push_str(&format!("\n# SESSION MEMORY\n{mem}"));
190        }
191
192        prompt.push_str("\n# ENVIRONMENT");
193        prompt.push_str(&format!(
194            "\n- Local Time: {}",
195            chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
196        ));
197        prompt.push_str(&format!(
198            "\n- Hematite Build: {}",
199            crate::hematite_version_display()
200        ));
201        prompt.push_str("\n- Operating System: Windows (User workspace)");
202
203        if git::is_git_repo(&self.workspace_root) {
204            if let Ok(branch) = git::get_active_branch(&self.workspace_root) {
205                prompt.push_str(&format!("\n- Git Branch: {branch}"));
206            }
207        }
208
209        let hematite_dir = self.workspace_root.join(".hematite");
210        for (name, path) in [
211            ("TASK", hematite_dir.join("TASK.md")),
212            ("PLAN", hematite_dir.join("PLAN.md")),
213        ] {
214            if path.exists() {
215                if let Ok(content) = fs::read_to_string(&path) {
216                    if !content.trim().is_empty() {
217                        let content = if content.len() > 3000 {
218                            format!("{}...[Truncated]", &content[..3000])
219                        } else {
220                            content
221                        };
222                        prompt.push_str(&format!(
223                            "\n\n# ACTIVE TASK {} (.hematite/)\n{}",
224                            name, content
225                        ));
226                    }
227                }
228            }
229        }
230
231        if !mcp_tools.is_empty() {
232            prompt.push_str("\n\n# ACTIVE MCP TOOLS");
233            for tool in mcp_tools {
234                let mut description = tool
235                    .description
236                    .clone()
237                    .unwrap_or_else(|| "No description provided.".to_string());
238                if description.len() > 180 {
239                    description.truncate(180);
240                    description.push_str("...");
241                }
242                prompt.push_str(&format!("\n- {}: {}", tool.name, description));
243            }
244        }
245
246        if let Some(hint) = &config.context_hint {
247            prompt.push_str(&format!("\n## PROJECT CONTEXT HINT\n{}\n", hint));
248        }
249
250        prompt.push_str("\n## OPERATIONAL PROTOCOL (Gemma-4-E4B Native)\n");
251        prompt.push_str("1. **Thinking Mode**: ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) to analyze the user's intent, verify facts, and plan your response architecture.\n");
252        prompt.push_str("2. **Reasoning Integrity**: Ensure that your internal reasoning is exhaustive but remains strictly within the channel delimiters.\n");
253        prompt.push_str("3. **Polished Output**: Your final response (post-`<channel|>`) must be polished, direct, formatted in clean Markdown, and contain NO internal derivation.\n");
254        prompt.push_str("4. **Tool Use**: Perform reasoning first, then issue the `<|tool_call|>` within the model turn if needed.\n");
255        prompt.push_str("5. **Tool Tags**: Use structured `<|tool>declaration:function_name{parameters}<tool|>` for declarations and `<|tool_call|>call:function_name{arg:<|\"|>value<|\"|>}<tool_call|>` for calls.\n");
256        prompt.push_str("6. **Safety**: String values MUST use the `<|\"|>` wrapper for safety.\n");
257        prompt.push_str("7. **Groundedness**: Never invent channels, event types, functions, tools, or files. If a detail is not verified from the repo or tool output, say `uncertain`.\n");
258        prompt.push_str("8. **Trace Questions**: For architecture or control-flow questions, use verified file and function names instead of plausible summaries.\n");
259        prompt.push_str("9. **Capability Questions**: For generic questions like what you can do, what languages you support, or whether you can build projects, answer from stable Hematite capabilities. Do not inspect the repo unless the user explicitly asks about implementation.\n");
260        prompt.push_str("10. **Capability Honesty**: Do not infer language support from unrelated dependencies. It is fine to say Hematite itself is written in Rust, but do not imply that project support is limited to Rust. Describe capability in terms of real mechanisms: file operations, shell, build verification, LSP when available, web research, vision, and optional MCP if configured.\n");
261        prompt.push_str("11. **Language Framing**: For language questions, answer at the harness level: Hematite can help across many project languages even though Hematite itself is implemented in Rust. Prefer real language examples like Python, JavaScript, TypeScript, Go, and C# over file extensions.\n");
262        prompt.push_str("12. **Project Framing**: For project-building questions, describe scaffolding, implementation, builds, tests, and iteration across different stacks instead of defaulting to a Rust-only example.\n");
263        prompt.push_str("13. **Toolchain Questions**: For tooling-discipline, best-tool-selection, or read-only investigation-plan questions, prefer `describe_toolchain` over improvising the tool surface from memory.\n");
264        prompt.push_str("14. **Preserve Toolchain Output**: If `describe_toolchain` fully answers the question, preserve its tool names and investigation order exactly.\n");
265        prompt.push_str("15. **Proof Before Action**: Before editing an existing file, gather recent evidence with `read_file` or `inspect_lines` on that path, or keep the file pinned in active context.\n");
266        prompt.push_str("16. **Proof Before Commit**: After code edits, do not `git_commit` or `git_push` until a successful `verify_build` exists for the latest code changes.\n");
267        prompt.push_str("17. **Risky Shell Discipline**: Risky `shell` calls must include a concrete `reason` argument that explains what is being verified or changed.\n");
268        prompt.push_str("18. **Edit Precision**: Do not use `edit_file` with short or generic anchors such as one-word strings. Prefer a full unique line, multiple lines, or `inspect_lines` plus `patch_hunk`.\n");
269        prompt.push_str("19. **Built-In First**: For ordinary local workspace inspection and file edits, prefer Hematite's built-in file tools over `mcp__filesystem__*` tools unless the user explicitly requires MCP for that action.\n");
270        prompt.push_str("20. **Deep Sync**: Every 6th turn, review the full TASK.md.\n\n21. **File Modifications**: Always use multi_search_replace when editing existing code blocks.\n");
271        prompt.push_str("22. **Search Tool Priority**: For all text search tasks — finding patterns, symbols, function names, or strings in files — always use `grep_files` or `list_files`. Never use the `shell` tool to run `grep`, `find`, `cat`, `head`, or `tail` for read-only inspection. Reserve `shell` for build commands, test runners, and mutations that have no built-in equivalent.");
272
273        prompt.push_str("23. **Host Inspection Priority**: For read-only questions about installed tools, PATH entries, environment/package-manager health, grounded fix plans for common workstation failures, network state, service state, desktop items, Downloads size, listening ports, repo-health summaries, or directory/disk reports, prefer `inspect_host` over raw `shell` when it can answer directly. If the user asks how to fix a common workstation problem such as `cargo not found`, `port 3000 already in use`, or `LM Studio not reachable`, use `fix_plan` first instead of `env_doctor`, `path`, or `ports`. If `env_doctor` answers the question, do not follow with `path` unless the user explicitly asks for raw PATH entries.");
274
275        prompt
276    }
277}
hematite/agent/prompt.rs

hematite/agent/
prompt.rs