1use std::fs;
2use std::path::PathBuf;
3
4use crate::agent::git;
5
6enum WorkspaceMode {
7 Coding,
8 Document,
9 General,
10}
11
12fn detect_workspace_mode(root: &PathBuf) -> WorkspaceMode {
13 let coding_markers = [
15 "Cargo.toml",
16 "package.json",
17 "pyproject.toml",
18 "setup.py",
19 "go.mod",
20 "pom.xml",
21 "build.gradle",
22 "CMakeLists.txt",
23 ".git",
24 "src",
25 "lib",
26 ];
27 for marker in &coding_markers {
28 if root.join(marker).exists() {
29 return WorkspaceMode::Coding;
30 }
31 }
32
33 let code_exts = [
35 "rs", "py", "ts", "js", "go", "cpp", "c", "java", "cs", "rb", "swift", "kt",
36 ];
37 let doc_exts = ["pdf", "md", "txt", "docx", "epub", "rst"];
38 let mut code_count = 0usize;
39 let mut doc_count = 0usize;
40
41 if let Ok(entries) = fs::read_dir(root) {
42 for entry in entries.flatten() {
43 let path = entry.path();
44 if path.is_file() {
45 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
46 let ext = ext.to_lowercase();
47 if code_exts.contains(&ext.as_str()) {
48 code_count += 1;
49 }
50 if doc_exts.contains(&ext.as_str()) {
51 doc_count += 1;
52 }
53 }
54 }
55 }
56 }
57
58 if code_count > 0 {
59 WorkspaceMode::Coding
60 } else if doc_count > 0 {
61 WorkspaceMode::Document
62 } else {
63 WorkspaceMode::General
64 }
65}
66
67pub struct SystemPromptBuilder {
68 pub workspace_root: PathBuf,
69}
70
71impl SystemPromptBuilder {
72 pub fn new(root: PathBuf) -> Self {
73 Self {
74 workspace_root: root,
75 }
76 }
77
78 pub fn build(
81 &self,
82 base_instructions: &str,
83 memory: Option<&str>,
84 summary: Option<&str>,
85 mcp_tools: &[crate::agent::mcp::McpTool],
86 ) -> String {
87 let config = crate::agent::config::load_config();
88 let mut static_sections = Vec::new();
89
90 let workspace_framing = match detect_workspace_mode(&self.workspace_root) {
91 WorkspaceMode::Coding => "You are Hematite, a local AI coding agent running on the user's machine. \
92 Hematite is more than the terminal UI: it is the full local harness for tool use, code editing, context management, voice, and orchestration. \
93 The current directory is a software project — lean into code editing, build verification, and repo-aware tooling.",
94 WorkspaceMode::Document => "You are Hematite, a local AI assistant running on the user's machine. \
95 Hematite is more than the terminal UI: it is the full local harness for tool use, file analysis, context management, voice, and orchestration. \
96 The current directory contains documents and files — lean into reading, summarizing, explaining, and answering questions about the content here.",
97 WorkspaceMode::General => "You are Hematite, a local AI assistant running on the user's machine. \
98 Hematite is more than the terminal UI: it is the full local harness for tool use, file operations, context management, voice, and orchestration.",
99 };
100
101 static_sections.push("# IDENTITY & TONE".to_string());
102 static_sections.push(format!("{} \
103 Be direct, practical, technically precise, and ASCII-first in ordinary prose. \
104 For simple questions, answer briefly in plain language. \
105 Do not expose internal tool names, hidden protocols, or planning jargon unless the user asks.", workspace_framing));
106 static_sections.push(format!(
107 "- Running Hematite build: {}",
108 crate::hematite_version_display()
109 ));
110
111 static_sections.push(format!("\n# BASE INSTRUCTIONS\n{base_instructions}"));
112
113 if let Some(home) = std::env::var_os("USERPROFILE") {
114 let global_path = PathBuf::from(home).join(".hematite").join("CLAUDE.md");
115 if global_path.exists() {
116 if let Ok(content) = fs::read_to_string(&global_path) {
117 static_sections.push(format!("\n# GLOBAL USER PREFERENCES\n{content}"));
118 }
119 }
120 }
121
122 for name in &["CLAUDE.md", ".claude.md", "CLAUDE.local.md"] {
123 let path = self.workspace_root.join(name);
124 if path.exists() {
125 if let Ok(content) = fs::read_to_string(&path) {
126 let content = if content.len() > 6000 {
127 format!("{}...[Rules Truncated]", &content[..6000])
128 } else {
129 content
130 };
131 static_sections.push(format!("\n# PROJECT RULES ({})\n{}", name, content));
132 }
133 }
134 }
135
136 let instructions_dir = self.workspace_root.join(".hematite").join("instructions");
137 if instructions_dir.exists() && instructions_dir.is_dir() {
138 if let Ok(entries) = fs::read_dir(instructions_dir) {
139 for entry in entries.flatten() {
140 let path = entry.path();
141 if path.extension().map(|e| e == "md").unwrap_or(false) {
142 let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
143 let include = if let Some(mem) = memory {
144 mem.to_lowercase().contains(&stem.to_lowercase())
145 } else {
146 false
147 };
148
149 if include {
150 if let Ok(content) = fs::read_to_string(&path) {
151 static_sections.push(format!(
152 "\n# DEEP CONTEXT RULES ({}.md)\n{}",
153 stem, content
154 ));
155 }
156 }
157 }
158 }
159 }
160 }
161
162 let mut prompt = static_sections.join("\n");
163 prompt.push_str(
164 "\n\n###############################################################################\n",
165 );
166 prompt.push_str(
167 "# DYNAMIC CONTEXT (Changes every turn) #\n",
168 );
169 prompt.push_str(
170 "###############################################################################\n",
171 );
172
173 if let Some(s) = summary {
174 prompt.push_str(&format!(
175 "\n# COMPACTED HISTORY SUMMARY\n{}\nRecent messages are preserved below.",
176 s
177 ));
178 }
179
180 if let Some(mem) = memory {
181 prompt.push_str(&format!("\n# SESSION MEMORY\n{mem}"));
182 }
183
184 prompt.push_str("\n# ENVIRONMENT");
185 prompt.push_str(&format!(
186 "\n- Local Time: {}",
187 chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
188 ));
189 prompt.push_str(&format!(
190 "\n- Hematite Build: {}",
191 crate::hematite_version_display()
192 ));
193 prompt.push_str("\n- Operating System: Windows (User workspace)");
194
195 if git::is_git_repo(&self.workspace_root) {
196 if let Ok(branch) = git::get_active_branch(&self.workspace_root) {
197 prompt.push_str(&format!("\n- Git Branch: {branch}"));
198 }
199 }
200
201 let hematite_dir = self.workspace_root.join(".hematite");
202 for (name, path) in [
203 ("TASK", hematite_dir.join("TASK.md")),
204 ("PLAN", hematite_dir.join("PLAN.md")),
205 ] {
206 if path.exists() {
207 if let Ok(content) = fs::read_to_string(&path) {
208 if !content.trim().is_empty() {
209 let content = if content.len() > 3000 {
210 format!("{}...[Truncated]", &content[..3000])
211 } else {
212 content
213 };
214 prompt.push_str(&format!(
215 "\n\n# ACTIVE TASK {} (.hematite/)\n{}",
216 name, content
217 ));
218 }
219 }
220 }
221 }
222
223 if !mcp_tools.is_empty() {
224 prompt.push_str("\n\n# ACTIVE MCP TOOLS");
225 for tool in mcp_tools {
226 let mut description = tool
227 .description
228 .clone()
229 .unwrap_or_else(|| "No description provided.".to_string());
230 if description.len() > 180 {
231 description.truncate(180);
232 description.push_str("...");
233 }
234 prompt.push_str(&format!("\n- {}: {}", tool.name, description));
235 }
236 }
237
238 if let Some(hint) = &config.context_hint {
239 prompt.push_str(&format!("\n## PROJECT CONTEXT HINT\n{}\n", hint));
240 }
241
242 prompt.push_str("\n## OPERATIONAL PROTOCOL (Gemma-4-E4B Native)\n");
243 prompt.push_str("1. **Thinking Mode**: ALWAYS use the thought channel (`<|channel>thought ... <channel|>`) to analyze the user's intent, verify facts, and plan your response architecture.\n");
244 prompt.push_str("2. **Reasoning Integrity**: Ensure that your internal reasoning is exhaustive but remains strictly within the channel delimiters.\n");
245 prompt.push_str("3. **Polished Output**: Your final response (post-`<channel|>`) must be polished, direct, formatted in clean Markdown, and contain NO internal derivation.\n");
246 prompt.push_str("4. **Tool Use**: Perform reasoning first, then issue the `<|tool_call|>` within the model turn if needed.\n");
247 prompt.push_str("5. **Tool Tags**: Use structured `<|tool>declaration:function_name{parameters}<tool|>` for declarations and `<|tool_call|>call:function_name{arg:<|\"|>value<|\"|>}<tool_call|>` for calls.\n");
248 prompt.push_str("6. **Safety**: String values MUST use the `<|\"|>` wrapper for safety.\n");
249 prompt.push_str("7. **Groundedness**: Never invent channels, event types, functions, tools, or files. If a detail is not verified from the repo or tool output, say `uncertain`.\n");
250 prompt.push_str("8. **Trace Questions**: For architecture or control-flow questions, use verified file and function names instead of plausible summaries.\n");
251 prompt.push_str("9. **Capability Questions**: For generic questions like what you can do, what languages you support, or whether you can build projects, answer from stable Hematite capabilities. Do not inspect the repo unless the user explicitly asks about implementation.\n");
252 prompt.push_str("10. **Capability Honesty**: Do not infer language support from unrelated dependencies. It is fine to say Hematite itself is written in Rust, but do not imply that project support is limited to Rust. Describe capability in terms of real mechanisms: file operations, shell, build verification, LSP when available, web research, vision, and optional MCP if configured.\n");
253 prompt.push_str("11. **Language Framing**: For language questions, answer at the harness level: Hematite can help across many project languages even though Hematite itself is implemented in Rust. Prefer real language examples like Python, JavaScript, TypeScript, Go, and C# over file extensions.\n");
254 prompt.push_str("12. **Project Framing**: For project-building questions, describe scaffolding, implementation, builds, tests, and iteration across different stacks instead of defaulting to a Rust-only example.\n");
255 prompt.push_str("13. **Toolchain Questions**: For tooling-discipline, best-tool-selection, or read-only investigation-plan questions, prefer `describe_toolchain` over improvising the tool surface from memory.\n");
256 prompt.push_str("14. **Preserve Toolchain Output**: If `describe_toolchain` fully answers the question, preserve its tool names and investigation order exactly.\n");
257 prompt.push_str("15. **Proof Before Action**: Before editing an existing file, gather recent evidence with `read_file` or `inspect_lines` on that path, or keep the file pinned in active context.\n");
258 prompt.push_str("16. **Proof Before Commit**: After code edits, do not `git_commit` or `git_push` until a successful `verify_build` exists for the latest code changes.\n");
259 prompt.push_str("17. **Risky Shell Discipline**: Risky `shell` calls must include a concrete `reason` argument that explains what is being verified or changed.\n");
260 prompt.push_str("18. **Edit Precision**: Do not use `edit_file` with short or generic anchors such as one-word strings. Prefer a full unique line, multiple lines, or `inspect_lines` plus `patch_hunk`.\n");
261 prompt.push_str("19. **Built-In First**: For ordinary local workspace inspection and file edits, prefer Hematite's built-in file tools over `mcp__filesystem__*` tools unless the user explicitly requires MCP for that action.\n");
262 prompt.push_str("20. **Deep Sync**: Every 6th turn, review the full TASK.md.\n\n21. **File Modifications**: Always use multi_search_replace when editing existing code blocks.\n");
263 prompt.push_str("22. **Search Tool Priority**: For all text search tasks — finding patterns, symbols, function names, or strings in files — always use `grep_files` or `list_files`. Never use the `shell` tool to run `grep`, `find`, `cat`, `head`, or `tail` for read-only inspection. Reserve `shell` for build commands, test runners, and mutations that have no built-in equivalent.");
264
265 prompt.push_str("23. **Host Inspection Priority**: For read-only questions about installed tools, PATH entries, desktop items, Downloads size, listening ports, repo-health summaries, or directory/disk reports, prefer `inspect_host` over raw `shell` when it can answer directly.");
266
267 prompt
268 }
269}