1use serde::{Deserialize, Serialize};
2use std::collections::HashSet;
3use std::path::{Path, PathBuf};
4
5const MAX_INSTRUCTION_FILE_BYTES: usize = 16_000;
6const MAX_TOTAL_INSTRUCTION_BYTES: usize = 48_000;
7
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
9pub enum InstructionScope {
10 User,
11 Workspace,
12 Directory,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
16pub struct InstructionDoc {
17 pub scope: InstructionScope,
18 pub path: PathBuf,
19 pub relative_path: String,
20 pub depth: usize,
21 pub content: String,
22}
23
24pub fn discover_workspace_instructions(root: &Path, task: &str) -> Vec<InstructionDoc> {
25 let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
26 let mut docs = Vec::new();
27 let mut seen = HashSet::new();
28
29 if let Some(home) = dirs::home_dir() {
30 for path in instruction_candidates_in_dir(&home, false) {
31 push_doc(&root, &path, InstructionScope::User, &mut seen, &mut docs);
32 }
33 }
34
35 for path in instruction_candidates_in_dir(&root, true) {
36 push_doc(
37 &root,
38 &path,
39 InstructionScope::Workspace,
40 &mut seen,
41 &mut docs,
42 );
43 }
44
45 scan_instruction_dirs(&root, &root, &mut seen, &mut docs);
46 sort_instruction_docs(&root, task, &mut docs);
47 enforce_total_limit(docs)
48}
49
50fn instruction_candidates_in_dir(dir: &Path, include_sparrow: bool) -> Vec<PathBuf> {
51 let mut out = vec![dir.join("AGENTS.md"), dir.join("CLAUDE.md")];
52 if dir.file_name().and_then(|name| name.to_str()) == Some(".sparrow") {
53 out.push(dir.join("INSTRUCTIONS.md"));
54 }
55 if include_sparrow {
56 out.push(dir.join(".sparrow").join("INSTRUCTIONS.md"));
57 }
58 out
59}
60
61fn scan_instruction_dirs(
62 root: &Path,
63 dir: &Path,
64 seen: &mut HashSet<PathBuf>,
65 docs: &mut Vec<InstructionDoc>,
66) {
67 let entries = match std::fs::read_dir(dir) {
68 Ok(entries) => entries,
69 Err(_) => return,
70 };
71
72 for entry in entries.flatten() {
73 let path = entry.path();
74 let name = entry.file_name().to_string_lossy().to_string();
75 if should_skip_entry(&name) {
76 continue;
77 }
78
79 if path.is_dir() {
80 for candidate in instruction_candidates_in_dir(&path, true) {
81 push_doc(root, &candidate, InstructionScope::Directory, seen, docs);
82 }
83 scan_instruction_dirs(root, &path, seen, docs);
84 }
85 }
86}
87
88fn should_skip_entry(name: &str) -> bool {
89 matches!(
90 name,
91 ".git"
92 | "target"
93 | "node_modules"
94 | "dist"
95 | "build"
96 | ".claude"
97 | ".codex-remote-attachments"
98 )
99}
100
101fn push_doc(
102 root: &Path,
103 path: &Path,
104 scope: InstructionScope,
105 seen: &mut HashSet<PathBuf>,
106 docs: &mut Vec<InstructionDoc>,
107) {
108 if !path.is_file() {
109 return;
110 }
111 let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
112 if !seen.insert(canonical.clone()) {
113 return;
114 }
115 let mut content = match std::fs::read_to_string(path) {
116 Ok(content) => content,
117 Err(_) => return,
118 };
119 if content.len() > MAX_INSTRUCTION_FILE_BYTES {
120 content.truncate(MAX_INSTRUCTION_FILE_BYTES);
121 content.push_str("\n\n[truncated by Sparrow instruction loader]");
122 }
123 let relative_path = canonical
124 .strip_prefix(root)
125 .map(|p| p.to_string_lossy().to_string())
126 .unwrap_or_else(|_| canonical.to_string_lossy().to_string())
127 .replace('\\', "/");
128 let depth = relative_path
129 .split(['/', '\\'])
130 .filter(|part| !part.is_empty())
131 .count();
132 docs.push(InstructionDoc {
133 scope,
134 path: canonical,
135 relative_path,
136 depth,
137 content,
138 });
139}
140
141fn sort_instruction_docs(root: &Path, task: &str, docs: &mut [InstructionDoc]) {
142 let task_lower = task.to_lowercase();
143 docs.sort_by(|a, b| {
144 let a_relevant = instruction_relevance(root, a, &task_lower);
145 let b_relevant = instruction_relevance(root, b, &task_lower);
146 a.scope_rank()
147 .cmp(&b.scope_rank())
148 .then_with(|| b_relevant.cmp(&a_relevant))
149 .then_with(|| a.depth.cmp(&b.depth))
150 .then_with(|| a.relative_path.cmp(&b.relative_path))
151 });
152}
153
154fn instruction_relevance(root: &Path, doc: &InstructionDoc, task_lower: &str) -> bool {
155 let rel = doc.relative_path.to_lowercase();
156 if rel != doc.path.to_string_lossy().to_lowercase() && task_lower.contains(&rel) {
157 return true;
158 }
159 let parent = doc.path.parent().unwrap_or(root);
160 if let Ok(parent_rel) = parent.strip_prefix(root) {
161 let parent_rel = parent_rel.to_string_lossy().to_lowercase();
162 return !parent_rel.is_empty() && task_lower.contains(&parent_rel);
163 }
164 false
165}
166
167impl InstructionDoc {
168 fn scope_rank(&self) -> u8 {
169 match self.scope {
170 InstructionScope::User => 0,
171 InstructionScope::Workspace => 1,
172 InstructionScope::Directory => 2,
173 }
174 }
175}
176
177fn enforce_total_limit(docs: Vec<InstructionDoc>) -> Vec<InstructionDoc> {
178 let mut total = 0usize;
179 let mut out = Vec::new();
180 for mut doc in docs {
181 if total >= MAX_TOTAL_INSTRUCTION_BYTES {
182 break;
183 }
184 let remaining = MAX_TOTAL_INSTRUCTION_BYTES - total;
185 if doc.content.len() > remaining {
186 doc.content.truncate(remaining);
187 doc.content
188 .push_str("\n\n[truncated by Sparrow instruction loader]");
189 }
190 total += doc.content.len();
191 out.push(doc);
192 }
193 out
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199
200 fn temp_dir(name: &str) -> PathBuf {
201 let dir = std::env::temp_dir().join(format!(
202 "sparrow-instructions-{}-{}",
203 name,
204 std::process::id()
205 ));
206 let _ = std::fs::remove_dir_all(&dir);
207 std::fs::create_dir_all(&dir).unwrap();
208 dir
209 }
210
211 #[test]
212 fn discovers_workspace_and_nested_instruction_files() {
213 let root = temp_dir("nested");
214 std::fs::write(root.join("AGENTS.md"), "root agents").unwrap();
215 std::fs::create_dir_all(root.join("src/.sparrow")).unwrap();
216 std::fs::write(
217 root.join("src/.sparrow/INSTRUCTIONS.md"),
218 "src instructions",
219 )
220 .unwrap();
221 std::fs::write(root.join("src/CLAUDE.md"), "src claude").unwrap();
222
223 let docs = discover_workspace_instructions(&root, "edit src/main.rs");
224 let rels: Vec<_> = docs.iter().map(|doc| doc.relative_path.as_str()).collect();
225 assert!(rels.contains(&"AGENTS.md"));
226 assert!(rels.contains(&"src/.sparrow/INSTRUCTIONS.md"));
227 assert!(rels.contains(&"src/CLAUDE.md"));
228 assert!(
229 docs.iter()
230 .any(|doc| doc.content.contains("src instructions"))
231 );
232
233 let _ = std::fs::remove_dir_all(root);
234 }
235
236 #[test]
237 fn skips_target_and_node_modules() {
238 let root = temp_dir("skip");
239 std::fs::create_dir_all(root.join("target")).unwrap();
240 std::fs::create_dir_all(root.join("node_modules/pkg")).unwrap();
241 std::fs::write(root.join("target/AGENTS.md"), "nope").unwrap();
242 std::fs::write(root.join("node_modules/pkg/CLAUDE.md"), "nope").unwrap();
243
244 let docs = discover_workspace_instructions(&root, "");
245 assert!(
246 docs.iter()
247 .all(|doc| !doc.content.contains("nope") && !doc.relative_path.contains("target"))
248 );
249
250 let _ = std::fs::remove_dir_all(root);
251 }
252}