Skip to main content

sparrow/
instructions.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashSet;
3use std::path::{Path, PathBuf};
4
5const MAX_INSTRUCTION_FILE_BYTES: usize = 16_000;
6const MAX_TOTAL_INSTRUCTION_BYTES: usize = 48_000;
7
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
9pub enum InstructionScope {
10    User,
11    Workspace,
12    Directory,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
16pub struct InstructionDoc {
17    pub scope: InstructionScope,
18    pub path: PathBuf,
19    pub relative_path: String,
20    pub depth: usize,
21    pub content: String,
22}
23
24pub fn discover_workspace_instructions(root: &Path, task: &str) -> Vec<InstructionDoc> {
25    let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
26    let mut docs = Vec::new();
27    let mut seen = HashSet::new();
28
29    if let Some(home) = dirs::home_dir() {
30        for path in instruction_candidates_in_dir(&home, false) {
31            push_doc(&root, &path, InstructionScope::User, &mut seen, &mut docs);
32        }
33    }
34
35    for path in instruction_candidates_in_dir(&root, true) {
36        push_doc(
37            &root,
38            &path,
39            InstructionScope::Workspace,
40            &mut seen,
41            &mut docs,
42        );
43    }
44
45    scan_instruction_dirs(&root, &root, &mut seen, &mut docs);
46    sort_instruction_docs(&root, task, &mut docs);
47    enforce_total_limit(docs)
48}
49
50fn instruction_candidates_in_dir(dir: &Path, include_sparrow: bool) -> Vec<PathBuf> {
51    let mut out = vec![dir.join("AGENTS.md"), dir.join("CLAUDE.md")];
52    if dir.file_name().and_then(|name| name.to_str()) == Some(".sparrow") {
53        out.push(dir.join("INSTRUCTIONS.md"));
54    }
55    if include_sparrow {
56        out.push(dir.join(".sparrow").join("INSTRUCTIONS.md"));
57    }
58    out
59}
60
61fn scan_instruction_dirs(
62    root: &Path,
63    dir: &Path,
64    seen: &mut HashSet<PathBuf>,
65    docs: &mut Vec<InstructionDoc>,
66) {
67    let entries = match std::fs::read_dir(dir) {
68        Ok(entries) => entries,
69        Err(_) => return,
70    };
71
72    for entry in entries.flatten() {
73        let path = entry.path();
74        let name = entry.file_name().to_string_lossy().to_string();
75        if should_skip_entry(&name) {
76            continue;
77        }
78
79        if path.is_dir() {
80            for candidate in instruction_candidates_in_dir(&path, true) {
81                push_doc(root, &candidate, InstructionScope::Directory, seen, docs);
82            }
83            scan_instruction_dirs(root, &path, seen, docs);
84        }
85    }
86}
87
88fn should_skip_entry(name: &str) -> bool {
89    matches!(
90        name,
91        ".git"
92            | "target"
93            | "node_modules"
94            | "dist"
95            | "build"
96            | ".claude"
97            | ".codex-remote-attachments"
98    )
99}
100
101fn push_doc(
102    root: &Path,
103    path: &Path,
104    scope: InstructionScope,
105    seen: &mut HashSet<PathBuf>,
106    docs: &mut Vec<InstructionDoc>,
107) {
108    if !path.is_file() {
109        return;
110    }
111    let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
112    if !seen.insert(canonical.clone()) {
113        return;
114    }
115    let mut content = match std::fs::read_to_string(path) {
116        Ok(content) => content,
117        Err(_) => return,
118    };
119    if content.len() > MAX_INSTRUCTION_FILE_BYTES {
120        content.truncate(MAX_INSTRUCTION_FILE_BYTES);
121        content.push_str("\n\n[truncated by Sparrow instruction loader]");
122    }
123    let relative_path = canonical
124        .strip_prefix(root)
125        .map(|p| p.to_string_lossy().to_string())
126        .unwrap_or_else(|_| canonical.to_string_lossy().to_string())
127        .replace('\\', "/");
128    let depth = relative_path
129        .split(['/', '\\'])
130        .filter(|part| !part.is_empty())
131        .count();
132    docs.push(InstructionDoc {
133        scope,
134        path: canonical,
135        relative_path,
136        depth,
137        content,
138    });
139}
140
141fn sort_instruction_docs(root: &Path, task: &str, docs: &mut [InstructionDoc]) {
142    let task_lower = task.to_lowercase();
143    docs.sort_by(|a, b| {
144        let a_relevant = instruction_relevance(root, a, &task_lower);
145        let b_relevant = instruction_relevance(root, b, &task_lower);
146        a.scope_rank()
147            .cmp(&b.scope_rank())
148            .then_with(|| b_relevant.cmp(&a_relevant))
149            .then_with(|| a.depth.cmp(&b.depth))
150            .then_with(|| a.relative_path.cmp(&b.relative_path))
151    });
152}
153
154fn instruction_relevance(root: &Path, doc: &InstructionDoc, task_lower: &str) -> bool {
155    let rel = doc.relative_path.to_lowercase();
156    if rel != doc.path.to_string_lossy().to_lowercase() && task_lower.contains(&rel) {
157        return true;
158    }
159    let parent = doc.path.parent().unwrap_or(root);
160    if let Ok(parent_rel) = parent.strip_prefix(root) {
161        let parent_rel = parent_rel.to_string_lossy().to_lowercase();
162        return !parent_rel.is_empty() && task_lower.contains(&parent_rel);
163    }
164    false
165}
166
167impl InstructionDoc {
168    fn scope_rank(&self) -> u8 {
169        match self.scope {
170            InstructionScope::User => 0,
171            InstructionScope::Workspace => 1,
172            InstructionScope::Directory => 2,
173        }
174    }
175}
176
177fn enforce_total_limit(docs: Vec<InstructionDoc>) -> Vec<InstructionDoc> {
178    let mut total = 0usize;
179    let mut out = Vec::new();
180    for mut doc in docs {
181        if total >= MAX_TOTAL_INSTRUCTION_BYTES {
182            break;
183        }
184        let remaining = MAX_TOTAL_INSTRUCTION_BYTES - total;
185        if doc.content.len() > remaining {
186            doc.content.truncate(remaining);
187            doc.content
188                .push_str("\n\n[truncated by Sparrow instruction loader]");
189        }
190        total += doc.content.len();
191        out.push(doc);
192    }
193    out
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    fn temp_dir(name: &str) -> PathBuf {
201        let dir = std::env::temp_dir().join(format!(
202            "sparrow-instructions-{}-{}",
203            name,
204            std::process::id()
205        ));
206        let _ = std::fs::remove_dir_all(&dir);
207        std::fs::create_dir_all(&dir).unwrap();
208        dir
209    }
210
211    #[test]
212    fn discovers_workspace_and_nested_instruction_files() {
213        let root = temp_dir("nested");
214        std::fs::write(root.join("AGENTS.md"), "root agents").unwrap();
215        std::fs::create_dir_all(root.join("src/.sparrow")).unwrap();
216        std::fs::write(
217            root.join("src/.sparrow/INSTRUCTIONS.md"),
218            "src instructions",
219        )
220        .unwrap();
221        std::fs::write(root.join("src/CLAUDE.md"), "src claude").unwrap();
222
223        let docs = discover_workspace_instructions(&root, "edit src/main.rs");
224        let rels: Vec<_> = docs.iter().map(|doc| doc.relative_path.as_str()).collect();
225        assert!(rels.contains(&"AGENTS.md"));
226        assert!(rels.contains(&"src/.sparrow/INSTRUCTIONS.md"));
227        assert!(rels.contains(&"src/CLAUDE.md"));
228        assert!(
229            docs.iter()
230                .any(|doc| doc.content.contains("src instructions"))
231        );
232
233        let _ = std::fs::remove_dir_all(root);
234    }
235
236    #[test]
237    fn skips_target_and_node_modules() {
238        let root = temp_dir("skip");
239        std::fs::create_dir_all(root.join("target")).unwrap();
240        std::fs::create_dir_all(root.join("node_modules/pkg")).unwrap();
241        std::fs::write(root.join("target/AGENTS.md"), "nope").unwrap();
242        std::fs::write(root.join("node_modules/pkg/CLAUDE.md"), "nope").unwrap();
243
244        let docs = discover_workspace_instructions(&root, "");
245        assert!(
246            docs.iter()
247                .all(|doc| !doc.content.contains("nope") && !doc.relative_path.contains("target"))
248        );
249
250        let _ = std::fs::remove_dir_all(root);
251    }
252}