Skip to main content

caliban_memory/
rules.rs

1//! `.caliban/rules/<topic>.md` — path-scoped rule files with optional
2//! `paths:` glob frontmatter.
3//!
4//! Part of ADR 0036. Rules behave like miniature CLAUDE.md addendums that the
5//! agent activates lazily once the model touches a matching file (or eagerly
6//! at startup when `paths:` is absent).
7
8use std::path::{Path, PathBuf};
9
10use globset::{Glob, GlobSet, GlobSetBuilder};
11use serde::Deserialize;
12
13/// A loaded rule file (frontmatter parsed; body raw).
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct Rule {
16    /// Logical name (kebab-case, defaults to the file stem).
17    pub name: String,
18    /// Optional one-line description.
19    pub description: Option<String>,
20    /// Optional glob patterns for lazy activation. When empty, the rule is
21    /// always active (loaded at startup).
22    pub paths: Vec<String>,
23    /// File body (everything after the closing `---`).
24    pub body: String,
25    /// Absolute path on disk.
26    pub path: PathBuf,
27    /// Source scope (project vs user).
28    pub scope: RuleScope,
29}
30
31/// Whether the rule was loaded from the user dir (`~/.caliban/rules/`) or the
32/// project dir (`<workspace>/.caliban/rules/`).
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum RuleScope {
35    /// User-global rules (`~/.caliban/rules/`).
36    User,
37    /// Project rules (`<workspace>/.caliban/rules/`).
38    Project,
39}
40
41impl RuleScope {
42    /// Splice attribute value.
43    #[must_use]
44    pub fn as_str(self) -> &'static str {
45        match self {
46            Self::User => "user",
47            Self::Project => "project",
48        }
49    }
50}
51
52/// A set of loaded rules with a pre-built `GlobSet` for fast path matching.
53#[derive(Debug)]
54pub struct RuleSet {
55    rules: Vec<Rule>,
56    matcher: GlobSet,
57    /// Maps a glob-set index back to the rule index that owns it.
58    glob_to_rule: Vec<usize>,
59}
60
61impl RuleSet {
62    /// Empty set (no rules loaded).
63    #[must_use]
64    pub fn empty() -> Self {
65        Self {
66            rules: Vec::new(),
67            matcher: GlobSet::empty(),
68            glob_to_rule: Vec::new(),
69        }
70    }
71
72    /// All loaded rules.
73    #[must_use]
74    pub fn rules(&self) -> &[Rule] {
75        &self.rules
76    }
77
78    /// Rules that have no `paths:` filter — always loaded into the prompt.
79    #[must_use]
80    pub fn always_active(&self) -> Vec<&Rule> {
81        self.rules.iter().filter(|r| r.paths.is_empty()).collect()
82    }
83
84    /// Return the **indexes** of every rule whose `paths:` filter matches
85    /// `path`. Always-active rules (no `paths:` filter) are not returned here
86    /// — they're loaded eagerly via [`Self::always_active`].
87    #[must_use]
88    pub fn matching(&self, path: &Path) -> Vec<usize> {
89        let mut hits = self.matcher.matches(path);
90        hits.sort_unstable();
91        hits.dedup();
92        hits.into_iter()
93            .map(|gi| self.glob_to_rule[gi])
94            .collect::<std::collections::BTreeSet<_>>()
95            .into_iter()
96            .collect()
97    }
98
99    /// Return the rule at index `i` (used after a [`Self::matching`] hit).
100    #[must_use]
101    pub fn get(&self, i: usize) -> Option<&Rule> {
102        self.rules.get(i)
103    }
104
105    /// Build a `RuleSet` from owned rules, building a `GlobSet` from their
106    /// `paths:` patterns.
107    #[must_use]
108    pub fn build(rules: Vec<Rule>) -> Self {
109        let mut builder = GlobSetBuilder::new();
110        let mut glob_to_rule = Vec::new();
111        for (idx, r) in rules.iter().enumerate() {
112            for pat in &r.paths {
113                if let Ok(g) = Glob::new(pat) {
114                    builder.add(g);
115                    glob_to_rule.push(idx);
116                } else {
117                    tracing::warn!(
118                        target: caliban_common::tracing_targets::TARGET_MEMORY_RULES,
119                        rule = %r.name,
120                        pattern = %pat,
121                        "invalid glob pattern in rule",
122                    );
123                }
124            }
125        }
126        let matcher = builder.build().unwrap_or_else(|e| {
127            tracing::warn!(
128                target: caliban_common::tracing_targets::TARGET_MEMORY_RULES,
129                error = %e,
130                "rule globset build failed; falling back to empty matcher",
131            );
132            GlobSet::empty()
133        });
134        Self {
135            rules,
136            matcher,
137            glob_to_rule,
138        }
139    }
140}
141
142/// Scan both the user dir (`~/.caliban/rules/`) and the project dir
143/// (`<workspace>/.caliban/rules/`) for `*.md` rule files. Malformed files are
144/// skipped with a warning.
145#[must_use]
146pub fn scan_caliban_rules(workspace_root: &Path) -> RuleSet {
147    let mut rules = Vec::new();
148    if let Some(home) = dirs::home_dir() {
149        scan_dir(
150            &home.join(".caliban").join("rules"),
151            RuleScope::User,
152            &mut rules,
153        );
154    }
155    scan_dir(
156        &workspace_root.join(".caliban").join("rules"),
157        RuleScope::Project,
158        &mut rules,
159    );
160    rules.sort_by(|a, b| a.name.cmp(&b.name));
161    RuleSet::build(rules)
162}
163
164fn scan_dir(dir: &Path, scope: RuleScope, out: &mut Vec<Rule>) {
165    let Ok(entries) = std::fs::read_dir(dir) else {
166        return;
167    };
168    for entry in entries.flatten() {
169        let p = entry.path();
170        if !p.is_file() {
171            continue;
172        }
173        if p.extension().and_then(|s| s.to_str()) != Some("md") {
174            continue;
175        }
176        let Some(stem) = p.file_stem().and_then(|s| s.to_str()) else {
177            continue;
178        };
179        // Skip a README.md per convention.
180        if stem.eq_ignore_ascii_case("README") {
181            continue;
182        }
183        match parse_rule(&p, scope, stem) {
184            Ok(r) => out.push(r),
185            Err(e) => tracing::warn!(
186                target: caliban_common::tracing_targets::TARGET_MEMORY_RULES,
187                path = %p.display(),
188                error = %e,
189                "skipping malformed rule file",
190            ),
191        }
192    }
193}
194
195#[derive(Debug, Deserialize, Default)]
196struct RawRuleFrontmatter {
197    #[serde(default)]
198    name: Option<String>,
199    #[serde(default)]
200    description: Option<String>,
201    #[serde(default)]
202    paths: Vec<String>,
203}
204
205fn parse_rule(path: &Path, scope: RuleScope, stem: &str) -> Result<Rule, String> {
206    let raw = std::fs::read_to_string(path).map_err(|e| format!("io: {e}"))?;
207    let trimmed = raw.trim_start_matches('\u{feff}');
208    let body_start = "---\n";
209    if !trimmed.starts_with(body_start) {
210        // No frontmatter — entire file is the body; rule is always-active.
211        return Ok(Rule {
212            name: stem.to_string(),
213            description: None,
214            paths: Vec::new(),
215            body: trimmed.to_string(),
216            path: path.to_path_buf(),
217            scope,
218        });
219    }
220    let after = &trimmed[body_start.len()..];
221    let Some(end) = after.find("\n---\n").or_else(|| {
222        let i = after.find("\n---")?;
223        if after[i..].starts_with("\n---") {
224            Some(i)
225        } else {
226            None
227        }
228    }) else {
229        return Err("missing closing `---` frontmatter delimiter".into());
230    };
231    let yaml = &after[..end];
232    let body_off = end + "\n---\n".len();
233    let body = if body_off >= after.len() {
234        ""
235    } else {
236        &after[body_off..]
237    };
238    let fm: RawRuleFrontmatter = serde_yaml::from_str(yaml).map_err(|e| format!("yaml: {e}"))?;
239    Ok(Rule {
240        name: fm.name.unwrap_or_else(|| stem.to_string()),
241        description: fm.description,
242        paths: fm.paths,
243        body: body.to_string(),
244        path: path.to_path_buf(),
245        scope,
246    })
247}
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252    use std::fs;
253    use tempfile::TempDir;
254
255    fn write_rule(dir: &Path, name: &str, fm: &str, body: &str) {
256        let mut s = String::new();
257        if !fm.is_empty() {
258            s.push_str("---\n");
259            s.push_str(fm);
260            if !fm.ends_with('\n') {
261                s.push('\n');
262            }
263            s.push_str("---\n\n");
264        }
265        s.push_str(body);
266        fs::write(dir.join(format!("{name}.md")), s).unwrap();
267    }
268
269    #[test]
270    fn scan_loads_project_rules_and_builds_globset() {
271        let tmp = TempDir::new().unwrap();
272        let workspace = tmp.path();
273        let rules_dir = workspace.join(".caliban").join("rules");
274        fs::create_dir_all(&rules_dir).unwrap();
275        write_rule(
276            &rules_dir,
277            "python-style",
278            "name: python-style\npaths:\n  - \"**/*.py\"\n  - \"scripts/**\"\n",
279            "Use black + ruff.\n",
280        );
281        write_rule(
282            &rules_dir,
283            "always-on",
284            "name: always-on\n",
285            "Always loaded.\n",
286        );
287
288        let set = scan_caliban_rules(workspace);
289        assert_eq!(set.rules().len(), 2);
290
291        // python-style activates on .py paths.
292        let hits = set.matching(Path::new("src/foo.py"));
293        assert_eq!(hits.len(), 1);
294        assert_eq!(set.get(hits[0]).unwrap().name, "python-style");
295
296        // always-on shows up in always_active().
297        let always: Vec<_> = set.always_active().iter().map(|r| r.name.clone()).collect();
298        assert!(always.contains(&"always-on".to_string()));
299    }
300
301    #[test]
302    fn rule_without_paths_is_always_active() {
303        let tmp = TempDir::new().unwrap();
304        let workspace = tmp.path();
305        let rules_dir = workspace.join(".caliban").join("rules");
306        fs::create_dir_all(&rules_dir).unwrap();
307        write_rule(&rules_dir, "convs", "name: convs\n", "Conventions.\n");
308        let set = scan_caliban_rules(workspace);
309        assert_eq!(set.always_active().len(), 1);
310        // Path-touch should NOT match an always-active rule (it's always-on already).
311        assert!(set.matching(Path::new("anything.txt")).is_empty());
312    }
313
314    #[test]
315    fn rules_skip_readme_by_convention() {
316        let tmp = TempDir::new().unwrap();
317        let workspace = tmp.path();
318        let rules_dir = workspace.join(".caliban").join("rules");
319        fs::create_dir_all(&rules_dir).unwrap();
320        write_rule(&rules_dir, "README", "name: README\n", "noise");
321        write_rule(&rules_dir, "actual", "name: actual\n", "ok");
322        let set = scan_caliban_rules(workspace);
323        let names: Vec<_> = set.rules().iter().map(|r| r.name.as_str()).collect();
324        assert!(!names.contains(&"README"));
325        assert!(names.contains(&"actual"));
326    }
327
328    #[test]
329    fn scan_emits_both_user_and_project_scopes() {
330        // Simulate a user-dir by monkey-patching HOME to a tempdir.
331        let tmp = TempDir::new().unwrap();
332        let home = tmp.path().join("home");
333        let workspace = tmp.path().join("ws");
334        let user_rules = home.join(".caliban").join("rules");
335        let proj_rules = workspace.join(".caliban").join("rules");
336        fs::create_dir_all(&user_rules).unwrap();
337        fs::create_dir_all(&proj_rules).unwrap();
338        write_rule(&user_rules, "user-a", "name: user-a\n", "U");
339        write_rule(&proj_rules, "proj-a", "name: proj-a\n", "P");
340
341        // The crate uses `dirs::home_dir()` which reads $HOME on Unix. We can't
342        // override that without unsafe env mutation; instead, call the lower-
343        // level helper directly via reflection — easier: assert RuleSet::build
344        // wrappers work and that the scan_dir helper is reachable.
345        //
346        // Use the public API by passing the workspace; the user-scope path
347        // resolution can still be unit-tested via build_two_scopes below.
348        let mut all = Vec::new();
349        scan_dir(&user_rules, RuleScope::User, &mut all);
350        scan_dir(&proj_rules, RuleScope::Project, &mut all);
351        let set = RuleSet::build(all);
352        let names: Vec<_> = set.rules().iter().map(|r| r.name.clone()).collect();
353        assert!(names.contains(&"user-a".to_string()));
354        assert!(names.contains(&"proj-a".to_string()));
355        let user_count = set
356            .rules()
357            .iter()
358            .filter(|r| matches!(r.scope, RuleScope::User))
359            .count();
360        let proj_count = set
361            .rules()
362            .iter()
363            .filter(|r| matches!(r.scope, RuleScope::Project))
364            .count();
365        assert_eq!(user_count, 1);
366        assert_eq!(proj_count, 1);
367    }
368}