Skip to main content

caliban_memory/
config.rs

1//! `MemoryConfig` — paths, dirs, and token budget for tier loading.
2
3use std::path::{Path, PathBuf};
4
5use globset::{Glob, GlobSet, GlobSetBuilder};
6
7use caliban_common::paths::sanitize_cwd_for_path;
8
9use crate::project_walk::WalkStop;
10
11const DEFAULT_BUDGET_TOKENS: usize = 32_000;
12
13/// Resolved configuration for one memory-load invocation.
14///
15/// Holds a handful of boolean knobs (regression escape + non-interactive +
16/// additional-dirs + approve-imports). Clippy's `struct_excessive_bools`
17/// would otherwise nudge us to bucket them, but their semantics are distinct
18/// enough that operators benefit from the flat list — keep them inline.
19#[allow(clippy::struct_excessive_bools)]
20#[derive(Debug, Clone)]
21pub struct MemoryConfig {
22    /// Path to the operator-global `CLAUDE.md`. `None` if none was discoverable.
23    pub global_path: Option<PathBuf>,
24    /// Legacy single-file project tier path. Still honored when
25    /// [`MemoryConfig::disable_walk`] is `true` (regression escape).
26    pub project_path: Option<PathBuf>,
27    /// Starting directory for the project-tier ancestor walk (typically cwd).
28    pub project_walk_root: PathBuf,
29    /// Where the walk stops (defaults to `Both`).
30    pub project_walk_stop: WalkStop,
31    /// Additional `--add-dir` paths. Each contributes its own ancestor walk
32    /// when [`MemoryConfig::additional_directories_claude_md`] is `true`.
33    pub additional_dirs: Vec<PathBuf>,
34    /// Gitignore-style patterns evaluated against paths relative to
35    /// `project_walk_root` to skip CLAUDE.md / AGENTS.md / `.caliban.md` files.
36    pub claude_md_excludes: GlobSet,
37    /// `CALIBAN_ADDITIONAL_DIRECTORIES_CLAUDE_MD` — load CLAUDE.md from
38    /// `--add-dir` paths too.
39    pub additional_directories_claude_md: bool,
40    /// `CALIBAN_DISABLE_CLAUDE_MD_WALK` — fall back to the legacy single-file
41    /// project tier (regression escape).
42    pub disable_walk: bool,
43    /// `CALIBAN_APPROVE_IMPORTS` — auto-approve every external `@`-import.
44    pub approve_imports: bool,
45    /// `--print` / `--bare` / similar — short-circuit the import dialog to
46    /// auto-deny. Defaults to `false` (interactive). Set by the binary based
47    /// on its run mode.
48    pub non_interactive: bool,
49    /// Path to the imports-allowlist JSON (`~/.caliban/imports-allowlist.json`).
50    pub imports_allowlist_path: PathBuf,
51    /// Per-workspace auto-memory directory. Always set; may not exist yet.
52    pub auto_memory_dir: PathBuf,
53    /// Approximate token budget for the combined memory prefix.
54    pub max_tokens: usize,
55    /// Optional per-scope cap for the auto-memory tier. When set, the auto
56    /// tier is truncated to fit this cap before the combined `max_tokens`
57    /// ceiling is applied. `None` means "no per-scope cap; only the combined
58    /// ceiling applies".
59    pub cap_tokens_auto: Option<usize>,
60    /// Optional per-scope cap for the combined CLAUDE.md tier (global +
61    /// project). When set, truncates project first, then global, to fit. `None`
62    /// means "no per-scope cap".
63    pub cap_tokens_claude_md: Option<usize>,
64    /// `CALIBAN_DISABLE_AUTO_MEMORY` — kill-switch: drop the auto-memory tier
65    /// from the prefix entirely. Resolved from the environment once in
66    /// [`MemoryConfig::from_env`] and defaulted to `false` in
67    /// [`MemoryConfig::for_test`], so [`crate::loader::load`] never reads the
68    /// process environment directly (which previously raced with env-mutating
69    /// tests under parallel execution).
70    pub disable_auto: bool,
71}
72
73impl MemoryConfig {
74    /// Resolve a `MemoryConfig` from the environment + the given workspace root.
75    ///
76    /// Env vars honored:
77    /// - `XDG_CONFIG_HOME` / `XDG_DATA_HOME` for global + auto-memory paths.
78    /// - `CALIBAN_MEMORY_DIR` / `CALIBAN_AUTO_MEMORY_DIRECTORY` for auto-memory.
79    /// - `CALIBAN_MEMORY_BUDGET_TOKENS` overrides the default `32_000` budget.
80    /// - `CALIBAN_MEMORY_CAP_TOKENS_AUTO` sets the per-scope cap for the auto
81    ///   tier (unset = no per-scope cap).
82    /// - `CALIBAN_MEMORY_CAP_TOKENS_CLAUDE_MD` sets the per-scope cap for the
83    ///   combined CLAUDE.md tier (global + project; unset = no per-scope cap).
84    /// - `CALIBAN_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1` enables CLAUDE.md load
85    ///   from `--add-dir` paths.
86    /// - `CALIBAN_DISABLE_CLAUDE_MD_WALK=1` reverts to the single-file project
87    ///   tier (regression escape).
88    /// - `CALIBAN_APPROVE_IMPORTS=1` auto-approves every external `@`-import.
89    /// - `CALIBAN_CLAUDE_MD_EXCLUDES` is a colon-or-newline-separated list of
90    ///   gitignore-style patterns to skip during the ancestor walk.
91    #[must_use]
92    pub fn from_env(workspace_root: &Path) -> Self {
93        let config_home = xdg_dir("XDG_CONFIG_HOME", dirs::config_dir);
94        let data_home = xdg_dir("XDG_DATA_HOME", dirs::data_local_dir);
95
96        let global_path = config_home.map(|d| d.join("caliban").join("CLAUDE.md"));
97        let project_path = Some(workspace_root.join("CLAUDE.md"));
98
99        let auto_memory_dir = if let Some(dir) = std::env::var_os("CALIBAN_AUTO_MEMORY_DIRECTORY") {
100            PathBuf::from(dir)
101        } else {
102            let auto_memory_root = std::env::var_os("CALIBAN_MEMORY_DIR")
103                .map(PathBuf::from)
104                .or_else(|| data_home.map(|d| d.join("caliban").join("projects")));
105            let slug = sanitize_cwd_for_path(workspace_root);
106            auto_memory_root
107                .unwrap_or_else(|| PathBuf::from("./.caliban/projects"))
108                .join(slug)
109                .join("memory")
110        };
111
112        let max_tokens = std::env::var("CALIBAN_MEMORY_BUDGET_TOKENS")
113            .ok()
114            .and_then(|s| s.parse::<usize>().ok())
115            .unwrap_or(DEFAULT_BUDGET_TOKENS);
116
117        let cap_tokens_auto = std::env::var("CALIBAN_MEMORY_CAP_TOKENS_AUTO")
118            .ok()
119            .and_then(|s| s.parse::<usize>().ok());
120        let cap_tokens_claude_md = std::env::var("CALIBAN_MEMORY_CAP_TOKENS_CLAUDE_MD")
121            .ok()
122            .and_then(|s| s.parse::<usize>().ok());
123
124        let claude_md_excludes =
125            parse_exclude_patterns(std::env::var("CALIBAN_CLAUDE_MD_EXCLUDES").ok().as_deref());
126
127        let imports_allowlist_path = dirs::home_dir()
128            .unwrap_or_else(|| PathBuf::from("."))
129            .join(".caliban")
130            .join("imports-allowlist.json");
131
132        Self {
133            global_path,
134            project_path,
135            project_walk_root: workspace_root.to_path_buf(),
136            project_walk_stop: WalkStop::default(),
137            additional_dirs: Vec::new(),
138            claude_md_excludes,
139            additional_directories_claude_md: env_truthy(
140                "CALIBAN_ADDITIONAL_DIRECTORIES_CLAUDE_MD",
141            ),
142            disable_walk: env_truthy("CALIBAN_DISABLE_CLAUDE_MD_WALK"),
143            approve_imports: env_truthy("CALIBAN_APPROVE_IMPORTS"),
144            non_interactive: false,
145            imports_allowlist_path,
146            auto_memory_dir,
147            max_tokens,
148            cap_tokens_auto,
149            cap_tokens_claude_md,
150            disable_auto: env_truthy("CALIBAN_DISABLE_AUTO_MEMORY"),
151        }
152    }
153}
154
155impl MemoryConfig {
156    /// Construct a minimal config for unit tests / library callers that don't
157    /// want to read from the process environment. All env-driven fields take
158    /// their defaults; only the auto-memory directory and the token budget are
159    /// caller-controlled.
160    #[must_use]
161    pub fn for_test(auto_memory_dir: PathBuf) -> Self {
162        Self {
163            global_path: None,
164            project_path: None,
165            project_walk_root: PathBuf::from("/tmp"),
166            project_walk_stop: WalkStop::default(),
167            additional_dirs: Vec::new(),
168            claude_md_excludes: GlobSet::empty(),
169            additional_directories_claude_md: false,
170            disable_walk: true, // tests opt out of the walk by default
171            approve_imports: false,
172            non_interactive: false,
173            imports_allowlist_path: PathBuf::from("/tmp/.caliban/imports-allowlist.json"),
174            auto_memory_dir,
175            max_tokens: 100_000,
176            cap_tokens_auto: None,
177            cap_tokens_claude_md: None,
178            disable_auto: false,
179        }
180    }
181
182    /// Builder-style setter for the per-scope auto-tier cap. Allows callers
183    /// (typically the binary at startup, reading from `[memory]` settings) to
184    /// override the env-driven value.
185    #[must_use]
186    pub fn with_cap_tokens_auto(mut self, n: usize) -> Self {
187        self.cap_tokens_auto = Some(n);
188        self
189    }
190
191    /// Builder-style setter for the per-scope CLAUDE.md-tier cap.
192    #[must_use]
193    pub fn with_cap_tokens_claude_md(mut self, n: usize) -> Self {
194        self.cap_tokens_claude_md = Some(n);
195        self
196    }
197
198    /// Compute the effective per-scope cap accounting for the combined
199    /// ceiling. When the sum of both per-scope caps would exceed `max_tokens`,
200    /// each is scaled down proportionally so the sum equals `max_tokens`.
201    ///
202    /// `this_cap` is the per-scope cap being computed; `other_cap` is the
203    /// other per-scope cap (used to compute the per-scope sum). When the
204    /// other cap is unset, the combined ceiling is treated as its value.
205    #[must_use]
206    pub fn effective_cap(&self, this_cap: usize, other_cap: Option<usize>) -> usize {
207        let other = other_cap.unwrap_or(self.max_tokens);
208        let per_scope_sum = this_cap.saturating_add(other);
209        if per_scope_sum <= self.max_tokens {
210            this_cap
211        } else {
212            // Proportional scale-down so the sum fits the combined ceiling.
213            ((this_cap as u128) * (self.max_tokens as u128) / (per_scope_sum as u128)) as usize
214        }
215    }
216}
217
218fn env_truthy(key: &str) -> bool {
219    matches!(
220        std::env::var(key).ok().as_deref(),
221        Some("1" | "true" | "TRUE" | "True" | "yes" | "YES"),
222    )
223}
224
225/// Parse a colon-or-newline-separated list of gitignore-style patterns into a
226/// `GlobSet`. Invalid patterns are dropped with a `warn!` log.
227fn parse_exclude_patterns(raw: Option<&str>) -> GlobSet {
228    let mut builder = GlobSetBuilder::new();
229    let Some(s) = raw else {
230        return GlobSet::empty();
231    };
232    for raw in s.split(['\n', ':']) {
233        let pat = raw.trim();
234        if pat.is_empty() {
235            continue;
236        }
237        match Glob::new(pat) {
238            Ok(g) => {
239                builder.add(g);
240            }
241            Err(e) => tracing::warn!(
242                target: caliban_common::tracing_targets::TARGET_MEMORY,
243                pattern = %pat,
244                error = %e,
245                "skipping invalid claude_md_excludes pattern",
246            ),
247        }
248    }
249    builder.build().unwrap_or_else(|e| {
250        tracing::warn!(
251            target: caliban_common::tracing_targets::TARGET_MEMORY,
252            error = %e,
253            "claude_md_excludes globset build failed; using empty matcher",
254        );
255        GlobSet::empty()
256    })
257}
258
259/// Public helper: build a `GlobSet` from an iterable of patterns. Used by
260/// downstream callers that load patterns from `settings.toml`.
261///
262/// # Errors
263///
264/// Returns the first [`globset::Error`] encountered if a pattern fails to
265/// parse. Builder errors during finalization are also surfaced.
266pub fn build_excludes<I, S>(patterns: I) -> std::result::Result<GlobSet, globset::Error>
267where
268    I: IntoIterator<Item = S>,
269    S: AsRef<str>,
270{
271    let mut builder = GlobSetBuilder::new();
272    for p in patterns {
273        builder.add(Glob::new(p.as_ref())?);
274    }
275    builder.build()
276}
277
278/// Resolve an XDG directory: honor the env var if set + non-empty, else fall
279/// back to the `dirs` crate's platform default.
280fn xdg_dir(env_var: &str, fallback: fn() -> Option<PathBuf>) -> Option<PathBuf> {
281    if let Some(v) = std::env::var_os(env_var)
282        && !v.is_empty()
283    {
284        return Some(PathBuf::from(v));
285    }
286    fallback()
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292
293    #[test]
294    fn default_budget_constant_matches() {
295        assert_eq!(DEFAULT_BUDGET_TOKENS, 32_000);
296    }
297
298    #[test]
299    fn with_cap_tokens_auto_sets_value() {
300        let cfg = MemoryConfig::for_test(PathBuf::from("/tmp/m")).with_cap_tokens_auto(4_096);
301        assert_eq!(cfg.cap_tokens_auto, Some(4_096));
302    }
303
304    #[test]
305    fn effective_cap_returns_raw_when_sum_fits_combined() {
306        let cfg = MemoryConfig::for_test(PathBuf::from("/tmp/m"));
307        // max_tokens=100_000; auto=16K + claude_md=16K = 32K < 100K → no scale.
308        assert_eq!(cfg.effective_cap(16_000, Some(16_000)), 16_000);
309    }
310
311    #[test]
312    fn effective_cap_scales_proportionally_when_sum_exceeds_combined() {
313        let cfg = MemoryConfig::for_test(PathBuf::from("/tmp/m"))
314            .with_cap_tokens_auto(20_000)
315            .with_cap_tokens_claude_md(20_000);
316        // Force combined ceiling below the per-scope sum.
317        let cfg = MemoryConfig {
318            max_tokens: 20_000,
319            ..cfg
320        };
321        // per_scope_sum=40_000 > max=20_000 → scale to 50%: each gets 10_000.
322        assert_eq!(cfg.effective_cap(20_000, Some(20_000)), 10_000);
323    }
324
325    #[test]
326    fn effective_cap_treats_missing_other_as_combined_ceiling() {
327        let cfg = MemoryConfig::for_test(PathBuf::from("/tmp/m"));
328        // other=None → treated as max_tokens=100_000.
329        // per_scope_sum = 50_000 + 100_000 = 150_000 > 100_000 → scale.
330        // Expected: 50_000 * 100_000 / 150_000 = 33_333.
331        assert_eq!(cfg.effective_cap(50_000, None), 33_333);
332    }
333
334    #[test]
335    fn project_path_joins_workspace_root() {
336        let cfg = MemoryConfig::from_env(Path::new("/tmp/my-workspace"));
337        assert_eq!(
338            cfg.project_path.as_deref(),
339            Some(Path::new("/tmp/my-workspace/CLAUDE.md")),
340        );
341        assert_eq!(
342            cfg.project_walk_root.as_path(),
343            Path::new("/tmp/my-workspace"),
344        );
345        assert_eq!(cfg.project_walk_stop, WalkStop::Both);
346    }
347
348    #[test]
349    fn parse_exclude_patterns_handles_colon_and_newline_lists() {
350        let g = parse_exclude_patterns(Some("node_modules/**\nvendor/**:third_party/**/CLAUDE.md"));
351        assert!(g.is_match("node_modules/foo/CLAUDE.md"));
352        assert!(g.is_match("vendor/x/y/AGENTS.md"));
353        assert!(g.is_match("third_party/lib/CLAUDE.md"));
354        assert!(!g.is_match("src/foo.rs"));
355    }
356
357    #[test]
358    fn parse_exclude_patterns_drops_invalid_patterns_and_empties() {
359        let g = parse_exclude_patterns(Some(""));
360        assert!(g.is_empty());
361        let g2 = parse_exclude_patterns(None);
362        assert!(g2.is_empty());
363    }
364
365    #[test]
366    fn build_excludes_helper_round_trips_patterns() {
367        let g = build_excludes(["a/**", "b/**.md"]).unwrap();
368        assert!(g.is_match("a/x"));
369        assert!(g.is_match("b/x.md"));
370        assert!(!g.is_match("c/x"));
371    }
372}