oxios_kernel/mount/path_promotion.rs
1//! Path promotion: auto-create Mounts for frequently-used paths (RFC-025 Phase 5).
2//!
3//! Unlike runtime detection (which only matches *existing* Mounts), this
4//! module scans session history to find paths the agent has *actually worked
5//! on* (via tool calls) or the user has *explicitly mentioned*, counts them,
6//! and promotes paths that cross a frequency threshold into Mounts.
7//!
8//! ## Pipeline
9//!
10//! 1. **Extract** raw paths from session trajectories (`tool_args`) and user
11//! messages.
12//! 2. **Normalize** each path to its project root by walking up the directory
13//! tree until a marker file (`Cargo.toml`, `package.json`, `.git`, …) is
14//! found. This collapses `/proj/src/main.rs` and `/proj/Cargo.toml` into
15//! the single root `/proj`.
16//! 3. **Count** normalized roots over a sliding window.
17//! 4. **Promote** roots that cross the threshold into Mounts (unless one
18//! already covers that root).
19//!
20//! ## Why background, not realtime
21//!
22//! Every extraction requires walking the filesystem (to find markers), so we
23//! run this on a cadence (alongside Dream consolidation) rather than on every
24//! message. The threshold naturally debounces one-off mentions.
25
26use std::collections::{HashMap, HashSet};
27use std::path::{Path, PathBuf};
28
29use chrono::{DateTime, Duration, Utc};
30
31use crate::state_store::Session;
32
33/// Markers whose presence indicates a project root. A directory containing
34/// any of these is treated as a self-contained project.
35///
36/// Mirrors `meta_detection::MARKERS` but adds `.git` (VCS root) and is
37/// intentionally kept separate so promotion logic can evolve independently.
38const ROOT_MARKERS: &[&str] = &[
39 "Cargo.toml",
40 "package.json",
41 "go.mod",
42 "pyproject.toml",
43 "setup.py",
44 "pom.xml",
45 "build.gradle",
46 "build.gradle.kts",
47 "Gemfile",
48 "composer.json",
49 "mix.exs",
50 "CMakeLists.txt",
51 "Makefile",
52 "AGENTS.md",
53 ".git",
54];
55
56/// A path's frequency tally within the promotion window.
57#[derive(Debug, Clone, Default)]
58pub struct PathFrequency {
59 /// Number of times a path under this root was touched/mentioned.
60 pub count: usize,
61 /// Most recent timestamp among the contributing events.
62 pub last_seen: Option<DateTime<Utc>>,
63}
64
65/// Configuration for path promotion (mirrors `MountsConfig`).
66#[derive(Debug, Clone)]
67pub struct PromotionConfig {
68 /// Disable promotion entirely.
69 pub enabled: bool,
70 /// Minimum distinct touches within the window to trigger promotion.
71 pub threshold: usize,
72 /// How far back to look (days). Events older than this are ignored.
73 pub window_days: i64,
74}
75
76impl Default for PromotionConfig {
77 fn default() -> Self {
78 Self {
79 enabled: true,
80 threshold: 3,
81 window_days: 14,
82 }
83 }
84}
85
86/// Extract raw path strings from a single session.
87///
88/// Sources (most-trusted first):
89/// - `tool_args` of trajectory steps (paths the agent actually operated on)
90/// - user messages (explicit mentions)
91///
92/// Each returned `(path, timestamp)` pair is one "touch". A single message
93/// may contribute several touches if it references several paths.
94pub fn extract_paths(session: &Session) -> Vec<(String, DateTime<Utc>)> {
95 let mut out = Vec::new();
96
97 // Trajectory tool_args: pull any string value that looks like a path.
98 for step in &session.trajectory_steps {
99 collect_path_like_strings(&step.tool_args, &mut out, step.timestamp);
100 }
101
102 // User messages: word-level path extraction.
103 for msg in &session.user_messages {
104 for word in msg.content.split_whitespace() {
105 if looks_like_path(word) {
106 out.push((word.trim_matches(punct).to_string(), msg.timestamp));
107 }
108 }
109 }
110
111 out
112}
113
114/// Normalize a raw path to its project root, then tally frequencies.
115///
116/// Returns a map of `root -> PathFrequency` restricted to events within the
117/// configured window.
118///
119/// Frequency is computed **per distinct root per session**: a single session
120/// that mentions the same project root ten times counts once, not ten times.
121/// This prevents one chatty session from inflating a root across the
122/// threshold (Promo-7).
123pub fn tally_frequencies(
124 sessions: &[Session],
125 config: &PromotionConfig,
126) -> HashMap<PathBuf, PathFrequency> {
127 let cutoff = Utc::now() - Duration::days(config.window_days);
128 let mut freqs: HashMap<PathBuf, PathFrequency> = HashMap::new();
129
130 for session in sessions {
131 // Deduplicate roots within a single session before counting: each
132 // distinct root contributes at most one touch per session.
133 let mut distinct_roots: HashSet<PathBuf> = HashSet::new();
134 let mut root_last_seen: HashMap<PathBuf, DateTime<Utc>> = HashMap::new();
135 for (raw, ts) in extract_paths(session) {
136 if ts < cutoff {
137 continue;
138 }
139 let Some(root) = normalize_to_root(Path::new(&raw)) else {
140 continue;
141 };
142 distinct_roots.insert(root.clone());
143 // Track the most recent touch for this root in this session.
144 root_last_seen
145 .entry(root)
146 .and_modify(|prev| *prev = (*prev).max(ts))
147 .or_insert(ts);
148 }
149
150 for root in distinct_roots {
151 let ts = root_last_seen[&root];
152 let entry = freqs.entry(root).or_default();
153 entry.count += 1; // +1 per distinct root per session
154 entry.last_seen = Some(
155 entry
156 .last_seen
157 .map_or(ts, |prev: DateTime<Utc>| prev.max(ts)),
158 );
159 }
160 }
161
162 freqs
163}
164
165/// Find the project root for `path` by walking up until a marker is found.
166///
167/// - If `path` itself is a directory containing a marker, it is its own root.
168/// - Otherwise we walk up ancestors looking for the first directory that
169/// contains a marker.
170/// - Returns `None` if no marker is found within the filesystem (e.g. the
171/// path doesn't exist, or it's a loose file with no project context).
172pub fn normalize_to_root(path: &Path) -> Option<PathBuf> {
173 // Expand a leading `~` to the home directory before canonicalizing so
174 // that home-relative paths (`~/projects/foo`) resolve correctly (Promo-4).
175 // `std::fs::canonicalize` does *not* expand `~`, so without this those
176 // paths would fall back to the raw form and fail to find markers.
177 let expanded = expand_tilde(path);
178
179 // Canonicalize to resolve `..` and symlinks. Fall back to the raw path
180 // if the file no longer exists (it may still be a meaningful prefix).
181 let canonical = std::fs::canonicalize(&expanded).unwrap_or(expanded);
182
183 // Start from the path itself if it's a directory, else from its parent.
184 let start = if canonical.is_dir() {
185 canonical.clone()
186 } else {
187 canonical.parent()?.to_path_buf()
188 };
189
190 // Walk up looking for a marker.
191 let mut candidate = Some(start.as_path());
192 while let Some(dir) = candidate {
193 if has_marker(dir) {
194 return Some(dir.to_path_buf());
195 }
196 candidate = dir.parent();
197 }
198 None
199}
200
201/// Returns `true` if `dir` contains any root marker file.
202fn has_marker(dir: &Path) -> bool {
203 ROOT_MARKERS.iter().any(|m| dir.join(m).exists())
204}
205
206/// Expand a leading `~` (or `~user`, though only `~` is common) to the home
207/// directory. Returns the original path unchanged if it doesn't start with `~`
208/// or if `HOME` is unavailable (Promo-4).
209fn expand_tilde(path: &Path) -> PathBuf {
210 expand_tilde_with_home(path, std::env::var_os("HOME"))
211}
212
213/// Pure helper: same as [`expand_tilde`] but with the home directory passed
214/// in explicitly. Split out so tests can exercise the prefix logic without
215/// mutating the process-wide `HOME` environment variable (Promo-4).
216fn expand_tilde_with_home(path: &Path, home: Option<std::ffi::OsString>) -> PathBuf {
217 let s = path.to_string_lossy();
218 let Some(home) = home else {
219 // No HOME → leave `~` paths untouched.
220 return path.to_path_buf();
221 };
222 if s == "~" {
223 return PathBuf::from(home);
224 }
225 if let Some(rest) = s.strip_prefix("~/") {
226 return PathBuf::from(home).join(rest);
227 }
228 path.to_path_buf()
229}
230
231/// Collect path-like string values from a JSON value (recursively).
232///
233/// `depth` bounds the recursion to prevent stack overflow on pathologically
234/// nested JSON (Promo-10). The bound of 32 comfortably exceeds any
235/// realistic tool_args payload.
236fn collect_path_like_strings(
237 value: &serde_json::Value,
238 out: &mut Vec<(String, DateTime<Utc>)>,
239 ts: DateTime<Utc>,
240) {
241 collect_path_like_strings_inner(value, out, ts, 0);
242}
243
244/// Inner recursive worker carrying the current `depth`.
245fn collect_path_like_strings_inner(
246 value: &serde_json::Value,
247 out: &mut Vec<(String, DateTime<Utc>)>,
248 ts: DateTime<Utc>,
249 depth: u32,
250) {
251 // Promo-10: bound recursion depth to avoid stack overflow on deeply
252 // (or cyclically) nested JSON.
253 const MAX_DEPTH: u32 = 32;
254 if depth > MAX_DEPTH {
255 return;
256 }
257 match value {
258 serde_json::Value::String(s) => {
259 if looks_like_path(s) {
260 out.push((s.trim_matches(punct).to_string(), ts));
261 }
262 }
263 serde_json::Value::Array(arr) => {
264 for v in arr {
265 collect_path_like_strings_inner(v, out, ts, depth + 1);
266 }
267 }
268 serde_json::Value::Object(map) => {
269 for v in map.values() {
270 collect_path_like_strings_inner(v, out, ts, depth + 1);
271 }
272 }
273 _ => {}
274 }
275}
276
277/// Heuristic: does this string look like an absolute or home-relative path?
278///
279/// Note: `normalize_to_root` filters out most false positives anyway, so this
280/// only needs to be a coarse pre-filter.
281fn looks_like_path(s: &str) -> bool {
282 let s = s.trim_matches(punct);
283 // Reject UNC paths (//host/share) — not local filesystem paths.
284 if s.starts_with("//") {
285 return false;
286 }
287 // Absolute unix path with at least one separator and some depth.
288 (s.starts_with('/') && s.matches('/').count() >= 2 && s.len() > 3)
289 || (s.starts_with("~/") && s.len() > 3)
290}
291
292/// Characters to strip from the edges of a path-like token (quotes, commas,
293/// brackets) before parsing.
294fn punct(c: char) -> bool {
295 matches!(
296 c,
297 '"' | '\'' | '`' | ',' | ';' | ')' | ']' | '}' | '(' | '[' | '{'
298 )
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304 use crate::state_store::{Session, SessionId};
305
306 fn make_session(msgs: Vec<(&str, DateTime<Utc>)>) -> Session {
307 let mut s = Session::new("test");
308 s.id = SessionId("s1".into());
309 for (content, ts) in msgs {
310 let mut m = crate::state_store::UserMessage {
311 content: content.into(),
312 timestamp: ts,
313 };
314 // Session::add_user_message stamps Utc::now(); override it.
315 s.user_messages.push(std::mem::replace(
316 &mut m,
317 crate::state_store::UserMessage {
318 content: String::new(),
319 timestamp: ts,
320 },
321 ));
322 }
323 s
324 }
325
326 #[test]
327 fn test_looks_like_path() {
328 assert!(looks_like_path("/usr/local/bin"));
329 assert!(looks_like_path("~/projects/foo"));
330 assert!(!looks_like_path("hello world"));
331 assert!(!looks_like_path("/x")); // too shallow
332 assert!(!looks_like_path("no-slash"));
333 }
334
335 /// Path to this crate's root — used instead of a hardcoded developer
336 /// path so the tests are portable (Promo-2). It has `Cargo.toml` at its
337 /// root, so `normalize_to_root` collapses any child to it.
338 fn crate_root() -> PathBuf {
339 PathBuf::from(env!("CARGO_MANIFEST_DIR"))
340 }
341
342 #[test]
343 fn test_tally_counts_repeated_paths() {
344 let now = Utc::now();
345 let root = crate_root();
346 let sessions = vec![make_session(vec![
347 (format!("fix {}/src/lib.rs", root.display()).as_str(), now),
348 (
349 format!("also check {}/Cargo.toml", root.display()).as_str(),
350 now,
351 ),
352 (format!("again {}", root.display()).as_str(), now),
353 ])];
354
355 let config = PromotionConfig {
356 threshold: 1,
357 ..Default::default()
358 };
359 let freqs = tally_frequencies(&sessions, &config);
360
361 // All three mentions collapse to the same project root (Cargo.toml).
362 let final_segment = root
363 .file_name()
364 .and_then(|n| n.to_str())
365 .unwrap_or("oxios-kernel");
366 let freq = freqs
367 .iter()
368 .find(|(k, _)| k.ends_with(final_segment))
369 .map(|(_, v)| v)
370 .unwrap_or_else(|| panic!("expected root in {:?}", freqs));
371 // Promo-7: a single session counts each distinct root once, so the
372 // count is exactly 1 regardless of how many times it was mentioned.
373 assert_eq!(freq.count, 1);
374 }
375
376 #[test]
377 fn test_tally_respects_window() {
378 let now = Utc::now();
379 let old = now - Duration::days(30);
380 let root = crate_root();
381 // Use a real project root (the crate itself) so that the *only* reason
382 // the tally is empty is the window filter, not `normalize_to_root`
383 // returning `None` (Promo-2). Three old touches of the same root.
384 let sessions = vec![make_session(vec![
385 (
386 format!("work on {}/src/lib.rs", root.display()).as_str(),
387 old,
388 ),
389 (
390 format!("work on {}/Cargo.toml", root.display()).as_str(),
391 old,
392 ),
393 (format!("work on {}", root.display()).as_str(), old),
394 ])];
395 let config = PromotionConfig {
396 window_days: 14,
397 ..Default::default()
398 };
399 let freqs = tally_frequencies(&sessions, &config);
400 // Old events outside the window must not appear.
401 assert!(freqs.is_empty(), "expected empty freqs, got {:?}", freqs);
402 }
403
404 #[test]
405 fn test_normalize_collapses_files_to_root() {
406 // The oxios repo itself has Cargo.toml at its root.
407 let file = Path::new(env!("CARGO_MANIFEST_DIR")).join("src/lib.rs");
408 let root = normalize_to_root(&file).expect("should find root");
409 assert!(root.ends_with("oxios-kernel"));
410 }
411
412 #[test]
413 fn test_normalize_expands_tilde() {
414 // Promo-4: test the pure prefix-expansion logic without touching the
415 // process environment (avoids unsafe set_var + parallel-test races).
416 let home = std::ffi::OsString::from("/Users/test");
417 // `~/foo` → `$HOME/foo`
418 assert_eq!(
419 expand_tilde_with_home(Path::new("~/foo"), Some(home.clone())),
420 PathBuf::from("/Users/test/foo")
421 );
422 // bare `~` → `$HOME`
423 assert_eq!(
424 expand_tilde_with_home(Path::new("~"), Some(home.clone())),
425 PathBuf::from("/Users/test")
426 );
427 // absolute path unchanged
428 assert_eq!(
429 expand_tilde_with_home(Path::new("/etc/passwd"), Some(home.clone())),
430 PathBuf::from("/etc/passwd")
431 );
432 // relative path unchanged
433 assert_eq!(
434 expand_tilde_with_home(Path::new("relative/path"), Some(home.clone())),
435 PathBuf::from("relative/path")
436 );
437 // No HOME available → `~` paths pass through untouched.
438 assert_eq!(
439 expand_tilde_with_home(Path::new("~/foo"), None),
440 PathBuf::from("~/foo")
441 );
442 }
443
444 #[test]
445 fn test_collect_path_like_bounds_recursion_depth() {
446 // Promo-10: deeply nested JSON must not overflow the stack.
447 // Build a value nested far beyond MAX_DEPTH and confirm collection
448 // returns without panicking.
449 let mut value = serde_json::json!({"path": "/usr/local/bin"});
450 for _ in 0..100 {
451 value = serde_json::json!({ "nested": value });
452 }
453 let mut out = Vec::new();
454 collect_path_like_strings(&value, &mut out, Utc::now());
455 // The deep path is unreachable (>32 levels) so nothing is collected.
456 assert!(out.is_empty(), "expected no paths past depth bound");
457
458 // A shallow path IS collected.
459 let shallow = serde_json::json!({
460 "a": { "b": { "file": "/usr/local/bin/oxios" } }
461 });
462 let mut out2 = Vec::new();
463 collect_path_like_strings(&shallow, &mut out2, Utc::now());
464 assert_eq!(out2.len(), 1);
465 assert_eq!(out2[0].0, "/usr/local/bin/oxios");
466 }
467}