Skip to main content

oxios_kernel/mount/
path_promotion.rs

1//! Path promotion: auto-create Mounts for frequently-used paths (RFC-025 Phase 5).
2//!
3//! Unlike runtime detection (which only matches *existing* Mounts), this
4//! module scans session history to find paths the agent has *actually worked
5//! on* (via tool calls) or the user has *explicitly mentioned*, counts them,
6//! and promotes paths that cross a frequency threshold into Mounts.
7//!
8//! ## Pipeline
9//!
10//! 1. **Extract** raw paths from session trajectories (`tool_args`) and user
11//!    messages.
12//! 2. **Normalize** each path to its project root by walking up the directory
13//!    tree until a marker file (`Cargo.toml`, `package.json`, `.git`, …) is
14//!    found. This collapses `/proj/src/main.rs` and `/proj/Cargo.toml` into
15//!    the single root `/proj`.
16//! 3. **Count** normalized roots over a sliding window.
17//! 4. **Promote** roots that cross the threshold into Mounts (unless one
18//!    already covers that root).
19//!
20//! ## Why background, not realtime
21//!
22//! Every extraction requires walking the filesystem (to find markers), so we
23//! run this on a cadence (alongside Dream consolidation) rather than on every
24//! message. The threshold naturally debounces one-off mentions.
25
26use std::collections::{HashMap, HashSet};
27use std::path::{Path, PathBuf};
28
29use chrono::{DateTime, Duration, Utc};
30
31use crate::state_store::Session;
32
33/// Markers whose presence indicates a project root. A directory containing
34/// any of these is treated as a self-contained project.
35///
36/// Mirrors `meta_detection::MARKERS` but adds `.git` (VCS root) and is
37/// intentionally kept separate so promotion logic can evolve independently.
38const ROOT_MARKERS: &[&str] = &[
39    "Cargo.toml",
40    "package.json",
41    "go.mod",
42    "pyproject.toml",
43    "setup.py",
44    "pom.xml",
45    "build.gradle",
46    "build.gradle.kts",
47    "Gemfile",
48    "composer.json",
49    "mix.exs",
50    "CMakeLists.txt",
51    "Makefile",
52    "AGENTS.md",
53    ".git",
54];
55
56/// A path's frequency tally within the promotion window.
57#[derive(Debug, Clone, Default)]
58pub struct PathFrequency {
59    /// Number of times a path under this root was touched/mentioned.
60    pub count: usize,
61    /// Most recent timestamp among the contributing events.
62    pub last_seen: Option<DateTime<Utc>>,
63}
64
65/// Configuration for path promotion (mirrors `MountsConfig`).
66#[derive(Debug, Clone)]
67pub struct PromotionConfig {
68    /// Disable promotion entirely.
69    pub enabled: bool,
70    /// Minimum distinct touches within the window to trigger promotion.
71    pub threshold: usize,
72    /// How far back to look (days). Events older than this are ignored.
73    pub window_days: i64,
74}
75
76impl Default for PromotionConfig {
77    fn default() -> Self {
78        Self {
79            enabled: true,
80            threshold: 3,
81            window_days: 14,
82        }
83    }
84}
85
86/// Extract raw path strings from a single session.
87///
88/// Sources (most-trusted first):
89/// - `tool_args` of trajectory steps (paths the agent actually operated on)
90/// - user messages (explicit mentions)
91///
92/// Each returned `(path, timestamp)` pair is one "touch". A single message
93/// may contribute several touches if it references several paths.
94pub fn extract_paths(session: &Session) -> Vec<(String, DateTime<Utc>)> {
95    let mut out = Vec::new();
96
97    // Trajectory tool_args: pull any string value that looks like a path.
98    for step in &session.trajectory_steps {
99        collect_path_like_strings(&step.tool_args, &mut out, step.timestamp);
100    }
101
102    // User messages: word-level path extraction.
103    for msg in &session.user_messages {
104        for word in msg.content.split_whitespace() {
105            if looks_like_path(word) {
106                out.push((word.trim_matches(punct).to_string(), msg.timestamp));
107            }
108        }
109    }
110
111    out
112}
113
114/// Normalize a raw path to its project root, then tally frequencies.
115///
116/// Returns a map of `root -> PathFrequency` restricted to events within the
117/// configured window.
118///
119/// Frequency is computed **per distinct root per session**: a single session
120/// that mentions the same project root ten times counts once, not ten times.
121/// This prevents one chatty session from inflating a root across the
122/// threshold (Promo-7).
123pub fn tally_frequencies(
124    sessions: &[Session],
125    config: &PromotionConfig,
126) -> HashMap<PathBuf, PathFrequency> {
127    let cutoff = Utc::now() - Duration::days(config.window_days);
128    let mut freqs: HashMap<PathBuf, PathFrequency> = HashMap::new();
129
130    for session in sessions {
131        // Deduplicate roots within a single session before counting: each
132        // distinct root contributes at most one touch per session.
133        let mut distinct_roots: HashSet<PathBuf> = HashSet::new();
134        let mut root_last_seen: HashMap<PathBuf, DateTime<Utc>> = HashMap::new();
135        for (raw, ts) in extract_paths(session) {
136            if ts < cutoff {
137                continue;
138            }
139            let Some(root) = normalize_to_root(Path::new(&raw)) else {
140                continue;
141            };
142            distinct_roots.insert(root.clone());
143            // Track the most recent touch for this root in this session.
144            root_last_seen
145                .entry(root)
146                .and_modify(|prev| *prev = (*prev).max(ts))
147                .or_insert(ts);
148        }
149
150        for root in distinct_roots {
151            let ts = root_last_seen[&root];
152            let entry = freqs.entry(root).or_default();
153            entry.count += 1; // +1 per distinct root per session
154            entry.last_seen = Some(
155                entry
156                    .last_seen
157                    .map_or(ts, |prev: DateTime<Utc>| prev.max(ts)),
158            );
159        }
160    }
161
162    freqs
163}
164
165/// Find the project root for `path` by walking up until a marker is found.
166///
167/// - If `path` itself is a directory containing a marker, it is its own root.
168/// - Otherwise we walk up ancestors looking for the first directory that
169///   contains a marker.
170/// - Returns `None` if no marker is found within the filesystem (e.g. the
171///   path doesn't exist, or it's a loose file with no project context).
172pub fn normalize_to_root(path: &Path) -> Option<PathBuf> {
173    // Expand a leading `~` to the home directory before canonicalizing so
174    // that home-relative paths (`~/projects/foo`) resolve correctly (Promo-4).
175    // `std::fs::canonicalize` does *not* expand `~`, so without this those
176    // paths would fall back to the raw form and fail to find markers.
177    let expanded = expand_tilde(path);
178
179    // Canonicalize to resolve `..` and symlinks. Fall back to the raw path
180    // if the file no longer exists (it may still be a meaningful prefix).
181    let canonical = std::fs::canonicalize(&expanded).unwrap_or(expanded);
182
183    // Start from the path itself if it's a directory, else from its parent.
184    let start = if canonical.is_dir() {
185        canonical.clone()
186    } else {
187        canonical.parent()?.to_path_buf()
188    };
189
190    // Walk up looking for a marker.
191    let mut candidate = Some(start.as_path());
192    while let Some(dir) = candidate {
193        if has_marker(dir) {
194            return Some(dir.to_path_buf());
195        }
196        candidate = dir.parent();
197    }
198    None
199}
200
201/// Returns `true` if `dir` contains any root marker file.
202fn has_marker(dir: &Path) -> bool {
203    ROOT_MARKERS.iter().any(|m| dir.join(m).exists())
204}
205
206/// Expand a leading `~` (or `~user`, though only `~` is common) to the home
207/// directory. Returns the original path unchanged if it doesn't start with `~`
208/// or if `HOME` is unavailable (Promo-4).
209fn expand_tilde(path: &Path) -> PathBuf {
210    expand_tilde_with_home(path, std::env::var_os("HOME"))
211}
212
213/// Pure helper: same as [`expand_tilde`] but with the home directory passed
214/// in explicitly. Split out so tests can exercise the prefix logic without
215/// mutating the process-wide `HOME` environment variable (Promo-4).
216fn expand_tilde_with_home(path: &Path, home: Option<std::ffi::OsString>) -> PathBuf {
217    let s = path.to_string_lossy();
218    let Some(home) = home else {
219        // No HOME → leave `~` paths untouched.
220        return path.to_path_buf();
221    };
222    if s == "~" {
223        return PathBuf::from(home);
224    }
225    if let Some(rest) = s.strip_prefix("~/") {
226        return PathBuf::from(home).join(rest);
227    }
228    path.to_path_buf()
229}
230
231/// Collect path-like string values from a JSON value (recursively).
232///
233/// `depth` bounds the recursion to prevent stack overflow on pathologically
234/// nested JSON (Promo-10). The bound of 32 comfortably exceeds any
235/// realistic tool_args payload.
236fn collect_path_like_strings(
237    value: &serde_json::Value,
238    out: &mut Vec<(String, DateTime<Utc>)>,
239    ts: DateTime<Utc>,
240) {
241    collect_path_like_strings_inner(value, out, ts, 0);
242}
243
244/// Inner recursive worker carrying the current `depth`.
245fn collect_path_like_strings_inner(
246    value: &serde_json::Value,
247    out: &mut Vec<(String, DateTime<Utc>)>,
248    ts: DateTime<Utc>,
249    depth: u32,
250) {
251    // Promo-10: bound recursion depth to avoid stack overflow on deeply
252    // (or cyclically) nested JSON.
253    const MAX_DEPTH: u32 = 32;
254    if depth > MAX_DEPTH {
255        return;
256    }
257    match value {
258        serde_json::Value::String(s) => {
259            if looks_like_path(s) {
260                out.push((s.trim_matches(punct).to_string(), ts));
261            }
262        }
263        serde_json::Value::Array(arr) => {
264            for v in arr {
265                collect_path_like_strings_inner(v, out, ts, depth + 1);
266            }
267        }
268        serde_json::Value::Object(map) => {
269            for v in map.values() {
270                collect_path_like_strings_inner(v, out, ts, depth + 1);
271            }
272        }
273        _ => {}
274    }
275}
276
277/// Heuristic: does this string look like an absolute or home-relative path?
278///
279/// Note: `normalize_to_root` filters out most false positives anyway, so this
280/// only needs to be a coarse pre-filter.
281fn looks_like_path(s: &str) -> bool {
282    let s = s.trim_matches(punct);
283    // Reject UNC paths (//host/share) — not local filesystem paths.
284    if s.starts_with("//") {
285        return false;
286    }
287    // Absolute unix path with at least one separator and some depth.
288    (s.starts_with('/') && s.matches('/').count() >= 2 && s.len() > 3)
289        || (s.starts_with("~/") && s.len() > 3)
290}
291
292/// Characters to strip from the edges of a path-like token (quotes, commas,
293/// brackets) before parsing.
294fn punct(c: char) -> bool {
295    matches!(
296        c,
297        '"' | '\'' | '`' | ',' | ';' | ')' | ']' | '}' | '(' | '[' | '{'
298    )
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304    use crate::state_store::{Session, SessionId};
305
306    fn make_session(msgs: Vec<(&str, DateTime<Utc>)>) -> Session {
307        let mut s = Session::new("test");
308        s.id = SessionId("s1".into());
309        for (content, ts) in msgs {
310            let mut m = crate::state_store::UserMessage {
311                content: content.into(),
312                timestamp: ts,
313            };
314            // Session::add_user_message stamps Utc::now(); override it.
315            s.user_messages.push(std::mem::replace(
316                &mut m,
317                crate::state_store::UserMessage {
318                    content: String::new(),
319                    timestamp: ts,
320                },
321            ));
322        }
323        s
324    }
325
326    #[test]
327    fn test_looks_like_path() {
328        assert!(looks_like_path("/usr/local/bin"));
329        assert!(looks_like_path("~/projects/foo"));
330        assert!(!looks_like_path("hello world"));
331        assert!(!looks_like_path("/x")); // too shallow
332        assert!(!looks_like_path("no-slash"));
333    }
334
335    /// Path to this crate's root — used instead of a hardcoded developer
336    /// path so the tests are portable (Promo-2). It has `Cargo.toml` at its
337    /// root, so `normalize_to_root` collapses any child to it.
338    fn crate_root() -> PathBuf {
339        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
340    }
341
342    #[test]
343    fn test_tally_counts_repeated_paths() {
344        let now = Utc::now();
345        let root = crate_root();
346        let sessions = vec![make_session(vec![
347            (format!("fix {}/src/lib.rs", root.display()).as_str(), now),
348            (
349                format!("also check {}/Cargo.toml", root.display()).as_str(),
350                now,
351            ),
352            (format!("again {}", root.display()).as_str(), now),
353        ])];
354
355        let config = PromotionConfig {
356            threshold: 1,
357            ..Default::default()
358        };
359        let freqs = tally_frequencies(&sessions, &config);
360
361        // All three mentions collapse to the same project root (Cargo.toml).
362        let final_segment = root
363            .file_name()
364            .and_then(|n| n.to_str())
365            .unwrap_or("oxios-kernel");
366        let freq = freqs
367            .iter()
368            .find(|(k, _)| k.ends_with(final_segment))
369            .map(|(_, v)| v)
370            .unwrap_or_else(|| panic!("expected root in {:?}", freqs));
371        // Promo-7: a single session counts each distinct root once, so the
372        // count is exactly 1 regardless of how many times it was mentioned.
373        assert_eq!(freq.count, 1);
374    }
375
376    #[test]
377    fn test_tally_respects_window() {
378        let now = Utc::now();
379        let old = now - Duration::days(30);
380        let root = crate_root();
381        // Use a real project root (the crate itself) so that the *only* reason
382        // the tally is empty is the window filter, not `normalize_to_root`
383        // returning `None` (Promo-2). Three old touches of the same root.
384        let sessions = vec![make_session(vec![
385            (
386                format!("work on {}/src/lib.rs", root.display()).as_str(),
387                old,
388            ),
389            (
390                format!("work on {}/Cargo.toml", root.display()).as_str(),
391                old,
392            ),
393            (format!("work on {}", root.display()).as_str(), old),
394        ])];
395        let config = PromotionConfig {
396            window_days: 14,
397            ..Default::default()
398        };
399        let freqs = tally_frequencies(&sessions, &config);
400        // Old events outside the window must not appear.
401        assert!(freqs.is_empty(), "expected empty freqs, got {:?}", freqs);
402    }
403
404    #[test]
405    fn test_normalize_collapses_files_to_root() {
406        // The oxios repo itself has Cargo.toml at its root.
407        let file = Path::new(env!("CARGO_MANIFEST_DIR")).join("src/lib.rs");
408        let root = normalize_to_root(&file).expect("should find root");
409        assert!(root.ends_with("oxios-kernel"));
410    }
411
412    #[test]
413    fn test_normalize_expands_tilde() {
414        // Promo-4: test the pure prefix-expansion logic without touching the
415        // process environment (avoids unsafe set_var + parallel-test races).
416        let home = std::ffi::OsString::from("/Users/test");
417        // `~/foo` → `$HOME/foo`
418        assert_eq!(
419            expand_tilde_with_home(Path::new("~/foo"), Some(home.clone())),
420            PathBuf::from("/Users/test/foo")
421        );
422        // bare `~` → `$HOME`
423        assert_eq!(
424            expand_tilde_with_home(Path::new("~"), Some(home.clone())),
425            PathBuf::from("/Users/test")
426        );
427        // absolute path unchanged
428        assert_eq!(
429            expand_tilde_with_home(Path::new("/etc/passwd"), Some(home.clone())),
430            PathBuf::from("/etc/passwd")
431        );
432        // relative path unchanged
433        assert_eq!(
434            expand_tilde_with_home(Path::new("relative/path"), Some(home.clone())),
435            PathBuf::from("relative/path")
436        );
437        // No HOME available → `~` paths pass through untouched.
438        assert_eq!(
439            expand_tilde_with_home(Path::new("~/foo"), None),
440            PathBuf::from("~/foo")
441        );
442    }
443
444    #[test]
445    fn test_collect_path_like_bounds_recursion_depth() {
446        // Promo-10: deeply nested JSON must not overflow the stack.
447        // Build a value nested far beyond MAX_DEPTH and confirm collection
448        // returns without panicking.
449        let mut value = serde_json::json!({"path": "/usr/local/bin"});
450        for _ in 0..100 {
451            value = serde_json::json!({ "nested": value });
452        }
453        let mut out = Vec::new();
454        collect_path_like_strings(&value, &mut out, Utc::now());
455        // The deep path is unreachable (>32 levels) so nothing is collected.
456        assert!(out.is_empty(), "expected no paths past depth bound");
457
458        // A shallow path IS collected.
459        let shallow = serde_json::json!({
460            "a": { "b": { "file": "/usr/local/bin/oxios" } }
461        });
462        let mut out2 = Vec::new();
463        collect_path_like_strings(&shallow, &mut out2, Utc::now());
464        assert_eq!(out2.len(), 1);
465        assert_eq!(out2[0].0, "/usr/local/bin/oxios");
466    }
467}