Skip to main content

oxios_kernel/mount/
detection.rs

1//! Mount detection: find a Mount matching user input (RFC-025).
2//!
3//! Replaces RFC-011's tag-based detection layer 3 with `auto_meta` keyword
4//! matching. Layers:
5//! 1. Direct name match ("oxios" → Mount named "oxios")
6//! 2. Path extraction + prefix match (most specific wins)
7//! 3. `auto_meta` keyword match (languages / stack / summary keywords)
8
9use std::path::PathBuf;
10
11use super::{Mount, MountId};
12
13/// Check if `haystack` contains `needle` as a whole word (token),
14/// case-insensitive. A character is considered part of the same word only if
15/// it is an ASCII alphanumeric or `_`. This means:
16///   - Latin substring false-positives are prevented ("go" does not match
17///     "going", "rust" does not match "trust") — the adjacent ASCII letter is
18///     a word continuation, not a boundary.
19///   - A script transition is a boundary, so Korean/Japanese postpositions
20///     written without spaces ("oxios에서", "oxios로") still let the Latin
21///     name match. This codebase is Korean-user-facing, so this is the
22///     desired behaviour.
23///
24/// Unicode-safe: boundary checks examine actual characters (not raw bytes),
25/// and the search cursor is advanced one character at a time so multi-byte
26/// (e.g. CJK) haystacks never slice on a non-char-boundary.
27fn contains_word(haystack: &str, needle: &str) -> bool {
28    if needle.is_empty() {
29        return false;
30    }
31    let h: String = haystack.to_lowercase();
32    let n: String = needle.to_lowercase();
33
34    /// `true` if `c` continues the current word (ASCII alphanumeric or `_`).
35    /// Everything else — punctuation, whitespace, or a non-ASCII script char
36    /// — acts as a word boundary.
37    fn continues_word(c: char) -> bool {
38        c.is_ascii_alphanumeric() || c == '_'
39    }
40
41    let mut start = 0;
42    while start < h.len() {
43        let Some(rel) = h[start..].find(&n) else {
44            break;
45        };
46        let abs_pos = start + rel;
47        let end_pos = abs_pos + n.len();
48
49        // Character immediately before the match (if any) must be a boundary.
50        let before_ok = abs_pos == 0
51            || h[..abs_pos]
52                .chars()
53                .next_back()
54                .is_none_or(|c| !continues_word(c));
55        // Character immediately after the match (if any) must be a boundary.
56        let after_ok = end_pos >= h.len()
57            || h[end_pos..]
58                .chars()
59                .next()
60                .is_none_or(|c| !continues_word(c));
61
62        if before_ok && after_ok {
63            return true;
64        }
65        // Advance past this occurrence by exactly one character so that
66        // overlapping matches are still considered and `start` remains on a
67        // valid char boundary (required for `h[start..]` slicing).
68        start = match h[abs_pos..].char_indices().nth(1) {
69            Some((i, _)) => abs_pos + i,
70            None => h.len(),
71        };
72    }
73    false
74}
75
76/// Result of a Mount lookup attempt.
77#[derive(Debug, Clone)]
78pub enum DetectionResult {
79    /// Found a matching Mount.
80    Found(MountId),
81    /// No Mount matched. Optionally, a path was detected.
82    NoMatch { detected_path: Option<PathBuf> },
83}
84
85/// Try to detect a Mount from a user message.
86///
87/// Detection considers **only Mounts**, never Projects (RFC-025: Projects
88/// always carry user-written instructions and shouldn't be guessed).
89pub fn detect_mounts(message: &str, mounts: &[Mount]) -> DetectionResult {
90    let lower = message.to_lowercase();
91
92    // Layer 1: Direct name match (case-insensitive, whole-word match).
93    // Match the longest name first so "oxios-dev" wins over "oxios".
94    // Names shorter than 3 chars are too ambiguous for Layer 1 ("go", "ai",
95    // "os", "pi") — they are skipped here (mirrors Layer 3's `kw.len() >= 3`).
96    let mut by_name: Vec<&Mount> = mounts
97        .iter()
98        .filter(|m| m.name.len() >= 3 && contains_word(&lower, &m.name))
99        .collect();
100    by_name.sort_by_key(|m| std::cmp::Reverse(m.name.len()));
101    if let Some(m) = by_name.first() {
102        return DetectionResult::Found(m.id);
103    }
104
105    // Layer 2: Path extraction + prefix match (most specific path wins).
106    if let Some(path) = extract_path(message) {
107        let matching: Vec<&Mount> = mounts
108            .iter()
109            .filter(|m| {
110                m.paths
111                    .iter()
112                    .any(|p| path.starts_with(p) || p.starts_with(&path))
113            })
114            .collect();
115        if matching.len() == 1 {
116            return DetectionResult::Found(matching[0].id);
117        }
118        if matching.len() > 1 {
119            // Prefer the most specific path (longest matching prefix).
120            let best = matching
121                .into_iter()
122                .max_by_key(|m| {
123                    m.paths
124                        .iter()
125                        .filter(|p| path.starts_with(p))
126                        .map(|p| p.components().count())
127                        .max()
128                        .unwrap_or(0)
129                })
130                .expect("non-empty");
131            return DetectionResult::Found(best.id);
132        }
133        return DetectionResult::NoMatch {
134            detected_path: Some(path),
135        };
136    }
137
138    // Layer 3: auto_meta keyword match (languages / stack / summary).
139    //
140    // Iterate in deterministic order: most recently active first, then by
141    // name. The caller-supplied `mounts` slice order is not guaranteed stable
142    // (MountManager builds it from a HashMap), so without sorting the winner
143    // among mounts sharing a keyword would be non-deterministic.
144    let mut sorted: Vec<&Mount> = mounts.iter().collect();
145    sorted.sort_by(|a, b| {
146        b.last_active_at
147            .cmp(&a.last_active_at)
148            .then_with(|| a.name.cmp(&b.name))
149    });
150    for mount in &sorted {
151        // Split the summary into individual words so that a multi-word summary
152        // (e.g. "Agent OS in Rust") does not have to match verbatim.
153        let keywords: Vec<String> = mount
154            .auto_meta
155            .languages
156            .iter()
157            .chain(mount.auto_meta.stack.iter())
158            .cloned()
159            .chain(mount.auto_meta.summary.split_whitespace().map(String::from))
160            .collect();
161        for kw in keywords {
162            let kw = kw.trim().to_lowercase();
163            if kw.len() >= 3 && contains_word(&lower, &kw) {
164                return DetectionResult::Found(mount.id);
165            }
166        }
167    }
168
169    DetectionResult::NoMatch {
170        detected_path: None,
171    }
172}
173
174/// Extract a filesystem path from a message string.
175///
176/// Looks for patterns like `/path/to/something` or `~/path`.
177pub fn extract_path(message: &str) -> Option<PathBuf> {
178    // Absolute paths
179    for word in message.split_whitespace() {
180        let cleaned = word.trim_matches(|c: char| {
181            !c.is_alphanumeric() && c != '/' && c != '.' && c != '-' && c != '_'
182        });
183        if cleaned.starts_with('/') && cleaned.len() > 2 {
184            let path = PathBuf::from(cleaned);
185            if path.parent().is_some() {
186                return Some(path);
187            }
188        }
189    }
190    // ~-prefixed paths
191    for word in message.split_whitespace() {
192        let cleaned = word.trim_matches(|c: char| {
193            !c.is_alphanumeric() && c != '/' && c != '.' && c != '-' && c != '_' && c != '~'
194        });
195        if cleaned.starts_with("~/")
196            && cleaned.len() > 2
197            && let Some(home) = std::env::var_os("HOME")
198        {
199            let expanded = cleaned.replacen("~", &home.to_string_lossy(), 1);
200            return Some(PathBuf::from(expanded));
201        }
202    }
203    None
204}
205
206/// Find a Mount by exact ID.
207pub fn find_by_id(mounts: &[Mount], id: MountId) -> Option<&Mount> {
208    mounts.iter().find(|m| m.id == id)
209}
210
211/// Find a Mount by name (case-insensitive).
212pub fn find_by_name<'a>(mounts: &'a [Mount], name: &str) -> Option<&'a Mount> {
213    let lower = name.to_lowercase();
214    mounts.iter().find(|m| m.name.to_lowercase() == lower)
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    fn make_mounts() -> Vec<Mount> {
222        let mut oxios =
223            Mount::from_name_and_path("oxios", PathBuf::from("/Volumes/MERCURY/PROJECTS/oxios"));
224        oxios.auto_meta.languages = vec!["rust".to_string()];
225        oxios.auto_meta.stack = vec!["tokio".to_string()];
226
227        let mut oxi =
228            Mount::from_name_and_path("oxi", PathBuf::from("/Volumes/MERCURY/PROJECTS/oxi"));
229        oxi.auto_meta.languages = vec!["rust".to_string()];
230        oxi.auto_meta.summary = "SDK for Oxios agents".to_string();
231
232        let mut blog = Mount::from_name_and_path("my-blog", PathBuf::from("/Users/me/blog"));
233        blog.auto_meta.languages = vec!["typescript".to_string()];
234        blog.auto_meta.stack = vec!["nextjs".to_string()];
235
236        vec![oxios, oxi, blog]
237    }
238
239    #[test]
240    fn test_detect_by_name() {
241        let mounts = make_mounts();
242        let result = detect_mounts("oxios 코드리뷰해줘", &mounts);
243        assert!(matches!(result, DetectionResult::Found(id) if id == mounts[0].id));
244    }
245
246    #[test]
247    fn test_detect_longest_name_wins() {
248        // "oxios-dev" and "oxios" both present; longest name should win.
249        let mut mounts = make_mounts();
250        mounts.push(Mount::from_name_and_path(
251            "oxios-dev",
252            PathBuf::from("/dev"),
253        ));
254        let result = detect_mounts("working on oxios-dev now", &mounts);
255        match result {
256            DetectionResult::Found(id) => {
257                let m = mounts.iter().find(|m| m.id == id).unwrap();
258                assert_eq!(m.name, "oxios-dev");
259            }
260            other => panic!("expected Found, got {other:?}"),
261        }
262    }
263
264    #[test]
265    fn test_detect_by_path() {
266        let mounts = make_mounts();
267        let result = detect_mounts("/Volumes/MERCURY/PROJECTS/oxios에서 작업", &mounts);
268        assert!(matches!(result, DetectionResult::Found(id) if id == mounts[0].id));
269    }
270
271    #[test]
272    fn test_detect_by_meta_keyword() {
273        let mounts = make_mounts();
274        // "nextjs" is a stack keyword on my-blog.
275        let result = detect_mounts("nextjs 관련 도움이 필요해", &mounts);
276        match result {
277            DetectionResult::Found(id) => {
278                let m = mounts.iter().find(|m| m.id == id).unwrap();
279                assert_eq!(m.name, "my-blog");
280            }
281            other => panic!("expected Found (my-blog), got {other:?}"),
282        }
283    }
284
285    #[test]
286    fn test_detect_no_match_with_path() {
287        let mounts = make_mounts();
288        let result = detect_mounts("/Volumes/MERCURY/PROJECTS/unknown 에서 작업", &mounts);
289        assert!(matches!(
290            result,
291            DetectionResult::NoMatch {
292                detected_path: Some(_)
293            }
294        ));
295    }
296
297    #[test]
298    fn test_detect_no_match() {
299        let mounts = make_mounts();
300        let result = detect_mounts("오늘 점심 뭐 먹지?", &mounts);
301        assert!(matches!(
302            result,
303            DetectionResult::NoMatch {
304                detected_path: None
305            }
306        ));
307    }
308
309    #[test]
310    fn test_extract_path() {
311        assert_eq!(
312            extract_path("/Volumes/MERCURY/PROJECTS/oxios"),
313            Some(PathBuf::from("/Volumes/MERCURY/PROJECTS/oxios"))
314        );
315        assert_eq!(extract_path("no path here"), None);
316    }
317
318    #[test]
319    fn test_find_by_name() {
320        let mounts = make_mounts();
321        assert!(find_by_name(&mounts, "oxios").is_some());
322        assert!(find_by_name(&mounts, "Oxios").is_some());
323        assert!(find_by_name(&mounts, "nonexistent").is_none());
324    }
325
326    // --- RFC-025 detection hardening (issues M1/M2/M3) ---
327
328    #[test]
329    fn test_short_name_not_substring_matched() {
330        // A mount named "go" (len < 3) must NOT match messages where it only
331        // appears as a substring of a larger word ("going", "again").
332        let mounts = vec![Mount::from_name_and_path("go", PathBuf::from("/p/go"))];
333        let result = detect_mounts("i am going there again", &mounts);
334        assert!(
335            matches!(result, DetectionResult::NoMatch { .. }),
336            "short name 'go' must not substring-match 'going'/'again'"
337        );
338    }
339
340    #[test]
341    fn test_name_word_boundary_no_substring() {
342        // A 3+ char name must not match as a substring of a larger token.
343        // "ring" (len 4) should not match "during", "string", or "brings".
344        let mounts = vec![Mount::from_name_and_path("ring", PathBuf::from("/p/ring"))];
345        let result = detect_mounts("during the string test it brings results", &mounts);
346        assert!(
347            matches!(result, DetectionResult::NoMatch { .. }),
348            "name 'ring' must not substring-match 'during'/'string'/'brings'"
349        );
350        // But it SHOULD match as a standalone word.
351        let result = detect_mounts("let's talk about ring design", &mounts);
352        assert!(matches!(result, DetectionResult::Found(_)));
353    }
354
355    #[test]
356    fn test_keyword_word_boundary_no_substring() {
357        // Layer 3 keyword "rust" must not substring-match "trust".
358        let mounts = make_mounts();
359        let result = detect_mounts("i really trust you on this", &mounts);
360        assert!(
361            matches!(result, DetectionResult::NoMatch { .. }),
362            "keyword 'rust' must not substring-match 'trust'"
363        );
364    }
365
366    #[test]
367    fn test_word_boundary_with_cjk_after() {
368        // A name followed (after a space) by CJK must still match as a word.
369        let mounts = make_mounts();
370        let result = detect_mounts("oxios 코드리뷰", &mounts);
371        assert!(matches!(result, DetectionResult::Found(id) if id == mounts[0].id));
372    }
373
374    #[test]
375    fn test_layer3_most_recent_active_wins() {
376        // Two mounts share the "rust" keyword. The more recently active one
377        // must win regardless of the order they appear in the input slice
378        // (deterministic tie-break on shared keywords — issue M3).
379        let mut oxios = Mount::from_name_and_path("oxios", PathBuf::from("/p/oxios"));
380        oxios.auto_meta.languages = vec!["rust".to_string()];
381
382        let mut oxi = Mount::from_name_and_path("oxi", PathBuf::from("/p/oxi"));
383        oxi.auto_meta.languages = vec!["rust".to_string()];
384        // Make `oxi` more recently active than `oxios`.
385        oxi.last_active_at = oxios.last_active_at + chrono::Duration::seconds(60);
386
387        // Deliberately pass them in least-recent-first order.
388        let mounts = vec![oxios, oxi];
389        let recent_id = mounts[1].id;
390        let result = detect_mounts("help with a rust project", &mounts);
391        match result {
392            DetectionResult::Found(id) => assert_eq!(
393                id, recent_id,
394                "most recently active mount should win on shared keyword"
395            ),
396            other => panic!("expected Found, got {other:?}"),
397        }
398    }
399}