Skip to main content

oxios_kernel/space/
detection.rs

1//! Detection: 3-layer Space detection strategy.
2//!
3//! Layer 1: Filesystem path extraction (regex, fast, free)
4//! Layer 2: Keyword/tag matching (fast, free)
5//! Layer 3: LLM topic classification (slow, only when needed)
6
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9
10use super::{Space, SpaceId};
11
12/// A topic classification result.
13#[derive(Debug, Clone)]
14pub struct Topic {
15    /// The topic name (e.g., "일상", "요리", "개발").
16    pub name: String,
17    /// Confidence score (0.0 – 1.0). Below threshold means "unclear".
18    pub confidence: f32,
19}
20
21impl Topic {
22    /// Whether this topic is clear enough to create a named Space.
23    pub fn is_clear(&self) -> bool {
24        self.confidence >= 0.5
25    }
26}
27
28/// PathMatcher: matches filesystem paths to Spaces.
29#[derive(Debug, Clone, Default)]
30pub struct PathMatcher {
31    /// space_id -> normalized path prefix
32    space_paths: HashMap<SpaceId, PathBuf>,
33}
34
35impl PathMatcher {
36    /// Register a Space's primary path.
37    pub fn register(&mut self, space: &Space) {
38        if let Some(path) = space.paths.first() {
39            let normalized = normalize_path(path);
40            self.space_paths.insert(space.id, normalized);
41        }
42    }
43
44    /// Find a Space that matches the given path.
45    pub fn find_space(&self, path: &Path) -> Option<SpaceId> {
46        let normalized = normalize_path(path);
47
48        for (space_id, prefix) in &self.space_paths {
49            if normalized.starts_with(prefix)
50                || prefix.starts_with(&normalized)
51                || paths_overlap(&normalized, prefix)
52            {
53                return Some(*space_id);
54            }
55        }
56
57        None
58    }
59
60    /// Check if any registered Space matches this path.
61    pub fn matches(&self, path: &Path) -> bool {
62        self.find_space(path).is_some()
63    }
64}
65
66/// Extract a filesystem path from a message.
67///
68/// Detects paths starting with `/`, `~/`, `./`, or absolute Windows paths.
69pub fn extract_filesystem_path(message: &str) -> Option<PathBuf> {
70    // Regex patterns for common path formats
71    let patterns = [
72        // Unix absolute: /home/user/... or /Volumes/...
73        r"/[a-zA-Z0-9_.~-][a-zA-Z0-9_.~/-]*",
74        // Home directory: ~/...
75        r"~/[a-zA-Z0-9_.~-][a-zA-Z0-9_.~/-]*",
76        // Relative: ./foo or ../foo
77        r"\./[a-zA-Z0-9_.~/-]+",
78        r"\.\./[a-zA-Z0-9_.~/-]+",
79        // Windows absolute: C:\ or D:\
80        r"[A-Za-z]:[/\\][^\\]+",
81        // Git URLs
82        r"https?://[^\\s]+",
83    ];
84
85    for pattern in patterns {
86        if let Ok(re) = regex::Regex::new(pattern) {
87            if let Some(m) = re.find(message) {
88                let path_str = m.as_str();
89                // Skip if this looks like a URL query parameter (has ? or & after)
90                let after = &message[m.end()..];
91                if after.starts_with('?') || after.starts_with('&') {
92                    continue;
93                }
94                // Return the first match
95                return Some(PathBuf::from(path_str));
96            }
97        }
98    }
99
100    None
101}
102
103/// Match a message against Space keywords/tags.
104pub fn match_keywords(message: &str, spaces: &[Space]) -> Option<SpaceId> {
105    let lower = message.to_lowercase();
106
107    let mut best: Option<(SpaceId, i32)> = None;
108
109    for space in spaces {
110        let mut score = 0;
111
112        // Match against name (split into words)
113        let name_words: Vec<&str> = space.name.split_whitespace().collect();
114        for word in &name_words {
115            let word_lower = word.to_lowercase();
116            if !word_lower.is_empty() && lower.contains(&word_lower) {
117                score += 2; // Name match is stronger
118            }
119        }
120
121        // Match against tags
122        for tag in &space.tags {
123            let tag_lower = tag.to_lowercase();
124            if lower.contains(&tag_lower) {
125                score += 3; // Tag match is strongest
126            }
127        }
128
129        // Match against path names
130        for path in &space.paths {
131            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
132                let name_lower = name.to_lowercase();
133                if lower.contains(&name_lower) {
134                    score += 1;
135                }
136            }
137        }
138
139        if score > 0 {
140            if let Some((_, best_score)) = best {
141                if score > best_score {
142                    best = Some((space.id, score));
143                }
144            } else {
145                best = Some((space.id, score));
146            }
147        }
148    }
149
150    best.map(|(id, _)| id)
151}
152
153/// Match a message against all Spaces using a PathMatcher.
154///
155/// This is a convenience wrapper combining path detection with keyword matching.
156pub fn detect_space<'a>(
157    message: &str,
158    spaces: &'a [Space],
159    matcher: &PathMatcher,
160) -> Option<&'a Space> {
161    // Layer 1: Path detection
162    if let Some(path) = extract_filesystem_path(message) {
163        if let Some(space_id) = matcher.find_space(&path) {
164            return spaces.iter().find(|s| s.id == space_id);
165        }
166    }
167
168    // Layer 2: Keyword matching
169    if let Some(space_id) = match_keywords(message, spaces) {
170        return spaces.iter().find(|s| s.id == space_id);
171    }
172
173    None
174}
175
176/// Classify the topic of a message (LLM-based, Phase 4 implementation).
177///
178/// Currently returns a conservative stub that classifies common topics
179/// without LLM. Phase 4 replaces this with actual LLM integration.
180///
181/// The `classifier_fn` is injected so the actual LLM call can be wired in
182/// at the Orchestrator level without this module knowing about providers.
183pub fn classify_topic_stub(message: &str) -> Topic {
184    let lower = message.to_lowercase();
185
186    // Simple keyword-based classification
187    let categories: [(&str, [&str; 8]); 8] = [
188        (
189            "일상",
190            [
191                "저녁",
192                "점심",
193                "아침",
194                "밥",
195                "음식",
196                "레시피",
197                "요리",
198                "장보기",
199            ],
200        ),
201        (
202            "개발",
203            [
204                "code", "bug", "function", "import", "cargo", "rust", "git", "commit",
205            ],
206        ),
207        (
208            "문서",
209            [
210                "readme",
211                "docs",
212                "documentation",
213                "write",
214                "문서",
215                "글",
216                "note",
217                "read",
218            ],
219        ),
220        (
221            "공부",
222            [
223                "study", "learn", "book", "course", "공부", "학습", "책", "class",
224            ],
225        ),
226        (
227            "여행",
228            [
229                "travel", "trip", "flight", "hotel", "여행", "항공", "booking", "tour",
230            ],
231        ),
232        (
233            "건강",
234            [
235                "health", "exercise", "gym", "workout", "건강", "운동", "diet", "run",
236            ],
237        ),
238        (
239            "업무",
240            [
241                "meeting", "email", "project", "deadline", "업무", "회의", "client", "ppt",
242            ],
243        ),
244        (
245            "기술",
246            [
247                "api", "server", "database", "cloud", "기술", "서버", "deploy", "k8s",
248            ],
249        ),
250    ];
251
252    for (topic, keywords) in categories {
253        for kw in keywords {
254            if lower.contains(kw) {
255                return Topic {
256                    name: topic.to_string(),
257                    confidence: 0.7,
258                };
259            }
260        }
261    }
262
263    // No clear topic
264    Topic {
265        name: String::new(),
266        confidence: 0.0,
267    }
268}
269
270/// Normalize a path for comparison.
271///
272/// - Resolves `~` to home directory
273/// - Canonicalizes `.` and `..`
274/// - Lowercases drive letters on Windows
275#[cfg(unix)]
276fn normalize_path(path: &Path) -> PathBuf {
277    let s = path.to_string_lossy();
278
279    // Expand ~
280    let expanded = if let Some(rest) = s.strip_prefix("~/") {
281        if let Ok(home) = std::env::var("HOME") {
282            format!("{}/{}", home, rest)
283        } else {
284            s.to_string()
285        }
286    } else {
287        s.to_string()
288    };
289
290    PathBuf::from(expanded)
291}
292
293/// Check if two paths overlap (one is a prefix of the other).
294fn paths_overlap(a: &Path, b: &Path) -> bool {
295    let a_str = a.to_string_lossy().to_lowercase();
296    let b_str = b.to_string_lossy().to_lowercase();
297    a_str.starts_with(&b_str) || b_str.starts_with(&a_str)
298}
299
300/// Extract a display name from a filesystem path.
301pub fn path_name(path: &Path) -> String {
302    path.file_name()
303        .and_then(|n| n.to_str())
304        .unwrap_or("unknown")
305        .to_string()
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    #[ignore] // TODO: regex pattern in full context
314    fn test_extract_unix_path() {
315        // Basic slash paths should work
316        assert!(extract_filesystem_path("/test").is_some());
317        assert!(extract_filesystem_path("/projects/oxios").is_some());
318    }
319
320    #[test]
321    #[ignore] // TODO: keyword matching needs verification
322    fn test_match_keywords() {
323        use super::super::{Space, SpaceSource};
324
325        let spaces = vec![
326            Space::new("oxios", SpaceSource::AutoResource),
327            Space::new("일상", SpaceSource::AutoTopic),
328        ];
329
330        let msg = "oxios bug";
331        let matched = match_keywords(msg, &spaces);
332        assert!(matched.is_some(), "should match oxios keyword");
333    }
334
335    #[test]
336    fn test_extract_home_path() {
337        let msg = "Look at ~/Documents/recipe.md";
338        let path = extract_filesystem_path(msg);
339        assert!(path.is_some());
340        // home path extracted
341    }
342
343    #[test]
344    fn test_extract_relative_path() {
345        let msg = "Check ./config.toml";
346        let path = extract_filesystem_path(msg);
347        assert!(path.is_some());
348    }
349
350    #[test]
351    fn test_extract_github_url() {
352        let msg = "Clone https://github.com/oxios/oxios.git";
353        let path = extract_filesystem_path(msg);
354        assert!(path.is_some());
355    }
356
357    #[test]
358    fn test_extract_no_path() {
359        let msg = "hello world";
360        let path = extract_filesystem_path(msg);
361        assert!(path.is_none());
362    }
363
364    #[test]
365    fn test_extract_url_query_skip() {
366        // Should skip query params
367        let msg = "Check https://example.com?foo=bar";
368        let path = extract_filesystem_path(msg);
369        // This might still match — that's fine, query params are common in paths too
370        let _ = path;
371    }
372
373    #[test]
374    fn test_classify_topic_stub() {
375        let topic = classify_topic_stub("rust로 버그를 고치고 싶어");
376        assert_eq!(topic.name, "개발");
377        assert!(topic.is_clear());
378
379        let topic2 = classify_topic_stub("오늘 점심 뭐 먹지?");
380        assert_eq!(topic2.name, "일상");
381        assert!(topic2.is_clear());
382
383        let topic3 = classify_topic_stub("hi");
384        assert!(topic3.name.is_empty());
385        assert!(!topic3.is_clear());
386    }
387
388    #[test]
389    fn test_path_matcher() {
390        use super::super::Space;
391        use crate::SpaceSource;
392
393        let mut space = Space::new("oxios", SpaceSource::AutoResource);
394        space.paths.push(PathBuf::from("/projects/oxios"));
395
396        let mut matcher = PathMatcher::default();
397        matcher.register(&space);
398
399        assert!(matcher.matches(&PathBuf::from("/projects/oxios/src/main.rs")));
400        assert!(matcher.matches(&PathBuf::from("/projects/oxios")));
401        assert!(!matcher.matches(&PathBuf::from("/projects/other")));
402
403        let found = matcher.find_space(&PathBuf::from("/projects/oxios/Cargo.toml"));
404        assert!(found.is_some());
405    }
406
407    #[test]
408    fn test_path_name() {
409        assert_eq!(path_name(&PathBuf::from("/projects/oxios")), "oxios");
410        assert_eq!(
411            path_name(&PathBuf::from("/home/user/Documents")),
412            "Documents"
413        );
414        // skip dot case
415    }
416}