Skip to main content

oxios_kernel/mount/
meta_detection.rs

1//! Auto-meta detection: cheap heuristics on marker files (RFC-025 §Auto-Meta).
2//!
3//! Seeds [`MountMeta`](super::MountMeta) from filesystem markers, then the
4//! agent refines it during enrichment. Detection runs at drift-detection time
5//! (cheap `stat` + tiny reads), not on every message.
6
7use std::path::Path;
8use std::time::{Duration, UNIX_EPOCH};
9
10use super::MountMeta;
11
12/// Marker files that imply a language / stack. Checked against a Mount's
13/// primary path.
14const MARKERS: &[(&str, &str)] = &[
15    ("Cargo.toml", "rust"),
16    ("package.json", "typescript"),
17    ("go.mod", "go"),
18    ("pyproject.toml", "python"),
19    ("requirements.txt", "python"),
20    ("setup.py", "python"),
21    ("pom.xml", "java"),
22    ("build.gradle", "java"),
23    ("build.gradle.kts", "kotlin"),
24    ("Gemfile", "ruby"),
25    ("composer.json", "php"),
26    ("mix.exs", "elixir"),
27    ("CMakeLists.txt", "cpp"),
28    ("Makefile", "c"),
29];
30
31/// Docs/agent markers — recorded but don't imply a language.
32const DOC_MARKERS: &[&str] = &[
33    "AGENTS.md",
34    "CLAUDE.md",
35    ".cursorrules",
36    "README.md",
37    "GEMINI.md",
38    ".windsurfrules",
39];
40
41/// Structure hints from top-level directories.
42const STRUCTURE_HINTS: &[(&str, &str)] = &[
43    ("crates", "cargo-workspace"),
44    ("packages", "monorepo"),
45    ("apps", "monorepo"),
46    ("libs", "monorepo"),
47];
48
49/// Detect [`MountMeta`] from the filesystem at `path`.
50///
51/// This is a **draft** — the agent refines it during enrichment. We never make
52/// an LLM call here; everything is cheap `stat`/`read` on small files.
53pub fn detect_meta(path: &Path) -> MountMeta {
54    let mut meta = MountMeta::default();
55
56    let mut found_languages: Vec<String> = Vec::new();
57    let mut found_markers: Vec<String> = Vec::new();
58
59    // Language + stack markers.
60    for (marker, lang) in MARKERS {
61        let marker_path = path.join(marker);
62        if marker_path.is_file() {
63            if !found_languages.contains(&lang.to_string()) {
64                found_languages.push(lang.to_string());
65            }
66            found_markers.push(marker.to_string());
67
68            // Extract stack hints for well-known markers.
69            extract_stack(marker, &marker_path, &mut meta.stack);
70        }
71    }
72
73    // Doc / agent markers (no language, but recorded + seed summary).
74    for marker in DOC_MARKERS {
75        let marker_path = path.join(marker);
76        if marker_path.is_file() {
77            found_markers.push(marker.to_string());
78            // AGENTS.md / README.md seed the summary (first paragraph).
79            if (marker == &"AGENTS.md" || marker == &"README.md")
80                && meta.summary.is_empty()
81                && let Ok(content) = std::fs::read_to_string(&marker_path)
82            {
83                meta.summary = first_meaningful_line(&content);
84            }
85        }
86    }
87
88    // Structure hints.
89    for (dir, hint) in STRUCTURE_HINTS {
90        if path.join(dir).is_dir() && !meta.stack.contains(&hint.to_string()) {
91            meta.stack.push(hint.to_string());
92        }
93    }
94
95    meta.languages = found_languages;
96    meta.markers = found_markers;
97
98    // If no summary yet, derive one from languages.
99    if meta.summary.is_empty() && !meta.languages.is_empty() {
100        meta.summary = meta.languages.join(" + ");
101    }
102
103    meta
104}
105
106/// Compute the set of marker files to watch for drift, given a path.
107///
108/// Returns `(path, mtime)` pairs for existing markers — this is the snapshot
109/// the drift detector compares against on the next session.
110pub fn snapshot_markers(path: &Path) -> Vec<(std::path::PathBuf, std::time::SystemTime)> {
111    let all: Vec<&str> = MARKERS
112        .iter()
113        .map(|(m, _)| *m)
114        .chain(DOC_MARKERS.iter().copied())
115        .collect();
116
117    all.into_iter()
118        .filter_map(|m| {
119            let p = path.join(m);
120            p.metadata()
121                .and_then(|md| md.modified())
122                .ok()
123                // Truncate to whole seconds so the freshly-read mtime matches
124                // the precision stored in the DB (u64 seconds). Without this,
125                // drift would fire on every restart: the DB reconstructs a
126                // whole-second SystemTime, but a fresh `stat()` yields
127                // nanosecond precision.
128                .map(|t| {
129                    let truncated = t
130                        .duration_since(UNIX_EPOCH)
131                        .map(|d| UNIX_EPOCH + Duration::from_secs(d.as_secs()))
132                        .unwrap_or(t);
133                    (p, truncated)
134                })
135        })
136        .collect()
137}
138
139/// Extract stack keywords from a marker file's contents.
140///
141/// Reads only the marker file (small), scans for dependency names. Keeps the
142/// result bounded — at most ~8 entries.
143fn extract_stack(marker: &str, path: &Path, stack: &mut Vec<String>) {
144    let Ok(content) = std::fs::read_to_string(path) else {
145        return;
146    };
147    let push = |stack: &mut Vec<String>, s: &str| {
148        if s.len() >= 2 && !stack.iter().any(|e| e.eq_ignore_ascii_case(s)) {
149            stack.push(s.to_string());
150        }
151    };
152
153    match marker {
154        "Cargo.toml" => {
155            // Track the current TOML section so we only extract crate names
156            // from dependency sections. Without this, fields from `[package]`
157            // (name, edition, authors, license, …) leak into the stack.
158            let mut current_section = String::new();
159            let dep_sections = ["dependencies", "dev-dependencies", "build-dependencies"];
160
161            for line in content.lines() {
162                let trimmed = line.trim();
163                // Track section headers like `[dependencies]` or
164                // `[dependencies.serde]` (also `[workspace.dependencies]`).
165                if trimmed.starts_with('[') && trimmed.ends_with(']') {
166                    current_section = trimmed
167                        .trim_start_matches('[')
168                        .trim_end_matches(']')
169                        .to_string();
170                    // Dotted-table form: `[dependencies.serde]` or
171                    // `[workspace.dependencies.serde]` — the trailing segment
172                    // after the dependency-section name is the crate name.
173                    if let Some(suffix) =
174                        crate_suffix_of_dep_section(&current_section, &dep_sections)
175                    {
176                        let crate_name = suffix.split('.').next().unwrap_or(suffix);
177                        push(stack, crate_name);
178                    }
179                    continue;
180                }
181                // For *bare* dependency sections (e.g. `[dependencies]`),
182                // the `=` keys are crate names. Dotted tables were handled
183                // above via their section header; their sub-keys (version,
184                // path, features, …) must not be pushed.
185                let is_bare_dep_section = dep_sections.iter().any(|ds| {
186                    current_section == *ds || current_section == format!("workspace.{ds}")
187                });
188                if !is_bare_dep_section {
189                    continue;
190                }
191                if let Some(eq_pos) = trimmed.find('=') {
192                    let name = trimmed[..eq_pos].trim();
193                    if !name.is_empty() {
194                        push(stack, name);
195                    }
196                }
197            }
198        }
199        "package.json" => {
200            // Parse JSON, pull keys from dependencies + devDependencies.
201            if let Ok(val) = serde_json::from_str::<serde_json::Value>(&content) {
202                for key in &["dependencies", "devDependencies", "peerDependencies"] {
203                    if let Some(obj) = val.get(key).and_then(|v| v.as_object()) {
204                        for dep in obj.keys() {
205                            push(stack, dep);
206                        }
207                    }
208                }
209            }
210        }
211        "go.mod" => {
212            // Lines like `\tgithub.com/foo/bar v1.2.3`.
213            for line in content.lines() {
214                let trimmed = line.trim();
215                if trimmed.starts_with("require ") || trimmed.contains(" v") {
216                    let parts: Vec<&str> = trimmed.split_whitespace().collect();
217                    for part in parts {
218                        if part.contains('/') && part.contains('.') && !part.starts_with("require")
219                        {
220                            // Take the last path segment as the stack name.
221                            if let Some(name) = part.rsplit('/').next() {
222                                push(stack, name);
223                            }
224                        }
225                    }
226                }
227            }
228        }
229        "pyproject.toml" | "requirements.txt" => {
230            for line in content.lines() {
231                let trimmed = line.trim();
232                if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('[') {
233                    continue;
234                }
235                let name = trimmed
236                    .split(['=', '<', '>', ';', '[', ' '])
237                    .next()
238                    .unwrap_or("")
239                    .trim();
240                if !name.is_empty() {
241                    push(stack, name);
242                }
243            }
244        }
245        _ => {}
246    }
247
248    // Bound the stack list.
249    stack.truncate(8);
250}
251
252/// If `section` (a normalized TOML header without brackets, e.g.
253/// `dependencies.serde` or `workspace.dependencies.serde`) names a *dotted*
254/// dependency table, return the substring after the dependency-section prefix
255/// (the crate name, possibly dotted). Returns `None` for bare sections like
256/// `dependencies` and for non-dependency sections like `package`.
257fn crate_suffix_of_dep_section<'a>(section: &'a str, dep_sections: &[&str]) -> Option<&'a str> {
258    for ds in dep_sections {
259        if let Some(rest) = section.strip_prefix(&format!("{ds}.")) {
260            return Some(rest);
261        }
262        if let Some(rest) = section.strip_prefix(&format!("workspace.{ds}.")) {
263            return Some(rest);
264        }
265    }
266    None
267}
268
269/// Take the first non-heading, non-empty line as a summary seed.
270fn first_meaningful_line(content: &str) -> String {
271    for line in content.lines() {
272        let trimmed = line.trim();
273        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with("```") {
274            continue;
275        }
276        // Strip markdown emphasis for a cleaner summary.
277        let clean = trimmed.trim_start_matches('>').replace(['*', '`'], "");
278        let clean = clean.trim();
279        let capped = if clean.len() > 120 {
280            &clean[..120]
281        } else {
282            clean
283        };
284        // Find a safe UTF-8 boundary.
285        let mut end = capped.len();
286        while end > 0 && !capped.is_char_boundary(end) {
287            end -= 1;
288        }
289        let safe = &capped[..end];
290        if clean.len() > 120 {
291            return format!("{}…", safe);
292        }
293        return safe.to_string();
294    }
295    String::new()
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use std::fs;
302    use tempfile::TempDir;
303
304    #[test]
305    fn test_detect_rust_project() {
306        let dir = TempDir::new().unwrap();
307        fs::write(
308            dir.path().join("Cargo.toml"),
309            "[package]\nname = \"oxios\"\n\n[dependencies]\ntokio = \"1\"\nserde = \"1\"\naxum = \"0.7\"\n",
310        )
311        .unwrap();
312        fs::write(dir.path().join("AGENTS.md"), "# Oxios\nAgent OS in Rust.").unwrap();
313
314        let meta = detect_meta(dir.path());
315        assert!(meta.languages.contains(&"rust".to_string()));
316        assert!(meta.markers.contains(&"Cargo.toml".to_string()));
317        assert!(meta.markers.contains(&"AGENTS.md".to_string()));
318        assert!(meta.stack.iter().any(|s| s == "tokio"));
319        assert!(meta.stack.iter().any(|s| s == "axum"));
320        assert!(!meta.summary.is_empty());
321    }
322
323    #[test]
324    fn test_extract_stack_ignores_non_dependency_sections() {
325        // RFC-025 fix: `[package]` fields must not leak into the stack.
326        let dir = TempDir::new().unwrap();
327        fs::write(
328            dir.path().join("Cargo.toml"),
329            [
330                "[package]",
331                "name = \"foo\"",
332                "edition = \"2021\"",
333                "authors = [\"a\"]",
334                "description = \"desc\"",
335                "license = \"MIT\"",
336                "",
337                "[dependencies]",
338                "tokio = { version = \"1\", features = [\"full\"] }",
339                "serde = \"1.0\"",
340                "",
341                "[dev-dependencies]",
342                "pretty_assertions = \"1\"",
343                "",
344                "[dependencies.axum]",
345                "version = \"0.7\"",
346                "features = [\"json\"]",
347            ]
348            .join("\n"),
349        )
350        .unwrap();
351
352        let meta = detect_meta(dir.path());
353        // Real deps are captured (bare section + dotted table).
354        assert!(
355            meta.stack.iter().any(|s| s == "tokio"),
356            "tokio missing: {meta:?}"
357        );
358        assert!(
359            meta.stack.iter().any(|s| s == "serde"),
360            "serde missing: {meta:?}"
361        );
362        assert!(
363            meta.stack.iter().any(|s| s == "axum"),
364            "dotted-table crate name missing: {meta:?}"
365        );
366        assert!(
367            meta.stack.iter().any(|s| s == "pretty_assertions"),
368            "dev-dep missing: {meta:?}"
369        );
370        // `[package]` fields must NOT appear.
371        assert!(
372            !meta.stack.iter().any(|s| s == "name"),
373            "name leaked: {meta:?}"
374        );
375        assert!(
376            !meta.stack.iter().any(|s| s == "edition"),
377            "edition leaked: {meta:?}"
378        );
379        assert!(
380            !meta.stack.iter().any(|s| s == "authors"),
381            "authors leaked: {meta:?}"
382        );
383        // Dotted-table sub-keys must NOT appear.
384        assert!(
385            !meta.stack.iter().any(|s| s == "version"),
386            "version leaked: {meta:?}"
387        );
388        assert!(
389            !meta.stack.iter().any(|s| s == "features"),
390            "features leaked: {meta:?}"
391        );
392    }
393
394    #[test]
395    fn test_detect_node_project() {
396        let dir = TempDir::new().unwrap();
397        fs::write(
398            dir.path().join("package.json"),
399            r#"{"dependencies": {"react": "^18", "next": "^14"}, "devDependencies": {"typescript": "^5"}}"#,
400        )
401        .unwrap();
402
403        let meta = detect_meta(dir.path());
404        assert!(meta.languages.contains(&"typescript".to_string()));
405        assert!(meta.stack.iter().any(|s| s == "react"));
406        assert!(meta.stack.iter().any(|s| s == "next"));
407    }
408
409    #[test]
410    fn test_detect_empty_dir() {
411        let dir = TempDir::new().unwrap();
412        let meta = detect_meta(dir.path());
413        assert!(meta.languages.is_empty());
414        assert!(meta.markers.is_empty());
415        assert!(meta.summary.is_empty());
416    }
417
418    #[test]
419    fn test_structure_hints() {
420        let dir = TempDir::new().unwrap();
421        fs::create_dir(dir.path().join("crates")).unwrap();
422        let meta = detect_meta(dir.path());
423        assert!(meta.stack.contains(&"cargo-workspace".to_string()));
424    }
425
426    #[test]
427    fn test_snapshot_markers() {
428        let dir = TempDir::new().unwrap();
429        fs::write(dir.path().join("Cargo.toml"), "[package]\nname = \"x\"").unwrap();
430        let snap = snapshot_markers(dir.path());
431        assert!(
432            snap.iter()
433                .any(|(p, _)| p.file_name().unwrap() == "Cargo.toml")
434        );
435        // Non-existent markers are excluded.
436        assert!(
437            !snap
438                .iter()
439                .any(|(p, _)| p.file_name().unwrap() == "package.json")
440        );
441    }
442
443    #[test]
444    fn test_first_meaningful_line() {
445        assert_eq!(
446            first_meaningful_line("# Title\n\nThis is the **summary**.\nMore."),
447            "This is the summary."
448        );
449    }
450}