Skip to main content

tokmd_analysis_archetype/
lib.rs

1//! # tokmd-analysis-archetype
2//!
3//! Microcrate for repository archetype inference in analysis receipts.
4
5use std::collections::BTreeSet;
6
7use tokmd_analysis_types::Archetype;
8use tokmd_types::{ExportData, FileKind, FileRow};
9
10/// Infer a high-level project archetype from normalized export metadata.
11pub fn detect_archetype(export: &ExportData) -> Option<Archetype> {
12    let parents: Vec<&FileRow> = export
13        .rows
14        .iter()
15        .filter(|r| r.kind == FileKind::Parent)
16        .collect();
17
18    let mut files: BTreeSet<String> = BTreeSet::new();
19    for row in parents {
20        files.insert(row.path.replace('\\', "/"));
21    }
22
23    if let Some(archetype) = rust_workspace(&files) {
24        return Some(archetype);
25    }
26    if let Some(archetype) = nextjs_app(&files) {
27        return Some(archetype);
28    }
29    if let Some(archetype) = containerized_service(&files) {
30        return Some(archetype);
31    }
32    if let Some(archetype) = iac_project(&files) {
33        return Some(archetype);
34    }
35    if let Some(archetype) = python_package(&files) {
36        return Some(archetype);
37    }
38    if files.contains("package.json") {
39        return Some(Archetype {
40            kind: "Node package".to_string(),
41            evidence: vec!["package.json".to_string()],
42        });
43    }
44
45    None
46}
47
48fn rust_workspace(files: &BTreeSet<String>) -> Option<Archetype> {
49    let has_manifest = files.contains("Cargo.toml");
50    let has_workspace_dir = files
51        .iter()
52        .any(|p| p.starts_with("crates/") || p.starts_with("packages/"));
53    if !has_manifest || !has_workspace_dir {
54        return None;
55    }
56
57    let mut evidence = vec!["Cargo.toml".to_string()];
58    if let Some(path) = files
59        .iter()
60        .find(|p| p.starts_with("crates/") || p.starts_with("packages/"))
61    {
62        evidence.push(path.clone());
63    }
64
65    let is_cli = files
66        .iter()
67        .any(|p| p.ends_with("src/main.rs") || p.contains("/src/bin/"));
68    let kind = if is_cli {
69        "Rust workspace (CLI)"
70    } else {
71        "Rust workspace"
72    };
73
74    Some(Archetype {
75        kind: kind.to_string(),
76        evidence,
77    })
78}
79
80fn nextjs_app(files: &BTreeSet<String>) -> Option<Archetype> {
81    let has_package = files.contains("package.json");
82    let has_next_config = files.iter().any(|p| {
83        p.starts_with("next.config.")
84            || p.ends_with("/next.config.js")
85            || p.ends_with("/next.config.mjs")
86            || p.ends_with("/next.config.ts")
87    });
88    if has_package && has_next_config {
89        let mut evidence = vec!["package.json".to_string()];
90        if let Some(cfg) = files.iter().find(|p| {
91            p.ends_with("next.config.js")
92                || p.ends_with("next.config.mjs")
93                || p.ends_with("next.config.ts")
94        }) {
95            evidence.push(cfg.clone());
96        }
97        return Some(Archetype {
98            kind: "Next.js app".to_string(),
99            evidence,
100        });
101    }
102    None
103}
104
105fn containerized_service(files: &BTreeSet<String>) -> Option<Archetype> {
106    let has_docker = files.contains("Dockerfile");
107    let has_k8s = files
108        .iter()
109        .any(|p| p.starts_with("k8s/") || p.starts_with("kubernetes/"));
110    if has_docker && has_k8s {
111        return Some(Archetype {
112            kind: "Containerized service".to_string(),
113            evidence: vec!["Dockerfile".to_string()],
114        });
115    }
116    None
117}
118
119fn iac_project(files: &BTreeSet<String>) -> Option<Archetype> {
120    let has_tf = files
121        .iter()
122        .any(|p| p.ends_with(".tf") || p.starts_with("terraform/"));
123    if has_tf {
124        return Some(Archetype {
125            kind: "Infrastructure as code".to_string(),
126            evidence: vec!["terraform/".to_string()],
127        });
128    }
129    None
130}
131
132fn python_package(files: &BTreeSet<String>) -> Option<Archetype> {
133    if files.contains("pyproject.toml") {
134        return Some(Archetype {
135            kind: "Python package".to_string(),
136            evidence: vec!["pyproject.toml".to_string()],
137        });
138    }
139    None
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145    use tokmd_types::{ChildIncludeMode, ExportData, FileKind, FileRow};
146
147    fn export_with_paths(paths: &[&str]) -> ExportData {
148        let rows = paths
149            .iter()
150            .map(|p| FileRow {
151                path: (*p).to_string(),
152                module: "(root)".to_string(),
153                lang: "Rust".to_string(),
154                kind: FileKind::Parent,
155                code: 1,
156                comments: 0,
157                blanks: 0,
158                lines: 1,
159                bytes: 10,
160                tokens: 2,
161            })
162            .collect();
163        ExportData {
164            rows,
165            module_roots: vec!["crates".to_string()],
166            module_depth: 2,
167            children: ChildIncludeMode::Separate,
168        }
169    }
170
171    fn files_set(paths: &[&str]) -> BTreeSet<String> {
172        paths.iter().map(|s| s.to_string()).collect()
173    }
174
175    // =============================================================================
176    // rust_workspace tests
177    // =============================================================================
178
179    #[test]
180    fn detects_rust_workspace_cli() {
181        let export = export_with_paths(&[
182            "Cargo.toml",
183            "crates/core/Cargo.toml",
184            "crates/core/src/lib.rs",
185            "src/main.rs",
186        ]);
187        let archetype = detect_archetype(&export).unwrap();
188        assert!(archetype.kind.contains("Rust workspace"));
189        assert!(archetype.kind.contains("CLI"));
190        // Kill OR→AND mutant: evidence must include a crates/ or packages/ path
191        assert!(
192            archetype
193                .evidence
194                .iter()
195                .any(|e| e.starts_with("crates/") || e.starts_with("packages/")),
196            "evidence must contain workspace dir path: {:?}",
197            archetype.evidence
198        );
199    }
200
201    #[test]
202    fn rust_workspace_needs_cargo_toml() {
203        // Missing Cargo.toml should return None
204        let files = files_set(&["crates/core/src/lib.rs"]);
205        assert!(rust_workspace(&files).is_none());
206    }
207
208    #[test]
209    fn rust_workspace_needs_workspace_dir() {
210        // Has Cargo.toml but no crates/ or packages/
211        let files = files_set(&["Cargo.toml", "src/lib.rs"]);
212        assert!(rust_workspace(&files).is_none());
213    }
214
215    #[test]
216    fn rust_workspace_with_packages_dir() {
217        // Should work with packages/ instead of crates/
218        let files = files_set(&["Cargo.toml", "packages/foo/src/lib.rs"]);
219        let archetype = rust_workspace(&files).unwrap();
220        assert_eq!(archetype.kind, "Rust workspace");
221        // Kill OR→AND mutant: evidence must include the packages/ path
222        assert!(
223            archetype
224                .evidence
225                .iter()
226                .any(|e| e.starts_with("packages/")),
227            "evidence must contain packages/ path: {:?}",
228            archetype.evidence
229        );
230    }
231
232    #[test]
233    fn rust_workspace_detects_cli_with_main_rs() {
234        let files = files_set(&["Cargo.toml", "crates/foo/src/lib.rs", "src/main.rs"]);
235        let archetype = rust_workspace(&files).unwrap();
236        assert!(archetype.kind.contains("CLI"));
237    }
238
239    #[test]
240    fn rust_workspace_detects_cli_with_bin_dir() {
241        let files = files_set(&[
242            "Cargo.toml",
243            "crates/foo/src/lib.rs",
244            "crates/foo/src/bin/cli.rs",
245        ]);
246        let archetype = rust_workspace(&files).unwrap();
247        assert!(archetype.kind.contains("CLI"));
248    }
249
250    #[test]
251    fn rust_workspace_library_only() {
252        // No main.rs or bin/, should be plain workspace
253        let files = files_set(&["Cargo.toml", "crates/foo/src/lib.rs"]);
254        let archetype = rust_workspace(&files).unwrap();
255        assert_eq!(archetype.kind, "Rust workspace");
256        assert!(!archetype.kind.contains("CLI"));
257    }
258
259    // =============================================================================
260    // nextjs_app tests
261    // =============================================================================
262
263    #[test]
264    fn detects_nextjs() {
265        let export = export_with_paths(&["package.json", "next.config.js", "pages/index.tsx"]);
266        let archetype = detect_archetype(&export).unwrap();
267        assert_eq!(archetype.kind, "Next.js app");
268        // Kill OR→AND mutant: evidence must include next.config.js
269        assert!(
270            archetype
271                .evidence
272                .iter()
273                .any(|e| e.ends_with("next.config.js")),
274            "evidence must contain next.config.js: {:?}",
275            archetype.evidence
276        );
277    }
278
279    #[test]
280    fn nextjs_needs_package_json() {
281        // Has next.config.js but no package.json
282        let files = files_set(&["next.config.js", "pages/index.tsx"]);
283        assert!(nextjs_app(&files).is_none());
284    }
285
286    #[test]
287    fn nextjs_needs_next_config() {
288        // Has package.json but no next config
289        let files = files_set(&["package.json", "pages/index.tsx"]);
290        assert!(nextjs_app(&files).is_none());
291    }
292
293    #[test]
294    fn nextjs_with_mjs_config() {
295        let files = files_set(&["package.json", "next.config.mjs"]);
296        let archetype = nextjs_app(&files).unwrap();
297        assert_eq!(archetype.kind, "Next.js app");
298        // Kill OR→AND mutant: evidence must include next.config.mjs
299        assert!(
300            archetype
301                .evidence
302                .iter()
303                .any(|e| e.ends_with("next.config.mjs")),
304            "evidence must contain next.config.mjs: {:?}",
305            archetype.evidence
306        );
307    }
308
309    #[test]
310    fn nextjs_with_ts_config() {
311        let files = files_set(&["package.json", "next.config.ts"]);
312        let archetype = nextjs_app(&files).unwrap();
313        assert_eq!(archetype.kind, "Next.js app");
314        // Kill OR→AND mutant: evidence must include next.config.ts
315        assert!(
316            archetype
317                .evidence
318                .iter()
319                .any(|e| e.ends_with("next.config.ts")),
320            "evidence must contain next.config.ts: {:?}",
321            archetype.evidence
322        );
323    }
324
325    #[test]
326    fn nextjs_with_subdir_next_config_mjs() {
327        // Kill OR→AND mutant: exercises ends_with("/next.config.mjs") clause in has_next_config
328        let files = files_set(&["package.json", "apps/web/next.config.mjs"]);
329        let archetype = nextjs_app(&files).unwrap();
330        assert_eq!(archetype.kind, "Next.js app");
331        assert!(
332            archetype
333                .evidence
334                .iter()
335                .any(|e| e == "apps/web/next.config.mjs"),
336            "evidence must contain apps/web/next.config.mjs: {:?}",
337            archetype.evidence
338        );
339    }
340
341    #[test]
342    fn nextjs_with_nested_config() {
343        // Config in subdirectory
344        let files = files_set(&["package.json", "app/next.config.js"]);
345        let archetype = nextjs_app(&files).unwrap();
346        assert_eq!(archetype.kind, "Next.js app");
347        // Kill OR→AND mutant: evidence must include the nested config path
348        assert!(
349            archetype.evidence.iter().any(|e| e == "app/next.config.js"),
350            "evidence must contain app/next.config.js: {:?}",
351            archetype.evidence
352        );
353    }
354
355    #[test]
356    fn nextjs_with_subdir_next_config_ts() {
357        // Kill OR→AND mutant: exercises ends_with("/next.config.ts") clause
358        let files = files_set(&["package.json", "apps/web/next.config.ts"]);
359        let archetype = nextjs_app(&files).unwrap();
360        assert_eq!(archetype.kind, "Next.js app");
361        assert!(
362            archetype
363                .evidence
364                .iter()
365                .any(|e| e == "apps/web/next.config.ts"),
366            "evidence must contain apps/web/next.config.ts: {:?}",
367            archetype.evidence
368        );
369    }
370
371    // =============================================================================
372    // containerized_service tests
373    // =============================================================================
374
375    #[test]
376    fn containerized_service_needs_dockerfile() {
377        // Has k8s/ but no Dockerfile
378        let files = files_set(&["k8s/deployment.yaml"]);
379        assert!(containerized_service(&files).is_none());
380    }
381
382    #[test]
383    fn containerized_service_needs_k8s() {
384        // Has Dockerfile but no k8s/
385        let files = files_set(&["Dockerfile", "src/main.rs"]);
386        assert!(containerized_service(&files).is_none());
387    }
388
389    #[test]
390    fn containerized_service_detected() {
391        let files = files_set(&["Dockerfile", "k8s/deployment.yaml"]);
392        let archetype = containerized_service(&files).unwrap();
393        assert_eq!(archetype.kind, "Containerized service");
394    }
395
396    #[test]
397    fn containerized_service_with_kubernetes_dir() {
398        let files = files_set(&["Dockerfile", "kubernetes/deployment.yaml"]);
399        let archetype = containerized_service(&files).unwrap();
400        assert_eq!(archetype.kind, "Containerized service");
401    }
402
403    // =============================================================================
404    // iac_project tests
405    // =============================================================================
406
407    #[test]
408    fn iac_project_with_tf_file() {
409        let files = files_set(&["main.tf"]);
410        let archetype = iac_project(&files).unwrap();
411        assert_eq!(archetype.kind, "Infrastructure as code");
412    }
413
414    #[test]
415    fn iac_project_with_terraform_dir() {
416        let files = files_set(&["terraform/main.tf"]);
417        let archetype = iac_project(&files).unwrap();
418        assert_eq!(archetype.kind, "Infrastructure as code");
419    }
420
421    #[test]
422    fn iac_project_not_detected_without_tf() {
423        let files = files_set(&["src/main.rs", "Cargo.toml"]);
424        assert!(iac_project(&files).is_none());
425    }
426
427    // =============================================================================
428    // python_package tests
429    // =============================================================================
430
431    #[test]
432    fn python_package_detected() {
433        let files = files_set(&["pyproject.toml", "src/main.py"]);
434        let archetype = python_package(&files).unwrap();
435        assert_eq!(archetype.kind, "Python package");
436    }
437
438    #[test]
439    fn python_package_not_detected_without_pyproject() {
440        let files = files_set(&["setup.py", "src/main.py"]);
441        assert!(python_package(&files).is_none());
442    }
443
444    // =============================================================================
445    // Node package tests
446    // =============================================================================
447
448    #[test]
449    fn node_package_detected() {
450        let export = export_with_paths(&["package.json", "src/index.js"]);
451        let archetype = detect_archetype(&export).unwrap();
452        assert_eq!(archetype.kind, "Node package");
453    }
454
455    // =============================================================================
456    // Priority tests
457    // =============================================================================
458
459    #[test]
460    fn rust_workspace_takes_priority_over_node() {
461        // Has both Cargo.toml/crates and package.json
462        let export = export_with_paths(&["Cargo.toml", "crates/foo/src/lib.rs", "package.json"]);
463        let archetype = detect_archetype(&export).unwrap();
464        assert!(archetype.kind.contains("Rust workspace"));
465    }
466
467    #[test]
468    fn nextjs_takes_priority_over_node() {
469        let export = export_with_paths(&["package.json", "next.config.js"]);
470        let archetype = detect_archetype(&export).unwrap();
471        assert_eq!(archetype.kind, "Next.js app");
472    }
473
474    #[test]
475    fn no_archetype_for_empty() {
476        let export = export_with_paths(&[]);
477        assert!(detect_archetype(&export).is_none());
478    }
479
480    #[test]
481    fn no_archetype_for_generic_files() {
482        let export = export_with_paths(&["README.md", "src/lib.rs"]);
483        assert!(detect_archetype(&export).is_none());
484    }
485}