tokmd_analysis_archetype/
lib.rs1use std::collections::BTreeSet;
6
7use tokmd_analysis_types::Archetype;
8use tokmd_types::{ExportData, FileKind, FileRow};
9
10pub fn detect_archetype(export: &ExportData) -> Option<Archetype> {
12 let parents: Vec<&FileRow> = export
13 .rows
14 .iter()
15 .filter(|r| r.kind == FileKind::Parent)
16 .collect();
17
18 let mut files: BTreeSet<String> = BTreeSet::new();
19 for row in parents {
20 files.insert(row.path.replace('\\', "/"));
21 }
22
23 if let Some(archetype) = rust_workspace(&files) {
24 return Some(archetype);
25 }
26 if let Some(archetype) = nextjs_app(&files) {
27 return Some(archetype);
28 }
29 if let Some(archetype) = containerized_service(&files) {
30 return Some(archetype);
31 }
32 if let Some(archetype) = iac_project(&files) {
33 return Some(archetype);
34 }
35 if let Some(archetype) = python_package(&files) {
36 return Some(archetype);
37 }
38 if files.contains("package.json") {
39 return Some(Archetype {
40 kind: "Node package".to_string(),
41 evidence: vec!["package.json".to_string()],
42 });
43 }
44
45 None
46}
47
48fn rust_workspace(files: &BTreeSet<String>) -> Option<Archetype> {
49 let has_manifest = files.contains("Cargo.toml");
50 let has_workspace_dir = files
51 .iter()
52 .any(|p| p.starts_with("crates/") || p.starts_with("packages/"));
53 if !has_manifest || !has_workspace_dir {
54 return None;
55 }
56
57 let mut evidence = vec!["Cargo.toml".to_string()];
58 if let Some(path) = files
59 .iter()
60 .find(|p| p.starts_with("crates/") || p.starts_with("packages/"))
61 {
62 evidence.push(path.clone());
63 }
64
65 let is_cli = files
66 .iter()
67 .any(|p| p.ends_with("src/main.rs") || p.contains("/src/bin/"));
68 let kind = if is_cli {
69 "Rust workspace (CLI)"
70 } else {
71 "Rust workspace"
72 };
73
74 Some(Archetype {
75 kind: kind.to_string(),
76 evidence,
77 })
78}
79
80fn nextjs_app(files: &BTreeSet<String>) -> Option<Archetype> {
81 let has_package = files.contains("package.json");
82 let has_next_config = files.iter().any(|p| {
83 p.starts_with("next.config.")
84 || p.ends_with("/next.config.js")
85 || p.ends_with("/next.config.mjs")
86 || p.ends_with("/next.config.ts")
87 });
88 if has_package && has_next_config {
89 let mut evidence = vec!["package.json".to_string()];
90 if let Some(cfg) = files.iter().find(|p| {
91 p.ends_with("next.config.js")
92 || p.ends_with("next.config.mjs")
93 || p.ends_with("next.config.ts")
94 }) {
95 evidence.push(cfg.clone());
96 }
97 return Some(Archetype {
98 kind: "Next.js app".to_string(),
99 evidence,
100 });
101 }
102 None
103}
104
105fn containerized_service(files: &BTreeSet<String>) -> Option<Archetype> {
106 let has_docker = files.contains("Dockerfile");
107 let has_k8s = files
108 .iter()
109 .any(|p| p.starts_with("k8s/") || p.starts_with("kubernetes/"));
110 if has_docker && has_k8s {
111 return Some(Archetype {
112 kind: "Containerized service".to_string(),
113 evidence: vec!["Dockerfile".to_string()],
114 });
115 }
116 None
117}
118
119fn iac_project(files: &BTreeSet<String>) -> Option<Archetype> {
120 let has_tf = files
121 .iter()
122 .any(|p| p.ends_with(".tf") || p.starts_with("terraform/"));
123 if has_tf {
124 return Some(Archetype {
125 kind: "Infrastructure as code".to_string(),
126 evidence: vec!["terraform/".to_string()],
127 });
128 }
129 None
130}
131
132fn python_package(files: &BTreeSet<String>) -> Option<Archetype> {
133 if files.contains("pyproject.toml") {
134 return Some(Archetype {
135 kind: "Python package".to_string(),
136 evidence: vec!["pyproject.toml".to_string()],
137 });
138 }
139 None
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145 use tokmd_types::{ChildIncludeMode, ExportData, FileKind, FileRow};
146
147 fn export_with_paths(paths: &[&str]) -> ExportData {
148 let rows = paths
149 .iter()
150 .map(|p| FileRow {
151 path: (*p).to_string(),
152 module: "(root)".to_string(),
153 lang: "Rust".to_string(),
154 kind: FileKind::Parent,
155 code: 1,
156 comments: 0,
157 blanks: 0,
158 lines: 1,
159 bytes: 10,
160 tokens: 2,
161 })
162 .collect();
163 ExportData {
164 rows,
165 module_roots: vec!["crates".to_string()],
166 module_depth: 2,
167 children: ChildIncludeMode::Separate,
168 }
169 }
170
171 fn files_set(paths: &[&str]) -> BTreeSet<String> {
172 paths.iter().map(|s| s.to_string()).collect()
173 }
174
175 #[test]
180 fn detects_rust_workspace_cli() {
181 let export = export_with_paths(&[
182 "Cargo.toml",
183 "crates/core/Cargo.toml",
184 "crates/core/src/lib.rs",
185 "src/main.rs",
186 ]);
187 let archetype = detect_archetype(&export).unwrap();
188 assert!(archetype.kind.contains("Rust workspace"));
189 assert!(archetype.kind.contains("CLI"));
190 assert!(
192 archetype
193 .evidence
194 .iter()
195 .any(|e| e.starts_with("crates/") || e.starts_with("packages/")),
196 "evidence must contain workspace dir path: {:?}",
197 archetype.evidence
198 );
199 }
200
201 #[test]
202 fn rust_workspace_needs_cargo_toml() {
203 let files = files_set(&["crates/core/src/lib.rs"]);
205 assert!(rust_workspace(&files).is_none());
206 }
207
208 #[test]
209 fn rust_workspace_needs_workspace_dir() {
210 let files = files_set(&["Cargo.toml", "src/lib.rs"]);
212 assert!(rust_workspace(&files).is_none());
213 }
214
215 #[test]
216 fn rust_workspace_with_packages_dir() {
217 let files = files_set(&["Cargo.toml", "packages/foo/src/lib.rs"]);
219 let archetype = rust_workspace(&files).unwrap();
220 assert_eq!(archetype.kind, "Rust workspace");
221 assert!(
223 archetype
224 .evidence
225 .iter()
226 .any(|e| e.starts_with("packages/")),
227 "evidence must contain packages/ path: {:?}",
228 archetype.evidence
229 );
230 }
231
232 #[test]
233 fn rust_workspace_detects_cli_with_main_rs() {
234 let files = files_set(&["Cargo.toml", "crates/foo/src/lib.rs", "src/main.rs"]);
235 let archetype = rust_workspace(&files).unwrap();
236 assert!(archetype.kind.contains("CLI"));
237 }
238
239 #[test]
240 fn rust_workspace_detects_cli_with_bin_dir() {
241 let files = files_set(&[
242 "Cargo.toml",
243 "crates/foo/src/lib.rs",
244 "crates/foo/src/bin/cli.rs",
245 ]);
246 let archetype = rust_workspace(&files).unwrap();
247 assert!(archetype.kind.contains("CLI"));
248 }
249
250 #[test]
251 fn rust_workspace_library_only() {
252 let files = files_set(&["Cargo.toml", "crates/foo/src/lib.rs"]);
254 let archetype = rust_workspace(&files).unwrap();
255 assert_eq!(archetype.kind, "Rust workspace");
256 assert!(!archetype.kind.contains("CLI"));
257 }
258
259 #[test]
264 fn detects_nextjs() {
265 let export = export_with_paths(&["package.json", "next.config.js", "pages/index.tsx"]);
266 let archetype = detect_archetype(&export).unwrap();
267 assert_eq!(archetype.kind, "Next.js app");
268 assert!(
270 archetype
271 .evidence
272 .iter()
273 .any(|e| e.ends_with("next.config.js")),
274 "evidence must contain next.config.js: {:?}",
275 archetype.evidence
276 );
277 }
278
279 #[test]
280 fn nextjs_needs_package_json() {
281 let files = files_set(&["next.config.js", "pages/index.tsx"]);
283 assert!(nextjs_app(&files).is_none());
284 }
285
286 #[test]
287 fn nextjs_needs_next_config() {
288 let files = files_set(&["package.json", "pages/index.tsx"]);
290 assert!(nextjs_app(&files).is_none());
291 }
292
293 #[test]
294 fn nextjs_with_mjs_config() {
295 let files = files_set(&["package.json", "next.config.mjs"]);
296 let archetype = nextjs_app(&files).unwrap();
297 assert_eq!(archetype.kind, "Next.js app");
298 assert!(
300 archetype
301 .evidence
302 .iter()
303 .any(|e| e.ends_with("next.config.mjs")),
304 "evidence must contain next.config.mjs: {:?}",
305 archetype.evidence
306 );
307 }
308
309 #[test]
310 fn nextjs_with_ts_config() {
311 let files = files_set(&["package.json", "next.config.ts"]);
312 let archetype = nextjs_app(&files).unwrap();
313 assert_eq!(archetype.kind, "Next.js app");
314 assert!(
316 archetype
317 .evidence
318 .iter()
319 .any(|e| e.ends_with("next.config.ts")),
320 "evidence must contain next.config.ts: {:?}",
321 archetype.evidence
322 );
323 }
324
325 #[test]
326 fn nextjs_with_subdir_next_config_mjs() {
327 let files = files_set(&["package.json", "apps/web/next.config.mjs"]);
329 let archetype = nextjs_app(&files).unwrap();
330 assert_eq!(archetype.kind, "Next.js app");
331 assert!(
332 archetype
333 .evidence
334 .iter()
335 .any(|e| e == "apps/web/next.config.mjs"),
336 "evidence must contain apps/web/next.config.mjs: {:?}",
337 archetype.evidence
338 );
339 }
340
341 #[test]
342 fn nextjs_with_nested_config() {
343 let files = files_set(&["package.json", "app/next.config.js"]);
345 let archetype = nextjs_app(&files).unwrap();
346 assert_eq!(archetype.kind, "Next.js app");
347 assert!(
349 archetype.evidence.iter().any(|e| e == "app/next.config.js"),
350 "evidence must contain app/next.config.js: {:?}",
351 archetype.evidence
352 );
353 }
354
355 #[test]
356 fn nextjs_with_subdir_next_config_ts() {
357 let files = files_set(&["package.json", "apps/web/next.config.ts"]);
359 let archetype = nextjs_app(&files).unwrap();
360 assert_eq!(archetype.kind, "Next.js app");
361 assert!(
362 archetype
363 .evidence
364 .iter()
365 .any(|e| e == "apps/web/next.config.ts"),
366 "evidence must contain apps/web/next.config.ts: {:?}",
367 archetype.evidence
368 );
369 }
370
371 #[test]
376 fn containerized_service_needs_dockerfile() {
377 let files = files_set(&["k8s/deployment.yaml"]);
379 assert!(containerized_service(&files).is_none());
380 }
381
382 #[test]
383 fn containerized_service_needs_k8s() {
384 let files = files_set(&["Dockerfile", "src/main.rs"]);
386 assert!(containerized_service(&files).is_none());
387 }
388
389 #[test]
390 fn containerized_service_detected() {
391 let files = files_set(&["Dockerfile", "k8s/deployment.yaml"]);
392 let archetype = containerized_service(&files).unwrap();
393 assert_eq!(archetype.kind, "Containerized service");
394 }
395
396 #[test]
397 fn containerized_service_with_kubernetes_dir() {
398 let files = files_set(&["Dockerfile", "kubernetes/deployment.yaml"]);
399 let archetype = containerized_service(&files).unwrap();
400 assert_eq!(archetype.kind, "Containerized service");
401 }
402
403 #[test]
408 fn iac_project_with_tf_file() {
409 let files = files_set(&["main.tf"]);
410 let archetype = iac_project(&files).unwrap();
411 assert_eq!(archetype.kind, "Infrastructure as code");
412 }
413
414 #[test]
415 fn iac_project_with_terraform_dir() {
416 let files = files_set(&["terraform/main.tf"]);
417 let archetype = iac_project(&files).unwrap();
418 assert_eq!(archetype.kind, "Infrastructure as code");
419 }
420
421 #[test]
422 fn iac_project_not_detected_without_tf() {
423 let files = files_set(&["src/main.rs", "Cargo.toml"]);
424 assert!(iac_project(&files).is_none());
425 }
426
427 #[test]
432 fn python_package_detected() {
433 let files = files_set(&["pyproject.toml", "src/main.py"]);
434 let archetype = python_package(&files).unwrap();
435 assert_eq!(archetype.kind, "Python package");
436 }
437
438 #[test]
439 fn python_package_not_detected_without_pyproject() {
440 let files = files_set(&["setup.py", "src/main.py"]);
441 assert!(python_package(&files).is_none());
442 }
443
444 #[test]
449 fn node_package_detected() {
450 let export = export_with_paths(&["package.json", "src/index.js"]);
451 let archetype = detect_archetype(&export).unwrap();
452 assert_eq!(archetype.kind, "Node package");
453 }
454
455 #[test]
460 fn rust_workspace_takes_priority_over_node() {
461 let export = export_with_paths(&["Cargo.toml", "crates/foo/src/lib.rs", "package.json"]);
463 let archetype = detect_archetype(&export).unwrap();
464 assert!(archetype.kind.contains("Rust workspace"));
465 }
466
467 #[test]
468 fn nextjs_takes_priority_over_node() {
469 let export = export_with_paths(&["package.json", "next.config.js"]);
470 let archetype = detect_archetype(&export).unwrap();
471 assert_eq!(archetype.kind, "Next.js app");
472 }
473
474 #[test]
475 fn no_archetype_for_empty() {
476 let export = export_with_paths(&[]);
477 assert!(detect_archetype(&export).is_none());
478 }
479
480 #[test]
481 fn no_archetype_for_generic_files() {
482 let export = export_with_paths(&["README.md", "src/lib.rs"]);
483 assert!(detect_archetype(&export).is_none());
484 }
485}