1use std::path::Path;
14use std::time::Instant;
15
16use super::helpers::{apply_check_outcome, CheckOutcome};
17use super::types::{CheckItem, CheckStatus, Evidence, EvidenceType, Severity};
18
19fn scan_src_files<F>(project_path: &Path, detect: F) -> Vec<&'static str>
22where
23 F: Fn(&str, &mut Vec<&'static str>),
24{
25 let mut indicators = Vec::new();
26 let Ok(entries) = glob::glob(&format!("{}/src/**/*.rs", project_path.display())) else {
27 return indicators;
28 };
29 for entry in entries.flatten() {
30 let Ok(content) = std::fs::read_to_string(&entry) else {
31 continue;
32 };
33 detect(&content, &mut indicators);
34 }
35 indicators.sort_unstable();
36 indicators.dedup();
37 indicators
38}
39
40fn path_exists_any(base: &Path, paths: &[&str]) -> bool {
42 paths.iter().any(|p| base.join(p).exists())
43}
44
45fn file_contains_any(path: &Path, patterns: &[&str]) -> bool {
47 std::fs::read_to_string(path).map(|c| patterns.iter().any(|p| c.contains(p))).unwrap_or(false)
48}
49
50fn file_contains_all(path: &Path, pattern_groups: &[&[&str]]) -> bool {
53 std::fs::read_to_string(path)
54 .map(|c| pattern_groups.iter().all(|group| group.iter().any(|p| c.contains(p))))
55 .unwrap_or(false)
56}
57
58pub fn evaluate_all(project_path: &Path) -> Vec<CheckItem> {
60 vec![
61 check_entanglement_detection(project_path),
62 check_correction_cascade_prevention(project_path),
63 check_undeclared_consumer_detection(project_path),
64 check_data_dependency_freshness(project_path),
65 check_pipeline_glue_code(project_path),
66 check_configuration_debt(project_path),
67 check_dead_code_elimination(project_path),
68 check_abstraction_boundaries(project_path),
69 check_feedback_loop_detection(project_path),
70 check_technical_debt_quantification(project_path),
71 ]
72}
73
74fn classify_isolation_patterns(content: &str) -> Vec<&'static str> {
76 let mut patterns = Vec::new();
77 if content.contains("#[cfg(feature =")
78 || content.contains("feature_enabled!")
79 || content.contains("Feature::")
80 {
81 patterns.push("feature_flags");
82 }
83 if content.contains("impl<T>") && content.contains("T:") {
84 patterns.push("generic_abstractions");
85 }
86 if content.contains("trait ") && content.contains("impl ") {
87 patterns.push("trait_abstractions");
88 }
89 if content.contains("pub(crate)") || content.contains("pub(super)") {
90 patterns.push("visibility_control");
91 }
92 patterns
93}
94
95fn scan_isolation_indicators(project_path: &Path) -> Vec<&'static str> {
97 scan_src_files(project_path, |content, indicators| {
98 indicators.extend(classify_isolation_patterns(content));
99 })
100}
101
102pub fn check_entanglement_detection(project_path: &Path) -> CheckItem {
109 let start = Instant::now();
110 let mut item = CheckItem::new(
111 "MTD-01",
112 "Entanglement (CACE) Detection",
113 "Feature changes are isolated; changing one doesn't silently affect others",
114 )
115 .with_severity(Severity::Major)
116 .with_tps("Kaizen — root cause analysis");
117
118 let isolation_indicators = scan_isolation_indicators(project_path);
119
120 let tests_dir = project_path.join("tests");
122 let has_test_structure = tests_dir.exists();
123
124 item = item.with_evidence(Evidence {
125 evidence_type: EvidenceType::StaticAnalysis,
126 description: format!(
127 "Isolation indicators: {:?}, test_structure={}",
128 isolation_indicators, has_test_structure
129 ),
130 data: None,
131 files: Vec::new(),
132 });
133
134 item = apply_check_outcome(
135 item,
136 &[
137 (isolation_indicators.len() >= 3, CheckOutcome::Pass),
138 (
139 !isolation_indicators.is_empty(),
140 CheckOutcome::Partial("Partial feature isolation patterns detected"),
141 ),
142 (true, CheckOutcome::Partial("Consider adding feature isolation patterns")),
143 ],
144 );
145
146 item.finish_timed(start)
147}
148
149fn scan_cascade_indicators(project_path: &Path) -> Vec<&'static str> {
151 scan_src_files(project_path, |content, indicators| {
152 if content.contains("post_process") && content.contains("model") {
153 indicators.push("post_processing");
154 }
155 if content.contains("correction") || content.contains("fix_output") {
156 indicators.push("correction_code");
157 }
158 if content.contains("ensemble") {
159 indicators.push("ensemble (intentional)");
160 }
161 })
162}
163
164pub fn check_correction_cascade_prevention(project_path: &Path) -> CheckItem {
171 let start = Instant::now();
172 let mut item = CheckItem::new(
173 "MTD-02",
174 "Correction Cascade Prevention",
175 "No model exists solely to correct another model's errors",
176 )
177 .with_severity(Severity::Major)
178 .with_tps("Kaizen — fix root cause in Model A");
179
180 let cascade_indicators = scan_cascade_indicators(project_path);
181
182 let has_architecture_doc = path_exists_any(
184 project_path,
185 &["docs/architecture.md", "ARCHITECTURE.md", "docs/pipeline.md"],
186 );
187
188 item = item.with_evidence(Evidence {
189 evidence_type: EvidenceType::StaticAnalysis,
190 description: format!(
191 "Cascade indicators: {:?}, architecture_doc={}",
192 cascade_indicators, has_architecture_doc
193 ),
194 data: None,
195 files: Vec::new(),
196 });
197
198 let no_cascades =
199 cascade_indicators.is_empty() || cascade_indicators.iter().all(|s| s.contains("ensemble"));
200 item = apply_check_outcome(
201 item,
202 &[
203 (no_cascades, CheckOutcome::Pass),
204 (
205 has_architecture_doc,
206 CheckOutcome::Partial(
207 "Potential cascades - verify intentional in architecture doc",
208 ),
209 ),
210 (true, CheckOutcome::Partial("Review for correction cascades")),
211 ],
212 );
213
214 item.finish_timed(start)
215}
216
217pub fn check_undeclared_consumer_detection(project_path: &Path) -> CheckItem {
224 let start = Instant::now();
225 let mut item = CheckItem::new(
226 "MTD-03",
227 "Undeclared Consumer Detection",
228 "All model consumers are documented and access-controlled",
229 )
230 .with_severity(Severity::Major)
231 .with_tps("Visibility across downstream supply chain");
232
233 let has_api_docs = path_exists_any(project_path, &["docs/api.md", "API.md"]);
235
236 let mut pub_items = 0;
238 let mut pub_crate_items = 0;
239
240 if let Ok(entries) = glob::glob(&format!("{}/src/**/*.rs", project_path.display())) {
241 for entry in entries.flatten() {
242 if let Ok(content) = std::fs::read_to_string(&entry) {
243 pub_items += content.matches("pub fn ").count();
244 pub_items += content.matches("pub struct ").count();
245 pub_items += content.matches("pub enum ").count();
246 pub_crate_items += content.matches("pub(crate)").count();
247 }
248 }
249 }
250
251 let lib_rs = project_path.join("src/lib.rs");
253 let has_controlled_exports = file_contains_any(&lib_rs, &["pub use ", "pub mod "]);
254
255 item = item.with_evidence(Evidence {
256 evidence_type: EvidenceType::StaticAnalysis,
257 description: format!(
258 "Consumer control: api_docs={}, pub_items={}, pub_crate={}, controlled_exports={}",
259 has_api_docs, pub_items, pub_crate_items, has_controlled_exports
260 ),
261 data: None,
262 files: Vec::new(),
263 });
264
265 item = apply_check_outcome(
266 item,
267 &[
268 (has_controlled_exports && pub_crate_items > 0, CheckOutcome::Pass),
269 (
270 has_controlled_exports,
271 CheckOutcome::Partial(
272 "Controlled exports but consider pub(crate) for internal items",
273 ),
274 ),
275 (true, CheckOutcome::Partial("Add explicit API boundary control")),
276 ],
277 );
278
279 item.finish_timed(start)
280}
281
282pub fn check_data_dependency_freshness(project_path: &Path) -> CheckItem {
289 let start = Instant::now();
290 let mut item = CheckItem::new(
291 "MTD-04",
292 "Data Dependency Freshness",
293 "Training data dependencies are current and maintained",
294 )
295 .with_severity(Severity::Major)
296 .with_tps("Muda (Inventory) — prevent data staleness");
297
298 let has_dvc = path_exists_any(project_path, &[".dvc", "dvc.yaml"]);
300 let has_data_dir = project_path.join("data").exists();
301
302 let cargo_toml = project_path.join("Cargo.toml");
304 let has_data_deps = file_contains_any(&cargo_toml, &["alimentar", "parquet", "arrow", "csv"]);
305
306 let has_data_docs =
308 path_exists_any(project_path, &["docs/data.md", "DATA.md", "data/README.md"]);
309
310 item = item.with_evidence(Evidence {
311 evidence_type: EvidenceType::StaticAnalysis,
312 description: format!(
313 "Data freshness: dvc={}, data_dir={}, data_deps={}, data_docs={}",
314 has_dvc, has_data_dir, has_data_deps, has_data_docs
315 ),
316 data: None,
317 files: Vec::new(),
318 });
319
320 item = apply_check_outcome(
321 item,
322 &[
323 (has_dvc && has_data_docs, CheckOutcome::Pass),
324 (has_dvc || has_data_docs, CheckOutcome::Partial("Partial data management setup")),
325 (!has_data_dir && !has_data_deps, CheckOutcome::Pass),
326 (true, CheckOutcome::Partial("Consider adding data versioning (DVC or similar)")),
327 ],
328 );
329
330 item.finish_timed(start)
331}
332
333fn scan_standardization_indicators(project_path: &Path) -> Vec<&'static str> {
335 scan_src_files(project_path, |content, indicators| {
336 if content.contains("trait Pipeline") || content.contains("impl Pipeline") {
337 indicators.push("pipeline_trait");
338 }
339 if content.contains("Stage") || content.contains("Step") {
340 indicators.push("stage_abstraction");
341 }
342 if content.contains("Builder") {
343 indicators.push("builder_pattern");
344 }
345 if content.contains("impl From<") || content.contains("impl Into<") {
346 indicators.push("type_conversions");
347 }
348 })
349}
350
351pub fn check_pipeline_glue_code(project_path: &Path) -> CheckItem {
358 let start = Instant::now();
359 let mut item = CheckItem::new(
360 "MTD-05",
361 "Pipeline Glue Code Minimization",
362 "Pipeline code uses standardized connectors, not ad-hoc scripts",
363 )
364 .with_severity(Severity::Major)
365 .with_tps("Muda (Motion) — standardization");
366
367 let has_pipeline_module =
368 path_exists_any(project_path, &["src/pipeline.rs", "src/pipeline/mod.rs"]);
369
370 let standardization_indicators = scan_standardization_indicators(project_path);
371
372 item = item.with_evidence(Evidence {
373 evidence_type: EvidenceType::StaticAnalysis,
374 description: format!(
375 "Standardization: pipeline_module={}, indicators={:?}",
376 has_pipeline_module, standardization_indicators
377 ),
378 data: None,
379 files: Vec::new(),
380 });
381
382 item = apply_check_outcome(
383 item,
384 &[
385 (has_pipeline_module && standardization_indicators.len() >= 2, CheckOutcome::Pass),
386 (
387 has_pipeline_module || !standardization_indicators.is_empty(),
388 CheckOutcome::Partial("Partial pipeline standardization"),
389 ),
390 (true, CheckOutcome::Partial("Consider standardized pipeline abstractions")),
391 ],
392 );
393
394 item.finish_timed(start)
395}
396
397pub fn check_configuration_debt(project_path: &Path) -> CheckItem {
404 let start = Instant::now();
405 let mut item = CheckItem::new(
406 "MTD-06",
407 "Configuration Debt Prevention",
408 "All hyperparameters and configurations are version-controlled",
409 )
410 .with_severity(Severity::Major)
411 .with_tps("Reproducibility requirement");
412
413 let config_files = [
415 project_path.join("config"),
416 project_path.join("configs"),
417 project_path.join("batuta.toml"),
418 project_path.join("config.toml"),
419 project_path.join("settings.toml"),
420 ];
421
422 let mut config_found = Vec::new();
423 for path in &config_files {
424 if path.exists() {
425 config_found.push(path.file_name().unwrap_or_default().to_string_lossy().to_string());
426 }
427 }
428
429 let has_config_struct = glob::glob(&format!("{}/src/**/*.rs", project_path.display()))
431 .ok()
432 .map(|entries| {
433 entries.flatten().any(|p| {
434 std::fs::read_to_string(&p)
435 .ok()
436 .map(|c| {
437 (c.contains("struct") && c.to_lowercase().contains("config"))
438 || c.contains("Deserialize")
439 })
440 .unwrap_or(false)
441 })
442 })
443 .unwrap_or(false);
444
445 let has_env_docs = path_exists_any(project_path, &[".env.example", ".env.template"]);
447
448 item = item.with_evidence(Evidence {
449 evidence_type: EvidenceType::StaticAnalysis,
450 description: format!(
451 "Config: files={:?}, typed_struct={}, env_docs={}",
452 config_found, has_config_struct, has_env_docs
453 ),
454 data: None,
455 files: Vec::new(),
456 });
457
458 item = apply_check_outcome(
459 item,
460 &[
461 (has_config_struct && !config_found.is_empty(), CheckOutcome::Pass),
462 (
463 has_config_struct || !config_found.is_empty(),
464 CheckOutcome::Partial("Configuration exists but consider typed structs"),
465 ),
466 (true, CheckOutcome::Partial("Add explicit configuration management")),
467 ],
468 );
469
470 item.finish_timed(start)
471}
472
473pub fn check_dead_code_elimination(project_path: &Path) -> CheckItem {
480 let start = Instant::now();
481 let mut item = CheckItem::new(
482 "MTD-07",
483 "Dead Code Elimination",
484 "No unused model code paths exist in production",
485 )
486 .with_severity(Severity::Major)
487 .with_tps("Muda (Inventory) — code hygiene");
488
489 let lib_rs = project_path.join("src/lib.rs");
491 let main_rs = project_path.join("src/main.rs");
492 let root_files = [&lib_rs, &main_rs];
493
494 let allows_dead_code =
495 root_files.iter().any(|p| file_contains_any(p, &["#![allow(dead_code)]"]));
496
497 let denies_dead_code = root_files
498 .iter()
499 .any(|p| file_contains_any(p, &["#![deny(dead_code)]", "#![warn(dead_code)]"]));
500
501 let has_udeps_ci = check_ci_for_content(project_path, "udeps");
503
504 let makefile = project_path.join("Makefile");
506 let has_cleanup = file_contains_any(&makefile, &["clean", "udeps"]);
507
508 item = item.with_evidence(Evidence {
509 evidence_type: EvidenceType::StaticAnalysis,
510 description: format!(
511 "Dead code: allows={}, denies={}, udeps_ci={}, cleanup={}",
512 allows_dead_code, denies_dead_code, has_udeps_ci, has_cleanup
513 ),
514 data: None,
515 files: Vec::new(),
516 });
517
518 item = apply_check_outcome(
519 item,
520 &[
521 (denies_dead_code || has_udeps_ci, CheckOutcome::Pass),
522 (
523 !allows_dead_code,
524 CheckOutcome::Partial("Default dead code warnings (consider explicit deny)"),
525 ),
526 (
527 has_cleanup,
528 CheckOutcome::Partial(
529 "Dead code allowed (development phase), cleanup targets available",
530 ),
531 ),
532 (
533 true,
534 CheckOutcome::Partial(
535 "Dead code warnings suppressed - verify intentional for development",
536 ),
537 ),
538 ],
539 );
540
541 item.finish_timed(start)
542}
543
544pub fn check_abstraction_boundaries(project_path: &Path) -> CheckItem {
551 let start = Instant::now();
552 let mut item = CheckItem::new(
553 "MTD-08",
554 "Abstraction Boundary Verification",
555 "ML code respects clean abstraction boundaries",
556 )
557 .with_severity(Severity::Major)
558 .with_tps("Clean Architecture principle");
559
560 let src_dir = project_path.join("src");
562 let mut module_count = 0;
563 let mut has_mod_files = false;
564
565 if src_dir.exists() {
566 if let Ok(entries) = std::fs::read_dir(&src_dir) {
567 for entry in entries.flatten() {
568 let path = entry.path();
569 if path.is_dir() {
570 module_count += 1;
571 } else if path.file_name().map(|n| n == "mod.rs").unwrap_or(false) {
572 has_mod_files = true;
573 }
574 }
575 }
576 }
577
578 let common_layers = ["api", "domain", "service", "repository", "model", "types"];
580 let layer_dirs: Vec<_> = common_layers
581 .iter()
582 .filter(|layer| src_dir.join(layer).exists())
583 .map(|s| (*s).to_string())
584 .collect();
585
586 let has_trait_boundaries = glob::glob(&format!("{}/src/**/*.rs", project_path.display()))
588 .ok()
589 .map(|entries| {
590 entries
591 .flatten()
592 .filter(|p| {
593 std::fs::read_to_string(p)
594 .ok()
595 .map(|c| c.contains("pub trait "))
596 .unwrap_or(false)
597 })
598 .count()
599 })
600 .unwrap_or(0);
601
602 item = item.with_evidence(Evidence {
603 evidence_type: EvidenceType::StaticAnalysis,
604 description: format!(
605 "Boundaries: modules={}, mod_files={}, layers={:?}, traits={}",
606 module_count, has_mod_files, layer_dirs, has_trait_boundaries
607 ),
608 data: None,
609 files: Vec::new(),
610 });
611
612 item = apply_check_outcome(
613 item,
614 &[
615 (module_count >= 3 && has_trait_boundaries >= 2, CheckOutcome::Pass),
616 (
617 module_count >= 2 || has_trait_boundaries > 0,
618 CheckOutcome::Partial("Partial abstraction boundaries"),
619 ),
620 (true, CheckOutcome::Partial("Consider module-based architecture")),
621 ],
622 );
623
624 item.finish_timed(start)
625}
626
627pub fn check_feedback_loop_detection(project_path: &Path) -> CheckItem {
634 let start = Instant::now();
635 let mut item = CheckItem::new(
636 "MTD-09",
637 "Feedback Loop Detection",
638 "No hidden feedback loops where model outputs influence future training",
639 )
640 .with_severity(Severity::Major)
641 .with_tps("Entanglement prevention");
642
643 let has_training_module =
645 path_exists_any(project_path, &["src/training.rs", "src/train.rs", "src/training/mod.rs"]);
646
647 let has_inference_module = path_exists_any(
648 project_path,
649 &["src/inference.rs", "src/infer.rs", "src/serve.rs", "src/inference/mod.rs"],
650 );
651
652 let has_feedback_docs = glob::glob(&format!("{}/docs/**/*.md", project_path.display()))
654 .ok()
655 .map(|entries| {
656 entries.flatten().any(|p| {
657 std::fs::read_to_string(&p)
658 .ok()
659 .map(|c| c.contains("feedback") || c.contains("loop"))
660 .unwrap_or(false)
661 })
662 })
663 .unwrap_or(false);
664
665 let cargo_toml = project_path.join("Cargo.toml");
667 let has_feature_separation =
668 file_contains_all(&cargo_toml, &[&["training", "train"], &["inference", "serve"]]);
669
670 item = item.with_evidence(Evidence {
671 evidence_type: EvidenceType::StaticAnalysis,
672 description: format!(
673 "Feedback loops: training_module={}, inference_module={}, docs={}, features={}",
674 has_training_module, has_inference_module, has_feedback_docs, has_feature_separation
675 ),
676 data: None,
677 files: Vec::new(),
678 });
679
680 item = apply_check_outcome(
681 item,
682 &[
683 (has_training_module && has_inference_module, CheckOutcome::Pass),
684 (
685 !has_training_module && !has_inference_module,
686 CheckOutcome::Partial("No explicit training/inference separation (verify N/A)"),
687 ),
688 (true, CheckOutcome::Partial("Consider separating training and inference paths")),
689 ],
690 );
691
692 item.finish_timed(start)
693}
694
695pub fn check_technical_debt_quantification(project_path: &Path) -> CheckItem {
703 let start = Instant::now();
704 let mut item = CheckItem::new(
705 "MTD-10",
706 "Technical Debt Quantification",
707 "ML technical debt is measured and trending downward",
708 )
709 .with_severity(Severity::Major)
710 .with_tps("Kaizen — continuous measurement");
711
712 let has_pmat_ci = check_ci_for_content(project_path, "pmat");
714 let has_tdg_tracking = path_exists_any(project_path, &["tdg_history.json", "metrics/tdg.json"]);
715
716 let makefile = project_path.join("Makefile");
718 let has_quality_targets = file_contains_any(&makefile, &["quality", "metrics", "pmat", "tdg"]);
719
720 let has_quality_ci = check_ci_for_content(project_path, "quality")
722 || check_ci_for_content(project_path, "lint")
723 || check_ci_for_content(project_path, "clippy");
724
725 item = item.with_evidence(Evidence {
726 evidence_type: EvidenceType::StaticAnalysis,
727 description: format!(
728 "Debt tracking: pmat_ci={}, tdg_history={}, quality_targets={}, quality_ci={}",
729 has_pmat_ci, has_tdg_tracking, has_quality_targets, has_quality_ci
730 ),
731 data: None,
732 files: Vec::new(),
733 });
734
735 item = apply_check_outcome(
736 item,
737 &[
738 (has_pmat_ci || has_tdg_tracking, CheckOutcome::Pass),
739 (
740 has_quality_targets && has_quality_ci,
741 CheckOutcome::Partial("Quality checks exist, consider TDG tracking"),
742 ),
743 (
744 has_quality_ci,
745 CheckOutcome::Partial("CI quality checks, consider formal debt tracking"),
746 ),
747 (true, CheckOutcome::Fail("No technical debt quantification")),
748 ],
749 );
750
751 item.finish_timed(start)
752}
753
754fn check_ci_for_content(project_path: &Path, content: &str) -> bool {
756 let ci_configs = [
757 project_path.join(".github/workflows/ci.yml"),
758 project_path.join(".github/workflows/test.yml"),
759 project_path.join(".github/workflows/rust.yml"),
760 project_path.join(".github/workflows/quality.yml"),
761 ];
762
763 for ci_path in &ci_configs {
764 if ci_path.exists() {
765 if let Ok(file_content) = std::fs::read_to_string(ci_path) {
766 if file_content.contains(content) {
767 return true;
768 }
769 }
770 }
771 }
772 false
773}
774
775#[cfg(test)]
776#[path = "technical_debt_tests.rs"]
777mod tests;