tokmd_sensor/
substrate_builder.rs1use std::collections::BTreeMap;
4use std::path::PathBuf;
5
6use anyhow::Result;
7use tokmd_settings::ScanOptions;
8use tokmd_substrate::{DiffRange, LangSummary, RepoSubstrate, SubstrateFile};
9use tokmd_types::ChildIncludeMode;
10
11pub fn build_substrate(
16 repo_root: &str,
17 scan_options: &ScanOptions,
18 module_roots: &[String],
19 module_depth: usize,
20 diff_range: Option<DiffRange>,
21) -> Result<RepoSubstrate> {
22 let paths = vec![PathBuf::from(repo_root)];
23
24 let languages = tokmd_scan::scan(&paths, scan_options)?;
26
27 let file_rows = tokmd_model::collect_file_rows(
29 &languages,
30 module_roots,
31 module_depth,
32 ChildIncludeMode::ParentsOnly,
33 Some(std::path::Path::new(repo_root)),
34 );
35
36 let strip_prefix = std::path::Path::new(repo_root);
39 let normalized_changed: Vec<String> = diff_range
40 .as_ref()
41 .map(|dr| {
42 dr.changed_files
43 .iter()
44 .map(|s| tokmd_model::normalize_path(std::path::Path::new(s), Some(strip_prefix)))
45 .collect()
46 })
47 .unwrap_or_default();
48 let changed_set: std::collections::BTreeSet<&str> =
49 normalized_changed.iter().map(|s| s.as_str()).collect();
50
51 let files: Vec<SubstrateFile> = file_rows
53 .iter()
54 .map(|row| SubstrateFile {
55 path: row.path.clone(),
56 lang: row.lang.clone(),
57 code: row.code,
58 lines: row.lines,
59 bytes: row.bytes,
60 tokens: row.tokens,
61 module: row.module.clone(),
62 in_diff: changed_set.contains(row.path.as_str()),
63 })
64 .collect();
65
66 let mut lang_summary: BTreeMap<String, LangSummary> = BTreeMap::new();
68 for f in &files {
69 let entry = lang_summary.entry(f.lang.clone()).or_insert(LangSummary {
70 files: 0,
71 code: 0,
72 lines: 0,
73 bytes: 0,
74 tokens: 0,
75 });
76 entry.files += 1;
77 entry.code += f.code;
78 entry.lines += f.lines;
79 entry.bytes += f.bytes;
80 entry.tokens += f.tokens;
81 }
82
83 let total_tokens: usize = files.iter().map(|f| f.tokens).sum();
85 let total_bytes: usize = files.iter().map(|f| f.bytes).sum();
86 let total_code_lines: usize = files.iter().map(|f| f.code).sum();
87
88 Ok(RepoSubstrate {
89 repo_root: repo_root.to_string(),
90 files,
91 lang_summary,
92 diff_range,
93 total_tokens,
94 total_bytes,
95 total_code_lines,
96 })
97}
98
99#[cfg(test)]
100mod tests {
101 use super::*;
102 use tokmd_settings::ScanOptions;
103
104 #[test]
105 fn build_substrate_scans_self() {
106 let manifest_dir = env!("CARGO_MANIFEST_DIR");
107 let substrate = build_substrate(
108 &format!("{}/src", manifest_dir),
109 &ScanOptions::default(),
110 &[],
111 2,
112 None,
113 )
114 .unwrap();
115
116 assert!(!substrate.files.is_empty());
117 assert!(substrate.lang_summary.contains_key("Rust"));
118 assert!(substrate.total_code_lines > 0);
119 assert!(substrate.diff_range.is_none());
120 }
121
122 #[test]
123 fn build_substrate_with_diff_range() {
124 let manifest_dir = env!("CARGO_MANIFEST_DIR");
125 let diff = DiffRange {
128 base: "main".to_string(),
129 head: "HEAD".to_string(),
130 changed_files: vec!["src/lib.rs".to_string()],
131 commit_count: 1,
132 insertions: 5,
133 deletions: 2,
134 };
135 let substrate =
136 build_substrate(manifest_dir, &ScanOptions::default(), &[], 2, Some(diff)).unwrap();
137
138 assert!(substrate.diff_range.is_some());
139 let diff_files: Vec<&str> = substrate
140 .files
141 .iter()
142 .filter(|f| f.in_diff)
143 .map(|f| f.path.as_str())
144 .collect();
145 assert!(!diff_files.is_empty());
146 assert!(diff_files.contains(&"src/lib.rs"));
147 let non_diff: Vec<&str> = substrate
149 .files
150 .iter()
151 .filter(|f| !f.in_diff && f.path.contains("substrate_builder"))
152 .map(|f| f.path.as_str())
153 .collect();
154 assert!(!non_diff.is_empty());
155 }
156
157 #[test]
158 fn build_substrate_errors_on_missing_root() {
159 let dir = tempfile::tempdir().expect("temp dir");
160 let missing = dir.path().join("definitely-not-created");
161 let result = build_substrate(
162 missing.to_string_lossy().as_ref(),
163 &ScanOptions::default(),
164 &[],
165 2,
166 None,
167 );
168 assert!(result.is_err());
169 }
170}