use std::collections::BTreeMap;
use std::path::PathBuf;
use crate::substrate::{DiffRange, LangSummary, RepoSubstrate, SubstrateFile};
use anyhow::Result;
use tokmd_settings::ScanOptions;
use tokmd_types::ChildIncludeMode;
pub fn build_substrate(
repo_root: &str,
scan_options: &ScanOptions,
module_roots: &[String],
module_depth: usize,
diff_range: Option<DiffRange>,
) -> Result<RepoSubstrate> {
let paths = vec![PathBuf::from(repo_root)];
let languages = tokmd_scan::scan(&paths, scan_options)?;
let file_rows = tokmd_model::collect_file_rows(
&languages,
module_roots,
module_depth,
ChildIncludeMode::ParentsOnly,
Some(std::path::Path::new(repo_root)),
);
let strip_prefix = std::path::Path::new(repo_root);
let normalized_changed: Vec<String> = diff_range
.as_ref()
.map(|dr| {
dr.changed_files
.iter()
.map(|s| tokmd_model::normalize_path(std::path::Path::new(s), Some(strip_prefix)))
.collect()
})
.unwrap_or_default();
let changed_set: std::collections::BTreeSet<&str> =
normalized_changed.iter().map(|s| s.as_str()).collect();
let files: Vec<SubstrateFile> = file_rows
.into_iter()
.map(|row| SubstrateFile {
in_diff: changed_set.contains(row.path.as_str()),
path: row.path,
lang: row.lang,
code: row.code,
lines: row.lines,
bytes: row.bytes,
tokens: row.tokens,
module: row.module,
})
.collect();
let mut lang_summary: BTreeMap<String, LangSummary> = BTreeMap::new();
for f in &files {
let entry = lang_summary.entry(f.lang.clone()).or_insert(LangSummary {
files: 0,
code: 0,
lines: 0,
bytes: 0,
tokens: 0,
});
entry.files += 1;
entry.code += f.code;
entry.lines += f.lines;
entry.bytes += f.bytes;
entry.tokens += f.tokens;
}
let total_tokens: usize = files.iter().map(|f| f.tokens).sum();
let total_bytes: usize = files.iter().map(|f| f.bytes).sum();
let total_code_lines: usize = files.iter().map(|f| f.code).sum();
Ok(RepoSubstrate {
repo_root: repo_root.to_string(),
files,
lang_summary,
diff_range,
total_tokens,
total_bytes,
total_code_lines,
})
}
#[cfg(test)]
mod tests {
use super::*;
use tokmd_settings::ScanOptions;
#[test]
fn build_substrate_scans_self() {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let substrate = build_substrate(
&format!("{}/src", manifest_dir),
&ScanOptions::default(),
&[],
2,
None,
)
.unwrap();
assert!(!substrate.files.is_empty());
assert!(substrate.lang_summary.contains_key("Rust"));
assert!(substrate.total_code_lines > 0);
assert!(substrate.diff_range.is_none());
}
#[test]
fn build_substrate_with_diff_range() {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let diff = DiffRange {
base: "main".to_string(),
head: "HEAD".to_string(),
changed_files: vec!["src/lib.rs".to_string()],
commit_count: 1,
insertions: 5,
deletions: 2,
};
let substrate =
build_substrate(manifest_dir, &ScanOptions::default(), &[], 2, Some(diff)).unwrap();
assert!(substrate.diff_range.is_some());
let diff_files: Vec<&str> = substrate
.files
.iter()
.filter(|f| f.in_diff)
.map(|f| f.path.as_str())
.collect();
assert!(!diff_files.is_empty());
assert!(diff_files.contains(&"src/lib.rs"));
let non_diff: Vec<&str> = substrate
.files
.iter()
.filter(|f| !f.in_diff && f.path.contains("substrate_builder"))
.map(|f| f.path.as_str())
.collect();
assert!(!non_diff.is_empty());
}
#[test]
fn build_substrate_errors_on_missing_root() {
let dir = tempfile::tempdir().expect("temp dir");
let missing = dir.path().join("definitely-not-created");
let result = build_substrate(
missing.to_string_lossy().as_ref(),
&ScanOptions::default(),
&[],
2,
None,
);
assert!(result.is_err());
}
}