use std::collections::BTreeMap;
use std::path::Path;
use crate::scanner::extensions::folder_key;
use crate::scanner::result::{
FileRecord, FolderRecord, LanguageStat, ProjectMetadata, SymbolKind, SymbolRecord,
};
#[derive(Default)]
struct FolderAggregation {
files: usize,
lines: usize,
languages: BTreeMap<String, usize>,
symbol_names: Vec<String>,
}
pub fn build_folder_records(files: &[FileRecord], symbols: &[SymbolRecord]) -> Vec<FolderRecord> {
let mut by_folder: BTreeMap<String, FolderAggregation> = BTreeMap::new();
for file in files {
let key = folder_key(&file.relative_path).to_string();
let agg = by_folder.entry(key).or_default();
agg.files += 1;
agg.lines += file.line_count;
*agg.languages.entry(file.language.clone()).or_insert(0) += 1;
}
let mut symbols_by_folder: BTreeMap<String, Vec<&SymbolRecord>> = BTreeMap::new();
for sym in symbols.iter().filter(|s| s.kind.is_type_definition()) {
let key = folder_key(&sym.file_path).to_string();
symbols_by_folder.entry(key).or_default().push(sym);
}
for (folder, syms) in symbols_by_folder.iter_mut() {
syms.sort_by(|a, b| {
b.importance_score
.partial_cmp(&a.importance_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
let names: Vec<String> = syms.iter().take(5).map(|s| s.name.clone()).collect();
if let Some(agg) = by_folder.get_mut(folder) {
agg.symbol_names = names;
}
}
let mut records: Vec<FolderRecord> = by_folder
.into_iter()
.map(|(folder, data)| {
let dominant = data
.languages
.iter()
.max_by_key(|(_, count)| *count)
.map(|(lang, _)| lang.clone())
.unwrap_or_default();
FolderRecord {
id: folder.clone(),
relative_path: folder,
file_count: data.files,
line_count: data.lines,
dominant_language: dominant,
key_symbol_names: data.symbol_names,
}
})
.collect();
records.sort_by(|a, b| {
b.line_count
.cmp(&a.line_count)
.then_with(|| a.relative_path.cmp(&b.relative_path))
});
records
}
pub fn build_project_metadata(
root_path: &Path,
files: &[FileRecord],
test_commands: BTreeMap<String, String>,
code_patterns: Vec<String>,
last_scanned_at: String,
) -> ProjectMetadata {
let root_path_string = root_path.to_string_lossy().replace('\\', "/");
let name = root_path
.file_name()
.and_then(|name| name.to_str())
.unwrap_or("")
.to_string();
let total_lines: usize = files.iter().map(|f| f.line_count).sum();
let total_files = files.len();
let mut lang_data: BTreeMap<String, (usize, usize)> = BTreeMap::new();
for file in files {
let entry = lang_data.entry(file.language.clone()).or_insert((0, 0));
entry.0 += 1;
entry.1 += file.line_count;
}
let mut languages: Vec<LanguageStat> = lang_data
.into_iter()
.map(|(name, (file_count, line_count))| LanguageStat {
name,
file_count,
line_count,
percentage: if total_lines > 0 {
(line_count as f64) / (total_lines as f64) * 100.0
} else {
0.0
},
})
.collect();
languages.sort_by(|a, b| {
b.line_count
.cmp(&a.line_count)
.then_with(|| a.name.cmp(&b.name))
});
let detected_test_command =
crate::scanner::commands::select_preferred_test_command(&test_commands);
ProjectMetadata {
name,
root_path: root_path_string,
languages,
test_commands,
detected_test_command,
code_patterns,
total_files,
total_lines,
last_scanned_at,
}
}
pub fn build_repo_map(
symbols: &[SymbolRecord],
_files: &[FileRecord],
token_budget: usize,
) -> String {
let char_budget = token_budget.saturating_mul(4);
if char_budget == 0 {
return String::new();
}
let mut output = String::new();
let mut remaining = char_budget;
let mut ranked: Vec<&SymbolRecord> = symbols
.iter()
.filter(|s| {
s.kind.is_type_definition()
|| matches!(s.kind, SymbolKind::Function | SymbolKind::Method)
})
.collect();
ranked.sort_by(|a, b| {
b.importance_score
.partial_cmp(&a.importance_score)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.id.cmp(&b.id))
});
let mut file_order: Vec<&str> = Vec::new();
let mut by_file: BTreeMap<&str, Vec<&SymbolRecord>> = BTreeMap::new();
for sym in ranked {
if !by_file.contains_key(sym.file_path.as_str()) {
file_order.push(sym.file_path.as_str());
}
by_file.entry(sym.file_path.as_str()).or_default().push(sym);
}
'files: for file in file_order {
let header = format!("{file}:\n");
if header.len() > remaining {
break;
}
output.push_str(&header);
remaining -= header.len();
if let Some(syms) = by_file.get(file) {
for sym in syms {
let line = if !sym.signature.is_empty() {
format!(" {}\n", sym.signature)
} else {
format!(" {} {}\n", sym.kind.keyword(), sym.name)
};
if line.len() > remaining {
break 'files;
}
output.push_str(&line);
remaining -= line.len();
}
}
if remaining == 0 {
break;
}
}
output.trim_end().to_string()
}
#[cfg(test)]
mod tests {
use super::*;
fn file(path: &str, lang: &str, lines: usize) -> FileRecord {
FileRecord {
id: path.to_string(),
relative_path: path.to_string(),
file_name: path.rsplit('/').next().unwrap().to_string(),
language: lang.to_string(),
line_count: lines,
size_bytes: 0,
last_modified_unix_ms: 0,
imports: Vec::new(),
churn_score: 0.0,
corresponding_test_file: None,
}
}
fn symbol(name: &str, kind: SymbolKind, file: &str, score: f64) -> SymbolRecord {
SymbolRecord {
id: format!("{file}:{name}:1"),
name: name.to_string(),
kind,
file_path: file.to_string(),
line: 1,
signature: String::new(),
container: None,
reference_count: 0,
importance_score: score,
}
}
#[test]
fn folder_records_sort_by_line_count_desc() {
let files = vec![
file("a/foo.rs", "rs", 10),
file("b/bar.rs", "rs", 100),
file("a/qux.rs", "rs", 5),
];
let records = build_folder_records(&files, &[]);
assert_eq!(records[0].relative_path, "b");
assert_eq!(records[0].line_count, 100);
assert_eq!(records[1].relative_path, "a");
assert_eq!(records[1].line_count, 15);
}
#[test]
fn repo_map_includes_top_symbols_per_file() {
let symbols = vec![
symbol("Foo", SymbolKind::StructDecl, "a.rs", 5.0),
symbol("bar", SymbolKind::Function, "a.rs", 2.0),
];
let map = build_repo_map(&symbols, &[], 200);
assert!(map.contains("a.rs:"));
assert!(map.contains("struct Foo"));
assert!(map.contains("function bar"));
}
#[test]
fn project_metadata_language_breakdown_sorted_by_lines() {
let files = vec![file("a.rs", "rs", 10), file("b.ts", "ts", 50)];
let proj = build_project_metadata(
Path::new("/repo/proj"),
&files,
BTreeMap::new(),
Vec::new(),
"2026-01-01T00:00:00Z".to_string(),
);
assert_eq!(proj.name, "proj");
assert_eq!(proj.total_lines, 60);
assert_eq!(proj.languages[0].name, "ts");
assert_eq!(proj.languages[1].name, "rs");
}
}