pub mod call_edges;
pub mod load;
pub mod other_edges;
pub mod routes;
pub mod type_edges;
use crate::code_tree::models::ParseResult;
use crate::code_tree::parsers::{detect_languages, get_parser, language_for_path};
use crate::graph::dir_graph::DirGraph;
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use walkdir::WalkDir;
pub(crate) fn class_node_type(kind: &str) -> &'static str {
match kind {
"struct" => "Struct",
"mixin" => "Mixin",
_ => "Class",
}
}
pub fn run_with_options(
input: &Path,
verbose: bool,
include_tests: bool,
save_to: Option<&Path>,
max_loc_per_file: Option<usize>,
) -> Result<Arc<DirGraph>, String> {
let input = input.canonicalize().unwrap_or_else(|_| input.to_path_buf());
let (project_root, mut project_info) = if input.is_file() {
let project_root = input
.parent()
.map(PathBuf::from)
.unwrap_or_else(|| input.clone());
let info = crate::code_tree::manifest::read_manifest_file(&input, &project_root)
.ok_or_else(|| {
format!(
"Not a recognised manifest file: {}",
input.file_name().and_then(|o| o.to_str()).unwrap_or(""),
)
})?;
(project_root, Some(info))
} else if input.is_dir() {
let info = crate::code_tree::manifest::read_manifest(&input);
(input.clone(), info)
} else {
return Err(format!("Not a file or directory: {}", input.display()));
};
let mut combined = ParseResult::new();
let mut parsed_any = false;
if let Some(info) = &mut project_info {
if info.source_roots.is_empty() {
if verbose {
eprintln!(
"Manifest: {} ({}) — no source roots declared, scanning whole repo",
info.manifest_path,
info.build_system.as_deref().unwrap_or("")
);
}
} else {
let mut roots: Vec<_> = info.source_roots.clone();
if include_tests {
roots.extend(info.test_roots.iter().cloned());
}
if verbose {
eprintln!(
"Manifest: {} ({})",
info.manifest_path,
info.build_system.as_deref().unwrap_or("")
);
let labels: Vec<String> = roots
.iter()
.map(|r| {
r.path
.strip_prefix(&project_root)
.map(|p| p.display().to_string())
.unwrap_or_else(|_| r.path.display().to_string())
})
.collect();
eprintln!("Source roots: {}", labels.join(", "));
}
let t_parse = std::time::Instant::now();
for root in &roots {
if !root.path.is_dir() {
continue;
}
let result = parse_directory(&root.path, &project_root, verbose, max_loc_per_file);
combined.merge(result);
parsed_any = true;
}
if verbose && parsed_any {
eprintln!("[timing] parse: {:.3}s", t_parse.elapsed().as_secs_f64());
}
}
}
if !parsed_any {
if !project_root.is_dir() {
return Err(format!("Not a directory: {}", project_root.display()));
}
let t_parse = std::time::Instant::now();
let result = parse_directory(&project_root, &project_root, verbose, max_loc_per_file);
combined.merge(result);
if verbose {
eprintln!("[timing] parse: {:.3}s", t_parse.elapsed().as_secs_f64());
}
}
finalize_and_load(combined, project_info, verbose, save_to)
}
fn parse_directory(
walk_dir: &Path,
project_root: &Path,
verbose: bool,
max_loc_per_file: Option<usize>,
) -> ParseResult {
let t_walk = std::time::Instant::now();
let mut by_lang: BTreeMap<&'static str, Vec<PathBuf>> = BTreeMap::new();
for entry in WalkDir::new(walk_dir)
.into_iter()
.filter_entry(crate::code_tree::manifest::walk_filter)
.filter_map(Result::ok)
{
if !entry.file_type().is_file() {
continue;
}
if let Some(lang) = language_for_path(entry.path()) {
by_lang
.entry(lang)
.or_default()
.push(entry.path().to_path_buf());
}
}
if verbose {
let langs: Vec<&'static str> = by_lang.keys().copied().collect();
eprintln!(
" Detected languages in {}: {:?}",
walk_dir.display(),
langs
);
for lang in &langs {
eprintln!(" Found {} {} files", by_lang[lang].len(), lang);
}
eprintln!("[timing] walk: {:.3}s", t_walk.elapsed().as_secs_f64());
}
let mut combined = ParseResult::new();
for (lang, files) in by_lang {
let Some(parser) = get_parser(lang) else {
continue;
};
let (to_parse, skipped) = match max_loc_per_file {
Some(threshold) => prefilter_oversized(&files, threshold, project_root, lang),
None => (files.clone(), Vec::new()),
};
if verbose && !skipped.is_empty() {
eprintln!(
" Skipped {} {} files over max_loc_per_file (threshold {})",
skipped.len(),
lang,
max_loc_per_file.unwrap_or(0)
);
}
let t_lang = std::time::Instant::now();
let mut result = parser.parse_files(&to_parse, project_root);
result.files.extend(skipped);
if verbose {
eprintln!(
"[timing] parse {}: {:.3}s ({} files)",
lang,
t_lang.elapsed().as_secs_f64(),
to_parse.len()
);
}
combined.merge(result);
}
combined
}
fn prefilter_oversized(
files: &[PathBuf],
threshold: usize,
src_root: &Path,
language: &str,
) -> (Vec<PathBuf>, Vec<crate::code_tree::models::FileInfo>) {
use std::io::{BufRead, BufReader};
let mut to_parse = Vec::with_capacity(files.len());
let mut skipped = Vec::new();
for fp in files {
let size_bytes = std::fs::metadata(fp).map(|m| m.len() as usize).unwrap_or(0);
if size_bytes <= threshold {
to_parse.push(fp.clone());
continue;
}
let Ok(file) = std::fs::File::open(fp) else {
to_parse.push(fp.clone());
continue;
};
let mut reader = BufReader::new(file);
let mut buf = Vec::new();
let mut loc: usize = 0;
let mut over = false;
while let Ok(n) = reader.read_until(b'\n', &mut buf) {
if n == 0 {
break;
}
loc += 1;
buf.clear();
if loc > threshold {
over = true;
break;
}
}
if over {
while let Ok(n) = reader.read_until(b'\n', &mut buf) {
if n == 0 {
break;
}
loc += 1;
buf.clear();
}
let rel_path = fp.strip_prefix(src_root).unwrap_or(fp);
let filename = fp
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
skipped.push(crate::code_tree::models::FileInfo {
path: rel_path.display().to_string(),
filename,
loc: loc as u32,
module_path: String::new(),
language: language.to_string(),
submodule_declarations: Vec::new(),
imports: Vec::new(),
exports: Vec::new(),
annotations: None,
is_test: false,
skip_reason: Some("too_large".into()),
});
} else {
to_parse.push(fp.clone());
}
}
(to_parse, skipped)
}
fn finalize_and_load(
mut combined: ParseResult,
project_info: Option<crate::code_tree::models::ProjectInfo>,
verbose: bool,
save_to: Option<&Path>,
) -> Result<Arc<DirGraph>, String> {
if verbose {
eprintln!(
"Parsed: {} files, {} functions, {} classes, {} enums, {} interfaces, {} attributes, {} constants",
combined.files.len(),
combined.functions.len(),
combined.classes.len(),
combined.enums.len(),
combined.interfaces.len(),
combined.attributes.len(),
combined.constants.len()
);
}
let t_dedup = std::time::Instant::now();
dedup_by_key(&mut combined.files, |f| f.path.clone());
dedup_by_key(&mut combined.functions, |f| f.qualified_name.clone());
dedup_by_key(&mut combined.classes, |c| c.qualified_name.clone());
dedup_by_key(&mut combined.enums, |e| e.qualified_name.clone());
dedup_by_key(&mut combined.interfaces, |i| i.qualified_name.clone());
dedup_by_key(&mut combined.constants, |c| c.qualified_name.clone());
if verbose {
eprintln!("[timing] dedup: {:.3}s", t_dedup.elapsed().as_secs_f64());
}
let t_load = std::time::Instant::now();
let graph = load::load_into_graph(&combined, project_info.as_ref())?;
if verbose {
eprintln!("[timing] load: {:.3}s", t_load.elapsed().as_secs_f64());
}
if let Some(dest) = save_to {
let mut graph = graph;
crate::graph::io::file::prepare_save(&mut graph);
std::sync::Arc::make_mut(&mut graph).enable_columnar();
let dest_str = dest.to_string_lossy();
crate::graph::io::file::write_graph_v3(&graph, &dest_str).map_err(|e| e.to_string())?;
return Ok(graph);
}
Ok(graph)
}
pub fn run(src_dir: &Path, verbose: bool) -> Result<Arc<DirGraph>, String> {
let mut combined = ParseResult::new();
let languages = detect_languages(src_dir);
if verbose {
eprintln!("Detected languages: {:?}", languages);
}
for lang in languages {
let Some(parser) = get_parser(lang) else {
if verbose {
eprintln!(" (no Rust parser yet for {lang})");
}
continue;
};
if verbose {
eprintln!("Parsing {} files...", lang);
}
let result = parser.parse_directory(src_dir, verbose);
combined.merge(result);
}
dedup_by_key(&mut combined.files, |f| f.path.clone());
dedup_by_key(&mut combined.functions, |f| f.qualified_name.clone());
dedup_by_key(&mut combined.classes, |c| c.qualified_name.clone());
dedup_by_key(&mut combined.enums, |e| e.qualified_name.clone());
dedup_by_key(&mut combined.interfaces, |i| i.qualified_name.clone());
dedup_by_key(&mut combined.constants, |c| c.qualified_name.clone());
if verbose {
eprintln!(
"Parsed: {} files, {} functions, {} classes, {} enums, {} interfaces, {} attributes, {} constants",
combined.files.len(),
combined.functions.len(),
combined.classes.len(),
combined.enums.len(),
combined.interfaces.len(),
combined.attributes.len(),
combined.constants.len()
);
}
load::load_into_graph(&combined, None)
}
fn dedup_by_key<T, K, F>(items: &mut Vec<T>, mut key: F)
where
K: Eq + std::hash::Hash,
F: FnMut(&T) -> K,
{
let mut seen: std::collections::HashMap<K, usize> = std::collections::HashMap::new();
for (idx, item) in items.iter().enumerate() {
seen.insert(key(item), idx);
}
if seen.len() == items.len() {
return;
}
let mut keep: Vec<usize> = seen.into_values().collect();
keep.sort_unstable();
let mut out: Vec<T> = Vec::with_capacity(keep.len());
for (idx, item) in std::mem::take(items).into_iter().enumerate() {
if keep.binary_search(&idx).is_ok() {
out.push(item);
}
}
*items = out;
}